R/findFiles.R

Defines functions findFiles

Documented in findFiles

########################################################################/**
# @RdocFunction findFiles
#
# @title "Finds one or several files in multiple directories"
#
# \description{
#  @get "title".
# }
#
# @synopsis
#
# \arguments{
#  \item{pattern}{A regular expression file name pattern to match.}
#  \item{paths}{A @character @vector of paths to be searched.}
#  \item{recursive}{If @TRUE, the directory structure is searched
#    breath-first, in lexicographic order.}
#  \item{firstOnly}{If @TRUE, the method returns as soon as a matching
#    file is found, otherwise not.}
#  \item{allFiles}{If @FALSE, files and directories starting with
#    a period will be skipped, otherwise not.}
#  \item{...}{Arguments passed to @see "base::list.files".}
# }
#
# \value{
#  Returns a @vector of the full pathnames of the files found.
# }
#
# \section{Paths}{
#   The \code{paths} argument may also contain paths specified as
#   semi-colon (\code{";"}) separated paths, e.g.
#   \code{"/usr/;usr/bin/;.;"}.
# }
#
# \section{Windows Shortcut links}{
#   If package \pkg{R.utils} is available and loaded , Windows Shortcut links (*.lnk)
#   are recognized and can be used to imitate links to directories
#   elsewhere.  For more details, see @see "R.utils::filePath".
# }
#
# @author "HB"
#
# @keyword file
# @keyword IO
# @keyword internal
#**/#######################################################################
findFiles <- function(pattern=NULL, paths=NULL, recursive=FALSE, firstOnly=TRUE, allFiles=TRUE, ...) {
  # To please R CMD check
  filePath <- NULL; rm(list="filePath");

  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  # Local functions
  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  splitPaths <- function(paths, ...) {
    if (length(paths) == 0)
      return(NULL);
    # If in format "path1; path2;path3", split it to multiple strings.
    paths <- unlist(strsplit(paths, split=";"));
    paths <- gsub("[ \t]*$", "", gsub("^[ \t]*", "", paths));
    paths <- paths[nchar(paths) > 0];
    if (length(paths) == 0)
      return(NULL);
    paths;
  } # splitPaths()


  # Checks if a package is loaded or not (cut'n'paste from R.utils)
  isPackageLoaded <- function(package, version=NULL, ...) {
    s <- search();
    if (is.null(version)) {
      s <- sub("_[0-9.-]*", "", s);
    } else {
      package <- paste(package, version, sep="_");
    }
    pattern <- sprintf("package:%s", package);
   (pattern %in% s);
  }


  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  # Validate arguments
  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  # Argument 'paths':
  paths <- splitPaths(paths);
  if (is.null(paths))
    paths <- ".";

  # Argument 'pattern':
  if (!is.null(pattern))
    pattern <- as.character(pattern);

  # Argument 'recursive':
  recursive <- as.logical(recursive);

  # Argument 'firstOnly':
  firstOnly <- as.logical(firstOnly);


  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  # Prepare list of paths to be scanned
  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  hasRutilsLoaded <- isPackageLoaded("R.utils");
  ## hasRutils <- suppressWarnings(require(R.utils, quietly=TRUE));

  # Don't search the same path twice
  paths <- unique(paths);

  # Don't search non-existing paths
  for (kk in seq_along(paths)) {
    path <- paths[kk];
    # Example any '~':s
    path <- file.path(dirname(path), basename(path));
    path <- gsub("^[.][/\\]", "", path);

    # Follow Windows shortcut links?
    if (hasRutilsLoaded)
      path <- filePath(path, expandLinks="any");

    # Does the path exist and is it a directory
    # Note, isdir is TRUE for directories, FALSE for files,
    # *and* NA for non-existing files, e.g. items found by
    # list.files() but are broken Unix links.
    isDirectory <- identical(file.info(path)$isdir, TRUE);
    if (!file.exists(path) || !isDirectory)
      path <- NA;

    paths[kk] <- path;
  }
  if (length(paths) > 0)
    paths <- paths[!is.na(paths)];


  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  # Search for files
  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  pathnames <- c();
  for (path in paths) {
    files <- list.files(path, all.files=allFiles, full.names=TRUE);

    # Exclude listings that are neither files nor directories
    files <- gsub("^[.][/\\]", "", files);
    files <- files[nchar(files) > 0];
    if (length(files) > 0) {
      excl <- (basename(files) %in% c(".", "..", "/", "\\"));
      files <- files[!excl];
    }

    # Nothing to do?
    if (length(files) == 0)
      next;

    # Expand Windows shortcut links?
    files0 <- files;
    if (hasRutilsLoaded) {
      # Remember these
      files <- sapply(files, FUN=filePath, expandLinks="any", USE.NAMES=FALSE);
    }

    # Keep only existing files and directories
    ok <- sapply(files, FUN=function(file) {
      (file.exists(path) && !is.na(file.info(file)$isdir));
    }, USE.NAMES=FALSE);
    files <- files[ok];
    files0 <- files0[ok];

    # Nothing to do?
    if (length(files) == 0)
      next;

    # First search the files, then the directories, so...
    # Note, isdir is TRUE for directories, FALSE for files,
    # *and* NA for non-existing files, e.g. items found by
    # list.files() but are broken Unix links.
    isDir <- sapply(files, FUN=function(file) {
      identical(file.info(file)$isdir, TRUE);
      file.info(file)$isdir;
    }, USE.NAMES=FALSE);

    # In case some files are non-accessible, exclude them
    ok <- (!is.na(isDir));
    files <- files[ok];
    files0 <- files0[ok];
    isDir <- isDir[ok];

    # Nothing to do?
    if (length(files) == 0)
      next;

    # Directories and files in lexicographic order
    dirs <- files[isDir];
    files <- files[!isDir];
    files0 <- files0[!isDir];

    # Keep only files that match the filename pattern
    # of the non-expanded filename.
    if (!is.null(pattern)) {
      keep <- grep(pattern, basename(files0));
      files <- files[keep];
    }

    if (length(files) > 0) {
      files <- sort(files);
      if (firstOnly)
        return(files[1]);

      # Store results
      pathnames <- c(pathnames, files);
    }

    # Search directories recursively?
    if (recursive) {
      if (length(dirs) == 0)
        next;

      for (dir in sort(dirs)) {
        files <- findFiles(pattern=pattern, paths=dir, recursive=recursive,
                                                 firstOnly=firstOnly, ...);
        if (length(files) > 0 && firstOnly)
          return(files[1]);

        pathnames <- c(pathnames, files);
      }
    }
  } # for (path ...)

  pathnames;
} # findFiles()


############################################################################
# HISTORY:
# 2013-03-18 [HB]
# o Internal isPackageLoaded() no longer uses defunct manglePackageName().
# 2008-02-21 [HB]
# o Added an internal generic isPackageLoaded() function.
# 2008-02-20 [KH]
# o Replaced require("R.utils") with a "isLoaded()" feature.
# 2008-02-14
# o Added argument 'allFiles=TRUE' to findFiles().
# 2007-09-17
# o ROBUSTNESS: Now findFiles() are robust against broken Unix links.
# 2007-08-30
# o BUG FIX: Pattern matching was done on expanded filenames, whereas they
#   should really be done on the non-expanded ones.  This, only applies to
#   Windows shortcuts, but it is not the destination file that is of
#   interest, but the name of the shortcut file.
# o BUG FIX: The recent update was not grep():ing correctly; forgot to
#   extract the basename().
# 2007-08-27
# o Now findFiles(..., recursive=TRUE) does a breath-first search in
#   lexicographic order.
# o Now findFiles() don't search replicated directories.
# 2006-11-01
# o Removed usage of R.utils for now.
# 2006-03-14
# o Created from findCdf.R.
############################################################################

Try the affxparser package in your browser

Any scripts or data that you put into this service are public.

affxparser documentation built on Nov. 8, 2020, 7:26 p.m.