R/list_files.R

Defines functions list_files

Documented in list_files

#' List files in a directory
#'
#' Wrapper to base::list.files with additional options for ignoring temporary
#' files (which on some systems begin with "~$"), and for selecting among
#' multiple files based on a date- or time-stamp extracted from the filename.
#'
#' @inheritParams base::list.files
#'
#' @param ignore_temp Logical indicating whether to ignore temporary files
#'   (files starting with "~$"). Defaults to `TRUE`.
#' @param select Select all matching files ("all") or only the single most
#'   recent file ("latest") as determined by a date or date-time stamp extracted
#'   from the file name using functions \link{extract_date} and
#'   \link{extract_datetime}, respectively.
#'
#'   When select is "latest" and there is:
#'
#'   - a single matching file name, the single match is returned regardless of
#'   whether it contains a date/datetime
#'   - multiple matching file names none of which contain a date/datetime,
#'   an error is thrown
#'   - multiple matching file names, where some contain a date/datetime and some
#'   do not, a warning is given if argument `warn_no_date` is `TRUE` and the
#'   names lacking a date/datetime are excluded
#'   - multiple matching file names where there is a tie for latest date/datetime,
#'   an error is thrown
#'
#' @param warn_no_date Logical indicating whether to give warning
#'
#' @return
#' A character vector containing the names of the files in the specified
#' directories, that match the given set of arguments (an empty character vector
#' if no matching files are found)
#'
#' @examples
#' \dontrun{
#' list_files("~/Documents", pattern = "^exported_data_", select = "latest")
#' }
#'
#' @export list_files
list_files <- function(path = ".",
                       pattern = NULL,
                       full.names = TRUE,
                       recursive = FALSE,
                       ignore.case = TRUE,
                       include.dirs = FALSE,
                       ignore_temp = TRUE,
                       select = c("all", "latest"),
                       warn_no_date = TRUE) {

  select <- match.arg(select)

  files <- list.files(
    path = path,
    pattern = pattern,
    full.names = full.names,
    recursive = recursive,
    ignore.case = ignore.case,
    include.dirs = include.dirs
  )

  if (ignore_temp) {
    files_base <- basename(files)
    is_temp <- grepl("\\~\\$", files_base, perl = TRUE)
    files <- files[!is_temp]
  }

  if (select == "latest" & length(files) > 1) {

    file_date <- extract_date(files)
    file_datetime <- extract_datetime(files)
    file_dt <- coalesce_dates(file_datetime, file_date, prefer = "x")

    if (all(is.na(file_dt))) {
      files_missing_date <- paste(files[is.na(file_dt)], collapse = "\n- ")
      stop("Argument select is 'latest' but no dates/datetimes were found ",
           "within any of the matching filenames:\n- ",
           files_missing_date, call. = FALSE)
    } else if (any(is.na(file_dt)) & warn_no_date) {
      files_missing_date <- paste(files[is.na(file_dt)], collapse = "\n- ")
      warning("Argument select is 'latest' but no dates/datetimes were found ",
              "within the following filenames:\n- ",
              files_missing_date, call. = FALSE)
    }

    is_latest <- !is.na(file_dt) & file_dt == max(file_dt, na.rm = TRUE)

    if (sum(is_latest) > 1L) {
      tied_for_latest <- paste(files[is_latest], collapse = "\n- ")
      stop("There's a tie for latest date among the following files:\n- ",
           tied_for_latest, call. = FALSE)
    } else {

      files <- files[is_latest]
    }
  }

  files
}
epicentre-msf/llutils documentation built on Nov. 9, 2020, 8:24 p.m.