R/path_by_date.R

Defines functions path_by_date

Documented in path_by_date

#' Return the Path to a NBS Investigations File
#'
#' `path_*` functions return paths to one or more files of interest
#' containing the supplied date in their names and ending with the supplied
#' file extension. These are convenience wrappers around `path_by_date()`, which
#' returns path that contain a specified date given additional parameters.
#'
#' \itemize{
#'   \item{`path_inv()` returns path(s) to NBS investigations files}
#'   \item{`path_pcr()` returns path(s) to NBS PCR files}
#'   \item{`path_vac()` returns path(s) to TennIIS vaccination files}
#'   \item{`path_nit()` returns path(s) to saved NIT records}
#'   \item{`path_nca()` returns path(s) to saved NCA records}
#' }
#'
#' This will eventually replace \code{\link[coviData:find_file]{find_file()}}
#' for finding specific files.
#'
#' @param date A `Date` or string in the format "YYYY-MM-DD"
#'
#' @param dir The directory holding the files of interest
#'
#' @param ext The file type to return. Files may be saved as "fst", "csv", or
#'   "xlsx".
#'
#' @param date_format The format of the date in the file name; see
#'   \code{\link[base:format]{format()}} for more information
#'
#' @param date_regex A regular expression matching the date in the file name.
#'   This will hopefully be deprecated in the future, but is currently needed
#'   for extracting dates from the file path strings.
#'
#' @param file_regex A regular expression matching the file names of interest.
#'   The location of the date in the file name is specified using `"{date}"`, as
#'   in the glue package.
#'
#' @param type The file type(s) to return, one or more of `"any"`, `"file"`,
#' `"directory"`, `"symlink"`, `"FIFO"`, `"socket"`, `"character_device"` or
#' `"block_device"`.
#'
#' @param force_latest If multiple files with the given `date` are found,
#'   should the function return only the file with the latest creation date
#'   (`TRUE`, the default), or should it return all file paths (`FALSE`)?
#'
#' @return An `fs_path` character vector
#'
#' @aliases path_inv path_pcr path_vac
#'
#' @export
path_by_date <- function(
  dir,
  date_format,
  date_regex,
  date = NULL,
  file_regex = ".*{date}.*",
  type = "any",
  force_latest = TRUE
) {

  # Create/clean `dir` path
  dir <- path_create(dir)

  # Format date - works if `NULL` as well by matching everything (".*")
  date_formatted <- date %>%
    lubridate::as_date() %>%
    format(format = date_format) %>%
    purrr::when(rlang::is_empty(.) ~ ".*", ~ .)

  # Create regex pattern - get rid of duplicate wildcards
  file_regex <- file_regex %>%
    stringr::str_replace(
      pattern = "[{]date[}]",
      replacement = date_formatted
    ) %>%
    stringr::str_replace_all(
      pattern = "([.][*]){2,}",
      replacement = ".*"
    )

  # Get list of files matching date, or all files if `date = NULL`
  files <- dir %>%
    fs::dir_ls(type = type, regexp = file_regex) %>%
    vec_sort()

  # If `date = NULL`, return the file with the latest date in the file name
  # Otherwise just sort `files` and return
  if (date_formatted == ".*") {
    files <- files %>%
      tibble::as_tibble_col("file") %>%
      dplyr::mutate(
        date = .data[["file"]] %>%
          fs::path_file() %>%
          stringr::str_extract(pattern = date_regex) %>%
          lubridate::fast_strptime(format = date_format, lt = FALSE) %>%
          lubridate::as_date()
      ) %>%
      dplyr::filter(.data[["date"]] == max(.data[["date"]], na.rm = TRUE)) %>%
      dplyr::pull(.data[["file"]])
  }

  if (vec_size(files) > 1L && force_latest) {
    files <- files %>%
      fs::file_info() %>%
      dplyr::select("path", "birth_time") %>%
      dplyr::filter(
        .data[["birth_time"]] == max(.data[["birth_time"]], na.rm = TRUE)
      ) %>%
      dplyr::pull(.data[["path"]])
  }

  files
}

#' @rdname path_by_date
#'
#' @export
path_inv <- function(
  date = NULL,
  dir = path_create(
    "V:/EPI DATA ANALYTICS TEAM/COVID SANDBOX REDCAP DATA/",
    "Sandbox data pull Final/"
  ),
  ext = c("csv", "xlsx", "fst")
) {

  # Check `ext`
  ext <- rlang::arg_match(ext)[[1L]]

  path_by_date(
    dir = dir,
    date_format = "%Y-%m-%d",
    date_regex = "[0-9]{4}-[0-9]{2}-[0-9]{2}",
    date = date,
    file_regex = paste0(".*{date} Final Data Pull.*[.]", ext),
    type = "file"
  )
}

#' @rdname path_by_date
#'
#' @export
path_pcr <- function(
  date = NULL,
  dir = "V:/EPI DATA ANALYTICS TEAM/COVID SANDBOX REDCAP DATA/MSR PCR/",
  ext = c("csv", "xlsx", "fst")
) {

  # Check `ext`
  ext <- rlang::arg_match(ext)[[1L]]

  path_by_date(
    dir = dir,
    date_format = "%m%d%Y",
    date_regex = "[0-9]{8}",
    date = date,
    file_regex = paste0("MSR - All PCRs_{date}.*[.]", ext),
    type = "file"
  )
}

#' @rdname path_by_date
#'
#' @export
path_vac <- function(
  date = NULL,
  dir = path_create(
    "V:/EPI DATA ANALYTICS TEAM/COVID SANDBOX REDCAP DATA/",
    "COVID-19 Vaccine Reporting/data/COVID-19 vaccine data pull/"
  ),
  ext = c("csv", "xlsx", "fst")
) {

  # Check `ext`
  ext <- rlang::arg_match(ext)[[1L]]

  path_by_date(
    dir = dir,
    date_format = "%Y%m%d",
    date_regex = "[0-9]{8}",
    date = date,
    file_regex = paste0("COVID_VACC_MSR_{date}.*[.]", ext),
    type = "file"
  )
}

#' @rdname path_by_date
#'
#' @export
path_nit <- function(date = NULL, force_latest = TRUE) {
  path_by_date(
    dir ="V:/EPI DATA ANALYTICS TEAM/COVID SANDBOX REDCAP DATA/Data for R/nit/",
    date_format = "%Y-%m-%d",
    date_regex = "[0-9]{1,4}-[0-9]{1,2}-[0-9]{1,2}",
    date = date,
    file_regex = ".*/nit_data_{date}[.]csv$",
    type = "file",
    force_latest = force_latest
  )
}

#' @rdname path_by_date
#'
#' @export
path_nca <- function(date = NULL, force_latest = TRUE) {
  path_by_date(
    dir ="V:/EPI DATA ANALYTICS TEAM/COVID SANDBOX REDCAP DATA/Data for R/nca/",
    date_format = "%Y-%m-%d",
    date_regex = "[0-9]{1,4}-[0-9]{1,2}-[0-9]{1,2}",
    date = date,
    file_regex = ".*/nca_data_{date}[.]csv$",
    type = "file",
    force_latest = force_latest
  )
}

#' Path to Processed Investigation Local IDs
#'
#' @inheritParams path_by_date
#'
#' @param status Should the path be to `"positive"` or `"negative"` IDs?
#'
#' @return An `fs_path`
#'
#' @keywords internal
path_inv_id <- function(
  date = NULL,
  status = c("+", "-"),
  dir = path_create(
    "V:/EPI DATA ANALYTICS TEAM/COVID SANDBOX REDCAP DATA/Data for R/",
    "inv_local_id"
  )
) {
  status <- rlang::arg_match(status)[[1L]]
  flag <- if (status == "+") "pos_" else "neg_"
  path_by_date(
    dir = dir,
    date_format = "%Y-%m-%d",
    date_regex = "[0-9]{4}-[0-9]{2}-[0-9]{2}",
    date = date,
    file_regex = paste0("inv_local_id_", flag, "{date}.fst"),
    type = "file"
  )
}
jesse-smith/coviData documentation built on Jan. 14, 2023, 11:08 a.m.