R/file-helpers.R

Defines functions parse_dataset is_data_file is_weekly_file

Documented in is_weekly_file parse_dataset

#' Weekly data file?
#'
#' @keywords internal
is_weekly_file <- function(x) {
  grepl("^[0-9]{8}[0-9a-z\\-]+\\.(tab|zip)$", basename(x))
}


is_data_file <- function(x) {
  # check it's all events.YYYY....tab or YYYYMMDD-icews-events.zip files
  good1 <- grepl("^[Ee]{1}vents\\.[0-9]{4}\\.[0-9a-z]+\\.(tab|zip|tab.zip)$", basename(x))
  good2 <- is_weekly_file(x)

  # add exception for January 2022 file (#80)
  good3 <- basename(x)=="202201-icews-events.tab"

  good1 | good2 | good3
}

#' Identify dataset contained in file
#'
#' Identify which time period is nominally covered by a file. This is kept
#' around from prior version of the package, in case it becomes useful again.
#' E.g. to allow for time range specific downloading.
#'
#' @param x a normalized file name
#'
#' @keywords internal
parse_dataset <- function(x) {
  data_set <- rep(NA_character_, length(x))
  is_data_mask <- is_data_file(x)
  out <- gsub("(.[0-9]{8,})|([Ee]vents.)|(-icews-events)|(.tab|.zip|.tab.zip)|(.sample)", "", x[is_data_mask])
  data_set[is_data_mask] <- out
  data_set
}
andybega/icews documentation built on July 7, 2023, 1:29 p.m.