R/inspect_folder.R

Defines functions inspect_all_in_folder

Documented in inspect_all_in_folder

#' run inspect_all() on all csv files in a folder and subfolders
#' @param source_dir folder to search for csv files that inspect_all() should be run on
#' @param pattern a regex pattern on which files in the folders to select. The default is "csv$"; this should be kept at the end of the pattern
#' @param recursive logical: if TRUE (default), also searches all subfolders of the current working directory
#' @param write.to.csv logical: whether or not to write csv files with the issue tables to files (folder can be specified with target_dir)
#' @param target_folder path to the folder where the issue tables should be saved
#' @return a list of data frames with the outputs from each csv file in the current working directory (and subdirectories)#
inspect_all_in_folder <- function(source_dir = "./", pattern = "csv$", recursive = TRUE,write.to.csv=FALSE,target_dir = "./") {

  # fail if.. wrong input types
  assertthat::assert_that(is.string(source_dir))
  assertthat::assert_that(is.string(pattern))
  assertthat::assert_that(is.string(target_dir))
  assertthat::assert_that(is.flag(recursive))

  # fail if.. directories not found
  if (!dir.exists(source_dir)) {
    stop("source directory does not exist")
  }
  if (!dir.exists(target_dir)) {
    stop("target directory does not exist")
  }

  # search directories for pattern
  files <- list.files(path = source_dir, pattern = pattern, recursive = TRUE)

  # fail if.. no files found
  if (length(files) < 1) {
    stop(paste("found no files matching the regex pattern: '", pattern, "' in directory", source_dir))
  }

  # load files, run checks

  all_issues <- purrr::map(files, function(csvfile) {
    df <- data.table::fread(file = csvfile)

    # skip files that didn't read correctly
    if (!is.data.frame(df)) {
      warning(paste0("file '", csvfile, "not read correctly - returning empty table"))
      return(empty_issues_table())
    }
    if (nrow(df) < 1) {
      warning(paste0("file '", csvfile, "has no data - returning empty table"))
      return(empty_issues_table())
    }

    tryCatch({
      inspect_all(df)
    }, error = function(e) {
      warning(e)
      return(NULL)
    })
  })

  names(all_issues)<-files

  # save all files

  if(write.to.csv){
    # target file names
    target_files <- gsub("/", "_____", files) %>% gsub(".csv$", "_issues.csv", .) %>% paste0(target_dir, "/", .)

    mapply(write.csv, x = all_issues, file = target_files)
  }
  invisible(all_issues)


}
ellieallien/cleaninginspectoR documentation built on July 18, 2019, 12:30 p.m.