R/util_bind.R

Defines functions util_bind

Documented in util_bind

#' Bind External Files
#'
#' Binds `csv`, `Excel`, `RData`, or `Rds` files
#' in a given directory by row (`rbind`).
#'
#' The function requires that files have the same data structure.
#' For `R` object files.
#' only vectors, matrices or data frames
#' are allowed.
#' By default, `stringsAsFactors = FALSE`
#' when `format = "csv"`.
#'
#' @author Ivan Jacob Agaloos Pesigan
#' @inheritParams util_lapply
#' @param dir Character string.
#' Directory which contains target files.
#' @param recursive Logical.
#' Search files recursively under `dir`.
#' @param format Character string.
#' `"csv"` for comma separated files.
#' `"xls"` or `"xlsx"` for `Excel` files.
#' `"Rda"`, `"rda"`, `"RDA"`, `"RData"`,
#' `"Rdata"`, `"rdata"`, `"RDATA"`
#' for `R` Data Format.
#' `"Rds"`, `"rds"`, `"RDS"` for serialized `R` object.
#' @param pattern Character string.
#' Regular expression.
#' Pattern of file names.
#' `format` is appended as an extension.
#' For example, if `pattern = "^filename.*"`,
#' and `format = "csv"`,
#' the pattern used to load files will be
#' `"^filename.*\\.csv$"`.
#' @param fn_column Logical.
#' Save file name of source data file as a new column.
#' The new column will have `"fn"` as its column name.
#' @param fn_column_full_path Logical.
#' Save the full path of source data file as a new column.
#' @param save Logical.
#' Save concatenated files in `csv` format.
#' @param fn_output Character string.
#' Filename to use when `save = TRUE`.
#' Uses the basename of `dir` if not provided.
#' @param save_dir Character string.
#' Directory used for `fn_output`.
#' @param ... Optional arguments to pass to
#' [`readxl::read_excel()`] when `format = "xls"` or `"xlsx"`,
#' [`read.csv()`] when `format = "csv"`,
#' [`load()`] when `format = `
#' any of the following `"Rda"`, `"rda"`, `"RDA"`, `"RData"`,
#' `"Rdata"`, `"rdata"`, `"RDATA"`, or [`readRDS()`]
#' when `format = `
#' any of the following `"Rds"`, `"rds"`, `"RDS"`.
#' @examples
#' \dontrun{
#' util_bind(
#'   dir = getwd(),
#'   format = "csv",
#'   pattern = "^filename.*",
#'   fn_column = TRUE,
#'   save = TRUE,
#'   fn_output = NULL,
#'   save_dir = getwd(),
#'   par = FALSE
#' )
#' }
#' @importFrom utils glob2rx
#' @importFrom utils read.csv
#' @importFrom utils write.csv
#' @importFrom readxl read_excel
#' @export
util_bind <- function(dir = getwd(),
                      recursive = FALSE,
                      format = "csv",
                      pattern = "^filename.*",
                      fn_column = TRUE,
                      fn_column_full_path = FALSE,
                      save = FALSE,
                      fn_output = NULL,
                      save_dir = getwd(),
                      par = TRUE,
                      ncores = NULL,
                      ...) {
  foo <- function(file,
                  format,
                  ...) {
    tryCatch(
      {
        if (format == "csv") {
          options(
            stringsAsFactors = FALSE
          )
          x <- read.csv(
            file = file,
            ...
          )
          return(x)
        }
        if (format %in% c("xls", "xlsx")) {
          x <- read_excel(
            path = file,
            ...
          )
          return(x)
        }
        if (format %in% c(
          "Rda",
          "rda",
          "RDA",
          "RData",
          "Rdata",
          "rdata",
          "RDATA"
        )) {
          x <- load(
            file = file,
            ...
          )
        }
        if (format %in% c("Rds", "rds", "RDS")) {
          x <- readRDS(
            file = file,
            ...
          )
        }
        # These lines are covered when
        # format %in% c(
        #  "Rda",
        #  "rda",
        #  "RDA",
        #  "RData",
        #  "Rdata",
        #  "rdata",
        #  "RDATA"
        #  )
        # or
        # format %in% c("Rds", "rds", "RDS")
        # write tests for these
        if (is.vector(x)) {
          x <- as.data.frame(t(x))
        } else if (is.matrix(x)) {
          x <- as.data.frame(x)
        } else if (is.data.frame(x)) {
          x
        } else {
          stop(
            paste0(
              "The function only accepts vectors, matrices, or data frames.\n"
            )
          )
        }
        return(x)
      },
      error = function(err) {
        stop(
          paste(
            "Error reading in",
            file,
            "\n"
          )
        )
      }
    )
  }
  dir <- normalizePath(dir)
  save_dir <- normalizePath(save_dir)
  root <- basename(dir)
  if (is.null(fn_output)) {
    fn_output <- file.path(
      save_dir,
      paste0(
        root,
        ".csv"
      )
    )
  }
  pattern <- paste0(
    pattern,
    "\\.",
    format,
    "$"
  )
  files <- util_search_pattern(
    dir = dir,
    pattern = pattern,
    all.files = FALSE,
    full.names = TRUE,
    recursive = recursive,
    ignore.case = TRUE,
    no.. = FALSE
  )
  if (length(files) > 0) {
    input <- vector(
      mode = "list",
      length(files)
    )
    input <- util_lapply(
      FUN = foo,
      args = list(
        file = files,
        format = format,
        ...
      ),
      par = par,
      ncores = ncores
    )
    if (fn_column) {
      for (i in seq_along(files)) {
        if (!fn_column_full_path) {
          files[i] <- basename(files[i])
        }
        input[[i]]["fn"] <- files[i]
      }
    }
    output <- as.data.frame(
      do.call(
        what = "rbind",
        args = input
      )
    )
    if (save) {
      write.csv(
        x = output,
        file = fn_output,
        row.names = FALSE
      )
      message(
        paste(
          fn_output,
          "saved.",
          "\n"
        )
      )
    }
    return(output)
  } else {
    stop("No files to bind.\n")
  }
}
jeksterslabds/jeksterslabRutils documentation built on Jan. 18, 2021, 11:41 p.m.