#' Bind External Files
#'
#' Binds `csv`, `Excel`, `RData`, or `Rds` files
#' in a given directory by row (`rbind`).
#'
#' The function requires that files have the same data structure.
#' For `R` object files.
#' only vectors, matrices or data frames
#' are allowed.
#' By default, `stringsAsFactors = FALSE`
#' when `format = "csv"`.
#'
#' @author Ivan Jacob Agaloos Pesigan
#' @inheritParams util_lapply
#' @param dir Character string.
#' Directory which contains target files.
#' @param recursive Logical.
#' Search files recursively under `dir`.
#' @param format Character string.
#' `"csv"` for comma separated files.
#' `"xls"` or `"xlsx"` for `Excel` files.
#' `"Rda"`, `"rda"`, `"RDA"`, `"RData"`,
#' `"Rdata"`, `"rdata"`, `"RDATA"`
#' for `R` Data Format.
#' `"Rds"`, `"rds"`, `"RDS"` for serialized `R` object.
#' @param pattern Character string.
#' Regular expression.
#' Pattern of file names.
#' `format` is appended as an extension.
#' For example, if `pattern = "^filename.*"`,
#' and `format = "csv"`,
#' the pattern used to load files will be
#' `"^filename.*\\.csv$"`.
#' @param fn_column Logical.
#' Save file name of source data file as a new column.
#' The new column will have `"fn"` as its column name.
#' @param fn_column_full_path Logical.
#' Save the full path of source data file as a new column.
#' @param save Logical.
#' Save concatenated files in `csv` format.
#' @param fn_output Character string.
#' Filename to use when `save = TRUE`.
#' Uses the basename of `dir` if not provided.
#' @param save_dir Character string.
#' Directory used for `fn_output`.
#' @param ... Optional arguments to pass to
#' [`readxl::read_excel()`] when `format = "xls"` or `"xlsx"`,
#' [`read.csv()`] when `format = "csv"`,
#' [`load()`] when `format = `
#' any of the following `"Rda"`, `"rda"`, `"RDA"`, `"RData"`,
#' `"Rdata"`, `"rdata"`, `"RDATA"`, or [`readRDS()`]
#' when `format = `
#' any of the following `"Rds"`, `"rds"`, `"RDS"`.
#' @examples
#' \dontrun{
#' util_bind(
#' dir = getwd(),
#' format = "csv",
#' pattern = "^filename.*",
#' fn_column = TRUE,
#' save = TRUE,
#' fn_output = NULL,
#' save_dir = getwd(),
#' par = FALSE
#' )
#' }
#' @importFrom utils glob2rx
#' @importFrom utils read.csv
#' @importFrom utils write.csv
#' @importFrom readxl read_excel
#' @export
util_bind <- function(dir = getwd(),
recursive = FALSE,
format = "csv",
pattern = "^filename.*",
fn_column = TRUE,
fn_column_full_path = FALSE,
save = FALSE,
fn_output = NULL,
save_dir = getwd(),
par = TRUE,
ncores = NULL,
...) {
foo <- function(file,
format,
...) {
tryCatch(
{
if (format == "csv") {
options(
stringsAsFactors = FALSE
)
x <- read.csv(
file = file,
...
)
return(x)
}
if (format %in% c("xls", "xlsx")) {
x <- read_excel(
path = file,
...
)
return(x)
}
if (format %in% c(
"Rda",
"rda",
"RDA",
"RData",
"Rdata",
"rdata",
"RDATA"
)) {
x <- load(
file = file,
...
)
}
if (format %in% c("Rds", "rds", "RDS")) {
x <- readRDS(
file = file,
...
)
}
# These lines are covered when
# format %in% c(
# "Rda",
# "rda",
# "RDA",
# "RData",
# "Rdata",
# "rdata",
# "RDATA"
# )
# or
# format %in% c("Rds", "rds", "RDS")
# write tests for these
if (is.vector(x)) {
x <- as.data.frame(t(x))
} else if (is.matrix(x)) {
x <- as.data.frame(x)
} else if (is.data.frame(x)) {
x
} else {
stop(
paste0(
"The function only accepts vectors, matrices, or data frames.\n"
)
)
}
return(x)
},
error = function(err) {
stop(
paste(
"Error reading in",
file,
"\n"
)
)
}
)
}
dir <- normalizePath(dir)
save_dir <- normalizePath(save_dir)
root <- basename(dir)
if (is.null(fn_output)) {
fn_output <- file.path(
save_dir,
paste0(
root,
".csv"
)
)
}
pattern <- paste0(
pattern,
"\\.",
format,
"$"
)
files <- util_search_pattern(
dir = dir,
pattern = pattern,
all.files = FALSE,
full.names = TRUE,
recursive = recursive,
ignore.case = TRUE,
no.. = FALSE
)
if (length(files) > 0) {
input <- vector(
mode = "list",
length(files)
)
input <- util_lapply(
FUN = foo,
args = list(
file = files,
format = format,
...
),
par = par,
ncores = ncores
)
if (fn_column) {
for (i in seq_along(files)) {
if (!fn_column_full_path) {
files[i] <- basename(files[i])
}
input[[i]]["fn"] <- files[i]
}
}
output <- as.data.frame(
do.call(
what = "rbind",
args = input
)
)
if (save) {
write.csv(
x = output,
file = fn_output,
row.names = FALSE
)
message(
paste(
fn_output,
"saved.",
"\n"
)
)
}
return(output)
} else {
stop("No files to bind.\n")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.