#' @title Import list of data frames
#' @description Use [import()] to import a list of data frames from a vector of file names or from a multi-object file (Excel workbook, .Rdata file, zipped directory in a zip file, or HTML file)
#' @param file A character string containing a single file name for a multi-object file (e.g., Excel workbook, zip file, or HTML file), or a vector of file paths for multiple files to be imported.
#' @param which If `file` is a single file path, this specifies which objects should be extracted (passed to [import()]'s `which` argument). Ignored otherwise.
#' @param rbind A logical indicating whether to pass the import list of data frames through [data.table::rbindlist()].
#' @param rbind_label If `rbind = TRUE`, a character string specifying the name of a column to add to the data frame indicating its source file.
#' @param rbind_fill If `rbind = TRUE`, a logical indicating whether to set the `fill = TRUE` (and fill missing columns with `NA`).
#' @param \dots Additional arguments passed to [import()]. Behavior may be unexpected if files are of different formats.
#' @inheritParams import
#' @return If `rbind=FALSE` (the default), a list of a data frames. Otherwise, that list is passed to [data.table::rbindlist()] with `fill = TRUE` and returns a data frame object of class set by the `setclass` argument; if this operation fails, the list is returned.
#' @details When file is a vector of file paths and any files are missing, those files are ignored (with warnings) and this function will not raise any error.
#' @examples
#' ## For demo, a temp. file path is created with the file extension .xlsx
#' xlsx_file <- tempfile(fileext = ".xlsx")
#' export(
#' list(
#' mtcars1 = mtcars[1:10, ],
#' mtcars2 = mtcars[11:20, ],
#' mtcars3 = mtcars[21:32, ]
#' ),
#' xlsx_file
#' )
#'
#' # import a single file from multi-object workbook
#' import(xlsx_file, sheet = "mtcars1")
#' # import all worksheets, the return value is a list
#' import_list(xlsx_file)
#'
#' # import and rbind all worksheets, the return value is a data frame
#' import_list(xlsx_file, rbind = TRUE)
#' @seealso [import()], [export_list()], [export()]
#' @export
import_list <- function(file, setclass = getOption("rio.import.class", "data.frame"), which, rbind = FALSE,
rbind_label = "_file", rbind_fill = TRUE, ...) {
.check_file(file, single_only = FALSE)
## special cases
if (length(file) == 1) {
x <- .read_file_as_list(file = file, which = which, setclass = setclass, rbind = rbind, rbind_label = rbind_label, ...)
} else {
## note the plural
x <- .read_multiple_files_as_list(files = file, setclass = setclass, rbind = rbind, rbind_label = rbind_label, ...)
}
## optionally rbind
if (isTRUE(rbind)) {
if (length(x) == 1) {
x <- x[[1L]]
} else {
x2 <- try(data.table::rbindlist(x, fill = rbind_fill), silent = TRUE)
if (inherits(x2, "try-error")) {
warning("Attempt to rbindlist() the data did not succeed. List returned instead.", call. = FALSE)
return(x)
} else {
x <- x2
}
}
x <- set_class(x, class = setclass)
}
return(x)
}
.strip_exts <- function(file) {
vapply(file, function(x) tools::file_path_sans_ext(basename(x)), character(1))
}
.read_multiple_files_as_list <- function(files, setclass, rbind, rbind_label, ...) {
names(files) <- .strip_exts(files)
x <- lapply(files, function(thisfile) {
out <- try(import(thisfile, setclass = setclass, ...), silent = TRUE)
if (inherits(out, "try-error")) {
warning(sprintf("Import failed for %s", thisfile), call. = FALSE)
##out <- NULL
return(NULL)
} else if (isTRUE(rbind)) {
out[[rbind_label]] <- thisfile
}
structure(out, filename = thisfile)
})
names(x) <- names(files)
return(x)
}
.read_file_as_list <- function(file, which, setclass, rbind, rbind_label, ...) {
if (R.utils::isUrl(file)) {
file <- remote_to_local(file)
}
if (get_info(file)$format == "rdata") {
e <- new.env()
load(file, envir = e)
return(as.list(e))
}
if (!get_info(file)$format %in% c("html", "xlsx", "xls", "zip")) {
which <- 1
whichnames <- NULL
}
## getting list of `whichnames`
if (get_info(file)$format == "html") {
.check_pkg_availability("xml2")
tables <- xml2::xml_find_all(xml2::read_html(unclass(file)), ".//table")
if (missing(which)) {
which <- seq_along(tables)
}
whichnames <- vapply(xml2::xml_attrs(tables[which]),
function(x) if ("class" %in% names(x)) x["class"] else "",
FUN.VALUE = character(1)
)
names(which) <- whichnames
}
if (get_info(file)$format %in% c("xls", "xlsx")) {
## .check_pkg_availability("readxl")
whichnames <- readxl::excel_sheets(path = file)
if (missing(which)) {
which <- seq_along(whichnames)
names(which) <- whichnames
} else if (is.character(which)) {
whichnames <- which
} else {
whichnames <- whichnames[which]
}
}
if (get_info(file)$format %in% c("zip")) {
if (missing(which)) {
whichnames <- utils::unzip(file, list = TRUE)[, "Name"]
which <- seq_along(whichnames)
names(which) <- .strip_exts(whichnames)
} else if (is.character(which)) {
whichnames <- utils::unzip(file, list = TRUE)[, "Name"]
whichnames <- whichnames[whichnames %in% which]
} else {
whichnames <- utils::unzip(file, list = TRUE)[, "Name"]
names(which) <- .strip_exts(whichnames)
}
}
## reading all `whichnames`
x <- lapply(which, function(thiswhich) {
out <- try(import(file, setclass = setclass, which = thiswhich, ...), silent = TRUE)
if (inherits(out, "try-error")) {
warning(sprintf("Import failed for %s from %s", thiswhich, file))
out <- NULL
} else if (isTRUE(rbind) && length(which) > 1) {
out[[rbind_label]] <- thiswhich
}
out
})
names(x) <- whichnames
return(x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.