Nothing
# WARNING - Generated by {fusen} from dev/flat_teaching.Rmd: do not edit by hand
#' Import Data from `XLSX` Files with Advanced Handling
#'
#' @description
#' A robust and flexible function for importing data from one or multiple
#' `XLSX` files, offering comprehensive options for sheet selection,
#' data combination, and source tracking.
#'
#' @param file A `character` vector of file paths to `Excel` files.
#' Must point to existing `.xlsx` or `.xls` files.
#' @param rbind A `logical` value controlling data combination strategy:
#' - `TRUE`: Combines all data into a single `data.table`
#' - `FALSE`: Returns a list of `data.table`s
#' Default is `TRUE`.
#' @param sheet A `numeric` vector or `NULL` specifying sheet import strategy:
#' - `NULL` (default): Imports all sheets
#' - `numeric`: Imports only specified sheet indices
#' @param ... Additional arguments passed to [`readxl::read_excel()`],
#' such as `col_types`, `skip`, or `na`.
#'
#' @details
#' The function provides a comprehensive solution for importing Excel data with the
#' following features:
#' \itemize{
#' \item Supports multiple files and sheets
#' \item Automatic source tracking for files and sheets
#' \item Flexible combining options
#' \item Handles missing columns across sheets when combining
#' \item Preserves original data types through readxl
#' }
#'
#' @return
#' Depends on the `rbind` parameter:
#' \itemize{
#' \item If `rbind = TRUE`: A single `data.table` with additional tracking columns:
#' - `excel_name`: Source file name (without extension)
#' - `sheet_name`: Source sheet name
#' \item If `rbind = FALSE`: A named list of `data.table`s with format
#' `"filename_sheetname"`
#' }
#'
#' @note
#' Critical Import Considerations:
#' \itemize{
#' \item Requires all specified files to be accessible `Excel` files
#' \item Sheet indices must be valid across input files
#' \item `rbind = TRUE` assumes compatible data structures
#' \item Missing columns are automatically filled with `NA`
#' \item File extensions are automatically removed in tracking columns
#' }
#'
#' @seealso
#' \itemize{
#' \item [`readxl::read_excel()`] for underlying Excel reading
#' \item [`data.table::rbindlist()`] for data combination
#' }
#'
#' @importFrom data.table ":="
#' @importFrom readxl read_excel excel_sheets
#'
#' @export
#' @examples
#' # Example: Excel file import demonstrations
#'
#' # Setup test files
#' xlsx_files <- mintyr_example(
#' mintyr_examples("xlsx_test") # Get example Excel files
#' )
#'
#' # Example 1: Import and combine all sheets from all files
#' import_xlsx(
#' xlsx_files, # Input Excel file paths
#' rbind = TRUE # Combine all sheets into one data.table
#' )
#'
#' # Example 2: Import specific sheets separately
#' import_xlsx(
#' xlsx_files, # Input Excel file paths
#' rbind = FALSE, # Keep sheets as separate data.tables
#' sheet = 2 # Only import first sheet
#' )
import_xlsx <- function(file, rbind = TRUE, sheet = NULL, ...) {
excel_name <- NULL
# Parameter checks
if (!is.character(file) || !all(file.exists(file))) {
stop("file must be a vector of existing file paths.")
}
if (!is.logical(rbind)) {
stop("Parameter 'rbind' should be logical (TRUE or FALSE).")
}
# Function to remove file extension
remove_extension <- function(filename) {
sub("\\.[^.]*$", "", basename(filename))
}
# Reads selected sheets from a single Excel file and converts them into a data.table
read_selected_sheets <- function(file_path, merge, sheet_indices, ...) {
all_sheets <- readxl::excel_sheets(file_path)
# Validate sheet indices
if (!is.null(sheet_indices)) {
if (is.numeric(sheet_indices)) {
if (any(sheet_indices > length(all_sheets)) || any(sheet_indices < 1)) {
stop("sheet index out of range for file: ", file_path)
}
} else {
stop("sheet parameter must be a numeric vector or NULL.")
}
}
selected_sheets <- if (is.null(sheet_indices)) all_sheets else all_sheets[sheet_indices]
sheet_data <- lapply(selected_sheets, function(s) {
dt <- data.table::as.data.table(readxl::read_excel(file_path, sheet = s, ...))
if (!merge) {
return(list(data = dt)) # Return each sheet as an independent list item if not merging
} else {
return(dt)
}
})
if (merge) {
names(sheet_data) <- selected_sheets
data.table::rbindlist(sheet_data, use.names = TRUE, fill = TRUE, idcol = "sheet_name")
} else {
names(sheet_data) <- selected_sheets
return(sheet_data)
}
}
# Finding minimum sheet count across all Excel files
min_sheet_count <- min(sapply(file, function(f) length(readxl::excel_sheets(f))))
# Sheet parameter validation
if (!is.null(sheet)) {
if (is.numeric(sheet) && (max(sheet) > min_sheet_count || min(sheet) < 1)) {
stop("sheet parameter contains indices out of range across files.")
}
}
# Applies the modified function across all files
all_data <- lapply(file, read_selected_sheets, merge = rbind, sheet_indices = sheet, ...)
if (rbind) {
# If merging, use rbindlist to combine all files' data into one data.table
combined_data <- data.table::rbindlist(all_data, use.names = TRUE, fill = TRUE, idcol = "excel_name")
xlsx_sheets_names <- sapply(file, remove_extension) # 使用remove_extension替代tools::file_path_sans_ext
# Set 'excel_name' column's value to the corresponding file names
combined_data[, excel_name := rep(xlsx_sheets_names, sapply(all_data, nrow))][]
return(combined_data)
} else {
# If not merging, create a new list to store all sheets' data
result_list <- list()
xlsx_sheets_names <- sapply(file, remove_extension) # 使用remove_extension替代tools::file_path_sans_ext
for (i in seq_along(file)) {
file_name <- xlsx_sheets_names[i]
file_data <- all_data[[i]]
# For each file's sheets, set list item names as "file_name_sheet_name"
for (sheet_name in names(file_data)) {
list_name <- paste(file_name, sheet_name, sep = "_")
result_list[[list_name]] <- file_data[[sheet_name]][["data"]]
}
}
return(result_list)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.