Nothing
# WARNING - Generated by {fusen} from dev/flat_teaching.Rmd: do not edit by hand
#' Export Nested Data with Advanced Grouping and Flexible Handling
#'
#' @description
#' The `export_list` function exports nested data from a `data.frame` or `data.table` with sophisticated grouping
#' capabilities, supporting multiple nested column types and flexible file export options.
#'
#' @param nest_dt A `data.frame` or `data.table` containing nested columns of `data.frame`s,
#' `data.table`s, or lists to be exported.
#' @param group_cols Optional character vector specifying grouping columns.
#' If `NULL`, uses all non-nested columns as grouping variables.
#' @param nest_col Optional character string indicating the nested column to export.
#' If `NULL`, automatically selects the first nested column.
#' @param export_path Base directory path for file export. Defaults to a temporary directory
#' created by `tempdir()`.
#' @param file_type File export format, either `"txt"` (tab-separated) or `"csv"`.
#' Defaults to `"txt"`.
#'
#' @details
#' Comprehensive Nested Data Export Features:
#' \itemize{
#' \item Automatic detection and handling of different nested column types
#' \item Flexible grouping strategies with intelligent column selection
#' \item Hierarchical directory structure generation based on grouping columns
#' \item Support for mixed nested column types (`data.frame`, `data.table`, `list`)
#' \item Multi-threaded file writing for enhanced performance
#' \item Informative messaging and warning system
#' }
#'
#' Nested Column Detection Hierarchy:
#' \enumerate{
#' \item Prioritizes `data.frame`/`data.table` nested columns
#' \item Falls back to regular `list` columns if no `data.frame` columns exist
#' }
#'
#' Grouping Column Selection Strategy:
#' \enumerate{
#' \item When `group_cols` is `NULL`, uses all non-nested columns
#' \item Provides warnings about unused non-nested columns
#' \item Validates provided group columns
#' }
#'
#' File Export Characteristics:
#' \itemize{
#' \item Supports `"txt"` (tab-separated) and `"csv"` formats
#' \item Uses multi-threading via `parallel::detectCores()`
#' \item Creates nested directory structure based on grouping variables
#' }
#'
#' @return
#' An `integer` representing the total number of files exported successfully.
#'
#' @note
#' Key Capabilities:
#' \itemize{
#' \item Handles complex nested data structures
#' \item Performs type conversion for nested content
#' \item Utilizes multi-threaded file export for optimal performance
#' \item Provides comprehensive column selection feedback
#' }
#'
#' @importFrom data.table as.data.table fwrite
#' @importFrom parallel detectCores
#' @export
#' @examples
#' # Example 1: Basic nested data export workflow
#' # Step 1: Create nested data structure
#' dt_nest <- w2l_nest(
#' data = iris, # Input iris dataset
#' cols2l = 1:2, # Columns to be nested
#' by = "Species" # Grouping variable
#' )
#'
#' # Step 2: Export nested data to files
#' export_nest(
#' nest_dt = dt_nest, # Input nested data.table
#' nest_col = "data", # Column containing nested data
#' group_cols = c("name", "Species") # Columns to create directory structure
#' )
#' # Returns the number of files created
#' # Creates directory structure: tempdir()/name/Species/data.txt
#'
#' # Check exported files
#' list.files(
#' path = tempdir(), # Default export directory
#' pattern = "txt", # File type pattern to search
#' recursive = TRUE # Search in subdirectories
#' )
#' # Returns list of created files and their paths
#'
#' # Clean up exported files
#' files <- list.files(
#' path = tempdir(), # Default export directory
#' pattern = "txt", # File type pattern to search
#' recursive = TRUE, # Search in subdirectories
#' full.names = TRUE # Return full file paths
#' )
#' file.remove(files) # Remove all exported files
export_nest <- function(nest_dt, group_cols = NULL, nest_col = NULL,
export_path = tempdir(), file_type = "txt") {
# Basic input validation
if (nrow(nest_dt) == 0) {
stop("The input nest_dt cannot be empty")
}
# Check and get nested columns
# 1. Check for data.frame/data.table nested columns
df_nested_cols <- names(nest_dt)[sapply(nest_dt, function(x) {
is.list(x) && all(sapply(x, function(y) {
inherits(y, c("data.frame", "data.table"))
}))
})]
# 2. Check for regular list columns
list_cols <- names(nest_dt)[vapply(nest_dt, is.list, logical(1))]
# Combine both types of nested columns
nested_cols <- unique(c(df_nested_cols, list_cols))
if (length(nested_cols) == 0) {
stop("The input nest_dt must contain at least one nested column")
}
# If nest_col is NULL, prioritize using data.frame/data.table nested columns
if (is.null(nest_col)) {
if (length(df_nested_cols) > 0) {
nest_col <- df_nested_cols[1]
message("Using first nested data.frame/data.table column: ", nest_col)
} else {
nest_col <- list_cols[1]
message("Using first list column: ", nest_col)
}
} else if (!nest_col %in% nested_cols) {
stop("Specified nest_col is not a valid nested column")
}
# If group_cols is NULL, use all non-nested columns
if (is.null(group_cols)) {
group_cols <- setdiff(names(nest_dt), nested_cols)
message("Using all non-nested columns as groups: ", paste(group_cols, collapse = ", "))
} else {
# Validate user-provided group columns
if (!is.character(group_cols)) {
stop("group_cols must be a character vector")
}
missing_cols <- setdiff(group_cols, names(nest_dt))
if (length(missing_cols) > 0) {
stop("The following group columns are missing: ", paste(missing_cols, collapse = ", "))
}
# Check if all non-nested columns are used as group columns
all_non_nested_cols <- setdiff(names(nest_dt), nested_cols)
unused_cols <- setdiff(all_non_nested_cols, group_cols)
if (length(unused_cols) > 0) {
warning("Not all non-nested columns are used as group columns. ",
"The exported data may be incomplete without the following columns: ",
paste(unused_cols, collapse = ", "))
}
}
# Parameter validation
file_type <- tolower(file_type)
if (!(file_type %in% c("txt", "csv"))) {
stop("file_type must be either 'txt' or 'csv'")
}
if (!is.character(export_path) || length(export_path) != 1) {
stop("export_path must be a single character string")
}
# Create export directory
dir.create(export_path, showWarnings = FALSE, recursive = TRUE)
# Export processing
tryCatch({
# Process and expand nested data
expanded_dt <- nest_dt[, {
processed_nests <- lapply(get(nest_col), function(x) {
if (inherits(x, c("data.frame", "data.table"))) {
x_dt <- if (!is.data.table(x)) as.data.table(x) else x
setattr(x_dt, "row.names", .set_row_names(nrow(x_dt)))
return(x_dt)
} else {
# Try to convert non-data.frame/data.table to data.table
tryCatch({
x_dt <- as.data.table(as.list(x))
setattr(x_dt, "row.names", .set_row_names(nrow(x_dt)))
return(x_dt)
}, error = function(e) {
stop(sprintf("Cannot convert nested content to data.table: %s", e$message))
})
}
})
data.table::rbindlist(processed_nests, fill = TRUE)
}, by = group_cols]
expanded_dt <- copy(expanded_dt)
# Create required subdirectories
unique_paths <- unique(expanded_dt[, do.call(file.path,
c(list(export_path), lapply(group_cols, function(col) get(col))))])
lapply(unique_paths, dir.create, showWarnings = FALSE, recursive = TRUE)
# Export files
file_count <- 0L
expanded_dt[, {
dir_path <- do.call(file.path, c(list(export_path),
lapply(group_cols, function(col) get(col))))
sep <- if (file_type == "txt") "\t" else ","
filename <- paste0(nest_col, ".", file_type)
fwrite(.SD, file = file.path(dir_path, filename),
sep = sep, nThread = parallel::detectCores() - 1, buffMB = 32)
file_count <<- file_count + 1L
NULL
}, by = group_cols]
return(file_count)
}, error = function(e) {
stop("Failed to export nested data: ", e$message)
})
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.