Nothing
# WARNING - Generated by {fusen} from dev/flat_teaching.Rmd: do not edit by hand
#' Reshape Wide Data to Long Format and Split into List
#'
#' @description
#' The `w2l_split` function reshapes wide-format data into long-format and splits it into a list
#' by variable names and optional grouping columns. It handles both `data.frame` and `data.table` objects.
#'
#' @param data `data.frame` or `data.table`
#' - Input dataset in wide format
#' - Automatically converted to `data.table` if necessary
#'
#' @param cols2l `numeric` or `character` columns to transform
#' - Specifies columns for wide-to-long conversion
#' - Can be column indices or column names
#' - Default is `NULL`
#'
#' @param by `numeric` or `character` grouping variables
#' - Optional columns for data splitting
#' - Can be column indices or column names
#' - Used to create hierarchical split structure
#' - Default is `NULL`
#'
#' @param split_type `character` output data type
#' - Defines split data object type
#' - Possible values:
#' - `"dt"`: split `data.table` objects
#' - `"df"`: split `data.frame` objects
#' - Default is `"dt"`
#'
#' @param sep `character` separator
#' - Used for combining split names
#' - Default is `"_"`
#'
#' @details
#' The function melts the specified wide columns into long format and splits the resulting data
#' into a list based on the variable names and any additional grouping variables specified in `by`.
#' The split data can be in the form of `data.table` or `data.frame` objects, controlled by the
#' `split_type` parameter.
#'
#' Both `cols2l` and `by` parameters accept either column indices or column names, providing flexible ways
#' to specify the columns for transformation and splitting.
#'
#' @return A list of `data.table` or `data.frame` objects (depending on `split_type`), split by variable
#' names and optional grouping columns.
#' \itemize{
#' \item If `by` is `NULL`, returns a list split by variable names only.
#' \item If `by` is specified, returns a list split by both variable names and grouping variables.
#' }
#'
#' @note
#' \itemize{
#' \item Both `cols2l` and `by` parameters can be specified using either numeric indices or character column names.
#' \item When using numeric indices, they must be valid column positions in the data (1 to ncol(data)).
#' \item When using character names, all specified columns must exist in the data.
#' \item The function converts `data.frame` to `data.table` if necessary.
#' \item The `split_type` parameter controls whether split data are `data.table` (`"dt"`) or `data.frame` (`"df"`) objects.
#' \item If `split_type` is not `"dt"` or `"df"`, the function will stop with an error.
#' }
#'
#' @seealso
#' Related functions and packages:
#' \itemize{
#' \item [`tidytable::group_split()`] Split data frame by groups
#' }
#'
#' @import data.table
#' @export
#' @examples
#' # Example: Wide to long format splitting demonstrations
#'
#' # Example 1: Basic splitting by Species
#' w2l_split(
#' data = iris, # Input dataset
#' by = "Species" # Split by Species column
#' ) |>
#' lapply(head) # Show first 6 rows of each split
#'
#' # Example 2: Split specific columns using numeric indices
#' w2l_split(
#' data = iris, # Input dataset
#' cols2l = 1:3, # Select first 3 columns to split
#' by = 5 # Split by column index 5 (Species)
#' ) |>
#' lapply(head) # Show first 6 rows of each split
#'
#' # Example 3: Split specific columns using column names
#' list_res <- w2l_split(
#' data = iris, # Input dataset
#' cols2l = c("Sepal.Length", # Select columns by name
#' "Sepal.Width"),
#' by = "Species" # Split by Species column
#' )
#' lapply(list_res, head) # Show first 6 rows of each split
#' # Returns similar structure to Example 2
w2l_split <- function(data, cols2l = NULL, by = NULL, split_type = "dt", sep = "_") {
# Check if input data is data.table, if not convert it
if (!data.table::is.data.table(data)) {
if (is.data.frame(data)) {
data <- data.table::as.data.table(data)
} else {
stop("data must be a data.frame or data.table.")
}
}
# Process by parameter - handle both numeric and character input
if (!is.null(by)) {
if (is.numeric(by)) {
if (any(by < 1 | by > ncol(data))) {
stop("Numeric indices in by are out of bounds.")
}
by <- names(data)[by]
} else if (is.character(by)) {
if (!all(by %in% names(data))) {
missing_by <- by[!by %in% names(data)]
stop("Some 'by' columns are not present in the data: ",
paste(missing_by, collapse = ", "))
}
} else {
stop("by should be either numeric indices or character vector of column names.")
}
}
# Handle case when cols2l is NULL
if (is.null(cols2l)) {
if (is.null(by)) {
stop("When cols2l is NULL, by parameter must be provided.")
}
# Directly split the data by grouping variables
dt_list <- split(data, by = by, keep.by = F, drop = TRUE)
# Create list names using by variables
split_values <- do.call(paste, c(lapply(by, function(x) data[[x]]), list(sep = sep)))
split_values <- unique(split_values)
names(dt_list) <- split_values
} else {
# Process cols2l parameter - handle both numeric and character input
if (is.numeric(cols2l)) {
if (any(cols2l < 1 | cols2l > ncol(data))) {
stop("Numeric indices in cols2l are out of bounds.")
}
cols2l_names <- names(data)[cols2l]
} else if (is.character(cols2l)) {
if (!all(cols2l %in% names(data))) {
missing_cols <- cols2l[!cols2l %in% names(data)]
stop("Some columns specified in cols2l are not present in the data: ",
paste(missing_cols, collapse = ", "))
}
cols2l_names <- cols2l
} else {
stop("cols2l should be either numeric indices or character vector of column names.")
}
# Identify ID variables (all columns except those to be transformed)
id_vars <- setdiff(names(data), cols2l_names)
if (!is.null(by)) {
id_vars <- unique(c(id_vars, by))
}
# Melt data from wide to long format
dt_long <- data.table::melt(data, id.vars = id_vars, measure.vars = cols2l_names,
variable.name = "variable", value.name = "value")
# Define splitting variables and split the data
split_vars <- c("variable", by)
dt_list <- split(dt_long, by = split_vars, keep.by = F, drop = TRUE)
# Create list names using by variables if provided
if (!is.null(by)) {
# Combine split variables values using specified separator
split_values <- do.call(paste, c(lapply(split_vars, function(x) dt_long[[x]]), list(sep = sep)))
split_values <- unique(split_values)
names(dt_list) <- split_values
}
}
# Convert to specified output format
if (split_type == "dt") {
# Keep as data.table
} else if (split_type == "df") {
# Convert to data.frame
dt_list <- lapply(dt_list, as.data.frame)
} else {
stop("Invalid split_type provided. It must be either 'dt' or 'df'.")
}
return(dt_list)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.