R/convert_nest.R

Defines functions convert_nest

Documented in convert_nest

# WARNING - Generated by {fusen} from dev/flat_teaching.Rmd: do not edit by hand

#' Convert Nested Columns Between `data.frame` and `data.table`
#'
#' @description
#' The `convert_nest` function transforms a `data.frame` or `data.table` by converting nested columns 
#' to either `data.frame` or `data.table` format while preserving the original data structure.
#'
#' @param data A `data.frame` or `data.table` containing nested columns
#' @param to A `character` string specifying the target format. 
#'   Options are `"df"` (data frame) or `"dt"` (data table). Defaults to `"df"`.
#' @param nest_cols A `character` vector of column names containing nested data. 
#'   If `NULL`, the function automatically detects list columns.
#'
#' @details
#' Advanced Nested Column Conversion Features:
#' \itemize{
#'   \item Intelligent automatic detection of nested columns
#'   \item Comprehensive conversion of entire data structure
#'   \item Selective conversion of specified nested columns
#'   \item Non-destructive transformation with data copying
#' }
#' 
#' Input Validation and Error Handling:
#' \itemize{
#'   \item Validates existence of specified nested columns
#'   \item Verifies that specified columns are actually list columns
#'   \item Provides informative error messages for invalid inputs
#'   \item Ensures data integrity through comprehensive checks
#' }
#' 
#' Conversion Strategies:
#' \enumerate{
#'   \item Nested column identification based on `is.list()` detection
#'   \item Preservation of original data integrity
#'   \item Flexible handling of mixed data structures
#'   \item Consistent type conversion across nested elements
#' }
#'
#' Nested Column Handling:
#' \itemize{
#'   \item Supports conversion of `list` columns
#'   \item Handles `data.table`, `data.frame`, and generic `list` inputs
#'   \item Maintains original column structure and order
#'   \item Prevents in-place modification of source data
#' }
#'
#' @return 
#' A transformed `data.frame` or `data.table` with nested columns converted to the specified format.
#'
#' @note
#' Conversion Characteristics:
#' \itemize{
#'   \item Non-destructive transformation of nested columns
#'   \item Supports flexible input and output formats
#'   \item Intelligent type detection and conversion
#'   \item Minimal performance overhead
#' }
#'
#' Error Conditions:
#' \itemize{
#'   \item Throws error if specified columns don't exist in the input data
#'   \item Throws error if specified columns are not list columns
#'   \item Provides clear error messages for troubleshooting
#'   \item Validates input parameters before processing
#' }
#'
#' @importFrom data.table as.data.table copy
#' @importFrom tibble as_tibble
#' 
#' @export
#' @examples
#' # Example 1: Create nested data structures
#' # Create single nested column
#' df_nest1 <- iris |> 
#'   dplyr::group_nest(Species)     # Group and nest by Species
#'
#' # Create multiple nested columns
#' df_nest2 <- iris |>
#'   dplyr::group_nest(Species) |>  # Group and nest by Species
#'   dplyr::mutate(
#'     data2 = purrr::map(          # Create second nested column
#'       data,
#'       dplyr::mutate, 
#'       c = 2
#'     )
#'   )
#'
#' # Example 2: Convert nested structures
#' # Convert data frame to data table
#' convert_nest(
#'   df_nest1,                      # Input nested data frame
#'   to = "dt"                      # Convert to data.table
#' )
#'
#' # Convert specific nested columns
#' convert_nest(
#'   df_nest2,                      # Input nested data frame
#'   to = "dt",                     # Convert to data.table
#'   nest_cols = "data"             # Only convert 'data' column
#' )
#'
#' # Example 3: Convert data table to data frame
#' dt_nest <- mintyr::w2l_nest(
#'   data = iris,                   # Input dataset
#'   cols2l = 1:2                   # Columns to nest
#' )
#' convert_nest(
#'   dt_nest,                       # Input nested data table
#'   to = "df"                      # Convert to data frame
#' )
convert_nest <- function(data, to = c("df", "dt"), nest_cols = NULL) {
  to <- match.arg(to)
  
  # Automatically detect nested columns (list columns) if not specified
  if (is.null(nest_cols)) {
    nest_cols <- names(data)[sapply(data, is.list)]
  }
  # Validate nest_cols
  invalid_cols <- setdiff(nest_cols, names(data))
  if (length(invalid_cols) > 0) {
    stop("Column(s) not found in data: ", paste(invalid_cols, collapse = ", "))
  }
  
  # Check if specified columns are actually list columns
  non_list_cols <- nest_cols[!sapply(data[, nest_cols, with = FALSE], is.list)]
  if (length(non_list_cols) > 0) {
    stop("Column(s) are not nested (list) columns: ", paste(non_list_cols, collapse = ", "))
  }
  
  if (to == "df") {
    # If data is data.table, convert to data.frame and copy to avoid modifying original data
    if (inherits(data, "data.table")) {
      data <- as_tibble(copy(data))
    } else if (!inherits(data, "data.frame")) {
      data <- as_tibble(data)
    }
    # Convert each element of nested columns to data.frame
    for (col in nest_cols) {
      data[[col]] <- lapply(data[[col]], function(x) {
        if (inherits(x, "data.table")) {
          as_tibble(copy(x))
        } else if (!inherits(x, "data.frame")) {
          as_tibble(x)
        } else {
          x
        }
      })
    }
  } else if (to == "dt") {
    # If data is not data.table, convert to data.table and copy to avoid modifying original data
    if (!inherits(data, "data.table")) {
      data <- as.data.table(copy(data))
    }
    # Convert each element of nested columns to data.table
    for (col in nest_cols) {
      data[[col]] <- lapply(data[[col]], function(x) {
        if (!inherits(x, "data.table")) {
          as.data.table(copy(x))
        } else {
          x
        }
      })
    }
  }
  
  return(data)
}

Try the mintyr package in your browser

Any scripts or data that you put into this service are public.

mintyr documentation built on April 4, 2025, 2:56 a.m.