R/import_csv.R

# WARNING - Generated by {fusen} from dev/flat_teaching.Rmd: do not edit by hand

#' Flexible `CSV`/`TXT` File Import with Multiple Backend Support
#'
#' @description
#' A comprehensive `CSV` or `TXT` file import function offering advanced reading capabilities 
#' through `data.table` and `arrow` packages with intelligent data combination strategies.
#'
#' @param file A `character` vector of file paths to `CSV` files.
#'   Must point to existing and accessible files.
#'
#' @param package A `character` string specifying the backend package:
#'   - `"data.table"`: Uses [`data.table::fread()`] (default)
#'   - `"arrow"`: Uses [`arrow::read_csv_arrow()`]
#'   Determines the underlying reading mechanism.
#'
#' @param rbind A `logical` value controlling data combination strategy:
#'   - `TRUE`: Combines all files into a single data object
#'   - `FALSE`: Returns a list of individual data objects
#'   Default is `TRUE`.
#'
#' @param rbind_label A `character` string or `NULL` for source file tracking:
#'   - `character`: Specifies the column name for file source labeling
#'   - `NULL`: Disables source file tracking
#'   Default is `"_file"`.
#'
#' @param ... Additional arguments passed to backend-specific reading functions 
#'   (e.g., `col_types`, `na.strings`, `skip`).
#'
#' @details
#' The function provides a unified interface for reading CSV files using either data.table
#' or arrow package. When reading multiple files, it can either combine them into a single
#' data object or return them as a list. File source tracking is supported through the
#' rbind_label parameter.
#'
#' @return 
#' Depends on the `rbind` parameter:
#' \itemize{
#'   \item If `rbind = TRUE`: A single data object (from chosen package) 
#'     containing all imported data
#'   \item If `rbind = FALSE`: A named list of data objects with names 
#'     derived from input file names (without extensions)
#' }
#'
#' @note
#' Critical Import Considerations:
#' \itemize{
#'   \item Requires all specified files to be accessible `CSV/TXT` files
#'   \item Supports flexible backend selection
#'   \item `rbind = TRUE` assumes compatible data structures
#'   \item Missing columns are automatically aligned
#'   \item File extensions are automatically removed in tracking columns
#' }
#'
#' @seealso
#' \itemize{
#'   \item [`data.table::fread()`] for `data.table` backend
#'   \item [`arrow::read_csv_arrow()`] for `arrow` backend
#'   \item [`data.table::rbindlist()`] for data combination
#' }
#'
#' @import data.table
#' @import arrow
#'
#' @export
#' @examples
#' # Example: CSV file import demonstrations
#'
#' # Setup test files
#' csv_files <- mintyr_example(
#'   mintyr_examples("csv_test")     # Get example CSV files
#' )
#'
#' # Example 1: Import and combine CSV files using data.table
#' import_csv(
#'   csv_files,                      # Input CSV file paths
#'   package = "data.table",         # Use data.table for reading
#'   rbind = TRUE,                   # Combine all files into one data.table
#'   rbind_label = "_file"           # Column name for file source
#' )
#'
#' # Example 2: Import files separately using arrow
#' import_csv(
#'   csv_files,                      # Input CSV file paths
#'   package = "arrow",              # Use arrow for reading
#'   rbind = FALSE                   # Keep files as separate data.tables
#' )
import_csv <- function (file, package = "data.table", rbind = TRUE, rbind_label = "_file", ...) {
  # Validations
  if (!is.character(file) || !all(file.exists(file))) {
    stop("file must be a vector of existing file paths.")
  }

  if (!package %in% c("data.table", "arrow")) {
    stop("package must be one of 'data.table', 'arrow'.")
  }

  # Function to remove file extension
  remove_extension <- function(filename) {
    sub("\\.[^.]*$", "", basename(filename))
  }

  # Read Functionality with naming
  read_files <- function(read_function) {
    file_data <- lapply(file, function(file_path) {
      df <- read_function(file_path, ...)
      if (!is.null(rbind_label) && rbind && length(file) > 1) {
        # Add a column with the label indicating the file origin, without extension
        df <- cbind(stats::setNames(data.frame(remove_extension(file_path)), rbind_label), df)
      }
      return(df)
    })

    if (rbind && length(file) > 1) {
      # Combine all data into a single data table/data frame
      return(data.table::rbindlist(file_data, use.names = TRUE, fill = TRUE))
    } else {
      # When rbind is FALSE, name the list elements with file names
      names(file_data) <- remove_extension(file)
      return(file_data)
    }
  }

  # Package specific operations
  if (package == "data.table") {
    return(read_files(data.table::fread))
  } else if (package == "arrow") {
    return(read_files(arrow::read_csv_arrow))
  }
}

Try the mintyr package in your browser

Any scripts or data that you put into this service are public.

mintyr documentation built on April 4, 2025, 2:56 a.m.