R/get_path_segment.R
In mintyr: Streamlined Data Processing Tools for Genomic Selection

Documented in get_path_segment

# WARNING - Generated by {fusen} from dev/flat_teaching.Rmd: do not edit by hand

#' Extract Specific Segments from File Paths
#'
#' @description
#' The `get_path_segment` function extracts specific segments from file paths provided as character strings. Segments can be extracted from either the beginning or the end of the path, depending on the value of `n`.
#'
#' @param paths A 'character vector' containing file system paths
#'   - Must be non-empty
#'   - Path segments separated by forward slash `'/'`
#'   - Supports absolute and relative paths
#'   - Handles cross-platform path representations
#'   - Supports paths with mixed separators (`'\\'` and `'/'`)
#'
#' @param n Numeric index for segment selection
#'   - Positive values: Select from path start
#'   - Negative values: Select from path end
#'   - Supports single index or range extraction
#'   - Cannot be `0`
#'   - Default is `1` (first segment)
#'
#' @details
#' Sophisticated Path Segment Extraction Mechanism:
#' \enumerate{
#'   \item Comprehensive input validation
#'   \item Path normalization and preprocessing
#'   \item Robust cross-platform path segmentation
#'   \item Flexible indexing with forward and backward navigation
#'   \item Intelligent segment retrieval
#'   \item Graceful handling of edge cases
#' }
#'
#' Indexing Behavior:
#' \itemize{
#'   \item Positive `n`: Forward indexing from path start
#'     - `n = 1`: First segment
#'     - `n = 2`: Second segment
#'   \item Negative `n`: Reverse indexing from path end
#'     - `n = -1`: Last segment
#'     - `n = -2`: Second-to-last segment
#'   \item Range extraction: Supports `c(start, end)` index specification
#' }
#'
#' Path Parsing Characteristics:
#' \itemize{
#'   \item Standardizes path separators to `'/'`
#'   \item Removes drive letters (e.g., `'C:'`)
#'   \item Ignores consecutive `'/'` delimiters
#'   \item Removes leading and trailing separators
#'   \item Returns `NA_character_` for non-existent segments
#'   \item Supports complex path structures
#' }
#'
#' @return 'character vector' with extracted path segments
#'   - Matching segments for valid indices
#'   - `NA_character_` for segments beyond path length
#'
#' @note Critical Operational Constraints:
#' \itemize{
#'   \item Requires non-empty 'paths' input
#'   \item `n` must be non-zero numeric value
#'   \item Supports cross-platform path representations
#'   \item Minimal computational overhead
#'   \item Preserves path segment order
#' }
#'
#' @seealso
#' \itemize{
#'   \item [`tools::file_path_sans_ext()`] File extension manipulation
#' }
#'
#' @export
#' @examples
#' # Example: Path segment extraction demonstrations
#'
#' # Setup test paths
#' paths <- c(
#'   "C:/home/user/documents",   # Windows style path
#'   "/var/log/system",          # Unix system path
#'   "/usr/local/bin"            # Unix binary path
#' )
#'
#' # Example 1: Extract first segment
#' get_path_segment(
#'   paths,                      # Input paths
#'   1                           # Get first segment
#' )
#' # Returns: c("home", "var", "usr")
#'
#' # Example 2: Extract second-to-last segment
#' get_path_segment(
#'   paths,                      # Input paths
#'   -2                          # Get second-to-last segment
#' )
#' # Returns: c("user", "log", "local")
#'
#' # Example 3: Extract from first to last segment
#' get_path_segment(
#'   paths,                      # Input paths
#'   c(1,-1)                     # Range from first to last
#' )
#' # Returns full paths without drive letters
#'
#' # Example 4: Extract first three segments
#' get_path_segment(
#'   paths,                      # Input paths
#'   c(1,3)                      # Range from first to third
#' )
#' # Returns: c("home/user/documents", "var/log/system", "usr/local/bin")
#'
#' # Example 5: Extract last two segments (reverse order)
#' get_path_segment(
#'   paths,                      # Input paths
#'   c(-1,-2)                    # Range from last to second-to-last
#' )
#' # Returns: c("documents/user", "system/log", "bin/local")
#'
#' # Example 6: Extract first two segments
#' get_path_segment(
#'   paths,                      # Input paths
#'   c(1,2)                      # Range from first to second
#' )
#' # Returns: c("home/user", "var/log", "usr/local")
get_path_segment <- function(paths, n = 1) {
  # Input validation for 'paths' parameter
  if (missing(paths)) stop("Parameter 'paths' cannot be empty")
  if (!is.character(paths)) stop("'paths' must be character")
  if (length(paths) == 0) return(character(0))
  
  # Input validation for 'n' parameter
  if (!is.numeric(n)) stop("'n' must be numeric")
  if (length(n) > 2) stop("'n' must be a single number or a vector of length 2")
  if (any(n == 0)) stop("'n' cannot contain 0")
  
  # Preprocessing: Standardize path separators
  # 1. Replace all '\' and '//' with single '/'
  paths <- gsub("\\\\|//", "/", paths)
  
  # 2. Handle multiple consecutive separators
  paths <- gsub("/+", "/", paths)
  
  # 3. Remove leading and trailing separators
  paths <- gsub("^/+|/+$", "", paths)
  
  # 4. Remove drive letters (e.g., C:)
  paths <- sub("^[A-Za-z]:/", "", paths)
  
  # Split paths into segments
  segments <- strsplit(paths, "/")
  
  # Segment extraction logic
  if (length(n) == 1) {
    result <- sapply(segments, function(x) {
      if (n > 0) {
        # Forward indexing
        if (length(x) >= n) x[n] else NA_character_
      } else {
        # Backward indexing
        pos <- length(x) + n + 1
        if (pos > 0 && pos <= length(x)) x[pos] else NA_character_
      }
    })
  } else {
    # Range extraction
    result <- sapply(segments, function(x) {
      # Convert negative indices
      pos1 <- if (n[1] > 0) n[1] else length(x) + n[1] + 1
      pos2 <- if (n[2] > 0) n[2] else length(x) + n[2] + 1
      
      # Ensure pos1 is not greater than pos2
      if (pos1 > pos2) {
        tmp <- pos1
        pos1 <- pos2
        pos2 <- tmp
      }
      
      # Check position validity and extract range
      if (pos1 > 0 && pos2 <= length(x)) {
        paste(x[pos1:pos2], collapse = "/")
      } else {
        NA_character_
      }
    })
  }
  
  return(result)
}