R/format_parse.R

Defines functions .format_date_bound .format_range_bound .invalue_to_text .stratified_format_to_text .datetime_format_to_text .format_to_text .block_to_ks_invalue .block_to_ks_datetime_format .block_to_value_type_format .block_to_stratified_range_format .block_to_ks_format .block_to_format .unquote .parse_range_bound .parse_mapping_line .parse_blocks .cntlout_to_ks_invalue .cntlout_to_ks_format fimport fexport fparse

Documented in .block_to_format .block_to_ks_datetime_format .block_to_ks_format .block_to_ks_invalue .cntlout_to_ks_format .cntlout_to_ks_invalue .datetime_format_to_text fexport fimport .format_date_bound .format_range_bound .format_to_text fparse .invalue_to_text .parse_blocks .parse_mapping_line .parse_range_bound .unquote

#' Parse Format Definitions from 'SAS'-like Text
#'
#' Reads format definitions written in a human-friendly 'SAS'-like syntax
#' and returns a list of \code{ks_format} and/or \code{ks_invalue} objects.
#' All parsed formats are automatically stored in the global format library.
#'
#' @param text Character string or character vector containing format definitions.
#'   If a character vector, lines are concatenated with newlines.
#' @param file Path to a text file containing format definitions.
#'   Exactly one of \code{text} or \code{file} must be provided.
#' @param verbose Logical. If \code{TRUE}, the parsed formats are
#'   printed to the console.  Default is \code{FALSE} to suppress output
#'   (the result is returned invisibly).
#'
#' @return A named list of \code{ks_format} and/or \code{ks_invalue} objects.
#'   Names correspond to the format names defined in the text.
#'   All formats are automatically registered in the global format library.
#'
#' @details
#' The syntax supports two block types:
#'
#' \strong{VALUE} blocks define formats (value -> label):
#' \preformatted{
#' VALUE name (type)
#'   "value1" = "Label 1"
#'   "value2" = "Label 2"
#'   [low, high) = "Range Label (half-open)"
#'   (low, high] = "Range Label (open-low, closed-high)"
#'   .missing = "Missing Label"
#'   .other = "Other Label"
#' ;
#' }
#'
#' \strong{INVALUE} blocks define reverse formats (label -> numeric value):
#' \preformatted{
#' INVALUE name
#'   "Label 1" = 1
#'   "Label 2" = 2
#' ;
#' }
#'
#' \strong{Syntax rules:}
#' \itemize{
#'   \item Blocks start with \code{VALUE} or \code{INVALUE} keyword and end with \code{;}
#'   \item The type in parentheses is optional; defaults to \code{"auto"} for VALUE,
#'         \code{"numeric"} for INVALUE
#'   \item Values can be quoted or unquoted
#'   \item Ranges use interval notation with explicit bounds
#'   \item Legacy range syntax \code{low - high} is also supported
#'   \item Special range keywords: \code{LOW} (-Inf) and \code{HIGH} (Inf)
#'   \item \code{.missing} and \code{.other} are special directives
#'   \item Lines starting with \code{/*}, \code{*}, \code{//}, or \code{#} are comments
#' }
#'
#' \strong{Block options:}
#'
#' Comma-separated options can be placed inside the parentheses after the type:
#' \itemize{
#'   \item \code{nocase} — enables case-insensitive key matching (equivalent to
#'         \code{ignore_case = TRUE} in \code{\link{fnew}}).
#'   \item \code{multilabel} — allows overlapping ranges where a single value
#'         matches multiple labels (used with \code{\link{fput_all}}).
#' }
#' Options can be combined: \code{VALUE name (character, nocase, multilabel)}.
#'
#' @export
#' @examples
#' # Parse multiple format definitions from text
#' fparse(text = '
#' VALUE sex (character)
#'   "M" = "Male"
#'   "F" = "Female"
#'   .missing = "Unknown"
#' ;
#'
#' VALUE age (numeric)
#'   [0, 18)    = "Child"
#'   [18, 65)   = "Adult"
#'   [65, HIGH]  = "Senior"
#'   .missing   = "Age Unknown"
#' ;
#'
#' // Invalue block
#' INVALUE race_inv
#'   "White" = 1
#'   "Black" = 2
#'   "Asian" = 3
#' ;
#' ')
#'
#' fput(c("M", "F", NA), "sex")
#' fputn(c(5, 25, 70, NA), "age")
#' finputn(c("White", "Black"), "race_inv")
#' flist()
#' fprint()
#' fclear()
#'
#' # Parse date/time/datetime format definitions
#' fparse(text = '
#' VALUE enrldt (date)
#'   pattern = "DATE9."
#'   .missing = "Not Enrolled"
#' ;
#'
#' VALUE visit_time (time)
#'   pattern = "TIME8."
#' ;
#'
#' VALUE stamp (datetime)
#'   pattern = "DATETIME20."
#' ;
#' ')
#'
#' fput(as.Date("2025-03-01"), "enrldt")
#' fput(36000, "visit_time")
#' fput(as.POSIXct("2025-03-01 10:00:00", tz = "UTC"), "stamp")
#' fclear()
#'
#' # Case-insensitive format (nocase option)
#' fparse(text = '
#' VALUE yesno (character, nocase)
#'   "Y" = "Yes"
#'   "N" = "No"
#'   .other = "Unknown"
#' ;
#' ')
#' fput(c("y", "N", "YES"), "yesno")
#' # [1] "Yes" "No" "Unknown"
#' fclear()
#'
#' # Parse multilabel format
#' fparse(text = '
#' VALUE risk (numeric, multilabel)
#'   [0, 3]  = "Low Risk"
#'   [0, 7]  = "Monitored"
#'   (3, 7]  = "Medium Risk"
#'   (7, 10] = "High Risk"
#' ;
#' ')
#' fput_all(c(2, 5, 9), "risk")
#' fclear()
fparse <- function(text = NULL, file = NULL, verbose = FALSE) {
  if (is.null(text) && is.null(file)) {
    cli_abort("Either {.arg text} or {.arg file} must be provided.")
  }
  if (!is.null(text) && !is.null(file)) {
    cli_abort("Only one of {.arg text} or {.arg file} should be provided, not both.")
  }
  if (!is.null(text) && !is.character(text)) {
    cli_abort("{.arg text} must be a character string or character vector.")
  }
  if (!is.null(file) && (!is.character(file) || length(file) != 1L)) {
    cli_abort("{.arg file} must be a single file path string.")
  }

  if (!is.null(file)) {
    if (!file.exists(file)) {
      cli_abort("File not found: {.file {file}}")
    }
    lines <- readLines(file, warn = FALSE)
  } else {
    if (length(text) > 1) {
      text <- paste(text, collapse = "\n")
    }
    lines <- strsplit(text, "\n")[[1]]
  }

  # Parse into blocks
  blocks <- .parse_blocks(lines)

  # Convert blocks to format objects and auto-register
  result <- list()
  for (block in blocks) {
    obj <- .block_to_format(block)

    # Validate
    .format_validate(obj)

    # Auto-register in library
    .format_register(obj, name = block$name, overwrite = TRUE)

    result[[block$name]] <- obj
  }

  if (verbose) result else invisible(result)
}


#' Export Formats to 'SAS'-like Text
#'
#' Converts \code{ks_format} and/or \code{ks_invalue} objects to
#' human-readable 'SAS'-like text representation.
#'
#' @param ... Named \code{ks_format} or \code{ks_invalue} objects to export.
#' @param formats A named list of format objects. Alternative to \code{...}.
#' @param file Optional file path to write the output to. If \code{NULL},
#'   returns the text as a character string.
#'
#' @return If \code{file} is \code{NULL}, returns a character string with the
#'   'SAS'-like text. If \code{file} is specified, writes to the file and returns
#'   the path invisibly.
#'
#' @export
#' @examples
#' # Export a character format
#' sex_fmt <- fnew("M" = "Male", "F" = "Female",
#'                 .missing = "Unknown", name = "sex")
#' cat(fexport(sex = sex_fmt))
#'
#' # Export a numeric range format
#' fparse(text = '
#' VALUE bmi (numeric)
#'   [0, 18.5)  = "Underweight"
#'   [18.5, 25) = "Normal"
#'   [25, 30)   = "Overweight"
#'   [30, HIGH] = "Obese"
#'   .missing   = "No data"
#' ;
#' ')
#' bmi_fmt <- format_get("bmi")
#' cat(fexport(bmi = bmi_fmt))
#'
#' # Export a multilabel format
#' risk_fmt <- fnew(
#'   "0,3,TRUE,TRUE" = "Low Risk",
#'   "0,7,TRUE,TRUE" = "Monitored",
#'   "3,7,FALSE,TRUE" = "Medium Risk",
#'   "7,10,FALSE,TRUE" = "High Risk",
#'   name = "risk", type = "numeric", multilabel = TRUE
#' )
#' cat(fexport(risk = risk_fmt))
#'
#' # Export a date format
#' enrl_fmt <- fnew_date("DATE9.", name = "enrldt", .missing = "Not Enrolled")
#' cat(fexport(enrldt = enrl_fmt))
#' fclear()
fexport <- function(..., formats = NULL, file = NULL) {
  if (is.null(formats)) {
    formats <- list(...)
  }

  if (length(formats) == 0) {
    cli_abort("At least one format object must be provided.")
  }

  # Ensure all items are named
  fmt_names <- names(formats)
  if (is.null(fmt_names)) {
    fmt_names <- rep("", length(formats))
  }
  if (any(fmt_names == "")) {
    for (i in seq_along(formats)) {
      if (fmt_names[i] == "") {
        obj_name <- formats[[i]]$name
        if (!is.null(obj_name)) {
          fmt_names[i] <- obj_name
        } else {
          fmt_names[i] <- paste0("unnamed_", i)
        }
      }
    }
    names(formats) <- fmt_names
  }

  text_blocks <- character(0)

  for (nm in names(formats)) {
    obj <- formats[[nm]]
    if (inherits(obj, "ks_format")) {
      text_blocks <- c(text_blocks, .format_to_text(obj, nm))
    } else if (inherits(obj, "ks_invalue")) {
      text_blocks <- c(text_blocks, .invalue_to_text(obj, nm))
    } else {
      cli_warn("Skipping {.val {nm}}: not a {.cls ks_format} or {.cls ks_invalue} object.")
    }
  }

  output <- paste(text_blocks, collapse = "\n\n")

  if (!is.null(file)) {
    writeLines(output, file)
    cli_inform("Formats written to: {.file {file}}")
    return(invisible(file))
  }

  return(output)
}


#' Import Formats from 'SAS' PROC FORMAT CNTLOUT CSV
#'
#' Reads a CSV file produced by 'SAS' \code{PROC FORMAT} with
#' \code{CNTLOUT=} option (typically exported via \code{PROC EXPORT})
#' and converts compatible format definitions into \code{ks_format} and
#' \code{ks_invalue} objects.
#'
#' @details
#' The 'SAS' format catalogue CSV is expected to contain the standard CNTLOUT
#' columns: \code{FMTNAME}, \code{START}, \code{END}, \code{LABEL},
#' \code{TYPE}, \code{HLO}, \code{SEXCL}, \code{EEXCL}.
#'
#' \strong{Supported SAS format types:}
#' \describe{
#'   \item{\code{N}}{Numeric VALUE format \eqn{\to} \code{ks_format} with
#'     \code{type = "numeric"}}
#'   \item{\code{C}}{Character VALUE format \eqn{\to} \code{ks_format} with
#'     \code{type = "character"}}
#'   \item{\code{I}}{Numeric INVALUE (informat) \eqn{\to} \code{ks_invalue}
#'     with \code{target_type = "numeric"}}
#'   \item{\code{J}}{Character INVALUE (informat) \eqn{\to} \code{ks_invalue}
#'     with \code{target_type = "character"}}
#' }
#'
#' \strong{Incompatible types (logged with a warning):}
#' \describe{
#'   \item{\code{P}}{PICTURE formats \eqn{-} no equivalent in ksformat}
#' }
#'
#' Rows with SAS special missing values (\code{.A}\eqn{-}\code{.Z},
#' \code{._}) in the HLO field are logged as incompatible entries and skipped
#' because R has no equivalent concept.
#'
#' @param file Path to the CSV file exported from a SAS format catalogue.
#' @param register Logical; if \code{TRUE} (default), each imported format is
#'   registered in the global format library.
#' @param overwrite Logical; if \code{TRUE} (default), existing library entries
#'   with the same name are overwritten.
#'
#' @return A named list of \code{ks_format} and \code{ks_invalue} objects that
#'   were successfully imported. Returned invisibly.
#'
#' @export
#' @examples
#' # In SAS:
#' # proc format library=work cntlout=fmts; run;
#' # proc export data=fmts outfile="formats.csv" dbms=csv replace; run;
#'
#' csv_file <- system.file("extdata", "test_cntlout.csv", package = "ksformat")
#' imported <- fimport(csv_file)
#' flist()
#' fprint()
#' fclear()
fimport <- function(file, register = TRUE, overwrite = TRUE) {
  if (!file.exists(file)) {
    cli_abort("File not found: {.file {file}}")
  }

  data <- utils::read.csv(file, stringsAsFactors = FALSE, na.strings = "")

  # Normalize column names to uppercase
  names(data) <- toupper(names(data))

  # Validate required columns
  required <- c("FMTNAME", "START", "END", "LABEL", "TYPE")
  missing_cols <- setdiff(required, names(data))
  if (length(missing_cols) > 0) {
    cli_abort(c(
      "Required column{?s} missing from CSV: {.val {missing_cols}}",
      "i" = "Expected a SAS {.code PROC FORMAT CNTLOUT=} export."
    ))
  }

  # Supply defaults for optional columns
  if (!"HLO"   %in% names(data)) data$HLO   <- ""
  if (!"SEXCL" %in% names(data)) data$SEXCL <- "N"
  if (!"EEXCL" %in% names(data)) data$EEXCL <- "N"

  # Ensure character types for key fields
  data$FMTNAME <- trimws(as.character(data$FMTNAME))
  data$START   <- as.character(ifelse(is.na(data$START), "", data$START))
  data$END     <- as.character(ifelse(is.na(data$END),   "", data$END))
  data$LABEL   <- as.character(ifelse(is.na(data$LABEL), "", data$LABEL))
  data$TYPE    <- trimws(toupper(as.character(data$TYPE)))
  data$HLO     <- toupper(as.character(ifelse(is.na(data$HLO), "", data$HLO)))
  data$SEXCL   <- toupper(as.character(ifelse(is.na(data$SEXCL), "N", data$SEXCL)))
  data$EEXCL   <- toupper(as.character(ifelse(is.na(data$EEXCL), "N", data$EEXCL)))

  # ---- Classify format types ----
  supported_types <- c("N", "C", "I", "J")
  all_types <- unique(data$TYPE)
  unsupported <- setdiff(all_types, supported_types)

  if (length(unsupported) > 0) {
    # Identify formats with unsupported types
    for (utype in unsupported) {
      affected <- unique(data$FMTNAME[data$TYPE == utype])
      type_desc <- switch(utype,
        "P" = "PICTURE",
        paste0("unknown (", utype, ")")
      )
      cli_warn(c(
        "Skipping {type_desc} format{?s}: {.val {affected}}",
        "i" = "TYPE={.val {utype}} is not supported by ksformat."
      ))
    }
    data <- data[data$TYPE %in% supported_types, , drop = FALSE]
  }

  if (nrow(data) == 0) {
    cli_inform("No compatible formats found in {.file {file}}.")
    return(invisible(list()))
  }

  # ---- Group by FMTNAME + TYPE and build format objects ----
  format_key <- paste(data$FMTNAME, data$TYPE, sep = "|")
  groups <- split(seq_len(nrow(data)), format_key)

  result <- list()
  skipped_entries <- list()

  for (gname in names(groups)) {
    idx <- groups[[gname]]
    rows <- data[idx, , drop = FALSE]
    fmt_name   <- rows$FMTNAME[1]
    fmt_type   <- rows$TYPE[1]

    if (fmt_type %in% c("N", "C")) {
      obj <- .cntlout_to_ks_format(rows, fmt_name, fmt_type, skipped_entries)
      skipped_entries <- obj$skipped
      obj <- obj$format
    } else {
      obj <- .cntlout_to_ks_invalue(rows, fmt_name, fmt_type, skipped_entries)
      skipped_entries <- obj$skipped
      obj <- obj$invalue
    }

    if (is.null(obj)) next

    # Validate
    tryCatch(
      .format_validate(obj),
      error = function(e) {
        cli_warn(c(
          "Format {.val {fmt_name}} failed validation and was skipped.",
          "x" = conditionMessage(e)
        ))
        obj <<- NULL
      }
    )
    if (is.null(obj)) next

    # Register
    if (register) {
      .format_register(obj, name = fmt_name, overwrite = overwrite)
    }

    result[[fmt_name]] <- obj
  }

  # ---- Report skipped entries ----
  if (length(skipped_entries) > 0) {
    for (entry in skipped_entries) {
      cli_warn(c(
        "Skipped incompatible entry in format {.val {entry$format}}:",
        "x" = entry$reason
      ))
    }
  }

  n_fmt <- sum(vapply(result, inherits, logical(1), "ks_format"))
  n_inv <- sum(vapply(result, inherits, logical(1), "ks_invalue"))
  cli_inform(c(
    "v" = "Imported {n_fmt} format{?s} and {n_inv} invalue{?s} from {.file {file}}."
  ))

  return(invisible(result))
}


#' Convert CNTLOUT rows to ks_format
#' @keywords internal
.cntlout_to_ks_format <- function(rows, fmt_name, fmt_type, skipped) {
  type <- if (fmt_type == "N") "numeric" else "character"
  mappings <- list()
  missing_label <- NULL
  other_label   <- NULL
  multilabel    <- FALSE

  for (i in seq_len(nrow(rows))) {
    hlo   <- rows$HLO[i]
    start <- rows$START[i]
    end   <- rows$END[i]
    label <- rows$LABEL[i]
    sexcl <- rows$SEXCL[i]
    eexcl <- rows$EEXCL[i]

    # Check for multilabel flag
    if (grepl("T", hlo, fixed = TRUE)) {
      multilabel <- TRUE
    }

    # OTHER entry
    if (grepl("O", hlo, fixed = TRUE)) {
      other_label <- label
      next
    }

    # Special missing values (.A-.Z, ._) — no R equivalent
    if (grepl("S", hlo, fixed = TRUE)) {
      skipped <- c(skipped, list(list(
        format = fmt_name,
        reason = paste0(
          "SAS special missing value '", start,
          "' (HLO='", hlo, "') has no R equivalent."
        )
      )))
      next
    }

    # Standard missing (numeric: START is ".", character: blank)
    if (type == "numeric" && start == ".") {
      missing_label <- label
      next
    }
    if (type == "character" && start == "" && end == "" &&
        !grepl("[LH]", hlo)) {
      missing_label <- label
      next
    }

    # ---- Range vs discrete ----
    has_low  <- grepl("L", hlo, fixed = TRUE)
    has_high <- grepl("H", hlo, fixed = TRUE)

    is_range <- FALSE
    if (has_low || has_high) {
      is_range <- TRUE
    } else if (type == "numeric" && start != end) {
      is_range <- TRUE
    }

    if (is_range && type == "numeric") {
      low  <- if (has_low)  -Inf else suppressWarnings(as.numeric(start))
      high <- if (has_high)  Inf else suppressWarnings(as.numeric(end))

      if (is.na(low) || is.na(high)) {
        skipped <- c(skipped, list(list(
          format = fmt_name,
          reason = paste0(
            "Could not parse range bounds: START='", start,
            "', END='", end, "'."
          )
        )))
        next
      }

      inc_low  <- (sexcl != "Y")
      inc_high <- (eexcl != "Y")

      range_key <- paste0(low, ",", high, ",", toupper(inc_low), ",",
                          toupper(inc_high))
      mappings[[range_key]] <- label
    } else {
      mappings[[trimws(start)]] <- label
    }
  }

  if (length(mappings) == 0 && is.null(missing_label) && is.null(other_label)) {
    skipped <- c(skipped, list(list(
      format = fmt_name,
      reason = "No valid mappings could be extracted."
    )))
    return(list(format = NULL, skipped = skipped))
  }

  format_obj <- structure(
    list(
      name         = fmt_name,
      type         = type,
      mappings     = mappings,
      missing_label = missing_label,
      other_label  = other_label,
      multilabel   = multilabel,
      ignore_case  = FALSE,
      range_table  = .build_range_table(mappings, type),
      created      = Sys.time()
    ),
    class = "ks_format"
  )

  list(format = format_obj, skipped = skipped)
}


#' Convert CNTLOUT rows to ks_invalue
#' @keywords internal
.cntlout_to_ks_invalue <- function(rows, fmt_name, fmt_type, skipped) {
  target_type   <- if (fmt_type == "I") "numeric" else "character"
  mappings      <- list()
  missing_value <- NA

  for (i in seq_len(nrow(rows))) {
    hlo   <- rows$HLO[i]
    start <- rows$START[i]
    label <- rows$LABEL[i]

    # OTHER — ks_invalue doesn't support .other, skip
    if (grepl("O", hlo, fixed = TRUE)) {
      skipped <- c(skipped, list(list(
        format = fmt_name,
        reason = "OTHER directive in INVALUE is not supported; entry skipped."
      )))
      next
    }

    # Special missing
    if (grepl("S", hlo, fixed = TRUE)) {
      skipped <- c(skipped, list(list(
        format = fmt_name,
        reason = paste0(
          "SAS special missing value '", start,
          "' in INVALUE has no R equivalent."
        )
      )))
      next
    }

    # Range entries — ks_invalue doesn't support ranges
    has_low  <- grepl("L", hlo, fixed = TRUE)
    has_high <- grepl("H", hlo, fixed = TRUE)
    if (has_low || has_high || (start != rows$END[i] && start != "")) {
      skipped <- c(skipped, list(list(
        format = fmt_name,
        reason = paste0(
          "Range entry '", start, "'-'", rows$END[i],
          "' in INVALUE is not supported."
        )
      )))
      next
    }

    # Standard missing value
    if (start == "." || start == "") {
      # Convert label to the appropriate target type
      if (target_type == "numeric") {
        missing_value <- suppressWarnings(as.numeric(label))
      } else {
        missing_value <- label
      }
      next
    }

    # Discrete mapping: key = input text, value = output value
    val <- if (target_type == "numeric") {
      suppressWarnings(as.numeric(label))
    } else {
      label
    }

    if (target_type == "numeric" && is.na(val) && label != "") {
      skipped <- c(skipped, list(list(
        format = fmt_name,
        reason = paste0(
          "INVALUE label '", label, "' for key '", start,
          "' could not be converted to numeric."
        )
      )))
      next
    }

    mappings[[start]] <- val
  }

  if (length(mappings) == 0 && is.na(missing_value)) {
    skipped <- c(skipped, list(list(
      format = fmt_name,
      reason = "No valid INVALUE mappings could be extracted."
    )))
    return(list(invalue = NULL, skipped = skipped))
  }

  invalue_obj <- structure(
    list(
      name          = fmt_name,
      target_type   = target_type,
      mappings      = mappings,
      missing_value = missing_value,
      created       = Sys.time()
    ),
    class = "ks_invalue"
  )

  list(invalue = invalue_obj, skipped = skipped)
}


# ---------------------------------------------------------------------------
# Internal parser helpers
# ---------------------------------------------------------------------------

#' Parse text lines into block structures
#' @keywords internal
.parse_blocks <- function(lines) {
  blocks <- list()
  current_block <- NULL
  in_block <- FALSE

  for (i in seq_along(lines)) {
    line <- trimws(lines[i])

    # Skip empty lines and comments
    if (line == "" || grepl("^(/\\*|\\*|//|#)", line)) {
      next
    }

    # Check for block start: VALUE or INVALUE
    block_match <- regmatches(line, regexec(
      "^(VALUE|INVALUE)\\s+([\\w.-]+)\\s*(?:\\(([^)]+)\\))?\\s*$",
      line, ignore.case = TRUE, perl = TRUE
    ))[[1]]

    if (length(block_match) >= 3) {
      if (in_block) {
        cli_warn(c(
          "Line {i}: New block started before previous block ended with {.val ;}.",
          "i" = "Closing previous block {.val {current_block$name}}."
        ))
        blocks <- c(blocks, list(current_block))
      }

      block_type <- toupper(block_match[2])
      block_name <- block_match[3]

      block_subtype <- if (length(block_match) >= 4 && block_match[4] != "") {
        trimws(block_match[4])
      } else {
        # Default: "auto" for VALUE, "numeric" for INVALUE
        if (block_type == "INVALUE") "numeric" else "auto"
      }

      # Parse multilabel flag from subtype
      block_multilabel <- FALSE
      if (grepl("multilabel", block_subtype, ignore.case = TRUE)) {
        block_multilabel <- TRUE
        block_subtype <- gsub(",?\\s*multilabel", "", block_subtype,
                              ignore.case = TRUE)
        block_subtype <- gsub("multilabel\\s*,?", "", block_subtype,
                              ignore.case = TRUE)
        block_subtype <- trimws(block_subtype)
        if (block_subtype == "") {
          block_subtype <- if (block_type == "INVALUE") "numeric" else "auto"
        }
      }

      # Parse nocase flag from subtype
      block_nocase <- FALSE
      if (grepl("nocase", block_subtype, ignore.case = TRUE)) {
        block_nocase <- TRUE
        block_subtype <- gsub(",?\\s*nocase", "", block_subtype,
                              ignore.case = TRUE)
        block_subtype <- gsub("nocase\\s*,?", "", block_subtype,
                              ignore.case = TRUE)
        block_subtype <- trimws(block_subtype)
        if (block_subtype == "") {
          block_subtype <- if (block_type == "INVALUE") "numeric" else "auto"
        }
      }

      # Parse format: spec from subtype (e.g. "format: %Y-%m-%d")
      block_date_format <- NULL
      fmt_match <- regmatches(block_subtype, regexec(
        "format:\\s*([^,)]+)", block_subtype, ignore.case = TRUE
      ))[[1]]
      if (length(fmt_match) >= 2 && fmt_match[1] != "") {
        block_date_format <- trimws(fmt_match[2])
        block_subtype <- gsub(",?\\s*format:\\s*[^,)]+", "", block_subtype,
                              ignore.case = TRUE)
        block_subtype <- gsub("format:\\s*[^,)]+\\s*,?", "", block_subtype,
                              ignore.case = TRUE)
        block_subtype <- trimws(block_subtype)
        if (block_subtype == "") {
          block_subtype <- if (block_type == "INVALUE") "numeric" else "auto"
        }
      }

      # Parse range_subtype: option for stratified_range
      block_range_subtype <- NULL
      rs_match <- regmatches(block_subtype, regexec(
        "range_subtype:\\s*([A-Za-z]+)", block_subtype, ignore.case = TRUE
      ))[[1]]
      if (length(rs_match) >= 2 && rs_match[1] != "") {
        block_range_subtype <- tolower(trimws(rs_match[2]))
        block_subtype <- gsub(",?\\s*range_subtype:\\s*[A-Za-z]+", "",
                              block_subtype, ignore.case = TRUE)
        block_subtype <- gsub("range_subtype:\\s*[A-Za-z]+\\s*,?", "",
                              block_subtype, ignore.case = TRUE)
        block_subtype <- trimws(block_subtype)
        if (block_subtype == "") block_subtype <- "auto"
      }

      # Parse strata_sep: option for stratified_range
      block_strata_sep <- NULL
      ss_match <- regmatches(block_subtype, regexec(
        "strata_sep:\\s*([^,)\\s]+)", block_subtype, ignore.case = TRUE
      ))[[1]]
      if (length(ss_match) >= 2 && ss_match[1] != "") {
        block_strata_sep <- trimws(ss_match[2])
        block_subtype <- gsub(",?\\s*strata_sep:\\s*[^,)\\s]+", "",
                              block_subtype, ignore.case = TRUE)
        block_subtype <- gsub("strata_sep:\\s*[^,)\\s]+\\s*,?", "",
                              block_subtype, ignore.case = TRUE)
        block_subtype <- trimws(block_subtype)
        if (block_subtype == "") block_subtype <- "auto"
      }

      current_block <- list(
        type = block_type,
        name = block_name,
        subtype = block_subtype,
        multilabel = block_multilabel,
        nocase = block_nocase,
        date_format = block_date_format,
        range_subtype = block_range_subtype,
        strata_sep = block_strata_sep,
        entries = list(),
        line_start = i
      )
      in_block <- TRUE
      next
    }

    # Warn if line looks like a block header but didn't match
    if (!in_block && grepl("^(VALUE|INVALUE)\\b", line, ignore.case = TRUE)) {
      cli_warn(c(
        "Line {i}: Looks like a block header but could not be parsed: {.val {line}}",
        "i" = "Check the format name or syntax."
      ))
      next
    }

    # Check for block end (standalone ; or trailing ;)
    ends_with_semi <- grepl(";\\s*$", line)
    if (grepl("^;\\s*$", line)) {
      if (in_block && !is.null(current_block)) {
        blocks <- c(blocks, list(current_block))
        current_block <- NULL
        in_block <- FALSE
      }
      next
    }

    # Warn about lines outside any block
    if (!in_block) {
      cli_warn("Line {i}: Ignoring line outside of any block: {.val {line}}")
      next
    }

    # Parse mapping line within a block
    if (!is.null(current_block)) {
      # Strip trailing comma and/or semicolon from mapping lines
      mapping_line <- sub("[,;]+\\s*$", "", line)
      entry <- .parse_mapping_line(mapping_line, i)
      if (!is.null(entry)) {
        current_block$entries <- c(current_block$entries, list(entry))
      }
      # Close block if line ended with ;
      if (ends_with_semi) {
        blocks <- c(blocks, list(current_block))
        current_block <- NULL
        in_block <- FALSE
      }
    }
  }

  # Handle unclosed block
  if (in_block && !is.null(current_block)) {
    cli_warn("Block {.val {current_block$name}} was not closed with {.val ;}. Closing automatically.")
    blocks <- c(blocks, list(current_block))
  }

  return(blocks)
}


#' Parse a single mapping line
#' @keywords internal
.parse_mapping_line <- function(line, line_num) {
  # Remove inline comments
  line <- sub("\\s*//.*$", "", line)
  line <- sub("\\s*/\\*.*\\*/\\s*$", "", line)
  line <- trimws(line)

  if (line == "") return(NULL)

  # Split on '='
  eq_pos <- regexpr("=", line)
  if (eq_pos < 0) {
    cli_warn("Line {line_num}: Could not parse mapping (no {.val =} found): {.val {line}}")
    return(NULL)
  }

  lhs <- trimws(substring(line, 1, eq_pos - 1))
  rhs <- trimws(substring(line, eq_pos + 1))

  # Check for (eval) marker before unquoting
  has_eval <- grepl("\\(eval\\)\\s*$", rhs)
  if (has_eval) {
    rhs <- trimws(sub("\\s*\\(eval\\)\\s*$", "", rhs))
  }

  # Handle unquoted NA/NaN literals before unquoting (e.g. .other = NA)
  is_quoted <- grepl("^([\"']).*\\1$", rhs)
  if (!is_quoted && rhs %in% c("NA", "NaN")) {
    rhs <- NA_character_
  } else {
    rhs <- .unquote(rhs)
  }

  # Set eval attribute if (eval) marker was found
  if (has_eval) {
    attr(rhs, "eval") <- TRUE
  }

  # Check for .missing / .other directives
  if (lhs == ".missing") {
    return(list(type = "missing", value = rhs))
  }
  if (lhs == ".other") {
    return(list(type = "other", value = rhs))
  }

  # Check for interval notation: [low, high) or (low, high] etc.
  interval_match <- regmatches(lhs, regexec(
    "^(\\[|\\()\\s*(-?[0-9]+(?:\\.[0-9]+)?|LOW|HIGH|Inf|-Inf)\\s*,\\s*(-?[0-9]+(?:\\.[0-9]+)?|LOW|HIGH|Inf|-Inf)\\s*(\\]|\\))$",
    lhs, ignore.case = TRUE
  ))[[1]]

  if (length(interval_match) == 5) {
    left_bracket <- interval_match[2]
    low_str <- trimws(interval_match[3])
    high_str <- trimws(interval_match[4])
    right_bracket <- interval_match[5]

    low_val <- .parse_range_bound(low_str, is_low = TRUE)
    high_val <- .parse_range_bound(high_str, is_low = FALSE)

    inc_low <- (left_bracket == "[")
    inc_high <- (right_bracket == "]")

    if (!is.na(low_val) && !is.na(high_val)) {
      return(list(type = "range", low = low_val, high = high_val,
                  inc_low = inc_low, inc_high = inc_high, label = rhs))
    }
  }

  # Check for date / datetime interval notation: [YYYY-MM-DD, YYYY-MM-DD)
  # or [YYYY-MM-DD HH:MM[:SS], ...]. Bounds are kept as strings so the
  # range_table builder can dispatch to .parse_date_range_key() /
  # .parse_datetime_range_key() using the format's date_format.
  date_bound <- "(\\d{4}-\\d{2}-\\d{2}(?:[ T]\\d{2}:\\d{2}(?::\\d{2})?)?|LOW|HIGH)"
  date_iv_re <- paste0("^(\\[|\\()\\s*", date_bound, "\\s*,\\s*",
                       date_bound, "\\s*(\\]|\\))$")
  date_iv_match <- regmatches(lhs, regexec(date_iv_re, lhs,
                                           ignore.case = TRUE))[[1]]

  if (length(date_iv_match) == 5) {
    left_bracket  <- date_iv_match[2]
    low_str       <- trimws(date_iv_match[3])
    high_str      <- trimws(date_iv_match[4])
    right_bracket <- date_iv_match[5]

    inc_low  <- (left_bracket == "[")
    inc_high <- (right_bracket == "]")

    return(list(type = "range", low = low_str, high = high_str,
                inc_low = inc_low, inc_high = inc_high, label = rhs,
                bound_kind = "date"))
  }

  # Check for legacy range: low - high pattern (no brackets)
  range_match <- regmatches(lhs, regexec(
    "^(-?[0-9]+(?:\\.[0-9]+)?|LOW|HIGH|Inf|-Inf)\\s*-\\s*(-?[0-9]+(?:\\.[0-9]+)?|LOW|HIGH|Inf|-Inf)$",
    lhs, ignore.case = TRUE
  ))[[1]]

  if (length(range_match) == 3) {
    low_str <- trimws(range_match[2])
    high_str <- trimws(range_match[3])

    low_val <- .parse_range_bound(low_str, is_low = TRUE)
    high_val <- .parse_range_bound(high_str, is_low = FALSE)

    if (!is.na(low_val) && !is.na(high_val)) {
      # Legacy syntax defaults to [low, high)
      return(list(type = "range", low = low_val, high = high_val,
                  inc_low = TRUE, inc_high = FALSE, label = rhs))
    }
  }

  # Discrete mapping
  lhs <- .unquote(lhs)
  return(list(type = "discrete", key = lhs, label = rhs))
}


#' Parse a range bound value
#' @keywords internal
.parse_range_bound <- function(s, is_low = TRUE) {
  s_upper <- toupper(s)

  if (s_upper == "LOW") return(-Inf)
  if (s_upper == "HIGH") return(Inf)

  val <- suppressWarnings(as.numeric(s))
  return(val)
}


#' Remove surrounding quotes from a string
#' @keywords internal
.unquote <- function(s) {
  if (is.na(s) || nchar(s) < 2L) return(s)
  if (grepl("^([\"']).*\\1$", s)) {
    return(substring(s, 2, nchar(s) - 1))
  }
  s
}


#' Convert a parsed block to a ks_format or ks_invalue object
#' @keywords internal
.block_to_format <- function(block) {
  if (block$type == "VALUE") {
    return(.block_to_ks_format(block))
  } else if (block$type == "INVALUE") {
    return(.block_to_ks_invalue(block))
  } else {
    cli_abort("Unknown block type: {.val {block$type}}.")
  }
}


#' Convert VALUE block to ks_format
#' @keywords internal
.block_to_ks_format <- function(block) {
  # Value types (case-sensitive: "Date", "POSIXct", "logical")
  if (block$subtype %in% .value_types) {
    return(.block_to_value_type_format(block))
  }

  # Handle date/time/datetime blocks (case-insensitive)
  if (tolower(block$subtype) %in% c("date", "time", "datetime")) {
    return(.block_to_ks_datetime_format(block))
  }

  # Stratified range blocks: build mappings then delegate to fnew()
  if (identical(tolower(block$subtype), "stratified_range")) {
    return(.block_to_stratified_range_format(block))
  }

  mappings <- list()
  missing_label <- NULL
  other_label <- NULL

  for (entry in block$entries) {
    if (entry$type == "missing") {
      missing_label <- entry$value
    } else if (entry$type == "other") {
      other_label <- entry$value
    } else if (entry$type == "discrete") {
      mappings[[entry$key]] <- entry$label
    } else if (entry$type == "range") {
      inc_low <- if (!is.null(entry$inc_low)) entry$inc_low else TRUE
      inc_high <- if (!is.null(entry$inc_high)) entry$inc_high else FALSE
      range_key <- paste0(entry$low, ",", entry$high, ",",
                          toupper(inc_low), ",", toupper(inc_high))
      mappings[[range_key]] <- entry$label
    }
  }

  # Build the format
  type <- block$subtype

  format_obj <- structure(
    list(
      name = block$name,
      type = type,
      mappings = mappings,
      missing_label = missing_label,
      other_label = other_label,
      multilabel = isTRUE(block$multilabel),
      ignore_case = isTRUE(block$nocase),
      date_format = block$date_format,
      created = Sys.time()
    ),
    class = "ks_format"
  )

  # Auto-detect type if needed
  if (type == "auto") {
    has_ranges <- any(vapply(block$entries, function(e) identical(e$type, "range"), logical(1L)))
    has_date_bounds <- any(vapply(block$entries, function(e) {
      identical(e$type, "range") && identical(e$bound_kind, "date")
    }, logical(1L)))
    if (has_date_bounds) {
      # Choose date_range vs datetime_range based on whether any bound
      # carries a time component.
      has_time <- any(vapply(block$entries, function(e) {
        identical(e$type, "range") && identical(e$bound_kind, "date") &&
          (grepl("[ T]\\d{2}:\\d{2}", as.character(e$low)) ||
           grepl("[ T]\\d{2}:\\d{2}", as.character(e$high)))
      }, logical(1L)))
      format_obj$type <- if (has_time) "datetime_range" else "date_range"
    } else if (has_ranges) {
      format_obj$type <- "numeric"
    } else {
      format_obj$type <- detect_format_type(names(mappings))
    }
  }

  format_obj$range_table <- .build_range_table(mappings, format_obj$type,
                                               format_obj$date_format)

  return(format_obj)
}


#' Convert VALUE block with stratified_range type to ks_format
#'
#' Stratified blocks support both canonical keys
#' (\code{"STRATUM<sep>low,high,inc_low,inc_high"}) supplied as quoted
#' discrete LHS, and the friendly interval form
#' (\code{STRATUM <sep> [low, high)}). Per-stratum directives appear as
#' \code{"STRATUM<sep>.missing" = "..."} or, with the \code{.missing|S}
#' shorthand, as plain discrete entries that are recognised here.
#' @keywords internal
#' @noRd
.block_to_stratified_range_format <- function(block) {
  strata_sep <- if (!is.null(block$strata_sep)) block$strata_sep else "|"
  range_subtype <- if (!is.null(block$range_subtype)) block$range_subtype else "numeric"
  date_format <- block$date_format

  # Escape strata_sep for regex use (special chars: . | ( ) [ ] { } * + ? ^ $ \)
  sep_re <- gsub("([\\.|()\\[\\]{}*+?^$\\\\])", "\\\\\\1", strata_sep,
                 perl = TRUE)

  num_token <- "(?:-?[0-9]+(?:\\.[0-9]+)?|LOW|HIGH|Inf|-Inf)"
  date_token <- "(?:\\d{4}-\\d{2}-\\d{2}(?:[ T]\\d{2}:\\d{2}(?::\\d{2})?)?|LOW|HIGH)"

  mappings <- list()
  missing_label <- NULL
  other_label <- NULL

  canonicalise <- function(key) {
    if (startsWith(key, paste0(".missing", strata_sep))) {
      return(list(kind = "missing_stratum",
                  stratum = substr(key,
                                   nchar(".missing") + nchar(strata_sep) + 1L,
                                   nchar(key))))
    }
    if (startsWith(key, paste0(".other", strata_sep))) {
      return(list(kind = "other_stratum",
                  stratum = substr(key,
                                   nchar(".other") + nchar(strata_sep) + 1L,
                                   nchar(key))))
    }
    # Already canonical?
    split_try <- .split_stratified_key(key, strata_sep, range_subtype,
                                        date_format)
    if (!is.null(split_try)) {
      return(list(kind = "canonical", key = key))
    }
    # Friendly interval: stratum<sep>[low,high)
    tok <- if (range_subtype == "numeric") num_token else date_token
    pat <- paste0("^(.+?)", sep_re, "\\s*(\\[|\\()\\s*(",
                  tok, ")\\s*,\\s*(", tok, ")\\s*(\\]|\\))\\s*$")
    m <- regmatches(key, regexec(pat, key, perl = TRUE))[[1]]
    if (length(m) == 6L) {
      stratum <- .unquote(trimws(m[2]))
      inc_low <- (m[3] == "[")
      low_str <- m[4]
      high_str <- m[5]
      inc_high <- (m[6] == "]")
      norm_bound <- function(s) {
        su <- toupper(s)
        if (range_subtype == "numeric") {
          if (su %in% c("LOW", "-INF")) return("-Inf")
          if (su %in% c("HIGH", "INF")) return("Inf")
          return(s)
        }
        # date / datetime
        if (su %in% c("LOW", "-INF")) return("LOW")
        if (su %in% c("HIGH", "INF")) return("HIGH")
        s
      }
      low_norm <- norm_bound(low_str)
      high_norm <- norm_bound(high_str)
      canon <- paste(low_norm, high_norm, toupper(inc_low),
                     toupper(inc_high), sep = ",")
      return(list(kind = "canonical",
                  key = paste0(stratum, strata_sep, canon)))
    }
    NULL
  }

  for (entry in block$entries) {
    if (entry$type == "missing") {
      missing_label <- entry$value
      next
    }
    if (entry$type == "other") {
      other_label <- entry$value
      next
    }
    if (entry$type == "range") {
      cli_warn(c(
        "Bare range entry in stratified_range block has no stratum; ignoring.",
        "i" = "Use {.val STRATUM{strata_sep}[low, high)} syntax."
      ))
      next
    }
    if (entry$type != "discrete") next

    info <- canonicalise(entry$key)
    if (is.null(info)) {
      cli_warn(c(
        "Could not parse stratified entry {.val {entry$key}}; treating as discrete.",
        "i" = "Expected {.val STRATUM{strata_sep}RANGE} key."
      ))
      mappings[[entry$key]] <- entry$label
      next
    }
    if (info$kind == "canonical") {
      mappings[[info$key]] <- entry$label
    } else if (info$kind == "missing_stratum") {
      mappings[[paste0(".missing", strata_sep, info$stratum)]] <- entry$label
    } else if (info$kind == "other_stratum") {
      mappings[[paste0(".other", strata_sep, info$stratum)]] <- entry$label
    }
  }

  # Assemble extras for fnew()
  extras <- list()
  if (!is.null(missing_label)) extras[[".missing"]] <- missing_label
  if (!is.null(other_label)) extras[[".other"]] <- other_label

  do.call(fnew, c(
    mappings, extras,
    list(
      name = block$name,
      type = "stratified_range",
      range_subtype = range_subtype,
      strata_sep = strata_sep,
      date_format = date_format,
      multilabel = isTRUE(block$multilabel),
      ignore_case = isTRUE(block$nocase)
    )
  ))
}


#' Convert VALUE block with a value type (Date/POSIXct/logical) to ks_format
#' @keywords internal
#' @noRd
.block_to_value_type_format <- function(block) {
  vtype <- block$subtype
  date_format <- block$date_format

  mappings <- list()

  for (entry in block$entries) {
    if (entry$type == "missing" || entry$type == "other") {
      # Silently ignore — value type formats always use typed NA
    } else if (entry$type == "discrete") {
      mappings[[entry$key]] <- .parse_typed_value(entry$label, vtype, date_format)
    } else if (entry$type == "range") {
      inc_low <- if (!is.null(entry$inc_low)) entry$inc_low else TRUE
      inc_high <- if (!is.null(entry$inc_high)) entry$inc_high else FALSE
      range_key <- paste0(entry$low, ",", entry$high, ",",
                          toupper(inc_low), ",", toupper(inc_high))
      mappings[[range_key]] <- .parse_typed_value(entry$label, vtype, date_format)
    }
  }

  format_obj <- structure(
    list(
      name = block$name,
      type = vtype,
      mappings = mappings,
      missing_label = NULL,
      other_label = NULL,
      multilabel = FALSE,
      ignore_case = isTRUE(block$nocase),
      date_format = date_format,
      created = Sys.time()
    ),
    class = "ks_format"
  )

  format_obj
}


#' Convert VALUE block with date/time/datetime type to ks_format
#' @keywords internal
.block_to_ks_datetime_format <- function(block) {
  # Look for a pattern entry (discrete mapping with key "pattern")
  pattern <- NULL
  missing_label <- NULL

  for (entry in block$entries) {
    if (entry$type == "discrete" && tolower(entry$key) == "pattern") {
      pattern <- entry$label
    } else if (entry$type == "missing") {
      missing_label <- entry$value
    }
  }

  if (is.null(pattern)) {
    cli_abort(c(
      "Date/time format {.val {block$name}} must have a {.val pattern} entry.",
      "i" = 'e.g., pattern = "DATE9."'
    ))
  }

  fmt <- fnew_date(
    pattern = pattern,
    name = block$name,
    type = tolower(block$subtype)
  )

  if (!is.null(missing_label)) {
    fmt$missing_label <- missing_label
  }

  return(fmt)
}


#' Convert INVALUE block to ks_invalue
#' @keywords internal
.block_to_ks_invalue <- function(block) {
  mappings <- list()

  for (entry in block$entries) {
    if (entry$type == "discrete") {
      mappings[[entry$key]] <- entry$label
    } else if (entry$type == "missing") {
      mappings[[".missing_marker"]] <- entry$value
    }
  }

  # Extract missing_value if set
  missing_value <- NA
  if (".missing_marker" %in% names(mappings)) {
    mv <- mappings[[".missing_marker"]]
    if (toupper(mv) == "NA" || mv == ".") {
      missing_value <- NA
    } else {
      missing_value <- mv
    }
    mappings[[".missing_marker"]] <- NULL
  }

  # Use block subtype; for INVALUE blocks, default is "numeric"
  target_type <- block$subtype

  invalue_obj <- structure(
    list(
      name = block$name,
      target_type = target_type,
      mappings = mappings,
      missing_value = missing_value,
      created = Sys.time()
    ),
    class = "ks_invalue"
  )

  return(invalue_obj)
}


# ---------------------------------------------------------------------------
# Internal export helpers
# ---------------------------------------------------------------------------

#' Convert ks_format to SAS-like text
#' @keywords internal
.format_to_text <- function(fmt, name) {
  if (fmt$type %in% c("date", "time", "datetime")) {
    return(.datetime_format_to_text(fmt, name))
  }
  if (.is_stratified_type(fmt$type)) {
    return(.stratified_format_to_text(fmt, name))
  }

  parts <- vector("list", length(fmt$mappings) + 4L)
  idx <- 1L

  type_str <- if (!is.null(fmt$type) && fmt$type != "auto") fmt$type else NULL
  ml_str <- if (isTRUE(fmt$multilabel)) "multilabel" else NULL
  nc_str <- if (isTRUE(fmt$ignore_case)) "nocase" else NULL
  df_str <- if (!is.null(fmt$date_format)) paste0("format: ", fmt$date_format) else NULL
  annot_parts <- c(type_str, ml_str, nc_str, df_str)
  type_part <- if (length(annot_parts) > 0L) {
    paste0(" (", paste(annot_parts, collapse = ", "), ")")
  } else {
    ""
  }
  parts[[idx]] <- paste0("VALUE ", name, type_part); idx <- idx + 1L

  is_vtype <- .is_value_type(fmt$type)
  is_date_rng <- .is_date_range_type(fmt$type)

  for (i in seq_along(fmt$mappings)) {
    key <- names(fmt$mappings)[i]
    label <- fmt$mappings[[i]]

    # Convert native values to strings for text output
    label_str <- if (is_vtype) {
      .typed_value_to_string(label, fmt$type, fmt$date_format)
    } else {
      as.character(label)
    }

    eval_suffix <- if (!is_vtype && .has_eval_attr(label)) " (eval)" else ""

    # Try to display range keys in interval notation
    parsed <- if (is_vtype && fmt$type %in% c("Date", "POSIXct")) {
      .parse_date_range_key(key, fmt$date_format)
    } else if (fmt$type == "date_range") {
      .parse_date_range_key(key, fmt$date_format)
    } else if (fmt$type == "datetime_range") {
      .parse_datetime_range_key(key, fmt$date_format)
    } else {
      .parse_range_key(key)
    }
    if (!is.null(parsed)) {
      left_bracket <- if (parsed$inc_low) "[" else "("
      right_bracket <- if (parsed$inc_high) "]" else ")"
      low <- if (is_vtype || is_date_rng) {
        .format_date_bound(parsed$low, fmt$date_format, fmt$type, is_low = TRUE)
      } else {
        .format_range_bound(parsed$low, is_low = TRUE)
      }
      high <- if (is_vtype || is_date_rng) {
        .format_date_bound(parsed$high, fmt$date_format, fmt$type, is_low = FALSE)
      } else {
        .format_range_bound(parsed$high, is_low = FALSE)
      }
      parts[[idx]] <- paste0("  ", left_bracket, low, ", ", high,
                             right_bracket, " = \"", label_str, "\"", eval_suffix)
    } else {
      parts[[idx]] <- paste0("  \"", key, "\" = \"", label_str, "\"", eval_suffix)
    }
    idx <- idx + 1L
  }

  if (!is.null(fmt$missing_label)) {
    miss_eval <- if (.has_eval_attr(fmt$missing_label)) " (eval)" else ""
    parts[[idx]] <- paste0("  .missing = \"", fmt$missing_label, "\"", miss_eval); idx <- idx + 1L
  }
  if (!is.null(fmt$other_label)) {
    other_eval <- if (.has_eval_attr(fmt$other_label)) " (eval)" else ""
    parts[[idx]] <- paste0("  .other = \"", fmt$other_label, "\"", other_eval); idx <- idx + 1L
  }
  parts[[idx]] <- ";"
  paste(unlist(parts[seq_len(idx)]), collapse = "\n")
}


#' Convert datetime ks_format to SAS-like text
#' @keywords internal
.datetime_format_to_text <- function(fmt, name) {
  parts <- vector("list", 4L)
  idx <- 1L

  parts[[idx]] <- paste0("VALUE ", name, " (", fmt$type, ")"); idx <- idx + 1L

  pattern_str <- if (!is.null(fmt$sas_name)) {
    paste0(fmt$sas_name, ".")
  } else {
    fmt$dt_pattern
  }
  parts[[idx]] <- paste0("  pattern = \"", pattern_str, "\""); idx <- idx + 1L

  if (!is.null(fmt$missing_label)) {
    parts[[idx]] <- paste0("  .missing = \"", fmt$missing_label, "\""); idx <- idx + 1L
  }
  parts[[idx]] <- ";"
  paste(unlist(parts[seq_len(idx)]), collapse = "\n")
}


#' Convert stratified_range ks_format to SAS-like text
#' @keywords internal
#' @noRd
.stratified_format_to_text <- function(fmt, name) {
  strata_sep <- if (!is.null(fmt$strata_sep)) fmt$strata_sep else "|"
  range_subtype <- if (!is.null(fmt$range_subtype)) fmt$range_subtype else "numeric"

  header_opts <- c("stratified_range",
                   paste0("range_subtype: ", range_subtype),
                   paste0("strata_sep: ", strata_sep))
  if (isTRUE(fmt$multilabel)) header_opts <- c(header_opts, "multilabel")
  if (isTRUE(fmt$ignore_case)) header_opts <- c(header_opts, "nocase")
  if (!is.null(fmt$date_format)) {
    header_opts <- c(header_opts, paste0("format: ", fmt$date_format))
  }
  header <- paste0("VALUE ", name, " (", paste(header_opts, collapse = ", "), ")")

  # Group mappings by stratum, first-seen order
  strata_order <- character(0)
  grouped <- list()
  parser <- switch(range_subtype,
    numeric  = function(k) .parse_range_key(k),
    date     = function(k) .parse_date_range_key(k, fmt$date_format),
    datetime = function(k) .parse_datetime_range_key(k, fmt$date_format)
  )
  for (i in seq_along(fmt$mappings)) {
    key <- names(fmt$mappings)[i]
    sp <- .split_stratified_key(key, strata_sep, range_subtype,
                                fmt$date_format)
    s <- if (is.null(sp)) "" else sp$stratum
    rk <- if (is.null(sp)) key else sp$range_key
    if (!s %in% strata_order) {
      strata_order <- c(strata_order, s)
      grouped[[s]] <- list()
    }
    grouped[[s]][[length(grouped[[s]]) + 1L]] <- list(
      range_key = rk, label = fmt$mappings[[i]]
    )
  }

  is_date_rng <- range_subtype %in% c("date", "datetime")
  inner_type <- .stratified_subtype_to_range_type(range_subtype)
  out <- character(0)
  out <- c(out, header)
  for (s in strata_order) {
    for (entry in grouped[[s]]) {
      parsed <- parser(entry$range_key)
      label_str <- as.character(entry$label)
      eval_suffix <- if (.has_eval_attr(entry$label)) " (eval)" else ""
      if (!is.null(parsed)) {
        lb <- if (parsed$inc_low) "[" else "("
        rb <- if (parsed$inc_high) "]" else ")"
        low <- if (is_date_rng) {
          .format_date_bound(parsed$low, fmt$date_format, inner_type, is_low = TRUE)
        } else {
          .format_range_bound(parsed$low, is_low = TRUE)
        }
        high <- if (is_date_rng) {
          .format_date_bound(parsed$high, fmt$date_format, inner_type, is_low = FALSE)
        } else {
          .format_range_bound(parsed$high, is_low = FALSE)
        }
        out <- c(out, paste0("  \"", s, "\"", strata_sep, lb, low, ", ",
                             high, rb, " = \"", label_str, "\"", eval_suffix))
      } else {
        out <- c(out, paste0("  \"", s, strata_sep, entry$range_key,
                             "\" = \"", label_str, "\"", eval_suffix))
      }
    }
    if (!is.null(fmt$missing_by_stratum) && s %in% names(fmt$missing_by_stratum)) {
      out <- c(out, paste0("  \".missing", strata_sep, s, "\" = \"",
                           fmt$missing_by_stratum[[s]], "\""))
    }
    if (!is.null(fmt$other_by_stratum) && s %in% names(fmt$other_by_stratum)) {
      out <- c(out, paste0("  \".other", strata_sep, s, "\" = \"",
                           fmt$other_by_stratum[[s]], "\""))
    }
  }
  if (!is.null(fmt$missing_label)) {
    out <- c(out, paste0("  .missing = \"", fmt$missing_label, "\""))
  }
  if (!is.null(fmt$other_label)) {
    out <- c(out, paste0("  .other = \"", fmt$other_label, "\""))
  }
  out <- c(out, ";")
  paste(out, collapse = "\n")
}


#' Convert ks_invalue to SAS-like text
#' @keywords internal
.invalue_to_text <- function(inv, name) {
  parts <- vector("list", length(inv$mappings) + 3L)
  idx <- 1L

  type_part <- if (!is.null(inv$target_type) && inv$target_type != "numeric") {
    paste0(" (", inv$target_type, ")")
  } else {
    ""
  }
  parts[[idx]] <- paste0("INVALUE ", name, type_part); idx <- idx + 1L

  for (i in seq_along(inv$mappings)) {
    key <- names(inv$mappings)[i]
    value <- inv$mappings[[i]]
    eval_suffix <- if (.has_eval_attr(value)) " (eval)" else ""
    parts[[idx]] <- paste0("  \"", key, "\" = \"", value, "\"", eval_suffix); idx <- idx + 1L
  }

  if (!is.null(inv$missing_value) && !identical(inv$missing_value, NA)) {
    parts[[idx]] <- paste0("  .missing = \"", inv$missing_value, "\""); idx <- idx + 1L
  }
  parts[[idx]] <- ";"
  paste(unlist(parts[seq_len(idx)]), collapse = "\n")
}


#' Format a numeric range bound for text output
#' @keywords internal
.format_range_bound <- function(val, is_low = TRUE) {
  if (is.infinite(val)) {
    if (val < 0) return("LOW")
    return("HIGH")
  }
  return(as.character(val))
}

#' Format a Date / POSIXct range bound for text output
#'
#' Renders infinite bounds as \code{LOW}/\code{HIGH}; otherwise formats the
#' Date/POSIXct value using \code{date_format} when supplied, else ISO 8601.
#' Used for value-type (Date/POSIXct) formats and the date_range /
#' datetime_range types.
#' @keywords internal
.format_date_bound <- function(val, date_format = NULL, type = NULL,
                               is_low = TRUE) {
  num <- as.numeric(val)
  if (is.na(num)) return("")
  if (is.infinite(num)) return(if (num < 0) "LOW" else "HIGH")
  if (!is.null(date_format)) return(format(val, date_format))
  as.character(val)
}

Try the ksformat package in your browser

Any scripts or data that you put into this service are public.

ksformat documentation built on May 21, 2026, 9:07 a.m.