Nothing
#' Parse Format Definitions from 'SAS'-like Text
#'
#' Reads format definitions written in a human-friendly 'SAS'-like syntax
#' and returns a list of \code{ks_format} and/or \code{ks_invalue} objects.
#' All parsed formats are automatically stored in the global format library.
#'
#' @param text Character string or character vector containing format definitions.
#' If a character vector, lines are concatenated with newlines.
#' @param file Path to a text file containing format definitions.
#' Exactly one of \code{text} or \code{file} must be provided.
#' @param verbose Logical. If \code{TRUE}, the parsed formats are
#' printed to the console. Default is \code{FALSE} to suppress output
#' (the result is returned invisibly).
#'
#' @return A named list of \code{ks_format} and/or \code{ks_invalue} objects.
#' Names correspond to the format names defined in the text.
#' All formats are automatically registered in the global format library.
#'
#' @details
#' The syntax supports two block types:
#'
#' \strong{VALUE} blocks define formats (value -> label):
#' \preformatted{
#' VALUE name (type)
#' "value1" = "Label 1"
#' "value2" = "Label 2"
#' [low, high) = "Range Label (half-open)"
#' (low, high] = "Range Label (open-low, closed-high)"
#' .missing = "Missing Label"
#' .other = "Other Label"
#' ;
#' }
#'
#' \strong{INVALUE} blocks define reverse formats (label -> numeric value):
#' \preformatted{
#' INVALUE name
#' "Label 1" = 1
#' "Label 2" = 2
#' ;
#' }
#'
#' \strong{Syntax rules:}
#' \itemize{
#' \item Blocks start with \code{VALUE} or \code{INVALUE} keyword and end with \code{;}
#' \item The type in parentheses is optional; defaults to \code{"auto"} for VALUE,
#' \code{"numeric"} for INVALUE
#' \item Values can be quoted or unquoted
#' \item Ranges use interval notation with explicit bounds
#' \item Legacy range syntax \code{low - high} is also supported
#' \item Special range keywords: \code{LOW} (-Inf) and \code{HIGH} (Inf)
#' \item \code{.missing} and \code{.other} are special directives
#' \item Lines starting with \code{/*}, \code{*}, \code{//}, or \code{#} are comments
#' }
#'
#' \strong{Block options:}
#'
#' Comma-separated options can be placed inside the parentheses after the type:
#' \itemize{
#' \item \code{nocase} — enables case-insensitive key matching (equivalent to
#' \code{ignore_case = TRUE} in \code{\link{fnew}}).
#' \item \code{multilabel} — allows overlapping ranges where a single value
#' matches multiple labels (used with \code{\link{fput_all}}).
#' }
#' Options can be combined: \code{VALUE name (character, nocase, multilabel)}.
#'
#' @export
#' @examples
#' # Parse multiple format definitions from text
#' fparse(text = '
#' VALUE sex (character)
#' "M" = "Male"
#' "F" = "Female"
#' .missing = "Unknown"
#' ;
#'
#' VALUE age (numeric)
#' [0, 18) = "Child"
#' [18, 65) = "Adult"
#' [65, HIGH] = "Senior"
#' .missing = "Age Unknown"
#' ;
#'
#' // Invalue block
#' INVALUE race_inv
#' "White" = 1
#' "Black" = 2
#' "Asian" = 3
#' ;
#' ')
#'
#' fput(c("M", "F", NA), "sex")
#' fputn(c(5, 25, 70, NA), "age")
#' finputn(c("White", "Black"), "race_inv")
#' flist()
#' fprint()
#' fclear()
#'
#' # Parse date/time/datetime format definitions
#' fparse(text = '
#' VALUE enrldt (date)
#' pattern = "DATE9."
#' .missing = "Not Enrolled"
#' ;
#'
#' VALUE visit_time (time)
#' pattern = "TIME8."
#' ;
#'
#' VALUE stamp (datetime)
#' pattern = "DATETIME20."
#' ;
#' ')
#'
#' fput(as.Date("2025-03-01"), "enrldt")
#' fput(36000, "visit_time")
#' fput(as.POSIXct("2025-03-01 10:00:00", tz = "UTC"), "stamp")
#' fclear()
#'
#' # Case-insensitive format (nocase option)
#' fparse(text = '
#' VALUE yesno (character, nocase)
#' "Y" = "Yes"
#' "N" = "No"
#' .other = "Unknown"
#' ;
#' ')
#' fput(c("y", "N", "YES"), "yesno")
#' # [1] "Yes" "No" "Unknown"
#' fclear()
#'
#' # Parse multilabel format
#' fparse(text = '
#' VALUE risk (numeric, multilabel)
#' [0, 3] = "Low Risk"
#' [0, 7] = "Monitored"
#' (3, 7] = "Medium Risk"
#' (7, 10] = "High Risk"
#' ;
#' ')
#' fput_all(c(2, 5, 9), "risk")
#' fclear()
fparse <- function(text = NULL, file = NULL, verbose = FALSE) {
if (is.null(text) && is.null(file)) {
cli_abort("Either {.arg text} or {.arg file} must be provided.")
}
if (!is.null(text) && !is.null(file)) {
cli_abort("Only one of {.arg text} or {.arg file} should be provided, not both.")
}
if (!is.null(text) && !is.character(text)) {
cli_abort("{.arg text} must be a character string or character vector.")
}
if (!is.null(file) && (!is.character(file) || length(file) != 1L)) {
cli_abort("{.arg file} must be a single file path string.")
}
if (!is.null(file)) {
if (!file.exists(file)) {
cli_abort("File not found: {.file {file}}")
}
lines <- readLines(file, warn = FALSE)
} else {
if (length(text) > 1) {
text <- paste(text, collapse = "\n")
}
lines <- strsplit(text, "\n")[[1]]
}
# Parse into blocks
blocks <- .parse_blocks(lines)
# Convert blocks to format objects and auto-register
result <- list()
for (block in blocks) {
obj <- .block_to_format(block)
# Validate
.format_validate(obj)
# Auto-register in library
.format_register(obj, name = block$name, overwrite = TRUE)
result[[block$name]] <- obj
}
if (verbose) result else invisible(result)
}
#' Export Formats to 'SAS'-like Text
#'
#' Converts \code{ks_format} and/or \code{ks_invalue} objects to
#' human-readable 'SAS'-like text representation.
#'
#' @param ... Named \code{ks_format} or \code{ks_invalue} objects to export.
#' @param formats A named list of format objects. Alternative to \code{...}.
#' @param file Optional file path to write the output to. If \code{NULL},
#' returns the text as a character string.
#'
#' @return If \code{file} is \code{NULL}, returns a character string with the
#' 'SAS'-like text. If \code{file} is specified, writes to the file and returns
#' the path invisibly.
#'
#' @export
#' @examples
#' # Export a character format
#' sex_fmt <- fnew("M" = "Male", "F" = "Female",
#' .missing = "Unknown", name = "sex")
#' cat(fexport(sex = sex_fmt))
#'
#' # Export a numeric range format
#' fparse(text = '
#' VALUE bmi (numeric)
#' [0, 18.5) = "Underweight"
#' [18.5, 25) = "Normal"
#' [25, 30) = "Overweight"
#' [30, HIGH] = "Obese"
#' .missing = "No data"
#' ;
#' ')
#' bmi_fmt <- format_get("bmi")
#' cat(fexport(bmi = bmi_fmt))
#'
#' # Export a multilabel format
#' risk_fmt <- fnew(
#' "0,3,TRUE,TRUE" = "Low Risk",
#' "0,7,TRUE,TRUE" = "Monitored",
#' "3,7,FALSE,TRUE" = "Medium Risk",
#' "7,10,FALSE,TRUE" = "High Risk",
#' name = "risk", type = "numeric", multilabel = TRUE
#' )
#' cat(fexport(risk = risk_fmt))
#'
#' # Export a date format
#' enrl_fmt <- fnew_date("DATE9.", name = "enrldt", .missing = "Not Enrolled")
#' cat(fexport(enrldt = enrl_fmt))
#' fclear()
fexport <- function(..., formats = NULL, file = NULL) {
if (is.null(formats)) {
formats <- list(...)
}
if (length(formats) == 0) {
cli_abort("At least one format object must be provided.")
}
# Ensure all items are named
fmt_names <- names(formats)
if (is.null(fmt_names)) {
fmt_names <- rep("", length(formats))
}
if (any(fmt_names == "")) {
for (i in seq_along(formats)) {
if (fmt_names[i] == "") {
obj_name <- formats[[i]]$name
if (!is.null(obj_name)) {
fmt_names[i] <- obj_name
} else {
fmt_names[i] <- paste0("unnamed_", i)
}
}
}
names(formats) <- fmt_names
}
text_blocks <- character(0)
for (nm in names(formats)) {
obj <- formats[[nm]]
if (inherits(obj, "ks_format")) {
text_blocks <- c(text_blocks, .format_to_text(obj, nm))
} else if (inherits(obj, "ks_invalue")) {
text_blocks <- c(text_blocks, .invalue_to_text(obj, nm))
} else {
cli_warn("Skipping {.val {nm}}: not a {.cls ks_format} or {.cls ks_invalue} object.")
}
}
output <- paste(text_blocks, collapse = "\n\n")
if (!is.null(file)) {
writeLines(output, file)
cli_inform("Formats written to: {.file {file}}")
return(invisible(file))
}
return(output)
}
#' Import Formats from 'SAS' PROC FORMAT CNTLOUT CSV
#'
#' Reads a CSV file produced by 'SAS' \code{PROC FORMAT} with
#' \code{CNTLOUT=} option (typically exported via \code{PROC EXPORT})
#' and converts compatible format definitions into \code{ks_format} and
#' \code{ks_invalue} objects.
#'
#' @details
#' The 'SAS' format catalogue CSV is expected to contain the standard CNTLOUT
#' columns: \code{FMTNAME}, \code{START}, \code{END}, \code{LABEL},
#' \code{TYPE}, \code{HLO}, \code{SEXCL}, \code{EEXCL}.
#'
#' \strong{Supported SAS format types:}
#' \describe{
#' \item{\code{N}}{Numeric VALUE format \eqn{\to} \code{ks_format} with
#' \code{type = "numeric"}}
#' \item{\code{C}}{Character VALUE format \eqn{\to} \code{ks_format} with
#' \code{type = "character"}}
#' \item{\code{I}}{Numeric INVALUE (informat) \eqn{\to} \code{ks_invalue}
#' with \code{target_type = "numeric"}}
#' \item{\code{J}}{Character INVALUE (informat) \eqn{\to} \code{ks_invalue}
#' with \code{target_type = "character"}}
#' }
#'
#' \strong{Incompatible types (logged with a warning):}
#' \describe{
#' \item{\code{P}}{PICTURE formats \eqn{-} no equivalent in ksformat}
#' }
#'
#' Rows with SAS special missing values (\code{.A}\eqn{-}\code{.Z},
#' \code{._}) in the HLO field are logged as incompatible entries and skipped
#' because R has no equivalent concept.
#'
#' @param file Path to the CSV file exported from a SAS format catalogue.
#' @param register Logical; if \code{TRUE} (default), each imported format is
#' registered in the global format library.
#' @param overwrite Logical; if \code{TRUE} (default), existing library entries
#' with the same name are overwritten.
#'
#' @return A named list of \code{ks_format} and \code{ks_invalue} objects that
#' were successfully imported. Returned invisibly.
#'
#' @export
#' @examples
#' # In SAS:
#' # proc format library=work cntlout=fmts; run;
#' # proc export data=fmts outfile="formats.csv" dbms=csv replace; run;
#'
#' csv_file <- system.file("extdata", "test_cntlout.csv", package = "ksformat")
#' imported <- fimport(csv_file)
#' flist()
#' fprint()
#' fclear()
fimport <- function(file, register = TRUE, overwrite = TRUE) {
if (!file.exists(file)) {
cli_abort("File not found: {.file {file}}")
}
data <- utils::read.csv(file, stringsAsFactors = FALSE, na.strings = "")
# Normalize column names to uppercase
names(data) <- toupper(names(data))
# Validate required columns
required <- c("FMTNAME", "START", "END", "LABEL", "TYPE")
missing_cols <- setdiff(required, names(data))
if (length(missing_cols) > 0) {
cli_abort(c(
"Required column{?s} missing from CSV: {.val {missing_cols}}",
"i" = "Expected a SAS {.code PROC FORMAT CNTLOUT=} export."
))
}
# Supply defaults for optional columns
if (!"HLO" %in% names(data)) data$HLO <- ""
if (!"SEXCL" %in% names(data)) data$SEXCL <- "N"
if (!"EEXCL" %in% names(data)) data$EEXCL <- "N"
# Ensure character types for key fields
data$FMTNAME <- trimws(as.character(data$FMTNAME))
data$START <- as.character(ifelse(is.na(data$START), "", data$START))
data$END <- as.character(ifelse(is.na(data$END), "", data$END))
data$LABEL <- as.character(ifelse(is.na(data$LABEL), "", data$LABEL))
data$TYPE <- trimws(toupper(as.character(data$TYPE)))
data$HLO <- toupper(as.character(ifelse(is.na(data$HLO), "", data$HLO)))
data$SEXCL <- toupper(as.character(ifelse(is.na(data$SEXCL), "N", data$SEXCL)))
data$EEXCL <- toupper(as.character(ifelse(is.na(data$EEXCL), "N", data$EEXCL)))
# ---- Classify format types ----
supported_types <- c("N", "C", "I", "J")
all_types <- unique(data$TYPE)
unsupported <- setdiff(all_types, supported_types)
if (length(unsupported) > 0) {
# Identify formats with unsupported types
for (utype in unsupported) {
affected <- unique(data$FMTNAME[data$TYPE == utype])
type_desc <- switch(utype,
"P" = "PICTURE",
paste0("unknown (", utype, ")")
)
cli_warn(c(
"Skipping {type_desc} format{?s}: {.val {affected}}",
"i" = "TYPE={.val {utype}} is not supported by ksformat."
))
}
data <- data[data$TYPE %in% supported_types, , drop = FALSE]
}
if (nrow(data) == 0) {
cli_inform("No compatible formats found in {.file {file}}.")
return(invisible(list()))
}
# ---- Group by FMTNAME + TYPE and build format objects ----
format_key <- paste(data$FMTNAME, data$TYPE, sep = "|")
groups <- split(seq_len(nrow(data)), format_key)
result <- list()
skipped_entries <- list()
for (gname in names(groups)) {
idx <- groups[[gname]]
rows <- data[idx, , drop = FALSE]
fmt_name <- rows$FMTNAME[1]
fmt_type <- rows$TYPE[1]
if (fmt_type %in% c("N", "C")) {
obj <- .cntlout_to_ks_format(rows, fmt_name, fmt_type, skipped_entries)
skipped_entries <- obj$skipped
obj <- obj$format
} else {
obj <- .cntlout_to_ks_invalue(rows, fmt_name, fmt_type, skipped_entries)
skipped_entries <- obj$skipped
obj <- obj$invalue
}
if (is.null(obj)) next
# Validate
tryCatch(
.format_validate(obj),
error = function(e) {
cli_warn(c(
"Format {.val {fmt_name}} failed validation and was skipped.",
"x" = conditionMessage(e)
))
obj <<- NULL
}
)
if (is.null(obj)) next
# Register
if (register) {
.format_register(obj, name = fmt_name, overwrite = overwrite)
}
result[[fmt_name]] <- obj
}
# ---- Report skipped entries ----
if (length(skipped_entries) > 0) {
for (entry in skipped_entries) {
cli_warn(c(
"Skipped incompatible entry in format {.val {entry$format}}:",
"x" = entry$reason
))
}
}
n_fmt <- sum(vapply(result, inherits, logical(1), "ks_format"))
n_inv <- sum(vapply(result, inherits, logical(1), "ks_invalue"))
cli_inform(c(
"v" = "Imported {n_fmt} format{?s} and {n_inv} invalue{?s} from {.file {file}}."
))
return(invisible(result))
}
#' Convert CNTLOUT rows to ks_format
#' @keywords internal
.cntlout_to_ks_format <- function(rows, fmt_name, fmt_type, skipped) {
type <- if (fmt_type == "N") "numeric" else "character"
mappings <- list()
missing_label <- NULL
other_label <- NULL
multilabel <- FALSE
for (i in seq_len(nrow(rows))) {
hlo <- rows$HLO[i]
start <- rows$START[i]
end <- rows$END[i]
label <- rows$LABEL[i]
sexcl <- rows$SEXCL[i]
eexcl <- rows$EEXCL[i]
# Check for multilabel flag
if (grepl("T", hlo, fixed = TRUE)) {
multilabel <- TRUE
}
# OTHER entry
if (grepl("O", hlo, fixed = TRUE)) {
other_label <- label
next
}
# Special missing values (.A-.Z, ._) — no R equivalent
if (grepl("S", hlo, fixed = TRUE)) {
skipped <- c(skipped, list(list(
format = fmt_name,
reason = paste0(
"SAS special missing value '", start,
"' (HLO='", hlo, "') has no R equivalent."
)
)))
next
}
# Standard missing (numeric: START is ".", character: blank)
if (type == "numeric" && start == ".") {
missing_label <- label
next
}
if (type == "character" && start == "" && end == "" &&
!grepl("[LH]", hlo)) {
missing_label <- label
next
}
# ---- Range vs discrete ----
has_low <- grepl("L", hlo, fixed = TRUE)
has_high <- grepl("H", hlo, fixed = TRUE)
is_range <- FALSE
if (has_low || has_high) {
is_range <- TRUE
} else if (type == "numeric" && start != end) {
is_range <- TRUE
}
if (is_range && type == "numeric") {
low <- if (has_low) -Inf else suppressWarnings(as.numeric(start))
high <- if (has_high) Inf else suppressWarnings(as.numeric(end))
if (is.na(low) || is.na(high)) {
skipped <- c(skipped, list(list(
format = fmt_name,
reason = paste0(
"Could not parse range bounds: START='", start,
"', END='", end, "'."
)
)))
next
}
inc_low <- (sexcl != "Y")
inc_high <- (eexcl != "Y")
range_key <- paste0(low, ",", high, ",", toupper(inc_low), ",",
toupper(inc_high))
mappings[[range_key]] <- label
} else {
mappings[[trimws(start)]] <- label
}
}
if (length(mappings) == 0 && is.null(missing_label) && is.null(other_label)) {
skipped <- c(skipped, list(list(
format = fmt_name,
reason = "No valid mappings could be extracted."
)))
return(list(format = NULL, skipped = skipped))
}
format_obj <- structure(
list(
name = fmt_name,
type = type,
mappings = mappings,
missing_label = missing_label,
other_label = other_label,
multilabel = multilabel,
ignore_case = FALSE,
range_table = .build_range_table(mappings, type),
created = Sys.time()
),
class = "ks_format"
)
list(format = format_obj, skipped = skipped)
}
#' Convert CNTLOUT rows to ks_invalue
#' @keywords internal
.cntlout_to_ks_invalue <- function(rows, fmt_name, fmt_type, skipped) {
target_type <- if (fmt_type == "I") "numeric" else "character"
mappings <- list()
missing_value <- NA
for (i in seq_len(nrow(rows))) {
hlo <- rows$HLO[i]
start <- rows$START[i]
label <- rows$LABEL[i]
# OTHER — ks_invalue doesn't support .other, skip
if (grepl("O", hlo, fixed = TRUE)) {
skipped <- c(skipped, list(list(
format = fmt_name,
reason = "OTHER directive in INVALUE is not supported; entry skipped."
)))
next
}
# Special missing
if (grepl("S", hlo, fixed = TRUE)) {
skipped <- c(skipped, list(list(
format = fmt_name,
reason = paste0(
"SAS special missing value '", start,
"' in INVALUE has no R equivalent."
)
)))
next
}
# Range entries — ks_invalue doesn't support ranges
has_low <- grepl("L", hlo, fixed = TRUE)
has_high <- grepl("H", hlo, fixed = TRUE)
if (has_low || has_high || (start != rows$END[i] && start != "")) {
skipped <- c(skipped, list(list(
format = fmt_name,
reason = paste0(
"Range entry '", start, "'-'", rows$END[i],
"' in INVALUE is not supported."
)
)))
next
}
# Standard missing value
if (start == "." || start == "") {
# Convert label to the appropriate target type
if (target_type == "numeric") {
missing_value <- suppressWarnings(as.numeric(label))
} else {
missing_value <- label
}
next
}
# Discrete mapping: key = input text, value = output value
val <- if (target_type == "numeric") {
suppressWarnings(as.numeric(label))
} else {
label
}
if (target_type == "numeric" && is.na(val) && label != "") {
skipped <- c(skipped, list(list(
format = fmt_name,
reason = paste0(
"INVALUE label '", label, "' for key '", start,
"' could not be converted to numeric."
)
)))
next
}
mappings[[start]] <- val
}
if (length(mappings) == 0 && is.na(missing_value)) {
skipped <- c(skipped, list(list(
format = fmt_name,
reason = "No valid INVALUE mappings could be extracted."
)))
return(list(invalue = NULL, skipped = skipped))
}
invalue_obj <- structure(
list(
name = fmt_name,
target_type = target_type,
mappings = mappings,
missing_value = missing_value,
created = Sys.time()
),
class = "ks_invalue"
)
list(invalue = invalue_obj, skipped = skipped)
}
# ---------------------------------------------------------------------------
# Internal parser helpers
# ---------------------------------------------------------------------------
#' Parse text lines into block structures
#' @keywords internal
.parse_blocks <- function(lines) {
blocks <- list()
current_block <- NULL
in_block <- FALSE
for (i in seq_along(lines)) {
line <- trimws(lines[i])
# Skip empty lines and comments
if (line == "" || grepl("^(/\\*|\\*|//|#)", line)) {
next
}
# Check for block start: VALUE or INVALUE
block_match <- regmatches(line, regexec(
"^(VALUE|INVALUE)\\s+([\\w.-]+)\\s*(?:\\(([^)]+)\\))?\\s*$",
line, ignore.case = TRUE, perl = TRUE
))[[1]]
if (length(block_match) >= 3) {
if (in_block) {
cli_warn(c(
"Line {i}: New block started before previous block ended with {.val ;}.",
"i" = "Closing previous block {.val {current_block$name}}."
))
blocks <- c(blocks, list(current_block))
}
block_type <- toupper(block_match[2])
block_name <- block_match[3]
block_subtype <- if (length(block_match) >= 4 && block_match[4] != "") {
trimws(block_match[4])
} else {
# Default: "auto" for VALUE, "numeric" for INVALUE
if (block_type == "INVALUE") "numeric" else "auto"
}
# Parse multilabel flag from subtype
block_multilabel <- FALSE
if (grepl("multilabel", block_subtype, ignore.case = TRUE)) {
block_multilabel <- TRUE
block_subtype <- gsub(",?\\s*multilabel", "", block_subtype,
ignore.case = TRUE)
block_subtype <- gsub("multilabel\\s*,?", "", block_subtype,
ignore.case = TRUE)
block_subtype <- trimws(block_subtype)
if (block_subtype == "") {
block_subtype <- if (block_type == "INVALUE") "numeric" else "auto"
}
}
# Parse nocase flag from subtype
block_nocase <- FALSE
if (grepl("nocase", block_subtype, ignore.case = TRUE)) {
block_nocase <- TRUE
block_subtype <- gsub(",?\\s*nocase", "", block_subtype,
ignore.case = TRUE)
block_subtype <- gsub("nocase\\s*,?", "", block_subtype,
ignore.case = TRUE)
block_subtype <- trimws(block_subtype)
if (block_subtype == "") {
block_subtype <- if (block_type == "INVALUE") "numeric" else "auto"
}
}
# Parse format: spec from subtype (e.g. "format: %Y-%m-%d")
block_date_format <- NULL
fmt_match <- regmatches(block_subtype, regexec(
"format:\\s*([^,)]+)", block_subtype, ignore.case = TRUE
))[[1]]
if (length(fmt_match) >= 2 && fmt_match[1] != "") {
block_date_format <- trimws(fmt_match[2])
block_subtype <- gsub(",?\\s*format:\\s*[^,)]+", "", block_subtype,
ignore.case = TRUE)
block_subtype <- gsub("format:\\s*[^,)]+\\s*,?", "", block_subtype,
ignore.case = TRUE)
block_subtype <- trimws(block_subtype)
if (block_subtype == "") {
block_subtype <- if (block_type == "INVALUE") "numeric" else "auto"
}
}
# Parse range_subtype: option for stratified_range
block_range_subtype <- NULL
rs_match <- regmatches(block_subtype, regexec(
"range_subtype:\\s*([A-Za-z]+)", block_subtype, ignore.case = TRUE
))[[1]]
if (length(rs_match) >= 2 && rs_match[1] != "") {
block_range_subtype <- tolower(trimws(rs_match[2]))
block_subtype <- gsub(",?\\s*range_subtype:\\s*[A-Za-z]+", "",
block_subtype, ignore.case = TRUE)
block_subtype <- gsub("range_subtype:\\s*[A-Za-z]+\\s*,?", "",
block_subtype, ignore.case = TRUE)
block_subtype <- trimws(block_subtype)
if (block_subtype == "") block_subtype <- "auto"
}
# Parse strata_sep: option for stratified_range
block_strata_sep <- NULL
ss_match <- regmatches(block_subtype, regexec(
"strata_sep:\\s*([^,)\\s]+)", block_subtype, ignore.case = TRUE
))[[1]]
if (length(ss_match) >= 2 && ss_match[1] != "") {
block_strata_sep <- trimws(ss_match[2])
block_subtype <- gsub(",?\\s*strata_sep:\\s*[^,)\\s]+", "",
block_subtype, ignore.case = TRUE)
block_subtype <- gsub("strata_sep:\\s*[^,)\\s]+\\s*,?", "",
block_subtype, ignore.case = TRUE)
block_subtype <- trimws(block_subtype)
if (block_subtype == "") block_subtype <- "auto"
}
current_block <- list(
type = block_type,
name = block_name,
subtype = block_subtype,
multilabel = block_multilabel,
nocase = block_nocase,
date_format = block_date_format,
range_subtype = block_range_subtype,
strata_sep = block_strata_sep,
entries = list(),
line_start = i
)
in_block <- TRUE
next
}
# Warn if line looks like a block header but didn't match
if (!in_block && grepl("^(VALUE|INVALUE)\\b", line, ignore.case = TRUE)) {
cli_warn(c(
"Line {i}: Looks like a block header but could not be parsed: {.val {line}}",
"i" = "Check the format name or syntax."
))
next
}
# Check for block end (standalone ; or trailing ;)
ends_with_semi <- grepl(";\\s*$", line)
if (grepl("^;\\s*$", line)) {
if (in_block && !is.null(current_block)) {
blocks <- c(blocks, list(current_block))
current_block <- NULL
in_block <- FALSE
}
next
}
# Warn about lines outside any block
if (!in_block) {
cli_warn("Line {i}: Ignoring line outside of any block: {.val {line}}")
next
}
# Parse mapping line within a block
if (!is.null(current_block)) {
# Strip trailing comma and/or semicolon from mapping lines
mapping_line <- sub("[,;]+\\s*$", "", line)
entry <- .parse_mapping_line(mapping_line, i)
if (!is.null(entry)) {
current_block$entries <- c(current_block$entries, list(entry))
}
# Close block if line ended with ;
if (ends_with_semi) {
blocks <- c(blocks, list(current_block))
current_block <- NULL
in_block <- FALSE
}
}
}
# Handle unclosed block
if (in_block && !is.null(current_block)) {
cli_warn("Block {.val {current_block$name}} was not closed with {.val ;}. Closing automatically.")
blocks <- c(blocks, list(current_block))
}
return(blocks)
}
#' Parse a single mapping line
#' @keywords internal
.parse_mapping_line <- function(line, line_num) {
# Remove inline comments
line <- sub("\\s*//.*$", "", line)
line <- sub("\\s*/\\*.*\\*/\\s*$", "", line)
line <- trimws(line)
if (line == "") return(NULL)
# Split on '='
eq_pos <- regexpr("=", line)
if (eq_pos < 0) {
cli_warn("Line {line_num}: Could not parse mapping (no {.val =} found): {.val {line}}")
return(NULL)
}
lhs <- trimws(substring(line, 1, eq_pos - 1))
rhs <- trimws(substring(line, eq_pos + 1))
# Check for (eval) marker before unquoting
has_eval <- grepl("\\(eval\\)\\s*$", rhs)
if (has_eval) {
rhs <- trimws(sub("\\s*\\(eval\\)\\s*$", "", rhs))
}
# Handle unquoted NA/NaN literals before unquoting (e.g. .other = NA)
is_quoted <- grepl("^([\"']).*\\1$", rhs)
if (!is_quoted && rhs %in% c("NA", "NaN")) {
rhs <- NA_character_
} else {
rhs <- .unquote(rhs)
}
# Set eval attribute if (eval) marker was found
if (has_eval) {
attr(rhs, "eval") <- TRUE
}
# Check for .missing / .other directives
if (lhs == ".missing") {
return(list(type = "missing", value = rhs))
}
if (lhs == ".other") {
return(list(type = "other", value = rhs))
}
# Check for interval notation: [low, high) or (low, high] etc.
interval_match <- regmatches(lhs, regexec(
"^(\\[|\\()\\s*(-?[0-9]+(?:\\.[0-9]+)?|LOW|HIGH|Inf|-Inf)\\s*,\\s*(-?[0-9]+(?:\\.[0-9]+)?|LOW|HIGH|Inf|-Inf)\\s*(\\]|\\))$",
lhs, ignore.case = TRUE
))[[1]]
if (length(interval_match) == 5) {
left_bracket <- interval_match[2]
low_str <- trimws(interval_match[3])
high_str <- trimws(interval_match[4])
right_bracket <- interval_match[5]
low_val <- .parse_range_bound(low_str, is_low = TRUE)
high_val <- .parse_range_bound(high_str, is_low = FALSE)
inc_low <- (left_bracket == "[")
inc_high <- (right_bracket == "]")
if (!is.na(low_val) && !is.na(high_val)) {
return(list(type = "range", low = low_val, high = high_val,
inc_low = inc_low, inc_high = inc_high, label = rhs))
}
}
# Check for date / datetime interval notation: [YYYY-MM-DD, YYYY-MM-DD)
# or [YYYY-MM-DD HH:MM[:SS], ...]. Bounds are kept as strings so the
# range_table builder can dispatch to .parse_date_range_key() /
# .parse_datetime_range_key() using the format's date_format.
date_bound <- "(\\d{4}-\\d{2}-\\d{2}(?:[ T]\\d{2}:\\d{2}(?::\\d{2})?)?|LOW|HIGH)"
date_iv_re <- paste0("^(\\[|\\()\\s*", date_bound, "\\s*,\\s*",
date_bound, "\\s*(\\]|\\))$")
date_iv_match <- regmatches(lhs, regexec(date_iv_re, lhs,
ignore.case = TRUE))[[1]]
if (length(date_iv_match) == 5) {
left_bracket <- date_iv_match[2]
low_str <- trimws(date_iv_match[3])
high_str <- trimws(date_iv_match[4])
right_bracket <- date_iv_match[5]
inc_low <- (left_bracket == "[")
inc_high <- (right_bracket == "]")
return(list(type = "range", low = low_str, high = high_str,
inc_low = inc_low, inc_high = inc_high, label = rhs,
bound_kind = "date"))
}
# Check for legacy range: low - high pattern (no brackets)
range_match <- regmatches(lhs, regexec(
"^(-?[0-9]+(?:\\.[0-9]+)?|LOW|HIGH|Inf|-Inf)\\s*-\\s*(-?[0-9]+(?:\\.[0-9]+)?|LOW|HIGH|Inf|-Inf)$",
lhs, ignore.case = TRUE
))[[1]]
if (length(range_match) == 3) {
low_str <- trimws(range_match[2])
high_str <- trimws(range_match[3])
low_val <- .parse_range_bound(low_str, is_low = TRUE)
high_val <- .parse_range_bound(high_str, is_low = FALSE)
if (!is.na(low_val) && !is.na(high_val)) {
# Legacy syntax defaults to [low, high)
return(list(type = "range", low = low_val, high = high_val,
inc_low = TRUE, inc_high = FALSE, label = rhs))
}
}
# Discrete mapping
lhs <- .unquote(lhs)
return(list(type = "discrete", key = lhs, label = rhs))
}
#' Parse a range bound value
#' @keywords internal
.parse_range_bound <- function(s, is_low = TRUE) {
s_upper <- toupper(s)
if (s_upper == "LOW") return(-Inf)
if (s_upper == "HIGH") return(Inf)
val <- suppressWarnings(as.numeric(s))
return(val)
}
#' Remove surrounding quotes from a string
#' @keywords internal
.unquote <- function(s) {
if (is.na(s) || nchar(s) < 2L) return(s)
if (grepl("^([\"']).*\\1$", s)) {
return(substring(s, 2, nchar(s) - 1))
}
s
}
#' Convert a parsed block to a ks_format or ks_invalue object
#' @keywords internal
.block_to_format <- function(block) {
if (block$type == "VALUE") {
return(.block_to_ks_format(block))
} else if (block$type == "INVALUE") {
return(.block_to_ks_invalue(block))
} else {
cli_abort("Unknown block type: {.val {block$type}}.")
}
}
#' Convert VALUE block to ks_format
#' @keywords internal
.block_to_ks_format <- function(block) {
# Value types (case-sensitive: "Date", "POSIXct", "logical")
if (block$subtype %in% .value_types) {
return(.block_to_value_type_format(block))
}
# Handle date/time/datetime blocks (case-insensitive)
if (tolower(block$subtype) %in% c("date", "time", "datetime")) {
return(.block_to_ks_datetime_format(block))
}
# Stratified range blocks: build mappings then delegate to fnew()
if (identical(tolower(block$subtype), "stratified_range")) {
return(.block_to_stratified_range_format(block))
}
mappings <- list()
missing_label <- NULL
other_label <- NULL
for (entry in block$entries) {
if (entry$type == "missing") {
missing_label <- entry$value
} else if (entry$type == "other") {
other_label <- entry$value
} else if (entry$type == "discrete") {
mappings[[entry$key]] <- entry$label
} else if (entry$type == "range") {
inc_low <- if (!is.null(entry$inc_low)) entry$inc_low else TRUE
inc_high <- if (!is.null(entry$inc_high)) entry$inc_high else FALSE
range_key <- paste0(entry$low, ",", entry$high, ",",
toupper(inc_low), ",", toupper(inc_high))
mappings[[range_key]] <- entry$label
}
}
# Build the format
type <- block$subtype
format_obj <- structure(
list(
name = block$name,
type = type,
mappings = mappings,
missing_label = missing_label,
other_label = other_label,
multilabel = isTRUE(block$multilabel),
ignore_case = isTRUE(block$nocase),
date_format = block$date_format,
created = Sys.time()
),
class = "ks_format"
)
# Auto-detect type if needed
if (type == "auto") {
has_ranges <- any(vapply(block$entries, function(e) identical(e$type, "range"), logical(1L)))
has_date_bounds <- any(vapply(block$entries, function(e) {
identical(e$type, "range") && identical(e$bound_kind, "date")
}, logical(1L)))
if (has_date_bounds) {
# Choose date_range vs datetime_range based on whether any bound
# carries a time component.
has_time <- any(vapply(block$entries, function(e) {
identical(e$type, "range") && identical(e$bound_kind, "date") &&
(grepl("[ T]\\d{2}:\\d{2}", as.character(e$low)) ||
grepl("[ T]\\d{2}:\\d{2}", as.character(e$high)))
}, logical(1L)))
format_obj$type <- if (has_time) "datetime_range" else "date_range"
} else if (has_ranges) {
format_obj$type <- "numeric"
} else {
format_obj$type <- detect_format_type(names(mappings))
}
}
format_obj$range_table <- .build_range_table(mappings, format_obj$type,
format_obj$date_format)
return(format_obj)
}
#' Convert VALUE block with stratified_range type to ks_format
#'
#' Stratified blocks support both canonical keys
#' (\code{"STRATUM<sep>low,high,inc_low,inc_high"}) supplied as quoted
#' discrete LHS, and the friendly interval form
#' (\code{STRATUM <sep> [low, high)}). Per-stratum directives appear as
#' \code{"STRATUM<sep>.missing" = "..."} or, with the \code{.missing|S}
#' shorthand, as plain discrete entries that are recognised here.
#' @keywords internal
#' @noRd
.block_to_stratified_range_format <- function(block) {
strata_sep <- if (!is.null(block$strata_sep)) block$strata_sep else "|"
range_subtype <- if (!is.null(block$range_subtype)) block$range_subtype else "numeric"
date_format <- block$date_format
# Escape strata_sep for regex use (special chars: . | ( ) [ ] { } * + ? ^ $ \)
sep_re <- gsub("([\\.|()\\[\\]{}*+?^$\\\\])", "\\\\\\1", strata_sep,
perl = TRUE)
num_token <- "(?:-?[0-9]+(?:\\.[0-9]+)?|LOW|HIGH|Inf|-Inf)"
date_token <- "(?:\\d{4}-\\d{2}-\\d{2}(?:[ T]\\d{2}:\\d{2}(?::\\d{2})?)?|LOW|HIGH)"
mappings <- list()
missing_label <- NULL
other_label <- NULL
canonicalise <- function(key) {
if (startsWith(key, paste0(".missing", strata_sep))) {
return(list(kind = "missing_stratum",
stratum = substr(key,
nchar(".missing") + nchar(strata_sep) + 1L,
nchar(key))))
}
if (startsWith(key, paste0(".other", strata_sep))) {
return(list(kind = "other_stratum",
stratum = substr(key,
nchar(".other") + nchar(strata_sep) + 1L,
nchar(key))))
}
# Already canonical?
split_try <- .split_stratified_key(key, strata_sep, range_subtype,
date_format)
if (!is.null(split_try)) {
return(list(kind = "canonical", key = key))
}
# Friendly interval: stratum<sep>[low,high)
tok <- if (range_subtype == "numeric") num_token else date_token
pat <- paste0("^(.+?)", sep_re, "\\s*(\\[|\\()\\s*(",
tok, ")\\s*,\\s*(", tok, ")\\s*(\\]|\\))\\s*$")
m <- regmatches(key, regexec(pat, key, perl = TRUE))[[1]]
if (length(m) == 6L) {
stratum <- .unquote(trimws(m[2]))
inc_low <- (m[3] == "[")
low_str <- m[4]
high_str <- m[5]
inc_high <- (m[6] == "]")
norm_bound <- function(s) {
su <- toupper(s)
if (range_subtype == "numeric") {
if (su %in% c("LOW", "-INF")) return("-Inf")
if (su %in% c("HIGH", "INF")) return("Inf")
return(s)
}
# date / datetime
if (su %in% c("LOW", "-INF")) return("LOW")
if (su %in% c("HIGH", "INF")) return("HIGH")
s
}
low_norm <- norm_bound(low_str)
high_norm <- norm_bound(high_str)
canon <- paste(low_norm, high_norm, toupper(inc_low),
toupper(inc_high), sep = ",")
return(list(kind = "canonical",
key = paste0(stratum, strata_sep, canon)))
}
NULL
}
for (entry in block$entries) {
if (entry$type == "missing") {
missing_label <- entry$value
next
}
if (entry$type == "other") {
other_label <- entry$value
next
}
if (entry$type == "range") {
cli_warn(c(
"Bare range entry in stratified_range block has no stratum; ignoring.",
"i" = "Use {.val STRATUM{strata_sep}[low, high)} syntax."
))
next
}
if (entry$type != "discrete") next
info <- canonicalise(entry$key)
if (is.null(info)) {
cli_warn(c(
"Could not parse stratified entry {.val {entry$key}}; treating as discrete.",
"i" = "Expected {.val STRATUM{strata_sep}RANGE} key."
))
mappings[[entry$key]] <- entry$label
next
}
if (info$kind == "canonical") {
mappings[[info$key]] <- entry$label
} else if (info$kind == "missing_stratum") {
mappings[[paste0(".missing", strata_sep, info$stratum)]] <- entry$label
} else if (info$kind == "other_stratum") {
mappings[[paste0(".other", strata_sep, info$stratum)]] <- entry$label
}
}
# Assemble extras for fnew()
extras <- list()
if (!is.null(missing_label)) extras[[".missing"]] <- missing_label
if (!is.null(other_label)) extras[[".other"]] <- other_label
do.call(fnew, c(
mappings, extras,
list(
name = block$name,
type = "stratified_range",
range_subtype = range_subtype,
strata_sep = strata_sep,
date_format = date_format,
multilabel = isTRUE(block$multilabel),
ignore_case = isTRUE(block$nocase)
)
))
}
#' Convert VALUE block with a value type (Date/POSIXct/logical) to ks_format
#' @keywords internal
#' @noRd
.block_to_value_type_format <- function(block) {
vtype <- block$subtype
date_format <- block$date_format
mappings <- list()
for (entry in block$entries) {
if (entry$type == "missing" || entry$type == "other") {
# Silently ignore — value type formats always use typed NA
} else if (entry$type == "discrete") {
mappings[[entry$key]] <- .parse_typed_value(entry$label, vtype, date_format)
} else if (entry$type == "range") {
inc_low <- if (!is.null(entry$inc_low)) entry$inc_low else TRUE
inc_high <- if (!is.null(entry$inc_high)) entry$inc_high else FALSE
range_key <- paste0(entry$low, ",", entry$high, ",",
toupper(inc_low), ",", toupper(inc_high))
mappings[[range_key]] <- .parse_typed_value(entry$label, vtype, date_format)
}
}
format_obj <- structure(
list(
name = block$name,
type = vtype,
mappings = mappings,
missing_label = NULL,
other_label = NULL,
multilabel = FALSE,
ignore_case = isTRUE(block$nocase),
date_format = date_format,
created = Sys.time()
),
class = "ks_format"
)
format_obj
}
#' Convert VALUE block with date/time/datetime type to ks_format
#' @keywords internal
.block_to_ks_datetime_format <- function(block) {
# Look for a pattern entry (discrete mapping with key "pattern")
pattern <- NULL
missing_label <- NULL
for (entry in block$entries) {
if (entry$type == "discrete" && tolower(entry$key) == "pattern") {
pattern <- entry$label
} else if (entry$type == "missing") {
missing_label <- entry$value
}
}
if (is.null(pattern)) {
cli_abort(c(
"Date/time format {.val {block$name}} must have a {.val pattern} entry.",
"i" = 'e.g., pattern = "DATE9."'
))
}
fmt <- fnew_date(
pattern = pattern,
name = block$name,
type = tolower(block$subtype)
)
if (!is.null(missing_label)) {
fmt$missing_label <- missing_label
}
return(fmt)
}
#' Convert INVALUE block to ks_invalue
#' @keywords internal
.block_to_ks_invalue <- function(block) {
mappings <- list()
for (entry in block$entries) {
if (entry$type == "discrete") {
mappings[[entry$key]] <- entry$label
} else if (entry$type == "missing") {
mappings[[".missing_marker"]] <- entry$value
}
}
# Extract missing_value if set
missing_value <- NA
if (".missing_marker" %in% names(mappings)) {
mv <- mappings[[".missing_marker"]]
if (toupper(mv) == "NA" || mv == ".") {
missing_value <- NA
} else {
missing_value <- mv
}
mappings[[".missing_marker"]] <- NULL
}
# Use block subtype; for INVALUE blocks, default is "numeric"
target_type <- block$subtype
invalue_obj <- structure(
list(
name = block$name,
target_type = target_type,
mappings = mappings,
missing_value = missing_value,
created = Sys.time()
),
class = "ks_invalue"
)
return(invalue_obj)
}
# ---------------------------------------------------------------------------
# Internal export helpers
# ---------------------------------------------------------------------------
#' Convert ks_format to SAS-like text
#' @keywords internal
.format_to_text <- function(fmt, name) {
if (fmt$type %in% c("date", "time", "datetime")) {
return(.datetime_format_to_text(fmt, name))
}
if (.is_stratified_type(fmt$type)) {
return(.stratified_format_to_text(fmt, name))
}
parts <- vector("list", length(fmt$mappings) + 4L)
idx <- 1L
type_str <- if (!is.null(fmt$type) && fmt$type != "auto") fmt$type else NULL
ml_str <- if (isTRUE(fmt$multilabel)) "multilabel" else NULL
nc_str <- if (isTRUE(fmt$ignore_case)) "nocase" else NULL
df_str <- if (!is.null(fmt$date_format)) paste0("format: ", fmt$date_format) else NULL
annot_parts <- c(type_str, ml_str, nc_str, df_str)
type_part <- if (length(annot_parts) > 0L) {
paste0(" (", paste(annot_parts, collapse = ", "), ")")
} else {
""
}
parts[[idx]] <- paste0("VALUE ", name, type_part); idx <- idx + 1L
is_vtype <- .is_value_type(fmt$type)
is_date_rng <- .is_date_range_type(fmt$type)
for (i in seq_along(fmt$mappings)) {
key <- names(fmt$mappings)[i]
label <- fmt$mappings[[i]]
# Convert native values to strings for text output
label_str <- if (is_vtype) {
.typed_value_to_string(label, fmt$type, fmt$date_format)
} else {
as.character(label)
}
eval_suffix <- if (!is_vtype && .has_eval_attr(label)) " (eval)" else ""
# Try to display range keys in interval notation
parsed <- if (is_vtype && fmt$type %in% c("Date", "POSIXct")) {
.parse_date_range_key(key, fmt$date_format)
} else if (fmt$type == "date_range") {
.parse_date_range_key(key, fmt$date_format)
} else if (fmt$type == "datetime_range") {
.parse_datetime_range_key(key, fmt$date_format)
} else {
.parse_range_key(key)
}
if (!is.null(parsed)) {
left_bracket <- if (parsed$inc_low) "[" else "("
right_bracket <- if (parsed$inc_high) "]" else ")"
low <- if (is_vtype || is_date_rng) {
.format_date_bound(parsed$low, fmt$date_format, fmt$type, is_low = TRUE)
} else {
.format_range_bound(parsed$low, is_low = TRUE)
}
high <- if (is_vtype || is_date_rng) {
.format_date_bound(parsed$high, fmt$date_format, fmt$type, is_low = FALSE)
} else {
.format_range_bound(parsed$high, is_low = FALSE)
}
parts[[idx]] <- paste0(" ", left_bracket, low, ", ", high,
right_bracket, " = \"", label_str, "\"", eval_suffix)
} else {
parts[[idx]] <- paste0(" \"", key, "\" = \"", label_str, "\"", eval_suffix)
}
idx <- idx + 1L
}
if (!is.null(fmt$missing_label)) {
miss_eval <- if (.has_eval_attr(fmt$missing_label)) " (eval)" else ""
parts[[idx]] <- paste0(" .missing = \"", fmt$missing_label, "\"", miss_eval); idx <- idx + 1L
}
if (!is.null(fmt$other_label)) {
other_eval <- if (.has_eval_attr(fmt$other_label)) " (eval)" else ""
parts[[idx]] <- paste0(" .other = \"", fmt$other_label, "\"", other_eval); idx <- idx + 1L
}
parts[[idx]] <- ";"
paste(unlist(parts[seq_len(idx)]), collapse = "\n")
}
#' Convert datetime ks_format to SAS-like text
#' @keywords internal
.datetime_format_to_text <- function(fmt, name) {
parts <- vector("list", 4L)
idx <- 1L
parts[[idx]] <- paste0("VALUE ", name, " (", fmt$type, ")"); idx <- idx + 1L
pattern_str <- if (!is.null(fmt$sas_name)) {
paste0(fmt$sas_name, ".")
} else {
fmt$dt_pattern
}
parts[[idx]] <- paste0(" pattern = \"", pattern_str, "\""); idx <- idx + 1L
if (!is.null(fmt$missing_label)) {
parts[[idx]] <- paste0(" .missing = \"", fmt$missing_label, "\""); idx <- idx + 1L
}
parts[[idx]] <- ";"
paste(unlist(parts[seq_len(idx)]), collapse = "\n")
}
#' Convert stratified_range ks_format to SAS-like text
#' @keywords internal
#' @noRd
.stratified_format_to_text <- function(fmt, name) {
strata_sep <- if (!is.null(fmt$strata_sep)) fmt$strata_sep else "|"
range_subtype <- if (!is.null(fmt$range_subtype)) fmt$range_subtype else "numeric"
header_opts <- c("stratified_range",
paste0("range_subtype: ", range_subtype),
paste0("strata_sep: ", strata_sep))
if (isTRUE(fmt$multilabel)) header_opts <- c(header_opts, "multilabel")
if (isTRUE(fmt$ignore_case)) header_opts <- c(header_opts, "nocase")
if (!is.null(fmt$date_format)) {
header_opts <- c(header_opts, paste0("format: ", fmt$date_format))
}
header <- paste0("VALUE ", name, " (", paste(header_opts, collapse = ", "), ")")
# Group mappings by stratum, first-seen order
strata_order <- character(0)
grouped <- list()
parser <- switch(range_subtype,
numeric = function(k) .parse_range_key(k),
date = function(k) .parse_date_range_key(k, fmt$date_format),
datetime = function(k) .parse_datetime_range_key(k, fmt$date_format)
)
for (i in seq_along(fmt$mappings)) {
key <- names(fmt$mappings)[i]
sp <- .split_stratified_key(key, strata_sep, range_subtype,
fmt$date_format)
s <- if (is.null(sp)) "" else sp$stratum
rk <- if (is.null(sp)) key else sp$range_key
if (!s %in% strata_order) {
strata_order <- c(strata_order, s)
grouped[[s]] <- list()
}
grouped[[s]][[length(grouped[[s]]) + 1L]] <- list(
range_key = rk, label = fmt$mappings[[i]]
)
}
is_date_rng <- range_subtype %in% c("date", "datetime")
inner_type <- .stratified_subtype_to_range_type(range_subtype)
out <- character(0)
out <- c(out, header)
for (s in strata_order) {
for (entry in grouped[[s]]) {
parsed <- parser(entry$range_key)
label_str <- as.character(entry$label)
eval_suffix <- if (.has_eval_attr(entry$label)) " (eval)" else ""
if (!is.null(parsed)) {
lb <- if (parsed$inc_low) "[" else "("
rb <- if (parsed$inc_high) "]" else ")"
low <- if (is_date_rng) {
.format_date_bound(parsed$low, fmt$date_format, inner_type, is_low = TRUE)
} else {
.format_range_bound(parsed$low, is_low = TRUE)
}
high <- if (is_date_rng) {
.format_date_bound(parsed$high, fmt$date_format, inner_type, is_low = FALSE)
} else {
.format_range_bound(parsed$high, is_low = FALSE)
}
out <- c(out, paste0(" \"", s, "\"", strata_sep, lb, low, ", ",
high, rb, " = \"", label_str, "\"", eval_suffix))
} else {
out <- c(out, paste0(" \"", s, strata_sep, entry$range_key,
"\" = \"", label_str, "\"", eval_suffix))
}
}
if (!is.null(fmt$missing_by_stratum) && s %in% names(fmt$missing_by_stratum)) {
out <- c(out, paste0(" \".missing", strata_sep, s, "\" = \"",
fmt$missing_by_stratum[[s]], "\""))
}
if (!is.null(fmt$other_by_stratum) && s %in% names(fmt$other_by_stratum)) {
out <- c(out, paste0(" \".other", strata_sep, s, "\" = \"",
fmt$other_by_stratum[[s]], "\""))
}
}
if (!is.null(fmt$missing_label)) {
out <- c(out, paste0(" .missing = \"", fmt$missing_label, "\""))
}
if (!is.null(fmt$other_label)) {
out <- c(out, paste0(" .other = \"", fmt$other_label, "\""))
}
out <- c(out, ";")
paste(out, collapse = "\n")
}
#' Convert ks_invalue to SAS-like text
#' @keywords internal
.invalue_to_text <- function(inv, name) {
parts <- vector("list", length(inv$mappings) + 3L)
idx <- 1L
type_part <- if (!is.null(inv$target_type) && inv$target_type != "numeric") {
paste0(" (", inv$target_type, ")")
} else {
""
}
parts[[idx]] <- paste0("INVALUE ", name, type_part); idx <- idx + 1L
for (i in seq_along(inv$mappings)) {
key <- names(inv$mappings)[i]
value <- inv$mappings[[i]]
eval_suffix <- if (.has_eval_attr(value)) " (eval)" else ""
parts[[idx]] <- paste0(" \"", key, "\" = \"", value, "\"", eval_suffix); idx <- idx + 1L
}
if (!is.null(inv$missing_value) && !identical(inv$missing_value, NA)) {
parts[[idx]] <- paste0(" .missing = \"", inv$missing_value, "\""); idx <- idx + 1L
}
parts[[idx]] <- ";"
paste(unlist(parts[seq_len(idx)]), collapse = "\n")
}
#' Format a numeric range bound for text output
#' @keywords internal
.format_range_bound <- function(val, is_low = TRUE) {
if (is.infinite(val)) {
if (val < 0) return("LOW")
return("HIGH")
}
return(as.character(val))
}
#' Format a Date / POSIXct range bound for text output
#'
#' Renders infinite bounds as \code{LOW}/\code{HIGH}; otherwise formats the
#' Date/POSIXct value using \code{date_format} when supplied, else ISO 8601.
#' Used for value-type (Date/POSIXct) formats and the date_range /
#' datetime_range types.
#' @keywords internal
.format_date_bound <- function(val, date_format = NULL, type = NULL,
is_low = TRUE) {
num <- as.numeric(val)
if (is.na(num)) return("")
if (is.infinite(num)) return(if (num < 0) "LOW" else "HIGH")
if (!is.null(date_format)) return(format(val, date_format))
as.character(val)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.