Nothing
#' Create a Format Definition (like 'SAS' PROC FORMAT)
#'
#' Creates a format object that maps values to labels, similar to 'SAS' PROC FORMAT.
#' Supports discrete value mapping, ranges, and special handling of missing values.
#' The format is automatically stored in the global format library if \code{name}
#' is provided.
#'
#' @param ... Named arguments defining value-label mappings, or one or more
#' named vectors/lists using the R convention \code{c(Label = "Code")}.
#' Can be:
#' \itemize{
#' \item Discrete values: \code{"M" = "Male", "F" = "Female"}
#' \item Named vector: \code{c(Male = "M", Female = "F")}
#' \item Named list: \code{list(Male = "M", Female = "F")}
#' \item \code{\link{fmap}} vector: \code{fmap(keys, values)} for
#' data-driven formats (no reversal)
#' \item Special values: \code{.missing = "Missing"}, \code{.other = "Other"}
#' }
#' Named vectors use the R idiom where names are labels and values are codes,
#' which is the reverse of the \code{...} argument convention.
#'
#' \strong{Named-vector reversal:} For character and numeric formats, named
#' vectors are automatically reversed so that \code{c(Male = "M")} becomes
#' the mapping \code{"M" -> "Male"} (following the \code{factor()} convention).
#' For value types (\code{"Date"}, \code{"POSIXct"}, \code{"logical"}), no
#' reversal occurs — the named vector is used as-is with names as input keys
#' and values as output objects, because non-character objects cannot serve
#' as vector names.
#'
#' \strong{Data-driven formats:} For formats built programmatically from
#' data, wrap your data in \code{\link{fmap}(keys, values)} to suppress
#' automatic reversal for all types. See
#' \code{vignette("usage_examples")} Example 21 for a detailed walkthrough.
#' @param name Character. Optional name for the format. If provided, the format
#' is automatically registered in the global format library.
#' @param type Character. Type of format: \code{"character"}, \code{"numeric"},
#' \code{"Date"}, \code{"POSIXct"}, \code{"logical"},
#' \code{"date_range"}, \code{"datetime_range"},
#' or \code{"auto"} (default) for auto-detection.
#' Value types (\code{"Date"}, \code{"POSIXct"}, \code{"logical"}) store
#' native R objects instead of character labels. For value types,
#' \code{.missing} and \code{.other} are always typed NA.
#' Range-bucketing types (\code{"date_range"}, \code{"datetime_range"})
#' bucket \code{Date}/\code{POSIXct} input into character labels using
#' ISO date/datetime strings as range bounds.
#' @param default Character. Default label for unmatched values (overrides .other)
#' @param multilabel Logical. If \code{TRUE}, the format supports overlapping
#' ranges where a single value can match multiple labels. Used with
#' \code{\link{fput_all}} to retrieve all matching labels. Default \code{FALSE}.
#' @param ignore_case Logical. If \code{TRUE}, key matching for character formats
#' is case-insensitive. Default \code{FALSE}.
#' @param date_format Character. Optional strptime-style format string used
#' when parsing date/datetime range keys (e.g. \code{"\%d/\%m/\%Y"}). When
#' \code{NULL}, ISO 8601 parsing is used. Applies to \code{"date_range"},
#' \code{"datetime_range"}, and the value types \code{"Date"} /
#' \code{"POSIXct"} when keys are ranges.
#' @param range_subtype Character. For \code{type = "stratified_range"} only.
#' One of \code{"numeric"} (default), \code{"date"}, or \code{"datetime"}.
#' Determines how the range part of each stratified key is parsed and compared.
#' @param strata_sep Character. For \code{type = "stratified_range"} only.
#' Single-character (or multi-character) separator between the stratum
#' identifier and the range key in each mapping key. Default \code{"|"}.
#' @param verbose Logical. If \code{TRUE}, returns the format object visibly;
#' otherwise returns it invisibly. Default \code{FALSE}.
#'
#' @return An object of class \code{"ks_format"} containing the format definition.
#' The object is also stored in the format library if \code{name} is given.
#'
#' @details
#' Special directives:
#' \itemize{
#' \item \code{.missing}: Label for NA, NULL, NaN values
#' \item \code{.other}: Label for values not matching any rule
#' }
#'
#' \strong{Named-vector direction (reverse convention):}
#'
#' When a named vector or list is passed as an unnamed argument (e.g.,
#' \code{fnew(c(Male = "M"))}), the direction of the name-to-value mapping
#' depends on the output \code{type}:
#'
#' \itemize{
#' \item For \strong{character / numeric} types, names are \emph{labels} and
#' values are \emph{codes}. The pairs are reversed internally so that the
#' format maps \code{code -> label}. This follows the standard R idiom used
#' by \code{factor()}, where \code{c(Label = "Code")}.
#' \item For \strong{value types} (\code{Date}, \code{POSIXct},
#' \code{logical}), names are \emph{input keys} and values are the native R
#' objects returned by the format. No reversal is applied, because
#' non-character objects cannot be used as vector names.
#' }
#'
#' This means the \emph{same data} may need to be arranged differently
#' depending on the target type. To avoid this inconsistency for data-driven
#' formats, use \code{\link{fmap}(keys, values)} which works identically
#' for all types:
#' \preformatted{
#' fnew(fmap(ids, dates), type = "Date")
#' fnew(fmap(ids, date_strings), type = "character")
#' }
#'
#' When in doubt, use explicit \code{key = "label"} arguments — these are
#' never reversed regardless of type.
#'
#' \strong{Expression labels:} If a label contains \code{.x1}, \code{.x2}, etc.,
#' it is treated as an R expression that is evaluated at apply-time. Extra arguments
#' are passed positionally via \code{...} in \code{\link{fput}}:
#' \preformatted{
#' stat_fmt <- fnew("n" = "sprintf('\%s', .x1)",
#' "pct" = "sprintf('\%.1f\%\%', .x1 * 100)")
#' fput(c("n", "pct"), stat_fmt, c(42, 0.15))
#' # Returns: "42" "15.0\%"
#' }
#'
#' @export
#'
#' @examples
#' # Discrete value format (auto-stored as "sex")
#' fnew(
#' "M" = "Male",
#' "F" = "Female",
#' .missing = "Unknown",
#' .other = "Other Gender",
#' name = "sex"
#' )
#'
#' # Apply immediately
#' fput(c("M", "F", NA, "X"), "sex")
#' # [1] "Male" "Female" "Unknown" "Other Gender"
#' fclear()
#'
#' # Multilabel format: a value can match multiple labels
#' fnew(
#' "0,5,TRUE,TRUE" = "Infant",
#' "6,11,TRUE,TRUE" = "Child",
#' "12,17,TRUE,TRUE" = "Adolescent",
#' "0,17,TRUE,TRUE" = "Pediatric",
#' "18,64,TRUE,TRUE" = "Adult",
#' "65,Inf,TRUE,TRUE" = "Elderly",
#' "18,Inf,TRUE,TRUE" = "Non-Pediatric",
#' name = "age_categories",
#' type = "numeric",
#' multilabel = TRUE
#' )
#'
#' # fput returns first match; fput_all returns all matches
#' fput(c(3, 14, 25, 70), "age_categories")
#' fput_all(c(3, 14, 25, 70), "age_categories")
#' fclear()
#'
#' # From a named vector (Label = Code convention)
#' sex_vec <- fnew(c(Male = "M", Female = "F"), .missing = "Unknown",
#' name = "sex_vec")
#' fput(c("M", "F", NA), sex_vec)
#' # [1] "Male" "Female" "Unknown"
#' fclear()
fnew <- function(..., name = NULL, type = "auto", default = NULL,
multilabel = FALSE, ignore_case = FALSE,
date_format = NULL,
range_subtype = c("numeric", "date", "datetime"),
strata_sep = "|",
verbose = FALSE) {
type <- match.arg(type, c("auto", "character", "numeric", .value_types,
"date_range", "datetime_range",
"stratified_range"))
range_subtype <- match.arg(range_subtype)
is_vtype <- .is_value_type(type)
is_strat <- .is_stratified_type(type)
if (!is.character(strata_sep) || length(strata_sep) != 1L ||
is.na(strata_sep) || !nzchar(strata_sep)) {
cli_abort("{.arg strata_sep} must be a single non-empty character string.")
}
if (!is.null(name)) {
if (!is.character(name) || length(name) != 1L || is.na(name) || !nzchar(name)) {
cli_abort("{.arg name} must be a single non-empty character string.")
}
}
if (!is.null(default) && (!is.character(default) || length(default) != 1L)) {
cli_abort("{.arg default} must be a single character string.")
}
if (!is.logical(multilabel) || length(multilabel) != 1L) {
cli_abort("{.arg multilabel} must be TRUE or FALSE.")
}
if (!is.logical(ignore_case) || length(ignore_case) != 1L) {
cli_abort("{.arg ignore_case} must be TRUE or FALSE.")
}
mappings <- list(...)
# Detect if any unnamed argument uses ks_fmap (suppresses reversal)
has_fmap <- FALSE
arg_names <- names(mappings)
for (i in seq_along(mappings)) {
nm <- if (!is.null(arg_names)) arg_names[i] else ""
if ((is.na(nm) || !nzchar(nm)) && inherits(mappings[[i]], "ks_fmap")) {
has_fmap <- TRUE
# Inherit strata_sep from fmap_strata() when caller did not
# explicitly override it.
fmap_sep <- attr(mappings[[i]], "strata_sep")
if (!is.null(fmap_sep) && missing(strata_sep)) {
strata_sep <- fmap_sep
}
# Strip ks_fmap class before expansion
cls <- class(mappings[[i]])
class(mappings[[i]]) <- cls[cls != "ks_fmap"]
}
}
# Pre-expansion value type auto-detection (must happen before reverse)
if (type == "auto") {
arg_names <- names(mappings)
arg_classes <- character(0)
for (i in seq_along(mappings)) {
nm <- if (!is.null(arg_names)) arg_names[i] else ""
if (nm %in% c(".missing", ".other")) next
arg_classes <- c(arg_classes, class(mappings[[i]])[1L])
}
if (length(arg_classes) > 0L) {
unique_cls <- unique(arg_classes)
if (length(unique_cls) == 1L && unique_cls %in% .value_types) {
type <- unique_cls
is_vtype <- TRUE
}
}
}
# Determine reversal: auto (reverse for char/numeric, not for value types)
# fmap() vectors suppress reversal for all types. Stratified ranges always
# use keys-as-LHS semantics (never reverse).
do_reverse <- if (has_fmap || is_strat) FALSE else !is_vtype
mappings <- .expand_named_vectors(mappings, reverse = do_reverse)
if (length(mappings) == 0L) {
cli_abort("At least one value-label mapping must be provided.")
}
# Extract special directives
missing_label <- mappings[[".missing"]]
other_label <- mappings[[".other"]]
# Remove special directives from mappings
mappings[[".missing"]] <- NULL
mappings[[".other"]] <- NULL
# For value types, .missing and .other must be NA (can't mix types)
if (is_vtype) {
if (!is.null(missing_label) && !is.na(missing_label)) {
cli_warn("{.arg .missing} is ignored for {.val {type}} formats (always NA).")
}
missing_label <- NULL
if (!is.null(other_label) && !is.na(other_label)) {
cli_warn("{.arg .other} is ignored for {.val {type}} formats (always NA).")
}
other_label <- NULL
}
# Override .other with default if provided
if (!is.null(default) && !is_vtype) {
other_label <- default
}
# Determine format type (auto-detect for character/numeric when not value type)
if (identical(type, "auto")) {
type <- detect_format_type(names(mappings))
}
# Per-stratum directives for stratified_range: pick up keys named like
# ".missing<sep>STRATUM" / ".other<sep>STRATUM" and split them off the
# main mappings list.
missing_by_stratum <- NULL
other_by_stratum <- NULL
if (is_strat) {
keys <- names(mappings)
miss_prefix <- paste0(".missing", strata_sep)
other_prefix <- paste0(".other", strata_sep)
miss_hit <- startsWith(keys, miss_prefix)
other_hit <- startsWith(keys, other_prefix)
if (any(miss_hit)) {
strata <- substr(keys[miss_hit],
nchar(miss_prefix) + 1L, nchar(keys[miss_hit]))
vals <- vapply(mappings[miss_hit], as.character, character(1L))
missing_by_stratum <- stats::setNames(as.list(vals), strata)
}
if (any(other_hit)) {
strata <- substr(keys[other_hit],
nchar(other_prefix) + 1L, nchar(keys[other_hit]))
vals <- vapply(mappings[other_hit], as.character, character(1L))
other_by_stratum <- stats::setNames(as.list(vals), strata)
}
mappings <- mappings[!(miss_hit | other_hit)]
}
# Build range_table(s)
range_table <- NULL
range_tables <- NULL
if (is_strat) {
range_tables <- .build_stratified_range_tables(
mappings, range_subtype, strata_sep, date_format
)
# Detect malformed keys: anything that didn't end up in any stratum.
accounted <- sum(vapply(range_tables, function(rt) {
length(rt$range_idx) + length(rt$discrete_idx)
}, integer(1L)))
if (length(mappings) > 0L && accounted < length(mappings)) {
bad <- character(0)
for (k in names(mappings)) {
sp <- .split_stratified_key(k, strata_sep, range_subtype, date_format)
if (is.null(sp)) bad <- c(bad, k)
}
if (length(bad) > 0L) {
cli_abort(c(
"Stratified mapping keys could not be parsed as STRATUM{strata_sep}RANGE.",
"x" = "Bad key: {.val {bad[1]}}",
"i" = "Expected e.g. {.val ARM_A{strata_sep}0,7,TRUE,FALSE} for range_subtype = {.val {range_subtype}}."
))
}
}
} else {
range_table <- .build_range_table(mappings, type, date_format)
}
# Create format object
format_obj <- structure(
list(
name = name,
type = type,
range_subtype = if (is_strat) range_subtype else NULL,
strata_sep = if (is_strat) strata_sep else NULL,
mappings = mappings,
missing_label = missing_label,
other_label = other_label,
missing_by_stratum = missing_by_stratum,
other_by_stratum = other_by_stratum,
multilabel = multilabel,
ignore_case = ignore_case,
date_format = date_format,
range_table = range_table,
range_tables = range_tables,
created = Sys.time()
),
class = "ks_format"
)
# Validate format structure
.format_validate(format_obj)
# Auto-register in library if named
.format_register(format_obj)
if (verbose) format_obj else invisible(format_obj)
}
#' Mark a Label for Expression Evaluation
#'
#' Marks a format label string so it will be evaluated as an R expression
#' at apply-time (\code{\link{fput}}), even when it does not contain
#' \code{.x1}, \code{.x2}, etc. placeholders.
#'
#' @param expr Character string. The R expression to evaluate.
#' @return The same character string with an \code{"eval"} attribute set to
#' \code{TRUE}.
#'
#' @details
#' This is useful when a label should call a function that does not need
#' positional \code{.xN} arguments.
#' The expression is evaluated in the caller's environment of
#' \code{\link{fput}}, so user-defined functions are accessible.
#'
#' Labels containing \code{.x1}, \code{.x2}, etc. are still evaluated
#' automatically without needing \code{e()}.
#'
#' @export
#' @examples
#' # Mark an expression for evaluation at apply-time
#' fmt <- fnew(
#' "timestamp" = e("format(Sys.time(), '%Y-%m-%d')"),
#' "static" = "Hello",
#' name = "demo_eval"
#' )
#' fput(c("timestamp", "static"), fmt)
#' fclear()
e <- function(expr) {
if (!is.character(expr) || length(expr) != 1L) {
cli_abort("{.arg expr} must be a single character string.")
}
attr(expr, "eval") <- TRUE
expr
}
#' Detect Format Type
#'
#' @param keys Character vector of mapping key names
#' @return Character: "character" or "numeric"
#' @keywords internal
detect_format_type <- function(keys) {
if (length(keys) == 0L) return("character")
non_empty <- keys[!is.na(keys) & nzchar(keys)]
if (length(non_empty) == 0L) {
cli_warn("All mapping keys are empty or NA; defaulting to {.val numeric} type.")
return("numeric")
}
if (all(!is.na(suppressWarnings(as.numeric(non_empty))))) {
return("numeric")
}
if (any(grepl(",", keys, fixed = TRUE))) {
return("numeric")
}
if (any(keys == "" | is.na(keys))) {
return("numeric")
}
"character"
}
#' Print Format Object
#'
#' @param x A ks_format object
#' @param ... Additional arguments (unused)
#' @return The input \code{x}, returned invisibly.
#' @export
print.ks_format <- function(x, ...) {
flags <- character(0)
if (isTRUE(x$multilabel)) flags <- c(flags, "multilabel")
if (isTRUE(x$ignore_case)) flags <- c(flags, "nocase")
flags_str <- if (length(flags) > 0) paste0(" (", paste(flags, collapse = ", "), ")") else ""
cat("KS Format:", if (!is.null(x$name)) x$name else "(unnamed)",
flags_str, "\n", sep = "")
cat("Type:", x$type, "\n")
# Date/time formats show pattern instead of mappings
if (x$type %in% c("date", "time", "datetime")) {
pattern_str <- x$dt_pattern
if (!is.null(x$sas_name)) {
pattern_str <- paste0(pattern_str, " (", x$sas_name, ".)")
}
cat("Pattern:", pattern_str, "\n")
} else if (.is_stratified_type(x$type)) {
cat("Range subtype:", x$range_subtype, "\n")
cat("Strata separator:", x$strata_sep, "\n")
cat("Mappings:\n")
sep <- x$strata_sep
rs <- x$range_subtype
.render_bound <- function(b, kind) {
bn <- suppressWarnings(as.numeric(b))
if (is.na(bn)) return(as.character(b))
if (kind == "low" && is.infinite(bn) && bn < 0) return("LOW")
if (kind == "high" && is.infinite(bn) && bn > 0) return("HIGH")
as.character(b)
}
parser <- switch(rs,
numeric = function(k) .parse_range_key(k),
date = function(k) .parse_date_range_key(k, x$date_format),
datetime = function(k) .parse_datetime_range_key(k, x$date_format)
)
# Group mappings by stratum, preserving first-seen order
strata_seen <- character(0)
groups <- list()
for (i in seq_along(x$mappings)) {
key <- names(x$mappings)[i]
sp <- .split_stratified_key(key, sep, rs, x$date_format)
s <- if (is.null(sp)) "(unparsed)" else sp$stratum
rk <- if (is.null(sp)) key else sp$range_key
if (!s %in% strata_seen) {
strata_seen <- c(strata_seen, s)
groups[[s]] <- list()
}
groups[[s]][[length(groups[[s]]) + 1L]] <- list(
range_key = rk, value = as.character(x$mappings[[i]])
)
}
for (s in strata_seen) {
cat(" Stratum \"", s, "\":\n", sep = "")
for (entry in groups[[s]]) {
parsed <- parser(entry$range_key)
if (!is.null(parsed)) {
lb <- if (parsed$inc_low) "[" else "("
rb <- if (parsed$inc_high) "]" else ")"
low_s <- .render_bound(parsed$low, "low")
high_s <- .render_bound(parsed$high, "high")
cat(" ", lb, low_s, ", ", high_s, rb,
" => ", entry$value, "\n", sep = "")
} else {
cat(" ", entry$range_key, " => ", entry$value, "\n", sep = "")
}
}
if (!is.null(x$missing_by_stratum) && s %in% names(x$missing_by_stratum)) {
cat(" .missing => ", x$missing_by_stratum[[s]], "\n", sep = "")
}
if (!is.null(x$other_by_stratum) && s %in% names(x$other_by_stratum)) {
cat(" .other => ", x$other_by_stratum[[s]], "\n", sep = "")
}
}
} else {
is_vtype <- .is_value_type(x$type)
cat("Mappings:\n")
for (i in seq_along(x$mappings)) {
key <- names(x$mappings)[i]
value <- x$mappings[[i]]
# Format the value for display
value_str <- if (is_vtype) {
.typed_value_to_string(value, x$type, x$date_format)
} else {
as.character(value)
}
# Try to display range keys in interval notation
parsed <- if (is_vtype && x$type %in% c("Date", "POSIXct")) {
.parse_date_range_key(key, x$date_format)
} else if (x$type == "date_range") {
.parse_date_range_key(key, x$date_format)
} else if (x$type == "datetime_range") {
.parse_datetime_range_key(key, x$date_format)
} else {
.parse_range_key(key)
}
if (!is.null(parsed)) {
left_bracket <- if (parsed$inc_low) "[" else "("
right_bracket <- if (parsed$inc_high) "]" else ")"
low_num <- as.numeric(parsed$low)
high_num <- as.numeric(parsed$high)
low_str <- if (is.infinite(low_num) && low_num < 0) "LOW" else as.character(parsed$low)
high_str <- if (is.infinite(high_num) && high_num > 0) "HIGH" else as.character(parsed$high)
cat(" ", left_bracket, low_str, ", ", high_str, right_bracket,
" => ", value_str, "\n", sep = "")
} else {
cat(" ", key, " => ", value_str, "\n", sep = "")
}
}
}
if (!is.null(x$missing_label)) {
cat(" .missing => ", x$missing_label, "\n", sep = "")
}
if (!is.null(x$other_label)) {
cat(" .other => ", x$other_label, "\n", sep = "")
}
invisible(x)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.