R/format_invalue.R

Defines functions print.ks_invalue fnew_bid finputk finputc finputn .invalue_apply finput

Documented in finput finputc finputk finputn fnew_bid .invalue_apply print.ks_invalue

#' Create Invalue Format (Reverse Formatting like 'SAS' INVALUE)
#'
#' Creates an invalue format that converts formatted labels back to values.
#' This is similar to 'SAS' PROC FORMAT with INVALUE statement.
#' The invalue is automatically stored in the global format library if \code{name}
#' is provided.
#'
#' @param ... Named arguments defining label-value mappings (reverse of \code{\link{fnew}}),
#'   or one or more named vectors/lists using \code{c(Label = value)}.
#'   Example: \code{"Male" = 1, "Female" = 2} or \code{c(Male = 1, Female = 2)}.
#' @param name Character. Optional name for the invalue format. If provided,
#'   the invalue is automatically registered in the global format library.
#' @param target_type Character. Type to convert to: \code{"numeric"} (default),
#'   \code{"integer"}, \code{"character"}, or \code{"logical"}.
#'   INVALUE formats produce numeric output by default; character-to-character
#'   conversion should use a regular VALUE format (\code{\link{fnew}}) instead.
#' @param missing_value Value to use for missing inputs (default: \code{NA})
#'
#' @return An object of class \code{"ks_invalue"} containing the invalue definition.
#'   The object is also stored in the format library if \code{name} is given.
#'
#' @export
#'
#' @examples
#' # Convert text labels to numeric codes
#' finput(
#'   "Male" = 1,
#'   "Female" = 2,
#'   name = "sex_inv"
#' )
#'
#' # Apply using finputn (numeric invalue by name)
#' finputn(c("Male", "Female", "Unknown"), "sex_inv")
#' # [1]  1  2 NA
#' fclear()
#'
#' # From a named vector
#' finput(c(Male = 1, Female = 2), name = "sex_inv2")
#' finputn(c("Male", "Female"), "sex_inv2")
#' # [1] 1 2
#' fclear()
finput <- function(..., name = NULL, target_type = "numeric", missing_value = NA) {
  target_type <- match.arg(target_type, c("numeric", "integer", "character", "logical"))
  if (!is.null(name)) {
    if (!is.character(name) || length(name) != 1L || is.na(name) || !nzchar(name)) {
      cli_abort("{.arg name} must be a single non-empty character string.")
    }
  }

  mappings <- list(...)

  # Expand named vectors (c(Label = value) -> individual mappings)
  mappings <- .expand_named_vectors(mappings, reverse = FALSE)

  if (length(mappings) == 0L) {
    cli_abort("At least one label-value mapping must be provided.")
  }

  # Validate mapping values are scalar
  for (i in seq_along(mappings)) {
    val <- mappings[[i]]
    if (length(val) != 1L) {
      cli_abort("Mapping value for label {.val {names(mappings)[i]}} must be scalar (length 1), got length {length(val)}.")
    }
  }

  # Create invalue object
  invalue_obj <- structure(
    list(
      name = name,
      target_type = target_type,
      mappings = mappings,
      missing_value = missing_value,
      created = Sys.time()
    ),
    class = "ks_invalue"
  )

  # Validate
  .format_validate(invalue_obj)

  # Auto-register in library if named
  .format_register(invalue_obj)

  invalue_obj
}


#' Apply Invalue Format (Reverse Formatting)
#'
#' Applies an invalue format to convert formatted labels back to values.
#'
#' @param x Character vector of labels to convert
#' @param invalue A \code{ks_invalue} object or a character string naming an
#'   invalue format in the global format library.
#' @param na_if Character vector. Additional values to treat as NA
#'
#' @return Vector with values (type depends on invalue's \code{target_type})
#'
#' @keywords internal
.invalue_apply <- function(x, invalue, na_if = NULL) {
  # Resolve invalue by name if string provided
  if (is.character(invalue) && length(invalue) == 1L) {
    invalue <- .format_get(invalue)
  }

  if (!inherits(invalue, "ks_invalue")) {
    cli_abort("{.arg invalue} must be a {.cls ks_invalue} object or a registered invalue name.")
  }

  # Handle NULL input
  if (is.null(x)) {
    return(vector(invalue$target_type, 0L))
  }

  n <- length(x)
  result <- vector(invalue$target_type, n)
  result[] <- invalue$missing_value

  # Identify missing values
  is_miss <- is.na(x)
  if (!is.null(na_if)) {
    is_miss <- is_miss | x %in% na_if
  }

  non_miss <- which(!is_miss)
  if (length(non_miss) == 0L) return(result)

  # Vectorized lookup using match()
  labels <- as.character(x[non_miss])
  map_keys <- names(invalue$mappings)
  pos <- match(labels, map_keys)
  found <- !is.na(pos)

  if (any(found)) {
    values <- unlist(invalue$mappings[pos[found]], use.names = FALSE)
    not_na_vals <- !is.na(values)

    converter <- switch(
      invalue$target_type,
      "numeric" = as.numeric,
      "integer" = as.integer,
      "character" = as.character,
      "logical" = as.logical,
      identity
    )

    target_idx <- non_miss[found]
    if (any(not_na_vals)) {
      result[target_idx[not_na_vals]] <- converter(values[not_na_vals])
    }
    # Explicitly assign NA for mappings that map to NA
    if (any(!not_na_vals)) {
      result[target_idx[!not_na_vals]] <- NA
    }
  }

  # Unfound: try direct numeric conversion for numeric/integer types
  not_found <- non_miss[!found]
  if (length(not_found) > 0L &&
      invalue$target_type %in% c("numeric", "integer")) {
    converted <- suppressWarnings(as.numeric(labels[!found]))
    valid <- !is.na(converted)
    if (any(valid)) {
      result[not_found[valid]] <- converted[valid]
    }
  }

  result
}

#' Apply Numeric Invalue by Name (like 'SAS' INPUTN)
#'
#' Looks up a numeric INVALUE format by name from the global format library
#' and applies it to convert labels to numeric values.
#'
#' @param x Character vector of labels to convert
#' @param invalue_name Character. Name of a registered INVALUE format.
#'
#' @return Numeric vector
#'
#' @export
#'
#' @examples
#' # Create numeric invalue and apply
#' finput(
#'   "Male" = 1,
#'   "Female" = 2,
#'   name = "sex_inv"
#' )
#' finputn(c("Male", "Female", "Male", "Unknown", "Female"), "sex_inv")
#' # [1]  1  2  1 NA  2
#' fclear()
#'
#' # Parse invalue from text and apply
#' fparse(text = '
#' INVALUE race_inv
#'   "White" = 1
#'   "Black" = 2
#'   "Asian" = 3
#' ;
#' ')
#' finputn(c("White", "Black"), "race_inv")
#' # [1] 1 2
#' fclear()
finputn <- function(x, invalue_name) {
  inv_obj <- .format_get(invalue_name)
  if (!inherits(inv_obj, "ks_invalue")) {
    cli_abort("{.val {invalue_name}} is not an INVALUE format ({.cls ks_invalue}).")
  }
  if (!inv_obj$target_type %in% c("numeric", "integer")) {
    cli_warn("{.val {invalue_name}} has target_type {.val {inv_obj$target_type}}, not numeric. Coercing result to numeric.")
  }
  result <- .invalue_apply(x, inv_obj)
  as.numeric(result)
}

#' Apply Character Invalue by Name (like 'SAS' INPUTC)
#'
#' Looks up an INVALUE format by name from the global format library
#' and applies it to convert labels to character values.
#'
#' @param x Character vector of labels to convert
#' @param invalue_name Character. Name of a registered INVALUE format.
#'
#' @return Character vector
#'
#' @export
#'
#' @examples
#' # Bidirectional: use finputc for reverse direction
#' fnew_bid(
#'   "A" = "Active",
#'   "I" = "Inactive",
#'   "P" = "Pending",
#'   name = "status"
#' )
#'
#' # Forward: code -> label
#' fputc(c("A", "I", "P"), "status")
#' # [1] "Active" "Inactive" "Pending"
#'
#' # Reverse: label -> code
#' finputc(c("Active", "Pending", "Inactive"), "status_inv")
#' # [1] "A" "P" "I"
#' fclear()
finputc <- function(x, invalue_name) {
  inv_obj <- .format_get(invalue_name)
  if (!inherits(inv_obj, "ks_invalue")) {
    cli_abort("{.val {invalue_name}} is not an INVALUE format ({.cls ks_invalue}).")
  }
  result <- .invalue_apply(x, inv_obj)
  as.character(result)
}

#' Apply Invalue Using a Composite Label
#'
#' Convenience wrapper around an INVALUE lookup that pastes multiple vectors
#' together into a composite label before reverse lookup. Mirrors
#' [fputk()] on the invalue side, for INVALUE formats built with composite
#' labels such as `fmap(paste(col1, col2, sep = "|"), codes)`.
#'
#' The output type is determined by the stored invalue's `target_type`
#' (numeric / integer → numeric, character → character, logical →
#' logical).
#'
#' @param ... Vectors to paste together into a composite label.
#'   All vectors are recycled to a common length by [paste()].
#' @param invalue_name Character. Name of a registered INVALUE format.
#' @param sep Separator inserted between the pasted components
#'   (default `"|"`).
#' @param na_as_string If `FALSE` (default), an `NA` in any component
#'   propagates to the composite label (restored to `NA_character_` after
#'   the [paste()] step) so the invalue's `missing_value` applies.
#'   If `TRUE`, the literal string `"NA"` produced by [paste()] is kept,
#'   which is useful when the invalue was built with composite labels via
#'   `fmap(paste(..., sep = "|"), values)`.
#'
#' @return A vector whose type depends on the invalue's `target_type`.
#'
#' @examples
#' # Build an INVALUE keyed on two columns via paste()
#' finput(
#'   fmap(paste(c("A", "A", "B"), c(1, 2, 1), sep = "|"), c(10, 20, 30)),
#'   name = "ab_inv"
#' )
#'
#' finputk(c("A", "A", "B"), c(1, 2, 1), invalue_name = "ab_inv")
#' # -> 10 20 30
#'
#' fclear()
#'
#' @seealso [finput()], [finputn()], [finputc()], [fputk()]
#' @export
finputk <- function(..., invalue_name, sep = "|", na_as_string = FALSE) {
  args <- list(...)
  if (length(args) < 1L) {
    cli_abort("At least one label component must be provided in {.code ...}.")
  }

  inv_obj <- .format_get(invalue_name)
  if (!inherits(inv_obj, "ks_invalue")) {
    cli_abort("{.val {invalue_name}} is not an INVALUE format ({.cls ks_invalue}).")
  }

  keys <- do.call(paste, c(args, list(sep = sep)))
  # Propagate NA: paste() coerces NA to "NA" — restore real NA so the
  # invalue's missing_value applies. When na_as_string = TRUE the literal
  # "NA" is kept so paste()/fmap()-built labels round-trip.
  if (!na_as_string) {
    any_na <- Reduce(`|`, lapply(args, is.na))
    keys[any_na] <- NA_character_
  }

  result <- .invalue_apply(keys, inv_obj)
  switch(
    inv_obj$target_type,
    "numeric"   = as.numeric(result),
    "integer"   = as.integer(result),
    "character" = as.character(result),
    "logical"   = as.logical(result),
    result
  )
}

#' Create Bidirectional Format
#'
#' Creates both a format and its corresponding invalue for bidirectional conversion.
#' Both are automatically stored in the global format library if \code{name}
#' is provided.
#'
#' @param ... Named arguments for format mappings
#' @param name Character. Base name for both formats. The invalue will be
#'   named \code{paste0(name, "_inv")}.
#' @param type Character. Format type
#'
#' @return List with \code{format} (ks_format) and \code{invalue} (ks_invalue)
#'   components.
#'
#' @export
#'
#' @examples
#' # Bidirectional status format
#' status_bi <- fnew_bid(
#'   "A" = "Active",
#'   "I" = "Inactive",
#'   "P" = "Pending",
#'   name = "status"
#' )
#'
#' # Forward: code -> label
#' fputc(c("A", "I", "P", "A"), "status")
#' # [1] "Active" "Inactive" "Pending" "Active"
#'
#' # Reverse: label -> code
#' finputc(c("Active", "Pending", "Inactive"), "status_inv")
#' # [1] "A" "P" "I"
#' fclear()
#'
#' # From a named vector (Label = Code convention, same as fnew)
#' fnew_bid(c(Male = "M", Female = "F"), name = "sex_bid")
#' fputc(c("M", "F"), "sex_bid")
#' finputc(c("Male", "Female"), "sex_bid_inv")
#' fclear()
fnew_bid <- function(..., name = NULL, type = "auto") {
  mappings <- list(...)

  # Expand named vectors (c(Label = "Code") -> individual mappings, reversed)
  mappings <- .expand_named_vectors(mappings, reverse = TRUE)

  # Warn if multiple keys map to the same value (ambiguous reverse mapping)
  values <- unlist(mappings, use.names = FALSE)
  if (anyDuplicated(values)) {
    dupes <- unique(values[duplicated(values)])
    cli_warn(c(
      "Multiple keys map to the same value{?s}: {.val {dupes}}",
      "i" = "The reverse (invalue) mapping is ambiguous; only the first key per value will be used."
    ))
  }

  # Create format (value -> label)
  format_obj <- do.call(fnew, c(mappings, list(name = name, type = type)))

  # Create invalue (label -> value)
  # Reverse the mappings
  reversed_mappings <- lapply(names(mappings), function(key) {
    value <- mappings[[key]]
    stats::setNames(list(key), value)
  })
  reversed_mappings <- do.call(c, reversed_mappings)

  inv_name <- if (!is.null(name)) paste0(name, "_inv") else NULL
  invalue_obj <- do.call(
    finput,
    c(reversed_mappings, list(
      name = inv_name,
      target_type = "character"
    ))
  )

  list(format = format_obj, invalue = invalue_obj)
}

#' Print Invalue Object
#'
#' @param x A ks_invalue object
#' @param ... Additional arguments (unused)
#' @return The input \code{x}, returned invisibly.
#' @export
print.ks_invalue <- function(x, ...) {
  cat("KS Invalue:", if (!is.null(x$name)) x$name else "(unnamed)", "\n")
  cat("Target Type:", x$target_type, "\n")
  cat("Mappings:\n")

  for (i in seq_along(x$mappings)) {
    key <- names(x$mappings)[i]
    value <- x$mappings[[i]]
    cat("  ", key, " => ", value, "\n", sep = "")
  }

  if (!is.null(x$missing_value) && !identical(x$missing_value, NA)) {
    cat("  Missing value: ", x$missing_value, "\n", sep = "")
  }

  invisible(x)
}

Try the ksformat package in your browser

Any scripts or data that you put into this service are public.

ksformat documentation built on May 21, 2026, 9:07 a.m.