R/use_val_lab1.R

Defines functions use_val_lab1

Documented in use_val_lab1

#' Replace a Single Data Frame Column's Values with Its Value Labels
#'
#' @description
#' For a single value-labeled column of a data.frame, replace
#' all of its values with the corresponding value labels and return the modified
#' data.frame.
#'
#' @details
#' Note 1: `use_val_lab1` is a variant of `use_val_labs` that allows you to
#' specify only one variable at a time but that allows you to pass its name
#' without quoting it (compare use_val_lab1(mtcars, am) to
#' use_val_labs(mtcars, "am")).
#'
#' Note 2: `uvl1` is a compact alias for `use_val_lab1`: they do the same thing,
#' and the former is easier to type.
#'
#' Note 3: This command is intended exclusively for interactive use. In
#' particular, the var argument must be the literal name of a single variable
#' (column) found in the supplied data.frame and may NOT be, e.g., the name of a
#' character vector that contains the variable (column name) of interest. If you
#' wish to supply a character vector with the names of variables (columns) of
#' interest, use `use_val_labs()`.
#'
#' `use_val_lab1` replaces a single, value-labeled data.frame column with a
#' "value labels-on" version of that column. Here, "labels-on" means that the
#' column's original values are replaced with the corresponding value labels.
#' Note that the modified column is a simple, self-contained character variable
#' that cannot itself be converted or reverted back to the original ("labels-off")
#' values of its parent/source column.
#'
#' @param data the data.frame.
#' @param var the unquoted name of the column (variable) whose values you wish
#' to replace with the corresponding value labels.
#'
#' @return A data.frame consisting of the originally supplied data.frame, with
#' the var argument variable's values replaced with its value labels.
#' @export
#' @examples
#' # swap in "am" value labels for values in mtcars
#' df <- mtcars # copy of mtcars
#'
#' # now, add value labels
#' df <- add_val1(
#'   data = df,
#'   var = am,
#'   vals = c(0, 1),
#'   labs = c("automatic", "manual")
#' )
#'
#' # switch out "am" values for value labels, assign to df_plus
#' df_plus <- use_val_lab1(df, am)
#' head(df_plus[c("am")])
use_val_lab1 <- function(data, var) {
  # use numeric range labs for numeric variables
  use_q_labsv <- function(data, var) {
    x <- data[[var]]
    x <- irregular2v(x, to = NA, nan.include = TRUE, inf.include = TRUE)
    this_val_label_var <- paste0("val.labs.", var)
    char_q <- attributes(data)[[this_val_label_var]]
    char_q <- char_q[char_q != "NA"]
    qvals <- as.numeric(names(char_q))
    names(qvals) <- as.character(char_q)
    qvals <- rev(qvals)

    x_out <- rep("Other", length(x))

    for (i in seq_along(qvals)) {
      this_val <- qvals[i]
      this_lab <- names(qvals)[i]
      x_out[!is.na(x) & x <= this_val] <- this_lab
    }

    x_out[is.na(x)] <- "NA"
    x_out <- as_numv(x_out)
    return(x_out)
  }

  # make var character value
  vars <- deparse(substitute(var))
  test_quote <- any(grepl("\"", vars))
  if (test_quote && is.character(vars)) vars <- gsub("\"", "", vars)
  vars <- gsub("c\\(", "", vars)
  vars <- gsub("\\(", "", vars)
  vars <- gsub("\\)", "", vars)

  # test for presence of var in data.frame
  if (!all(vars %in% names(data)) || length(vars) != 1) {
    stop("
\nInvalid var argument specification: var arg should be a single, unquoted
name of a variable that is present in the data.frame.
         ")
  }

  # make this a Base R data.frame
  data <- as_base_data_frame(data)

  # ensure value labels are sorted
  data <- sort_val_labs(data)

  if (nrow(data) > 300000) {
    warning("
\nNote: labelr is not optimized for data.frames this large.")
  }

  # get label attributes, to restore when we're done
  initial_lab_atts <- get_all_lab_atts(data)

  # get value labs
  val.labs <- get_val_labs(data)

  # capture variable names
  if (is.null(vars)) {
    vars <- gsub("val.labs.", "", names(get_all_lab_atts(data, "val.labs")))
    if (length(vars) == 0) {
      stop("
\nNo value-labeled variables found in data.frame./n")
    }
  }

  if (!all(vars %in% names(data))) {
    stop("
\nInvalid var argument specification: var arg should be a single, unquoted
name of a value-labeled variable present in the data.frame.
         ")
  }

  # use the labels (recode from vals to labels)
  for (i in seq_along(vars)) {
    var_name <- vars[i]
    val_lab_name <- paste0("val.labs.", var_name)

    if (!check_labs_att(data, val_lab_name)) {
      stop(sprintf(
        "
No value labels found for supplied var --%s--.",
        var_name
      ))
    }

    # test for whether variable could be numeric
    num_test1 <- is_numable(names(attributes(data)[[val_lab_name]]))
    num_test2 <- is_numable(data[[var_name]]) || is.numeric(data[[var_name]])
    num_test <- all(num_test1, num_test2)

    # test for presence of many-to-one (m1) labels
    this_var_val_lab <- get_labs_att(data, val_lab_name)[[1]]
    not_m1_test <- length(unique(names(this_var_val_lab))) == length(unique(unname(this_var_val_lab)))

    # if not m1 and is numable, use use_q_labsv() vals-to-labs conversion
    if (num_test && not_m1_test) {
      var_new <- use_q_labsv(data, var_name)
      data[[var_name]] <- var_new

      # handle other nominal value-labeled variables
      # these are the add_val_labs() and add_val1() value labels
    } else if (var_name %in% val.labs$var) {
      var_old <- data[[var_name]]
      var_old <- as.character(var_old)
      var_old <- irregular2v(var_old, NA)
      val_labv <- unlist(attributes(data)[val_lab_name])
      names(val_labv) <- gsub(paste0(val_lab_name, "."), "", names(val_labv))
      var_new <- val_labv[var_old]
      var_new <- unname(var_new)
      var_new <- as_numv(var_new)
      data[[var_name]] <- var_new
      vals_to_fix <- which(is.na(var_new) & !is.na(var_old))
      data[vals_to_fix, var_name] <- var_old[vals_to_fix]
    }
  }

  # to restore label attributes information
  data <- add_lab_atts(data, initial_lab_atts, num.convert = FALSE)
  return(data)
}

#' @export
#' @rdname use_val_lab1
uvl1 <- use_val_lab1

Try the labelr package in your browser

Any scripts or data that you put into this service are public.

labelr documentation built on Sept. 11, 2024, 9:05 p.m.