Nothing
#' Swap Variable Value Labels for Variable Values
#'
#' @description
#' Replace the actual values of data.frame variables with the corresponding
#' value labels (previous assigned using `add_val_labs` or a related function).
#'
#' @details
#' Note: `uvl` is a compact alias for `use_val_labs`: they do the same thing,
#' and the former is easier to type.
#'
#' `use_val_labs` takes a variable value-labeled data.frame and substitutes each
#' (labeled) variable's value labels for its values, returning a data.frame whose
#' dimensions, names, and members are the same as the inputted data.frame. This
#' may be useful if one wishes to view data.frame information using the
#' (potentially) more intuitively meaningful value labels (e.g., gender=1 values
#' displayed as "Male" instead of 1).
#'
#' Warning: `use_val_labs` will replace existing variable values with value
#' labels and cannot be undone. If you wish to preserve variable values, be sure
#' to assign the result of `use_val_labs` to a new object. For other ways to
#' leverage value labels for common data management or inspection tasks, while
#' preserving raw data values in returned object, see `add_lab_cols`,
#' `add_lab_dummies`, `flab`, `slab`, `tabl`, `headl`, `taill`, and `somel`.
#'
#' @param data a data.frame.
#' @param vars the names of the columns (variables) for which value labels will
#' will replace original values in the returned data.frame.
#'
#' @return A data.frame, with (all or the select) variable value labels
#' substituted for original variable values and any affected variables coerced
#' to character if they were not already.
#' @export
#' @examples
#' # Example #1 - mtcars example, one variable at a time
#' # one variable at a time, mtcars
#' df <- mtcars
#' # now, add value labels
#' df <- add_val_labs(
#' data = df,
#' vars = "am",
#' vals = c(0, 1),
#' labs = c("automatic", "manual")
#' )
#'
#' df <- add_val_labs(
#' data = df,
#' vars = "carb",
#' vals = c(1, 2, 3, 4, 6, 8),
#' labs = c(
#' "1-carb", "2-carbs",
#' "3-carbs", "4-carbs",
#' "6-carbs", "8-carbs"
#' )
#' )
#'
#' # var arg can be unquoted if using add_val1()
#' # note that this is not add_val_labs(); add_val1() has "var" (not "vars) arg
#' df <- add_val1(
#' data = df,
#' var = cyl, # note, "var," not "vars" arg
#' vals = c(4, 6, 8),
#' labs = c(
#' "four-cyl",
#' "six-cyl",
#' "eight-cyl"
#' )
#' )
#'
#' df <- add_val_labs(
#' data = df,
#' vars = "gear",
#' vals = c(3, 4),
#' labs = c(
#' "3-speed",
#' "4-speed"
#' )
#' )
#'
#' # Oops, we forgot 5-speeds; let's finish the job.
#' df <- add_val_labs(
#' data = df,
#' vars = "gear",
#' vals = 5,
#' labs = "5-speed"
#' )
#'
#' head(use_val_labs(df), 3) # they're there
#'
#' # Example #2 - (Fake) Likert Data
#' # add val labs to multiple variables at once
#' # make a "Likert"-type fake data set to demo
#' # note, by default, add_val_labs() "vars" arg will do partial matching
#' # in this case, we catch all vars with "x" in their name
#' set.seed(272)
#' dflik <- make_likert_data(scale = 1:7)
#' vals2label <- 1:7
#' labs2use <- c(
#' "VSD",
#' "SD",
#' "D",
#' "N",
#' "A",
#' "SA",
#' "VSA"
#' )
#'
#' dflik <- add_val_labs(
#' data = dflik, vars = c("x", "y3"), # note the vars args
#' vals = vals2label,
#' labs = labs2use,
#' partial = TRUE
#' )
#'
#' # note, all "x" vars get the labs, as does "y3"
#' # see vars = args above
#' lik1 <- use_val_labs(dflik)
#' head(lik1)
#' # keep a copy
#' dflik_conv <- use_val_labs(dflik)
#' head(dflik_conv, 3)
use_val_labs <- function(data, vars = NULL) {
# use numeric range labs for numeric variables
use_q_labs <- function(data, var) {
x <- data[[var]]
x <- irregular2v(x, to = NA, nan.include = TRUE, inf.include = TRUE)
this_val_label_var <- paste0("val.labs.", var)
char_q <- attributes(data)[[this_val_label_var]]
char_q <- char_q[char_q != "NA"]
qvals <- as.numeric(names(char_q))
names(qvals) <- as.character(char_q)
qvals <- rev(qvals)
x_out <- rep("Other", length(x))
for (i in seq_along(qvals)) {
this_val <- qvals[i]
this_lab <- names(qvals)[i]
x_out[!is.na(x) & x <= this_val] <- this_lab
}
x_out[is.na(x)] <- "NA"
x_out <- as_numv(x_out)
data[[var]] <- x_out
return(data)
}
# make this a Base R data.frame
data <- as_base_data_frame(data)
# ensure value labels are sorted
data <- sort_val_labs(data)
if (nrow(data) > 300000) {
warning("
Note: labelr is not optimized for data.frames this large.")
}
# # check systematically for all found values being NA
# size <- 5000
# if (nrow(data) < size) size <- nrow(data)
# inds2check <- unique(floor(seq(1, nrow(data), length.out = size)))
# any_all_na_init <- any(sapply(data[inds2check, ], function(x) all(is.na(x))))
#
# get value labs
val.labs <- get_val_labs(data)
# capture variable names
if (is.null(vars)) vars <- unique(get_val_labs(data)[["var"]])
if (!all(vars %in% names(data)) && !all(is.na(vars))) {
stop("\n
One or more vars supplied to use_val_labs() not found in the supplied data.frame.
Did you drop or rename a column (var) after value-labeling it? If so, use_val_labs()
may be trying to modify a variable (column) that no longer exists -- at least not by
the column name it had when you labeled it.
Explore commands like get_val_labs(), drop_val_labs(), and srename() for tools to
prevent and troubleshoot these sorts of issues.
")
} else if (all(is.na(vars))) {
warning("
\nNo value-labeled vars found. Run get_val_labs() on your data.frame to see which,
if any, variables have value labels.
")
} else {
# use the labels (recode from vals to labels)
for (i in seq_along(vars)) {
var_name <- vars[i]
# handle any labeled numerical values
val_lab_name <- paste0("val.labs.", var_name)
if (!check_labs_att(data, val_lab_name)) {
stop(sprintf(
"
No value labels found for supplied var --%s--.",
var_name
))
}
# handle value-labeled numerical variables
# test for whether variable could be numeric
num_test1 <- is_numable(names(attributes(data)[[val_lab_name]]))
num_test2 <- is_numable(data[[var_name]]) || is.numeric(data[[var_name]])
num_test <- all(num_test1, num_test2)
# test for presence of many-to-one (m1) labels
this_var_val_lab <- get_labs_att(data, val_lab_name)[[1]]
not_m1_test <- length(unique(names(this_var_val_lab))) == length(unique(unname(this_var_val_lab)))
# if not m1 and is numable, use use_q_labs() vals-to-labs conversion
if (num_test && not_m1_test) {
data <- use_q_labs(data, var_name)
# handle other nominal value-labeled variables
} else if (var_name %in% val.labs$var) {
val_labv <- unlist(attributes(data)[val_lab_name])
names(val_labv) <- gsub(paste0(val_lab_name, "."), "", names(val_labv))
var_old <- data[[var_name]]
var_old <- as.character(var_old)
var_old <- irregular2v(var_old, NA)
var_new <- val_labv[var_old]
var_new <- unname(var_new)
var_new <- as_numv(var_new)
data[[var_name]] <- var_new
vals_to_fix <- which(is.na(var_new) & !is.na(var_old))
data[vals_to_fix, var_name] <- var_old[vals_to_fix]
} else {
next
}
attributes(data)[[val_lab_name]] <- NULL
}
# # check systematically for columns that lost many values to NA
# inds2check <- unique(floor(seq(1, nrow(data), length.out = size)))
# any_all_na_end <- any(sapply(data[inds2check, ], function(x) all(is.na(x))))
#
# # throw an error if some column acquired new NA values based on
# # non-comprehensive but systematic test
# if (!any_all_na_init && any_all_na_end) {
# stop("
# \nThis application of use_val_labs() would lead a column to be coerced to all NA values,
# which is not allowed. This may result from attempting multiple nested or redundant
# calls to use_val_labs() and/or related functions.
# ")
# }
#
data <- clean_data_atts(data)
}
return(data)
}
#' @export
#' @rdname use_val_labs
uvl <- use_val_labs
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.