Nothing
#' Apply One Label to Multiple Values for a Single Variable
#'
#' @description
#' Apply a single variable value label to multiple values of a variable
#' ("m1" is shorthand for "many values get one label").
#'
#' @details
#' Note 1: `add1m1` is a variant of `add_m1_lab` that allows you to specify
#' only one var to label but allows you to pass its name without quoting it
#' (compare `add1m1`(mtcars, am, ...) to `add_m1_lab`(mtcars, "carb", ...).
#'
#' Note 2: `add1m1` (and `add_m1_lab`) allows the user to assign the same value
#' label to multiple distinct values of a variable ("m1" is short for
#' "many-to-one"). This is in contrast to `add_val1` (and `add_val_labs`), which
#' requires a strict one-to-one mapping of distinct variable values and distinct
#' value labels.
#'
#' Note 3: This command is intended exclusively for interactive use. In
#' particular, the var argument must be the literal name of a single variable
#' (column) found in the supplied data.frame and may NOT be, e.g., the name of a
#' character vector that contains the variable (column name) of interest. If you
#' wish to supply a character vector with the names of variables (columns) of
#' interest, use `add_m1_lab()`.
#'
#' @param data a data.frame.
#' @param var the unquoted name of the variable (column) to which value labels
#' will be added.
#' @param vals a vector of distinct values of the actual variable, each of which
#' is to be associated with the label supplied to the lab argument. Note: NA and
#' other "irregular" (e.g., NaN, Inf) values all are automatically assigned the
#' label "NA", and this cannot be overridden. Note that you do not need to
#' specify all unique vals of var, and you can supply value labels
#' incrementally, one (or a few, or all) unique vals of var at a time. However,
#' if you do this, do not re-use a value label or repeat a value-label
#' assignment you have already made: Once you've added the value label, it is
#' bound to those values until you drop the label (see `drop_val_labs`) or some
#' other action (intentional or otherwise) strips the value label attributes
#' from your data.frame (see, e.g. `strip_labs`).
#' @param lab a single distinct label that will be associated with all values
#' specified in your vals argument. Note: NA and other "irregular" (e.g.,
#' NaN, Inf) values are automatically assigned the label "NA" and may not be
#' assigned another label.
#' @param max.unique.vals `add1m1`() will not assign value labels to non-
#' integer (i.e., decimal-having) numeric variables. The max.unique.vals
#' argument further constrains the variables that may receive value labels to
#' those whose total unique values do not exceed the integer value supplied to
#' this argument.
#' @param init assign placeholder labels for variables that lack decimals
#' and meet the max.unique.vals threshold.
#'
#' @return A data.frame, with new variable value labels added (call
#' `get_val_labs` to see them), other provisional/default labelr label
#' information added, and previous user-added labelr label information
#' preserved.
#' @export
#' @examples
#' df <- mtcars
#'
#' df <- add1m1(df,
#' var = carb,
#' vals = 1:3,
#' lab = "<=3",
#' max.unique.vals = 10
#' )
#'
#' df <- add1m1(df,
#' var = carb,
#' vals = c(4, 6, 8),
#' lab = ">=4",
#' max.unique.vals = 10
#' )
#'
#' head(use_val_labs(df), 8) # they're there
add1m1 <- function(data, var, vals, lab,
max.unique.vals = 10,
init = FALSE) {
# function to streamline a data.frame while preserving prior labelr attributes
sunique <- function(data, vars = NULL) {
lab_atts <- get_all_lab_atts(data)
if (!is.null(vars)) {
data <- data[vars]
data <- as.data.frame(data)
names(data) <- vars
}
data_unique <- unique(data)
data_unique <- add_lab_atts(data_unique, lab_atts,
num.convert = FALSE,
clean = FALSE
)
return(data_unique)
}
# function to recode many to one
recode_m21 <- function(x, bef, aft, unique = FALSE) {
x <- as.character(x)
xuni <- sort(unique(x))
oldxuni <- xuni
bef <- as.character(bef)
aft <- as.character(aft)
if (!length(bef) >= 1) stop("vals argument must contain at least one element.")
if (length(aft) != 1) stop("lab argument must contain exactly one element.")
for (i in seq_along(bef)) {
xuni[oldxuni == bef[i]] <- aft
}
names(xuni) <- oldxuni
if (!unique) {
x <- xuni[x]
na_start <- sum(is.na(x))
na_end <- sum(is.na(suppressWarnings(as.numeric(as.character(x)))))
numable <- na_end == na_start
if (numable) x <- as.numeric(x)
x <- unname(x)
} else {
x <- xuni
}
return(x)
}
# check for one lab
if (length(lab) != 1) stop("lab argument must contain exactly one element.")
# check max vals
if (max.unique.vals > 5000) {
stop("
\n max.unique.vals may not exceed 5000.")
}
# remove any excess quotes in var
vars <- deparse(substitute(var))
test_quote <- any(grepl("\"", vars))
if (test_quote && is.character(vars)) vars <- gsub("\"", "", vars)
vars <- gsub("c\\(", "", vars)
vars <- gsub("\\(", "", vars)
vars <- gsub("\\)", "", vars)
# test for presence of var in data.frame
if (!all(vars %in% names(data)) || length(vars) != 1) {
stop("
\nInvalid var argument specification: var arg should be a single, unquoted
name of a variable that is present in the data.frame.
")
}
# capture data.frame name and coerce to Base R data.frame
dfname <- deparse(substitute(data))
data <- as_base_data_frame(data)
# check again for no valid vars found
if (!any(vars %in% names(data))) {
stop("
Taken together, your inputs do not identify any vars to value-label. Possibilities include:
1. you are using the var arg to specify multiple vars (add1m1() cannot do that; try add_m1_lab());
2. your var arg input requests a variable that simply does not exist in your data.frame, because
you've previously dropped it or you've specified its name incorrectly.\n")
}
# check for incompatible vars
if (any(!sapply(data[vars], function(x) check_class(x)))) {
incomp_vars <- names(which(!sapply(data[vars], function(x) check_class(x))))[1]
stop(sprintf("
Var --%s-- is of class() that is not supported by labelr.
variable (column) vector classes must be numeric, integer, character, logical, or factor.", incomp_vars))
}
# vars that exceed max.unique.vals limit
vars_exceed <- sapply(
data[vars],
function(x) length(unique(x)) > max.unique.vals
)
vars_exceed <- names(vars_exceed)[vars_exceed]
if (length(vars_exceed) != 0) {
for (i in seq_along(vars_exceed)) {
this_val <- names(vars_exceed)[i]
warning(sprintf("
\n Var --%s-- exceeds your max.unique.vals limit and will not be labeled.\n", this_val))
}
}
# keep only variables that stay within max.unique.vals
elig_vars <- names(data)[sapply(
data,
function(x) length(unique(x)) <= max.unique.vals
)]
if (!is.null(vars)) {
if (any(!vars %in% elig_vars)) {
stop("
\nOne or more of the vars supplied exceeds the max.unique.vals limit.
1. Increase your max.unique.vals argument and/or
2. If var is numerical:
a. Round your var values or
b. Use add_quant_labs() or add_quant1() to apply numerical range
labels to the var.
")
}
}
### streamline your data.frame
data_unique <- sunique(data, vars = elig_vars)
# check again for no valid vars found
if (!any(vars %in% names(data_unique))) {
stop("
Taken together, your inputs do not identify any vars to value-label. Possibilities include:
1. you are using the var arg to specify multiple vars (add_val1() cannot do that; try add_val_labs());
2. your var arg input requests a variable that simply does not exist in your data.frame, because
you've previously dropped it or you've specified its name incorrectly.\n")
}
if (nrow(data) > 300000) {
warning("
\nNote: labelr is not optimized for data.frames this large.")
}
na.test <- check_irregular(c(vals, lab), any = TRUE)
if (na.test) {
stop("
Cannot supply NA, NaN, Inf, or character variants as a val or lab arg.
These are handled automatically.")
}
dupe_vars_test <- length(vars) != length(unique(vars))
if (dupe_vars_test) {
stop("
\nThe same var name appears multiple times in your vars argument (not allowed). Try again.\n")
}
dupe_vals_test <- length(vals) != length(unique(vals))
if (dupe_vals_test) {
stop("
\nThe same val appears multiple times in your vals argument (not allowed). Try again.\n")
}
dupe_labs_test <- length(lab) != length(unique(lab))
if (dupe_labs_test) {
stop("
\nThe same lab appears multiple times in your lab argument (not allowed). Try again.\n")
}
# initialize value labels for eligible variables if requested
if (init) {
data_unique <- init_labs(data_unique,
max.unique.vals = max.unique.vals
)
}
# begin main loop
for (i in seq_along((vars))) {
var <- vars[i]
# get var
if (!var %in% vars) {
stop(sprintf(
"\n variable name %s not found in your data.frame\n", var
))
}
# see if this variable already has any val.labs
# if so, check for already-assigned labels: each label can have only one value
this_val_label <- paste0("val.labs.", var)
this_var_have_val_labs <- check_labs_att(data_unique, this_val_label)
if (this_var_have_val_labs) {
used_lab_test <- any(lab %in% unname(get_labs_att(data_unique, this_val_label)[[1]]))
if (used_lab_test) {
# free up val lab(s) to be re-applied to other vals
labs_to_overwrite <- lab[which(lab %in% attributes(data_unique)[[this_val_label]])]
var_val_labs <- get_labs_att(data_unique, this_val_label)[[1]]
var_val_labs[var_val_labs %in% labs_to_overwrite] <- names(var_val_labs)[var_val_labs %in% labs_to_overwrite]
attributes(data_unique)[[this_val_label]] <- var_val_labs
warning(sprintf(
"
You are re-assigning at least one value label(s) previously applied to other values of -- %s --.\n",
var
))
}
}
x <- data_unique[[var]]
# handle factors and misc tests
x <- data_unique[[var]]
if (is.factor(x)) {
data_unique <- add_factor_info(data_unique)
x <- as.character(x)
}
if (!check_class(x)) {
stop(sprintf(
"\n\nVar --%s-- is of class() that is not supported by labelr. Its class
must be one of: numeric, integer, character, logical, or factor.", var
))
}
if (has_decv(x)) {
stop(sprintf(
"\n\nVar --%s-- is numeric with decimal values.\n
Round to whole number and/or coerce to character and try again.\n
Alternatively, use add_quant_labs() or add_quant1() to apply
numerical range labels to the variable in its current form.", var
))
}
if (length(unique(x)) > max.unique.vals) {
stop(sprintf(
"\nVar --%s-- has more unique vals than allowed.
Adjust max.unique.vals arg?", var
))
}
all_in <- all(unique(vals) %in% unique(x))
if (!all_in) warning(sprintf("\n Var --%s-- does not currently possess all of the vals you have specified.\n", var))
# NA, Inf, NAN handling
x <- as.character(x)
x <- irregular2v(x, to = "NA", inf.include = TRUE, nan.include = TRUE)
this_var_val_label <- paste0("val.labs", ".", var)
if (check_labs_att(data_unique, this_var_val_label)) {
vals_vec <- recode_m21(vals,
bef = vals,
aft = lab,
unique = TRUE
)
for (i in seq_along(vals_vec)) {
name_to_change <- names(attr(data_unique, this_var_val_label)) %in% names(vals_vec)[i]
if (!any(name_to_change)) {
current_val_labs <- attr(data_unique, this_var_val_label)
new_val_label_to_add <- vals_vec[i]
names(new_val_label_to_add) <- names(vals_vec)[i]
current_plus_new <- attr(data_unique, this_var_val_label) <- c(current_val_labs, new_val_label_to_add)
attr(data_unique, this_var_val_label) <- current_plus_new
}
attr(data_unique, this_var_val_label)[name_to_change] <- vals_vec[i]
}
} else {
vals_vec <- recode_m21(x,
bef = vals,
aft = lab,
unique = TRUE
)
na_element <- "NA"
names(na_element) <- "NA"
vals_vec <- c(vals_vec, na_element)
attr(data_unique, this_var_val_label) <- vals_vec
}
# ensure no literal NA values as val.labs
na_names_lab_att <- any(is.na(names(attributes(data_unique)[[this_var_val_label]])))
if (na_names_lab_att) {
na_names <- which(is.na(names(attributes(data_unique)[[this_var_val_label]])))
attributes(data_unique)[[this_var_val_label]] <- attributes(data_unique)[[this_var_val_label]][-na_names]
}
# de-duplicate as needed
final_names <- names(get_labs_att(data_unique, this_var_val_label)[[1]])
final_vals <- unname(get_labs_att(data_unique, this_var_val_label)[[1]])
labs_changed <- rep(lab, length(vals))
names(labs_changed) <- vals
final_names <- final_names[!final_names %in% vals]
final_vals <- final_vals[!final_vals %in% c(lab)]
names(final_vals) <- final_names
final_vals <- c(final_vals, labs_changed)
final_vals <- final_vals[sort(names(final_vals))]
final_vals <- final_vals[unique(names(final_vals))]
attributes(data_unique)[[this_var_val_label]] <- final_vals
}
# end main loop
lab_atts <- get_all_lab_atts(data_unique)
data <- add_lab_atts(data, lab_atts, num.convert = FALSE)
return(data)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.