Nothing
#' @title Combine Checkbox Fields into a Single Column
#'
#' @description
#' [combine_checkboxes()] consolidates multiple checkbox fields in a REDCap data
#' tibble into a single column. This transformation simplifies analysis by
#' merging several binary columns into one labeled factor column, making the
#' data more interpretable and easier to analyze.
#'
#' @details
#' [combine_checkboxes()] operates on the data and metadata tibbles produced by
#' the [read_redcap()] function. Since it relies on the checkbox field naming
#' conventions used by REDCap, changes to the checkbox variable names or their
#' associated metadata `field_name`s could lead to errors.
#'
#' REDCap checkbox fields are typically expanded into separate variables for each
#' checkbox option, with names formatted as `checkbox_var___1`, `checkbox_var___2`,
#' etc. [combine_checkboxes()] detects these variables and combines them into a
#' single column. If the expected variables are not found, an error is returned.
#'
#' @param supertbl A supertibble generated by [read_redcap()]. Required.
#' @param tbl The `redcap_form_name` of the data tibble to extract. Required.
#' @param cols Checkbox columns to combine to single column. Required.
#' @param names_prefix String added to the start of every variable name.
#' @param names_sep String to separate new column names from `names_prefix`.
#' @param names_glue Instead of `names_sep` and `names_prefix`, you can supply
#' a glue specification and the unique `.value` to create custom column names.
#' @param names_repair What happens if the output has invalid column names?
#' The default, "check_unique" is to error if the columns are duplicated.
#' Use "minimal" to allow duplicates in the output, or "unique" to de-duplicated
#' by adding numeric suffixes. See [vctrs::vec_as_names()] for more options.
#' @param multi_value_label A string specifying the value to be used when multiple
#' checkbox fields are selected. Default "Multiple".
#' @param values_fill Value to use when no checkboxes are selected. Default `NA`.
#' @param raw_or_label Either 'raw' or 'label' to specify whether to use raw coded
#' values or labels for the options. Default 'label'.
#' @param keep Logical indicating whether to keep the original checkbox fields in
#' the output. Default `TRUE`.
#'
#' @return A modified supertibble.
#'
#' @examples
#' library(dplyr)
#' # Set up sample data tibble
#' data_tbl <- tibble::tribble(
#' ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3",
#' 1, TRUE, FALSE, FALSE,
#' 2, TRUE, TRUE, FALSE,
#' 3, FALSE, FALSE, FALSE
#' )
#'
#' # Set up sample metadata tibble
#' metadata_tbl <- tibble::tribble(
#' ~"field_name", ~"field_type", ~"select_choices_or_calculations",
#' "study_id", "text", NA,
#' "multi___1", "checkbox", "1, Red | 2, Yellow | 3, Blue",
#' "multi___2", "checkbox", "1, Red | 2, Yellow | 3, Blue",
#' "multi___3", "checkbox", "1, Red | 2, Yellow | 3, Blue"
#' )
#'
#' # Create sample supertibble
#' supertbl <- tibble::tribble(
#' ~"redcap_form_name", ~"redcap_data", ~"redcap_metadata",
#' "tbl", data_tbl, metadata_tbl
#' )
#'
#' class(supertbl) <- c("redcap_supertbl", class(supertbl))
#'
#' # Combine checkboxes under column "multi"
#' combine_checkboxes(
#' supertbl = supertbl,
#' tbl = "tbl",
#' cols = starts_with("multi")
#' ) %>%
#' dplyr::pull(redcap_data) %>%
#' dplyr::first()
#'
#' \dontrun{
#'
#' redcap_uri <- Sys.getenv("REDCAP_URI")
#' token <- Sys.getenv("REDCAP_TOKEN")
#'
#' supertbl <- read_redcap(redcap_uri, token)
#' combine_checkboxes(
#' supertbl = supertbl,
#' tbl = "tbl",
#' cols = starts_with("col"),
#' multi_value_label = "Multiple",
#' values_fill = NA
#' )
#' }
#'
#' @export
combine_checkboxes <- function(supertbl,
tbl,
cols,
names_prefix = "",
names_sep = "_",
names_glue = NULL,
names_repair = "check_unique",
multi_value_label = "Multiple",
values_fill = NA,
raw_or_label = "label",
keep = TRUE) {
# Check args ---
check_arg_is_supertbl(supertbl, req_cols = c("redcap_data", "redcap_metadata"))
check_arg_is_character(names_prefix, len = 1)
check_arg_is_character(names_sep, len = 1, any.missing = TRUE)
check_arg_is_character(names_repair, len = 1, any.missing = FALSE)
check_arg_is_character(names_glue, len = 1, any.missing = FALSE, null.ok = TRUE)
check_arg_is_character(tbl, len = 1, any.missing = FALSE)
check_arg_is_character(multi_value_label, len = 1, any.missing = TRUE)
check_arg_is_character(values_fill, len = 1, any.missing = TRUE)
check_arg_choices(raw_or_label, choices = c("label", "raw"))
check_arg_is_logical(keep, len = 1, any.missing = FALSE)
# Extract tbl from supertbl
data_tbl <- supertbl %>%
extract_tibble(tbl)
# Save user cols to quo
cols_exp <- enquo(cols)
# Evaluate the cols expression to get the selected column names
selected_cols <- names(eval_select(cols_exp, data = data_tbl))
check_fields_exist(fields = selected_cols, expr = cols_exp) # Check supplied fields exist
# Get metadata reference table, check that chosen fields are checkboxes
metadata_tbl <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]]
metadata_spec <- get_metadata_spec(metadata_tbl, selected_cols, names_prefix, names_sep, names_glue)
# Copy data_tbl to mod, data_tbl to be referenced later
data_tbl_mod <- data_tbl
data_tbl_mod <- data_tbl_mod %>%
mutate(
across(
all_of(selected_cols),
~ replace_true(.x,
cur_column(),
metadata = metadata_spec,
raw_or_label = raw_or_label
)
),
across(all_of(selected_cols), as.character) # enforce to character strings
)
new_cols <- metadata_spec %>%
nest(.by = ".new_value", .key = "metadata") %>%
pmap(convert_checkbox_vals,
data_tbl = data_tbl_mod,
raw_or_label = raw_or_label, multi_value_label = multi_value_label, values_fill = values_fill
)
final_tbl <- combine_and_repair_tbls(data_tbl, data_tbl_mod, new_cols, names_repair = names_repair)
# Keep or remove original multi columns
if (!keep) {
final_tbl <- final_tbl %>%
select(!all_of(selected_cols))
}
# Update the supertbl data tibble
supertbl$redcap_data[supertbl$redcap_form_name == tbl][[1]] <- final_tbl
supertbl
}
#' @title Get metadata specification table
#'
#' @inheritParams combine_checkboxes
#' @param metadata_tbl A metadata tibble from the supertibble generated by [read_redcap()].
#' @param selected_cols Character string vector of field names for checkbox combination
#'
#' @returns a tibble
#'
#' @keywords internal
get_metadata_spec <- function(metadata_tbl,
selected_cols,
names_prefix,
names_sep,
names_glue) {
check_metadata_fields_exist(metadata_tbl, selected_cols)
# Create a metadata reference table linking field name to raw and label values
out <- metadata_tbl %>%
filter(.data$field_name %in% selected_cols) %>%
mutate(
.value = sub("___.*$", "", .data$field_name)
)
if (!is.null(names_glue)) {
# Similar to pivot_*, use of `names_glue` overrides use of names_prefix/sep
glue_env <- out %>%
select(".value")
glue_env$.new_value <- as.character(glue::glue_data(glue_env, names_glue))
glue_env <- glue_env %>%
select(".new_value")
out <- cbind(out, glue_env)
} else {
out <- out %>%
mutate(
.new_value = case_when(names_prefix != "" ~ paste(names_prefix, .data$.value, sep = names_sep),
.default = paste(names_prefix, .data$.value, sep = "")
)
)
}
# Check that for each unique value of .value there is one unique value of .new_value
# May be removed in the future
check_equal_col_summaries(out, ".value", ".new_value")
# Make sure selection is checkbox metadata field type
check_fields_are_checkboxes(out)
# Bind raw/label values per original field grouping
parsed_vals <- tibble()
for (i in seq_along(unique(out$.value))) {
index <- unique(out$.value)[i]
out_filtered <- out %>% filter(.data$.value == index)
parsed_vals <- rbind(parsed_vals, parse_labels(first(out_filtered$select_choices_or_calculations)))
}
bind_cols(out, parsed_vals) %>%
select("field_name", "raw", "label", ".value", ".new_value") %>%
relocate(".value", ".new_value", .after = "field_name")
}
#' @title Replace checkbox TRUEs with raw_or_label values
#'
#' @inheritParams combine_checkboxes
#' @param col A vector
#' @param col_name A string
#' @param metadata A metadata tibble from the original supertibble
#'
#' @description
#' Simple utility function for replacing checkbox field values.
#'
#' @returns A character string
#'
#' @keywords internal
replace_true <- function(col, col_name, metadata, raw_or_label) {
# Replace TRUEs/1s with the appropriate raw or label value from the metadata
replacement <- metadata %>%
filter(.data$field_name == col_name) %>%
pull(raw_or_label)
col <- ifelse(col == TRUE, replacement, NA) # col == TRUE works for raw or label because TRUE == 1 & 1 == TRUE
# Convert non-TRUEs to NA, since values can be either "FALSE" or "0" for unchecked values
col
}
#' @title Convert a new checkbox column's values
#'
#' @description This function takes a single column of data and converts the values
#' based on the overall data tibble cross referenced with a nested section of the
#' metadata tibble.
#'
#' `case_when` logic helps determine whether the value is a coalesced singular
#' value or a user-specified one via `multi_value_label` or `values_fill`.
#'
#' @details
#' This function is used in conjunction with `pmap()`.
#'
#' @keywords internal
#'
#' @param metadata A nested portion of the overall metadata tibble
#' @param data_tbl The data tibble from the original supertibble
#' @param .new_value The new column values made by [combine_checkboxes()]
#' @inheritParams combine_checkboxes
convert_checkbox_vals <- function(metadata, .new_value, data_tbl, raw_or_label, multi_value_label, values_fill) {
tibble(
!!.new_value := rowSums(!is.na(data_tbl[names(data_tbl) %in% metadata$field_name]))
) %>%
mutate(
!!.new_value := case_when(. > 1 ~ multi_value_label,
. == 1 ~ coalesce(!!!data_tbl[, names(data_tbl) %in% metadata$field_name]),
.default = values_fill
),
!!.new_value := factor(!!sym(.new_value),
levels = c(metadata[[raw_or_label]], multi_value_label, values_fill)
)
)
}
#' @title Combine checkbox fields with respect to repaired outputs
#'
#' @description
#' This function seeks to preserve the original data columns and types from the
#' originally supplied data_tbl and add on the new columns from data_tbl_mod.
#'
#' If `names_repair` presents a repair strategy, the output columns will be
#' captured and updated here while dropping the original columns.
#'
#' @param data_tbl The original data table given to [combine_checkboxes()]
#' @param data_tbl_mod A modified data table from `data_tbl`
#' @param new_cols The new columns created for checkbox combination
#' @inheritParams combine_checkboxes
#'
#' @keywords internal
#'
#' @returns a tibble
combine_and_repair_tbls <- function(data_tbl, data_tbl_mod, new_cols, names_repair) {
# Perform initial column bind with repair strategy
data_tbl_mod <- bind_cols(data_tbl_mod, new_cols, .name_repair = names_repair)
# Get the column names of each table
cols_data_tbl <- names(data_tbl)
cols_data_tbl_mod <- names(data_tbl_mod)
# Identify common columns
common_cols <- intersect(cols_data_tbl, cols_data_tbl_mod)
# Identify unique columns in data_tbl_mod
unique_cols_mod <- setdiff(cols_data_tbl_mod, cols_data_tbl)
# Select common columns from data_tbl
common_data <- data_tbl %>%
select(all_of(common_cols))
# Select unique columns from data_tbl_mod
unique_data_mod <- data_tbl_mod %>%
select(all_of(unique_cols_mod))
# Combine the selected columns
combined_data <- bind_cols(common_data, unique_data_mod)
combined_data
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.