R/combine_checkboxes.R

Defines functions combine_and_repair_tbls convert_checkbox_vals replace_true get_metadata_spec combine_checkboxes

Documented in combine_and_repair_tbls combine_checkboxes convert_checkbox_vals get_metadata_spec replace_true

#' @title Combine Checkbox Fields into a Single Column
#'
#' @description
#' [combine_checkboxes()] consolidates multiple checkbox fields in a REDCap data
#' tibble into a single column. This transformation simplifies analysis by
#' merging several binary columns into one labeled factor column, making the
#' data more interpretable and easier to analyze.
#'
#' @details
#' [combine_checkboxes()] operates on the data and metadata tibbles produced by
#' the [read_redcap()] function. Since it relies on the checkbox field naming
#' conventions used by REDCap, changes to the checkbox variable names or their
#' associated metadata `field_name`s could lead to errors.
#'
#' REDCap checkbox fields are typically expanded into separate variables for each
#' checkbox option, with names formatted as `checkbox_var___1`, `checkbox_var___2`,
#' etc. [combine_checkboxes()] detects these variables and combines them into a
#' single column. If the expected variables are not found, an error is returned.
#'
#' @param supertbl A supertibble generated by [read_redcap()]. Required.
#' @param tbl The `redcap_form_name` of the data tibble to extract. Required.
#' @param cols Checkbox columns to combine to single column. Required.
#' @param names_prefix String added to the start of every variable name.
#' @param names_sep String to separate new column names from `names_prefix`.
#' @param names_glue Instead of `names_sep` and `names_prefix`, you can supply
#' a glue specification and the unique `.value` to create custom column names.
#' @param names_repair What happens if the output has invalid column names?
#' The default, "check_unique" is to error if the columns are duplicated.
#' Use "minimal" to allow duplicates in the output, or "unique" to de-duplicated
#' by adding numeric suffixes. See [vctrs::vec_as_names()] for more options.
#' @param multi_value_label A string specifying the value to be used when multiple
#' checkbox fields are selected. Default "Multiple".
#' @param values_fill Value to use when no checkboxes are selected. Default `NA`.
#' @param raw_or_label Either 'raw' or 'label' to specify whether to use raw coded
#' values or labels for the options. Default 'label'.
#' @param keep Logical indicating whether to keep the original checkbox fields in
#' the output. Default `TRUE`.
#'
#' @return A modified supertibble.
#'
#' @examples
#' library(dplyr)
#' # Set up sample data tibble
#' data_tbl <- tibble::tribble(
#'   ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3",
#'   1, TRUE, FALSE, FALSE,
#'   2, TRUE, TRUE, FALSE,
#'   3, FALSE, FALSE, FALSE
#' )
#'
#' # Set up sample metadata tibble
#' metadata_tbl <- tibble::tribble(
#'   ~"field_name", ~"field_type", ~"select_choices_or_calculations",
#'   "study_id", "text", NA,
#'   "multi___1", "checkbox", "1, Red | 2, Yellow | 3, Blue",
#'   "multi___2", "checkbox", "1, Red | 2, Yellow | 3, Blue",
#'   "multi___3", "checkbox", "1, Red | 2, Yellow | 3, Blue"
#' )
#'
#' # Create sample supertibble
#' supertbl <- tibble::tribble(
#'   ~"redcap_form_name", ~"redcap_data", ~"redcap_metadata",
#'   "tbl", data_tbl, metadata_tbl
#' )
#'
#' class(supertbl) <- c("redcap_supertbl", class(supertbl))
#'
#' # Combine checkboxes under column "multi"
#' combine_checkboxes(
#'   supertbl = supertbl,
#'   tbl = "tbl",
#'   cols = starts_with("multi")
#' ) %>%
#'   dplyr::pull(redcap_data) %>%
#'   dplyr::first()
#'
#' \dontrun{
#'
#' redcap_uri <- Sys.getenv("REDCAP_URI")
#' token <- Sys.getenv("REDCAP_TOKEN")
#'
#' supertbl <- read_redcap(redcap_uri, token)
#' combine_checkboxes(
#'   supertbl = supertbl,
#'   tbl = "tbl",
#'   cols = starts_with("col"),
#'   multi_value_label = "Multiple",
#'   values_fill = NA
#' )
#' }
#'
#' @export

combine_checkboxes <- function(supertbl,
                               tbl,
                               cols,
                               names_prefix = "",
                               names_sep = "_",
                               names_glue = NULL,
                               names_repair = "check_unique",
                               multi_value_label = "Multiple",
                               values_fill = NA,
                               raw_or_label = "label",
                               keep = TRUE) {
  # Check args ---
  check_arg_is_supertbl(supertbl, req_cols = c("redcap_data", "redcap_metadata"))
  check_arg_is_character(names_prefix, len = 1)
  check_arg_is_character(names_sep, len = 1, any.missing = TRUE)
  check_arg_is_character(names_repair, len = 1, any.missing = FALSE)
  check_arg_is_character(names_glue, len = 1, any.missing = FALSE, null.ok = TRUE)
  check_arg_is_character(tbl, len = 1, any.missing = FALSE)
  check_arg_is_character(multi_value_label, len = 1, any.missing = TRUE)
  check_arg_is_character(values_fill, len = 1, any.missing = TRUE)
  check_arg_choices(raw_or_label, choices = c("label", "raw"))
  check_arg_is_logical(keep, len = 1, any.missing = FALSE)

  # Extract tbl from supertbl
  data_tbl <- supertbl %>%
    extract_tibble(tbl)

  # Save user cols to quo
  cols_exp <- enquo(cols)

  # Evaluate the cols expression to get the selected column names
  selected_cols <- names(eval_select(cols_exp, data = data_tbl))
  check_fields_exist(fields = selected_cols, expr = cols_exp) # Check supplied fields exist

  # Get metadata reference table, check that chosen fields are checkboxes
  metadata_tbl <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]]
  metadata_spec <- get_metadata_spec(metadata_tbl, selected_cols, names_prefix, names_sep, names_glue)

  # Copy data_tbl to mod, data_tbl to be referenced later
  data_tbl_mod <- data_tbl

  data_tbl_mod <- data_tbl_mod %>%
    mutate(
      across(
        all_of(selected_cols),
        ~ replace_true(.x,
          cur_column(),
          metadata = metadata_spec,
          raw_or_label = raw_or_label
        )
      ),
      across(all_of(selected_cols), as.character) # enforce to character strings
    )

  new_cols <- metadata_spec %>%
    nest(.by = ".new_value", .key = "metadata") %>%
    pmap(convert_checkbox_vals,
      data_tbl = data_tbl_mod,
      raw_or_label = raw_or_label, multi_value_label = multi_value_label, values_fill = values_fill
    )

  final_tbl <- combine_and_repair_tbls(data_tbl, data_tbl_mod, new_cols, names_repair = names_repair)

  # Keep or remove original multi columns
  if (!keep) {
    final_tbl <- final_tbl %>%
      select(!all_of(selected_cols))
  }

  # Update the supertbl data tibble
  supertbl$redcap_data[supertbl$redcap_form_name == tbl][[1]] <- final_tbl

  supertbl
}

#' @title Get metadata specification table
#'
#' @inheritParams combine_checkboxes
#' @param metadata_tbl A metadata tibble from the supertibble generated by [read_redcap()].
#' @param selected_cols Character string vector of field names for checkbox combination
#'
#' @returns a tibble
#'
#' @keywords internal
get_metadata_spec <- function(metadata_tbl,
                              selected_cols,
                              names_prefix,
                              names_sep,
                              names_glue) {
  check_metadata_fields_exist(metadata_tbl, selected_cols)

  # Create a metadata reference table linking field name to raw and label values
  out <- metadata_tbl %>%
    filter(.data$field_name %in% selected_cols) %>%
    mutate(
      .value = sub("___.*$", "", .data$field_name)
    )

  if (!is.null(names_glue)) {
    # Similar to pivot_*, use of `names_glue` overrides use of names_prefix/sep
    glue_env <- out %>%
      select(".value")

    glue_env$.new_value <- as.character(glue::glue_data(glue_env, names_glue))

    glue_env <- glue_env %>%
      select(".new_value")

    out <- cbind(out, glue_env)
  } else {
    out <- out %>%
      mutate(
        .new_value = case_when(names_prefix != "" ~ paste(names_prefix, .data$.value, sep = names_sep),
          .default = paste(names_prefix, .data$.value, sep = "")
        )
      )
  }

  # Check that for each unique value of .value there is one unique value of .new_value
  # May be removed in the future
  check_equal_col_summaries(out, ".value", ".new_value")

  # Make sure selection is checkbox metadata field type
  check_fields_are_checkboxes(out)

  # Bind raw/label values per original field grouping
  parsed_vals <- tibble()

  for (i in seq_along(unique(out$.value))) {
    index <- unique(out$.value)[i]
    out_filtered <- out %>% filter(.data$.value == index)

    parsed_vals <- rbind(parsed_vals, parse_labels(first(out_filtered$select_choices_or_calculations)))
  }

  bind_cols(out, parsed_vals) %>%
    select("field_name", "raw", "label", ".value", ".new_value") %>%
    relocate(".value", ".new_value", .after = "field_name")
}

#' @title Replace checkbox TRUEs with raw_or_label values
#'
#' @inheritParams combine_checkboxes
#' @param col A vector
#' @param col_name A string
#' @param metadata A metadata tibble from the original supertibble
#'
#' @description
#' Simple utility function for replacing checkbox field values.
#'
#' @returns A character string
#'
#' @keywords internal
replace_true <- function(col, col_name, metadata, raw_or_label) {
  # Replace TRUEs/1s with the appropriate raw or label value from the metadata
  replacement <- metadata %>%
    filter(.data$field_name == col_name) %>%
    pull(raw_or_label)
  col <- ifelse(col == TRUE, replacement, NA) # col == TRUE works for raw or label because TRUE == 1 & 1 == TRUE
  # Convert non-TRUEs to NA, since values can be either "FALSE" or "0" for unchecked values
  col
}

#' @title Convert a new checkbox column's values
#'
#' @description This function takes a single column of data and converts the values
#' based on the overall data tibble cross referenced with a nested section of the
#' metadata tibble.
#'
#' `case_when` logic helps determine whether the value is a coalesced singular
#' value or a user-specified one via `multi_value_label` or `values_fill`.
#'
#' @details
#' This function is used in conjunction with `pmap()`.
#'
#' @keywords internal
#'
#' @param metadata A nested portion of the overall metadata tibble
#' @param data_tbl The data tibble from the original supertibble
#' @param .new_value The new column values made by [combine_checkboxes()]
#' @inheritParams combine_checkboxes
convert_checkbox_vals <- function(metadata, .new_value, data_tbl, raw_or_label, multi_value_label, values_fill) {
  tibble(
    !!.new_value := rowSums(!is.na(data_tbl[names(data_tbl) %in% metadata$field_name]))
  ) %>%
    mutate(
      !!.new_value := case_when(. > 1 ~ multi_value_label,
        . == 1 ~ coalesce(!!!data_tbl[, names(data_tbl) %in% metadata$field_name]),
        .default = values_fill
      ),
      !!.new_value := factor(!!sym(.new_value),
        levels = c(metadata[[raw_or_label]], multi_value_label, values_fill)
      )
    )
}

#' @title Combine checkbox fields with respect to repaired outputs
#'
#' @description
#' This function seeks to preserve the original data columns and types from the
#' originally supplied data_tbl and add on the new columns from data_tbl_mod.
#'
#' If `names_repair` presents a repair strategy, the output columns will be
#' captured and updated here while dropping the original columns.
#'
#' @param data_tbl The original data table given to [combine_checkboxes()]
#' @param data_tbl_mod A modified data table from `data_tbl`
#' @param new_cols The new columns created for checkbox combination
#' @inheritParams combine_checkboxes
#'
#' @keywords internal
#'
#' @returns a tibble
combine_and_repair_tbls <- function(data_tbl, data_tbl_mod, new_cols, names_repair) {
  # Perform initial column bind with repair strategy
  data_tbl_mod <- bind_cols(data_tbl_mod, new_cols, .name_repair = names_repair)

  # Get the column names of each table
  cols_data_tbl <- names(data_tbl)
  cols_data_tbl_mod <- names(data_tbl_mod)

  # Identify common columns
  common_cols <- intersect(cols_data_tbl, cols_data_tbl_mod)

  # Identify unique columns in data_tbl_mod
  unique_cols_mod <- setdiff(cols_data_tbl_mod, cols_data_tbl)

  # Select common columns from data_tbl
  common_data <- data_tbl %>%
    select(all_of(common_cols))

  # Select unique columns from data_tbl_mod
  unique_data_mod <- data_tbl_mod %>%
    select(all_of(unique_cols_mod))

  # Combine the selected columns
  combined_data <- bind_cols(common_data, unique_data_mod)

  combined_data
}

Try the REDCapTidieR package in your browser

Any scripts or data that you put into this service are public.

REDCapTidieR documentation built on April 3, 2025, 10:50 p.m.