combine_checkboxes: Cobine checkbox style variables

View source: R/combine_checkboxes.R

combine_checkboxesR Documentation

Cobine checkbox style variables

Description

In REDCap, checkbox style variables are ones where more than one choice is allowed and the responses appear over multiple columns with values "Checked" or "Unchecked" that have names with a similar prefix. This function takes the columns specified by the prefix and combine them into a single column.

Usage

combine_checkboxes(
  data,
  id_var,
  prefix,
  check_text = "Checked",
  sep = ", ",
  data_labels = NULL
)

Arguments

data

A tbl_df or data.frame

id_var

Name of the column for each subject/participant/observational unit

prefix

Character string to select variables that start with this pattern

check_text

Character string of the response to count. Default is "Checked"

sep

Character string to collapse the responses by. Default is ", "

data_labels

A data frame or tibble with columns, 'field_name' and 'checkbox_choice'. Used to replace variable names with checkbox labels.

Value

a tble_df

Examples

#### Example 1 --------------------------------

library(dplyr)
library(tibble)
library(tidyr)

df <- tibble::tibble(
  id = c(1:100),
  group = sample(c("A", "B", "C"), size = 100, replace = TRUE),
  preferred_color___1 = sample(c("Checked", "Unchecked"), size = 100, replace = TRUE),
  preferred_color___2 = sample(c("Checked", "Unchecked"), size = 100, replace = TRUE),
  preferred_color___3 = sample(c("Checked", "Unchecked"), size = 100, replace = TRUE),
  preferred_color___4 = sample(c("Checked", "Unchecked"), size = 100, replace = TRUE),
  preferred_color___5 = sample(c("Checked", "Unchecked"), size = 100, replace = TRUE)) %>%
  mutate(group = factor(group,
                        levels = c("A", "B", "C")))


var_labels <- tibble::tribble(
  ~field_name, ~checkbox_choice,
  "preferred_color___1",           "Blue",
  "preferred_color___2",          "Green",
  "preferred_color___3",         "Orange",
  "preferred_color___4",            "Red",
  "preferred_color___5",         "Yellow"
)

var_labels


combine_checkboxes(data = df,
                   id_var = id,
                   prefix = "preferred_color___")

combine_checkboxes(data = df,
                   id_var = id,
                   prefix = "preferred_color___",
                   data_labels = var_labels)

combine_checkboxes(data = df,
                   id_var = id,
                   prefix = "preferred_color___",
                   data_labels = var_labels,
                   sep = "-")





#### Example 2 --------------------------------

library(dplyr)
library(tibble)
library(tidyr)

df2 <- tibble::tibble(
  record_id = c(1:100),
  gender = sample(c("Female", "Male"),
                  size = 100,
                  replace = TRUE),
  gender_other = NA,
  age = sample(c(18:85),
               size = 100,
               replace = TRUE),
  education = sample(c("High-school", "College", "Graduate school"),
                     size = 100,
                     replace = TRUE),
  ethnicity = sample(c("Hispanic", "Non-hispanic"),
                     size = 100,
                     replace = TRUE),
  key = sample(c("race___1", "race___2", "race___3",
                 "race___4", "race___5", "race___6", "race___98"),
               size = 100,
               replace = TRUE),
  value = "Checked",
  income = sample(c(20000:120000),
                  size = 100,
                  replace = TRUE),
  marital_status = sample(c("Married", "Single"),
                          size = 100,
                          replace = TRUE),
  survey_complete = sample(c("Complete", "Not complete"),
                           size = 100,
                           replace = TRUE)) %>%
  mutate(key = factor(key,
                      levels = c("race___1", "race___2", "race___3",
                                 "race___4", "race___5", "race___6",
                                 "race___98"))) %>%
  tidyr::spread(.,
                key = "key",
                value = "value") %>%
  mutate_at(.vars = vars(race___1:race___98),
            .funs = list(~ tidyr::replace_na(., "Unchecked")))


data_labels <- tibble::tribble(
        ~field_name,                      ~checkbox_choice,
        "record_id",                                    NA,
           "gender",                                    NA,
     "gender_other",                                    NA,
              "age",                                    NA,
        "education",                                    NA,
        "ethnicity",                                    NA,
         "race___1",                               "White",
         "race___2",           "Black or African-American",
         "race___3",    "American Indian or Alaska Native",
         "race___4",                               "Asian",
         "race___5", "Native Hawaiian or Pacific Islander",
         "race___6",                               "Other",
        "race___98",                "Prefer not to answer",
       "race_other",                                    NA,
           "income",                                    NA,
   "marital_status",                                    NA,
  "survey_complete",                                    NA
  )


df2

combine_checkboxes(data = df2,
                   id_var = record_id,
                   prefix = "race___")

combine_checkboxes(data = df2,
                   id_var = record_id,
                   prefix = "race___",
                   data_labels = data_labels)

emilelatour/redcaptools documentation built on March 21, 2023, 3:35 p.m.