Nothing
#multiple choice
#' Compute summary statistics for a multiple choice questions
#'
#' This function generates summary statistics, including frequencies, based on
#' the provided question. It allows for optional grouping and weighting of data.
#'
#' @inheritParams single_summary
#' @param question The columns that contain each of the response options for a
#' question, can be selected by using **tidyselect** semanatics or providing a
#' vector of column names or numbers
#' @importFrom dplyr if_all
#' @return A data frame containing summary statistics, including frequencies,
#' for the specified question.
#' @export
#'
#' @family multiple-choice questions
#'
multi_summary <- function(dataset,
question,
group_by = NULL,
subgroups_to_exclude = NULL,
weights = NULL,
na.rm = FALSE){
try(group_by <- rlang::ensym(group_by), silent = TRUE) # try function is here since if is null, then it will fail
try(weights <- rlang::ensym(weights), silent = TRUE) # try function is here since if is null, then it will fail
response <- NULL #created useing NSE, necessary to avoid visible binding note
#function to identify what to exclude rather than what to include
`%ni%` <- Negate(`%in%`)
## Data Selection ##
#Evaluate users selection and select from data
data.question <-
tidyselect::eval_select(
expr = rlang::enquo(question),
data = dataset)
#Select data from dataframe with optional group_by/weights if provided
data.summary <- dplyr::case_when(
#Both subgroup and weights
!is.null(group_by) & !is.null(weights) ~ dataset %>% dplyr::select(all_of(data.question),!!group_by, !!weights),
#Only group_by
!is.null(group_by) & is.null(weights) ~ dataset %>% dplyr::select(all_of(data.question),!!group_by),
#only weights
is.null(group_by) & !is.null(weights) ~ dataset %>% dplyr::select(all_of(data.question),!!weights),
#neither group_by nor weights
TRUE ~ dataset %>% dplyr::select(all_of(data.question))
)
## Data pre-processing ##
#remove NAs if specified
if(isTRUE(na.rm)){
data.summary <- data.summary %>% dplyr::filter(if_all(-!!group_by, ~ !is.na(.x)))
}
if(is.null(group_by) & !is.null(subgroups_to_exclude)){
stop('Cannot specify `subgroups_to_exclude` without `group_by`.
Please remove `subgroups_to_exclude` or specify a grouping variable')
}
#if weights are not specified create vector of 1s
if(is.null(weights)){
data.summary$weights <- 1
#if weights are specified, rename column to weights
#necessary since weights column will always be selected
} else {
data.summary <- data.summary %>% dplyr::rename(weights = !!weights)
}
#filter variables to exclude, if specified
if(!is.null(subgroups_to_exclude)){
#Check that provided levels are contained within group_by variable
group_by_levels <- data.summary %>%
dplyr::select(!!group_by) %>%
unique %>%
dplyr::pull(1) %>%
as.character
for(i in 1:length(subgroups_to_exclude)){ #vector has length at least 1
if(subgroups_to_exclude[[i]] %ni% group_by_levels){
stop('Levels to exclude not found within group_by variable')
}
}
data.summary <- data.summary %>%
dplyr::filter(!!group_by %ni% subgroups_to_exclude)}
## Compute Frequencies ##
#transform all cols that aren't weights to character for count
weight_column <- match('weights', colnames(data.summary))
data.summary[,-weight_column] <- purrr::map_df(data.summary[,-weight_column], as.character)
if(is.null(group_by)){
#no subgroup specified
data.summary <- data.summary %>%
tidyr::pivot_longer(-weights, names_to = 'question', values_to = 'response') %>%
dplyr::group_by(question) %>%
dplyr::count(response, wt = weights) %>%
dplyr::mutate(freq = n/sum(n)) %>%
dplyr::ungroup()
colnames(data.summary) <- c('question', 'response', 'n', 'freq')
} else {
#subgroup specified
data.summary <- data.summary %>%
tidyr::pivot_longer(-c(weights, !!group_by), names_to = 'question', values_to = 'response') %>%
dplyr::group_by(question, !!group_by) %>%
dplyr::count(response, wt = weights) %>%
dplyr::mutate(freq = n/sum(n)) %>%
dplyr::ungroup()
colnames(data.summary) <- c('question', 'group_by', 'response', 'n', 'freq')
}
return(data.summary)
}
#' Generate an UpSet plot for multiple-choice questions
#'
#' Visualize multiple-choice question responses with an upset plot, a visual
#' tool for exploring the overlap and distribution of multiple-choice question
#' responses. The function supports optional subgrouping of data using the
#' `group_by` variable, exclusion of specific subgroups with
#' 'subgroups_to_exclude,' and data weighting with the 'weights' parameter.
#' Users can also choose to exclude NA values from the questions prior to
#' analysis using the 'na.rm' parameter.
#'
#' @inheritParams multi_summary
#' @inheritParams single_summary
#'
#'
#' @examples
#'
#' #Use dplyr to select questions
#' library(dplyr)
#'
#' #Basic Upset plot
#'
#' #Use `group_by` to partition the question into several groups
#' multi_freq(berlinbears, question = dplyr::starts_with('will_eat'), group_by
#' = gender)
#'
#' #to ignore a subgroup, use `subgroups_to_exclude`
#' multi_freq(berlinbears, question = dplyr::starts_with('will_eat'), group_by =
#' gender, subgroups_to_exclude = NA)
#'
#' #Specifiy survey weights with `weights`
#' multi_freq(berlinbears, question = dplyr::starts_with('will_eat'), group_by
#' = gender, weights = weights)
#'
#'
#'
#' @return An upset plot visualizing the distribution of responses to the multiple-choice question.
#'
#' @export
#'
#' @family multiple-choice questions
#'
multi_freq <- function(dataset,
question,
group_by = NULL,
subgroups_to_exclude = NULL,
weights = NULL,
na.rm = FALSE){
try(group_by <- rlang::ensym(group_by), silent = TRUE) # try function is here since if is null, then it will fail
try(weights <- rlang::ensym(weights), silent = TRUE) # try function is here since if is null, then it will fail
value <- count <- respondent <- NULL #created useing NSE, necessary to avoid visible binding note
#function to identify what to exclude rather than what to include
`%ni%` <- Negate(`%in%`)
## Data Selection ##
#Evaluate users selection and select from data
data.question <-
tidyselect::eval_select(
expr = rlang::enquo(question),
data = dataset)
#Select data from dataframe with optional group_by/weights if provided
data.summary <- dplyr::case_when(
#Both subgroup and weights
!is.null(group_by) & !is.null(weights) ~ dataset %>% dplyr::select(all_of(data.question),!!group_by, !!weights),
#Only group_by
!is.null(group_by) & is.null(weights) ~ dataset %>% dplyr::select(all_of(data.question),!!group_by),
#only weights
is.null(group_by) & !is.null(weights) ~ dataset %>% dplyr::select(all_of(data.question),!!weights),
#neither group_by nor weights
TRUE ~ dataset %>% dplyr::select(all_of(data.question))
)
## Data Validation ##
#Make sure data is in correct format (only 1s and 0s)
#create list with all unique responses for each column
responses <- data.summary %>%
dplyr::select(-c(!!group_by, !!weights)) %>%
purrr::map(.f = unique) %>%
unique
#test to make sure each column only contains 1s, 0s or NAs
for(i in 1:length(responses)){
if(!all(responses[[i]] %in% c(1,0,NA))){
stop('Multiple choice question data should consist of only 1s, 0s, or NA values')
}
}
## Data pre-processing ##
#remove NAs if specified
if(isTRUE(na.rm)){
data.summary <- data.summary %>% dplyr::filter(if_all(-!!group_by, ~ !is.na(.x)))
}
#if weights are not specified create vector of 1s
if(is.null(weights)){
data.summary$weights <- 1
#if weights are specified, rename column to weights
#necessary since weights column will always be selected
} else {
data.summary <- data.summary %>% dplyr::rename(weights = !!weights)
message('Estimes are only preciese to one significant digit, weights may have been rounded')
}
#filter variables to exclude, if specified
if(!is.null(subgroups_to_exclude)){
#Check that provided levels are contained within group_by variable
group_by_levels <- data.summary %>%
dplyr::select(!!group_by) %>%
unique %>%
dplyr::pull(1) %>%
as.character
for(i in 1:length(subgroups_to_exclude)){ #vector has length at least 1
if(subgroups_to_exclude[[i]] %ni% group_by_levels){
stop('Levels to exclude not found within group_by variable')
}
}
data.summary <- data.summary %>%
dplyr::filter(!!group_by %ni% subgroups_to_exclude)}
if(is.null(group_by)){
#no subgroup
mc_upset <- data.summary %>%
dplyr::as_tibble(rownames = "respondent") %>%
tidyr::pivot_longer(-c(respondent, weights), names_to = 'question') %>%
dplyr::filter(value != 0) %>%
dplyr::group_by(respondent, weights) %>%
dplyr::summarize(question = list(question)) %>%
dplyr::group_by(question) %>%
dplyr::summarise(count = round(sum(weights),0)) %>%
tidyr::uncount(count) %>%
ggplot2::ggplot(aes(x = question)) +
ggplot2::geom_bar() +
ggupset::scale_x_upset(order_by = 'freq') +
labs(x = "", y = "", fill = "")
#if weights are specified, transform y axis to percentage (absoulte values loose meaning)
if(length(unique(data.summary$weights)) > 1){
mc_upset <- mc_upset +
ggplot2::scale_y_continuous(labels = scales::percent_format(scale = 1))
}
} else {
#with subgroup
mc_upset <- data.summary %>%
dplyr::as_tibble(rownames = "respondent") %>%
tidyr::pivot_longer(-c(respondent, !!group_by, weights) , names_to = 'question') %>%
dplyr::filter(value != 0) %>%
dplyr::group_by(respondent, !!group_by, weights) %>%
dplyr::summarize(question = list(question)) %>%
dplyr::group_by(question, !!group_by) %>%
dplyr::summarise(count = round(sum(weights),0)) %>%
tidyr::uncount(count) %>%
ggplot2::ggplot(aes(x = question, fill = !!group_by)) +
ggplot2::geom_bar() +
ggupset::scale_x_upset(order_by = 'freq') +
labs(x = "", y = "", fill = "")
#if weights are specified, transform y axis to percentage (absoulte values loose meaning
if(length(unique(data.summary$weights)) > 1){
mc_upset <- mc_upset +
ggplot2::scale_y_continuous(labels = scales::percent_format(scale = 1))
}
}
return(mc_upset)
}
#' Create a table of frequencies and counts for multiple-choice questions
#'
#' Generates a table presenting the distribution of responses for a specified
#' multiple-choice question. If a grouping variable, `group_by`, is provided,
#' the table extends to include row and column totals, along with additional count and
#' frequency columns for each level of `group_by` (excluding specified subgroups, if any).
#' When survey weights are specified with `weights`, the counts reflect the weighted values,
#' and a note is appended at the bottom of the table.
#'
#'
#'
#' @inheritParams multi_summary
#'
#' @return A gt table displaying frequencies and counts for the specified multiple-choice question.
#' If a grouping variable is provided, the table includes subgroups for a comprehensive analysis.
#' If survey weights are specified, the table notes that frequencies and counts are weighted.
#'
#'
#' @examples
#' #Basic Table
#' multi_table(berlinbears, question = dplyr::starts_with('will_eat'))
#'
#' #Use `group_by` to partition the question into several groups
#' multi_table(berlinbears, question = dplyr::starts_with('will_eat'), group_by
#' = gender)
#'
#' #to ignore a subgroup, use `subgroups_to_exclude`
#' multi_table(berlinbears, question = dplyr::starts_with('will_eat'), group_by
#' = gender, subgroups_to_exclude = NA)
#'
#' #Specifiy survey weights with `weights`
#' multi_table(berlinbears, question = dplyr::starts_with('will_eat'), group_by
#' = gender, weights = weights)
#'
#'
#' @export
#'
#' @family multiple-choice questions
#'
multi_table <- function(dataset,
question,
group_by = NULL,
subgroups_to_exclude = NULL,
weights = NULL,
na.rm = FALSE){
#save user input for name of table
question_name <- deparse(substitute(question))
try(group_by <- rlang::ensym(group_by), silent = TRUE) # try function is here since if is null, then it will fail
try(weights <- rlang::ensym(weights), silent = TRUE) # try function is here since if is null, then it will fail
contains <- response <- freq <- NULL #created useing NSE, necessary to avoid visible binding note
data.table <- multi_summary(dataset = dataset,
question = all_of(question),
group_by = if(!is.null(group_by)){group_by},
subgroups_to_exclude = subgroups_to_exclude,
weights = if(!is.null(weights)){weights},
na.rm)
## Data Validation ##
#Make sure data is in correct format (only 1s and 0s)
responses <- data.table %>%
dplyr::pull(response) %>%
unique
#if all of the responses are correct format
if( !all(responses %in% c(1,0,NA)) ){
stop('Multiple choice question data should consist of only 1s, 0s, or NA values')
}
#only include 'selected' counts
data.table <- data.table %>%
dplyr::filter(response != 0) %>%
dplyr::select(-response) %>%
#0.2.0 Add default arrange
dplyr::arrange(dplyr::desc(freq), .by_group = ifelse(is.null(group_by), FALSE, TRUE))
#create base of table
gt.table <- frequency_table(data.table = data.table,
group_by =if(!is.null(group_by)){group_by})
#Add names to the table
if(is.null(group_by)){
gt.table <- gt.table %>%
gt::tab_header(
title = paste0("Question: ", question_name))
} else {
gt.table <- gt.table %>%
gt::tab_header(
title = paste0("Question: ", question_name),
subtitle = paste0("grouped by: ", group_by))
}
#if therey're weights, add a note
if(!is.null(weights)){
gt.table <- gt.table %>%
gt::tab_footnote(
footnote = "Frequencies and counts are weighted") %>%
gt::fmt_number(columns = contains('n'), decimals = 1)
}
return(gt.table)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.