# Public function ---------------------------------------------------------
### footer_y2
#' Create a string with question wording and n-size
#'
#' Pipe in the dataset (responses) and specify variables. Save out the nicely formatted string to an object to pass to the footer of an ms chart.
#'
#' @keywords n question n-size footer
#' @param dataset Haven labelled dataset from which to pull the variable(s)
#' @param ... Unquoted variables
#' @param q_type DEFAULT = 'NULL'; When 'NULL', footer_y2() will automatically choose the variable type when standard Y2 naming conventions are used. When variables have non-standard names, accepts list of any of c('s', 'm', 'oe', 'r', 'sl', 'md', 'n') used to specify question type. s = single select, m = multiple select, etc.
#' @param label_length DEFAULT = 100; Truncates question labels to specified length and adds a '...' where truncation has occurred.
#' @param after_symbol DEFAULT = 'Select all that apply'; Removes all question text after and including the provided symbol. Must match upper/lower case of question wording.
#' @param prompt_rm DEFAULT TRUE; Attempts to better format the question by removing hard returns, white space, and anything after " - " or the input of the "after_symbol" argument. TRUE is needed in order to identify common stems.
#' @export
#' @examples
#' \dontrun{
#' responses %>%
#' footer_y2(
#' s_income
#' )
#'
#' mtcars %>%
#' footer_y2(
#' m_aware,
#' prompt_rm = TRUE
#' )
#' }
footer_y2 <- function(
dataset,
...,
q_type = 'NULL',
label_length = 100,
prompt_rm = TRUE,
after_symbol = 'Select all that apply'
) {
# Error: Allow only once grouped data ------------------------------------
if (
dplyr::group_vars(dataset) %>% length() > 1
) {
stop('footer_y2 can currently only handle one grouping. Your data has multiple groups')
}
# Error: Grouping data has missing data -----------------------------------
if (dplyr::is_grouped_df(dataset)) {
if (
dataset %>%
dplyr::select(
tidyselect::all_of(
dplyr::group_vars(dataset)
)
) %>%
anyNA()
) {
stop('Grouping variable has missingness (NA\'s). Please manually filter out NA\'s or replace NA\'s with an explicit variable level.')
}
}
# Make variable List ----------------------------------------------------
variable_list <-
dataset %>%
dplyr::ungroup() %>%
dplyr::select(
...
) %>%
names()
if (length(variable_list) == 0) {
variable_list <-
dataset %>%
dplyr::ungroup() %>%
names()
}
# Check qtypes -------------------------------------------------------------
q_type_original_specification <- q_type
q_type <-
check_qtypes(
df = dataset,
q.type = q_type,
variables = variable_list
)
# Error: non standard naming ----------------------------------------------
if (!all(q_type %in% c('s', 'm', 'r', 'n', 'sl', 'md', 'oe'))) {
stop(
"Variable names do not match standardized format. Please specify q_types for all questions."
)
} else {
if ('NULL' %in% q_type_original_specification) {
message(
'Note: Variable names match standardized format. Assuming question types.'
)
}
}
# Get the question text ---------------------------------------------------
question <-
get_questions(
df = dataset,
prompt.rm = prompt_rm,
variables = variable_list,
prompt.stem = after_symbol
)
# Error: Non-labelled data ------------------------------------------------
for (i in 1:length(question)) {
if ( question[i] == 'NULL' ) {
stop('variable "', variable_list[i], '" is not haven labelled, please use labelled data with this function.')
}
}
# Get common stems --------------------------------------------------
stems <-
get_stems(
variables = variable_list,
q.text = question,
q.type = q_type,
label_length
)
variable_list <-
stems %>%
dplyr::pull(.data$q_name)
question <-
stems %>%
dplyr::mutate(
q_text = stringr::str_c(
.data$q_text %>%
stringr::str_trunc(
width = label_length,
side = 'right',
ellipsis = '...'
) %>%
stringr::str_trim()
)
) %>%
dplyr::pull(.data$q_text)
q_type <-
stems %>%
dplyr::pull(.data$q_type)
q_multi_stem <-
stems %>%
dplyr::pull(.data$q_multi_stem)
q_stem <-
stems %>%
dplyr::pull(.data$q_stem)
# Calculate n sizes -------------------------------------------------------
# First loop --------------------------------------------------------------
for (i in 1:length(variable_list)) {
if (i == 1) {
if ( q_type[i] %in% c('s','n','oe','sl') & !dplyr::is_grouped_df(dataset)) {
n <-
singles_not_grouped(
data = dataset,
variable = variable_list[i],
label.length = label_length,
q.stem = q_stem[i]
)
} else if ( q_type[i] %in% c('m', 'r', 'md') & !dplyr::is_grouped_df(dataset)) {
n <-
multi_not_grouped(
data = dataset,
variable = variable_list[i],
q.stem = q_stem[i]
)
} else if ( q_type[i] %in% c('s','n','oe','sl') & dplyr::is_grouped_df(dataset)) {
n <-
singles_grouped(
data = dataset,
variable = variable_list[i],
label.length = label_length,
q.stem = q_stem[i]
)
} else if ( q_type[i] %in% c('m', 'r', 'md') & dplyr::is_grouped_df(dataset)) {
n <-
multi_grouped(
data = dataset,
variable = variable_list[i],
q.stem = q_stem[i],
label.length = label_length
)
}
footer <-
stringr::str_c(
'Q: ',
question[i],
n,
collapse = ''
)
# Second loop -------------------------------------------------------------
} else {
if ( q_type[i] %in% c('s','n','oe','sl') & !dplyr::is_grouped_df(dataset)) {
n <-
singles_not_grouped(
data = dataset,
variable = variable_list[i],
label.length = label_length,
q.stem = q_stem[i]
)
} else if ( q_type[i] %in% c('m', 'r', 'md') & !dplyr::is_grouped_df(dataset)) {
n <-
multi_not_grouped(
data = dataset,
variable = variable_list[i],
q.stem = q_stem[i]
)
} else if ( q_type[i] %in% c('s','n','oe','sl') & dplyr::is_grouped_df(dataset)) {
n <-
singles_grouped(
data = dataset,
variable = variable_list[i],
label.length = label_length,
q.stem = q_stem[i]
)
} else if ( q_type[i] %in% c('m', 'r', 'md') & dplyr::is_grouped_df(dataset)) {
n <-
multi_grouped(
data = dataset,
variable = variable_list[i],
q.stem = q_stem[i],
label.length = label_length
)
}
footer <-
stringr::str_c(
footer,
'\nQ: ',
question[i],
n
)
}
}
footer
}
# Private Functions -------------------------------------------------------
### Singles not grouped
singles_not_grouped <- function(
data,
variable,
label.length,
q.stem
){
if (stringr::str_detect(variable, '^oe_|_TEXT$')) {
data <-
data %>%
dplyr::filter(
.data[[variable]] != '',
.data[[variable]] != 'NA'
)
}
n <-
data %>%
dplyr::filter(
!is.na(
.data[[variable]]
)
) %>%
dplyr::count() %>%
dplyr::pull(.data$n) %>%
stringr::str_c(
' (n = ',
.,
')'
)
n
}
### Multi not grouped
multi_not_grouped <- function(
data,
variable,
q.stem
) {
n <-
data %>%
dplyr::select(
tidyselect::starts_with(
q.stem
),
-tidyselect::ends_with('_TEXT')
) %>%
dplyr::mutate(
ns = rowSums(
dplyr::across(
.cols = tidyselect::starts_with(q.stem),
.fns = ~!is.na(.x)
)
)
) %>%
dplyr::filter(
.data$ns > 0
) %>%
dplyr::count() %>%
dplyr::pull(.data$n) %>%
stringr::str_c(
' (n = ',
.,
')'
)
message(
stringr::str_c(
'Note: Stem "',
q.stem,
'" was used to find n size.'
)
)
n
}
### Singles grouped
singles_grouped <- function(
data,
variable,
label.length,
q.stem
) {
if (stringr::str_detect(variable, '^oe_|_TEXT$')) {
data <-
data %>%
dplyr::filter(
.data[[variable]] != '',
.data[[variable]] != 'NA'
)
}
n <-
data %>%
dplyr::filter(
!is.na(
.data[[variable]]
)
) %>%
dplyr::count() %>%
forcats::as_factor() %>%
dplyr::mutate(
new_n = .data[[dplyr::group_vars(data)]] %>%
as.character() %>%
stringr::str_trunc(
width = label.length,
side = 'right',
ellipsis = '...'
) %>%
stringr::str_trim() %>%
stringr::str_c(
'(',
.,
': ',
n,
')'
)
) %>%
dplyr::pull(.data$new_n) %>%
stringr::str_flatten() %>%
stringr::str_replace_all(
'\\)\\(',
', '
) %>%
stringr::str_c(
' n = ',
.
)
n
}
### Multi grouped
multi_grouped <- function(
data,
variable,
q.stem,
label.length
) {
n <-
data %>%
dplyr::ungroup() %>%
dplyr::select(
tidyselect::starts_with(
q.stem
),
tidyselect::all_of(dplyr::group_vars(data)),
-tidyselect::ends_with('_TEXT')
) %>%
dplyr::mutate(
ns = rowSums(
dplyr::across(
.cols = tidyselect::starts_with(q.stem),
.fns = ~!is.na(.x)
)
)
) %>%
dplyr::filter(
.data$ns > 0
) %>%
dplyr::count(
.data[[dplyr::group_vars(data)]]
) %>%
forcats::as_factor() %>%
dplyr::mutate(
new_n = stringr::str_c(
.data[[dplyr::group_vars(data)]] %>%
as.character() %>%
stringr::str_trunc(
width = label.length,
side = 'right',
ellipsis = '...'
) %>%
stringr::str_trim(),
': ',
n
)
) %>%
dplyr::pull(.data$new_n) %>%
stringr::str_c(
.,
collapse = ', '
) %>%
stringr::str_c(
' n = (',
.,
')'
)
message(
stringr::str_c(
'Note: Stem "',
q.stem,
'" was used to find n size.'
)
)
n
}
check_qtypes <- function(
df,
q.type,
variables
) {
if ('NULL' %in% q.type) {
q_type_check <-
df %>%
dplyr::ungroup() %>%
dplyr::select(
tidyselect::all_of(variables)
) %>%
names() %>%
stringr::str_split('_') %>%
purrr::map(
purrr::pluck(1)
) %>%
as.vector()
q.type <-
q_type_check
}
q.type
}
get_questions <- function(
df,
prompt.rm,
variables,
prompt.stem
) {
if (prompt.rm == FALSE) {
question <-
df %>%
dplyr::ungroup() %>%
dplyr::select(
tidyselect::all_of(variables)
) %>%
labelled::var_label() %>%
as.vector(mode = 'character') %>%
stringr::str_replace_all('[\\r\\n\\t\\v\\f]', ' ') %>%
stringr::str_squish()
} else {
question <-
df %>%
dplyr::ungroup() %>%
dplyr::select(
tidyselect::all_of(variables)
) %>%
labelled::var_label() %>%
as.vector(mode = 'character') %>%
stringr::str_replace_all('[\\r\\n\\t\\v\\f]', ' ') %>%
stringr::str_remove(' - .+') %>%
stringr::str_remove(
stringr::str_c(
prompt.stem,
'.*')
) %>%
stringr::str_trim() %>%
stringr::str_squish()
}
question
}
get_stems <- function(
variables,
q.text,
q.type,
label_length
) {
possible_stems <-
tibble::tibble(
q_stem = variables %>%
stringr::str_remove(
'_[0-9]+$'
),
q_num = variables %>%
stringr::str_extract(
'_[0-9]+$'
),
q_text = q.text,
q_type = q.type
)
known_stems <-
dplyr::full_join(
possible_stems %>%
dplyr::filter(
!is.na(.data$q_num)
) %>%
dplyr::add_count(
.data$q_stem,
.data$q_text,
name = 'stem_count'
) %>%
dplyr::distinct(
.data$q_stem,
.data$q_text,
.keep_all = TRUE
) %>%
dplyr::group_by(.data$q_stem) %>%
dplyr::add_count(
name = 'false_stems'
) %>%
dplyr::mutate(
q_multi_stem = dplyr::case_when(
false_stems == 1 & stem_count > 1~ TRUE,
TRUE ~ FALSE
)
),
possible_stems %>%
dplyr::filter(
is.na(.data$q_num)
) %>%
dplyr::mutate(
q_multi_stem = FALSE
),
by = c("q_stem", "q_num", "q_text", 'q_type', 'q_multi_stem')
) %>%
dplyr::mutate(
q_type = .data$q_type %>% as.character(),
q_name = dplyr::case_when(
!is.na(.data$q_num) ~ stringr::str_c(.data$q_stem, .data$q_num),
TRUE ~ .data$q_stem
),
q_type = dplyr::if_else(
stringr::str_detect(.data$q_stem, '_TEXT$'),
'oe',
.data$q_type
)
)
known_stems
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.