Nothing
get_prop_for_highest_categories <- function(
plot_data,
var,
selected_categories
) {
data.frame(
var = var,
value = plot_data |>
dplyr::filter(
.data$.variable_label == var,
as.character(.data$.category) %in% selected_categories
) |>
dplyr::pull(.data$.proportion) |>
as.numeric() |>
sum(na.rm = TRUE)
)
}
#' Extract Text Summary from Categorical Mesos Plots
#'
#' @description
#' Generates text summaries comparing two groups from categorical mesos plot data.
#' The function identifies meaningful differences between groups based on proportions
#' of respondents selecting specific categories and produces narrative text descriptions.
#'
#' @param plots A list of two plot objects (or data frames with plot data) to compare.
#' Each must contain columns: `.variable_label`, `.category`, `.category_order`, `.proportion`.
#' @param min_prop_diff Numeric. Minimum proportion difference (default 0.10) required
#' between groups to generate text. Differences below this threshold are ignored.
#' @param n_highest_categories Integer. Number of top categories to include in the
#' comparison (default 1). Categories are selected based on `.category_order`.
#' @param flip_to_lowest_categories Logical. If TRUE, compare lowest categories instead
#' of highest (default FALSE).
#' @param digits Integer. Number of decimal places for rounding proportions (default 2).
#' @param selected_categories_last_split Character. Separator for the last item when
#' listing multiple categories (default " or ").
#' @param fallback_string Character. String to return when validation fails (default `character()`).
#' @param glue_str_pos Character vector. Templates for positive differences (group_1 > group_2).
#' Available placeholders: `{var}`, `{group_1}`, `{group_2}`, `{selected_categories}`.
#' @param glue_str_neg Character vector. Templates for negative differences (group_2 > group_1).
#' Same placeholders as `glue_str_pos`.
#'
#' @return A character vector of text summaries, one per variable with meaningful differences.
#' Returns empty character vector if no plots provided or no meaningful differences found.
#'
#' @details
#' The function compares proportions between two groups for each variable in the plot data.
#' One template is randomly selected from the provided vectors for variety in output text.
#'
#' @examples
#' \dontrun{
#' # Create sample plot data
#' plot_data_1 <- data.frame(
#' .variable_label = rep("Job satisfaction", 3),
#' .category = factor(c("Low", "Medium", "High"), levels = c("Low", "Medium", "High")),
#' .category_order = 1:3,
#' .proportion = c(0.2, 0.3, 0.5)
#' )
#'
#' plot_data_2 <- data.frame(
#' .variable_label = rep("Job satisfaction", 3),
#' .category = factor(c("Low", "Medium", "High"), levels = c("Low", "Medium", "High")),
#' .category_order = 1:3,
#' .proportion = c(0.3, 0.4, 0.3)
#' )
#'
#' plots <- list(
#' list(data = plot_data_1),
#' list(data = plot_data_2)
#' )
#'
#' # Generate text summaries
#' txt_from_cat_mesos_plots(plots, min_prop_diff = 0.10)
#'
#' # Compare lowest categories instead
#' txt_from_cat_mesos_plots(
#' plots,
#' flip_to_lowest_categories = TRUE,
#' min_prop_diff = 0.05
#' )
#' }
#'
#' @export
txt_from_cat_mesos_plots <- function(
plots,
min_prop_diff = .10,
n_highest_categories = 1,
flip_to_lowest_categories = FALSE,
digits = 2,
selected_categories_last_split = " or ",
fallback_string = character(),
glue_str_pos = c(
paste0(
"For {var}, the target group has a higher proportion of respondents ",
"({group_1}) than all others ({group_2}) who answered {selected_categories}."
),
paste0(
"More respondents answered {selected_categories} for {var} in the ",
"target group ({group_1}) than in other groups ({group_2})."
),
paste0(
"The statement {var} shows {selected_categories} responses are more ",
"common in the target group ({group_1}) compared to others ({group_2})."
)
),
glue_str_neg = c(
paste0(
"For {var}, the target group has a lower proportion of respondents ",
"({group_1}) than all others ({group_2}) who answered {selected_categories}."
),
paste0(
"Fewer respondents answered {selected_categories} for {var} in the ",
"target group ({group_1}) than in other groups ({group_2})."
),
paste0(
"The statement {var} shows {selected_categories} responses are less ",
"common in the target group ({group_1}) compared to others ({group_2})."
)
)
) {
args <- check_options(
call = match.call(),
ignore_args = .saros.env$ignore_args,
defaults_env = global_settings_get(fn_name = "txt_from_cat_mesos_plots"),
default_values = formals(txt_from_cat_mesos_plots)
)
# Re-insert plots after check_options (like data in other functions)
args$plots <- plots
# Validate plots argument
if (!is.list(args$plots)) {
cli::cli_warn(
c(
"{.arg plots} must be a list, not {.cls {class(args$plots)}}.",
"i" = "Returning {.val {args$fallback_string}}."
)
)
return(args$fallback_string)
}
if (length(args$plots) < 2) {
cli::cli_warn(
c(
"{.arg plots} must contain at least 2 elements, not {length(args$plots)}.",
"i" = "Returning {.val {args$fallback_string}}."
)
)
return(args$fallback_string)
}
# Check that each element has a data component
has_data <- vapply(
args$plots,
function(x) {
!is.null(x) && (inherits(x, "data.frame") || !is.null(x$data))
},
logical(1)
)
if (!all(has_data)) {
missing_data <- which(!has_data)
cli::cli_warn(
c(
"{.arg plots} elements {missing_data} do not contain plot data.",
"i" = "Each element must be a data frame or have a {.field data} component.",
"i" = "Returning {.val {args$fallback_string}}."
)
)
return(args$fallback_string)
}
# Extract data - handle both data frames and plot objects with $data
dat_1 <- if (inherits(args$plots[[1]], "data.frame")) {
args$plots[[1]]
} else {
args$plots[[1]]$data
}
dat_2 <- if (inherits(args$plots[[2]], "data.frame")) {
args$plots[[2]]
} else {
args$plots[[2]]$data
}
selected_categories <-
dat_1 |>
dplyr::distinct(.data$.category, .keep_all = TRUE) |>
dplyr::filter(
.data$.category_order %in%
if (isFALSE(args$flip_to_lowest_categories)) {
(max(c(
1,
max(.data$.category_order) - args$n_highest_categories + 1
)):max(.data$.category_order))
} else if (isTRUE(args$flip_to_lowest_categories)) {
min(.data$.category_order):(min(c(
max(.data$.category_order),
args$n_highest_categories
)))
}
) |>
dplyr::pull(.data$.category) |>
as.character() |>
unique()
out <-
unique(as.character(dat_1$.variable_label)) |>
lapply(function(var) {
list(group_1 = dat_1, group_2 = dat_2) |>
lapply(function(.x) {
get_prop_for_highest_categories(
plot_data = .x,
var = var,
selected_categories = selected_categories
)
}) |>
dplyr::bind_rows(.id = "group")
}) |>
dplyr::bind_rows() |>
tidyr::pivot_wider(names_from = "group", values_from = "value")
out[["txt"]] <- dplyr::case_when(
out[[2]] > out[[3]] + args$min_prop_diff ~
sample(args$glue_str_pos, size = nrow(out), replace = TRUE),
out[[3]] > out[[2]] + args$min_prop_diff ~
sample(args$glue_str_neg, size = nrow(out), replace = TRUE),
.default = ""
)
out[[2]] <- round(out[[2]], digits = args$digits)
out[[3]] <- round(out[[3]], digits = args$digits)
out[["selected_categories"]] <- cli::ansi_collapse(
selected_categories,
sep = "; ",
last = args$selected_categories_last_split,
trunc = 10,
sep2 = args$selected_categories_last_split
)
for (i in seq_len(nrow(out))) {
out[i, "txt"] <- glue::glue_data(.x = out[i, ], out[i, "txt"][[1]])
}
stringi::stri_omit_empty_na(out$txt)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.