#' Plot standard Schola likert-like barplot with groupwise comparison per items
#'
#' @description `r lifecycle::badge("maturing")`
#'
#' @param .data data with items and group variable
#' @param vars vector of items, supports `{tidyselect}` syntax (i.e,
#' non-standard evaluation)
#' @param group group variable used to split the results, have to be logical,
#' where `TRUE` is gonna be considered as "focal" group and displayed as upper
#' group Supports `{tidyselect}` syntax.
#' @param n_breaks number of breaks displayed at x-axis, outer labels are
#' automatically aligned to face inward. Defaults to 11, which results in 10%
#' wide breaks.
#' @param dict item code-label dictionary, if none provided, those are derived
#' from the data.
#' @param escape_level *character*, level of item response considered as NA
#' @param desc sor items in descending order?
#' @param labels draw labels?
#' @param min_label_width smallest percentage (0-1) to display in the plot,
#' proportions larger than this value are shown, smaller are not.
#' @param absolute_counts draw labels and absolute counts in parentheses?
#' @param fill_labels character vector or function taking breaks and returning
#' labels for fill aesthetic
#' @param order_by how to order the items. chi-square differences (default)
#' computes chi-square test for every item and sort them by largest X2
#' statistic to smallest (if desc = TRUE)
#' @param reverse if TRUE, reverse colors
#' @param facet_label_wrap width of facet label to wrap
#' @param fill_cols colors to be used for item categories, defaults to NULL,
#' meaning standard RdYlBu palette will be used
#' @param drop Drop unobserved levels form the legend? Defaults to `FALSE`. See
#' [ggplot2::discrete_scale] for more details. To always show the legend key,
#' make sure you have `show.legend` set to `TRUE` as well (as done by
#' default).
#' @param drop_na Drop `NA`s from every item (a.k.a. "pairwise")? Defaults to
#' `TRUE`. Note that the number of observations per item may differ, because
#' `NA` in one item does not mean the respondent row is discarded completely
#' (listwise).
#'
#' @inheritDotParams fct_nanify -f -level
#' @inheritParams ggplot2::layer
#'
#' @family Making charts
#'
#' @return Object of class "gg", "ggplot".
#' @export
#'
#' @importFrom ggtext element_markdown
#' @importFrom rlang abort eval_tidy
#' @importFrom forcats fct_relevel fct_rev
#' @importFrom dplyr pull group_by arrange if_else summarise n ungroup select
#' @importFrom stringr str_wrap
#' @importFrom stats median
#' @importFrom purrr pluck map_dbl
#' @importFrom ggplot2 waiver
#' @importFrom tidyr pivot_longer nest drop_na
#' @importFrom scales percent number
#' @importFrom RColorBrewer brewer.pal
#'
schola_barplot <- function(.data, vars, group, dict = dict_from_data(.data),
escape_level = "nev\u00edm", n_breaks = 11,
desc = TRUE, labels = TRUE, min_label_width = .09,
absolute_counts = TRUE, fill_cols = NULL,
fill_labels = waiver(), facet_label_wrap = 115,
reverse = FALSE, order_by = "chi-square differences",
drop = FALSE, drop_na = TRUE, show.legend = TRUE, ...) {
if (!is.logical(eval_tidy(enquo(group), .data))) {
abort("`group` variable have to be logical.")
}
order_by <- match.arg(
order_by,
c("chi-square differences", "weighted total scores")
)
# data --------------------------------------------------------------------
long_data <- .data %>%
pivot_longer({{ vars }}, names_to = ".item", values_to = ".resp")
# drop NAs column-wise, not excludig rows with just ANY NA present
if (drop_na) {
long_data <- long_data %>% drop_na(.data$.resp)
}
if (order_by == "weighted total scores") {
# get counts for each response category, multiply by its .resp to get
# "weight" of some sort
# -- higher usage of higher categories results in higher weight
item_order <- long_data %>%
mutate(resp_num = fct_nanify(.data$.resp, escape_level, ...) %>%
as.integer()) %>%
group_by({{ group }}, .data$.item) %>%
summarise(ts = sum(.data$resp_num, na.rm = TRUE)) %>%
filter({{ group }}) %>%
arrange(desc(.data$ts)) %>%
pull(.data$.item)
}
plt_data <- long_data %>%
mutate(.resp = fct_rev(.data$.resp)) %>%
group_by({{ group }}, .data$.item, .data$.resp) %>%
summarise(n = n(), .groups = "drop_last")
if (order_by == "chi-square differences") {
item_order <- plt_data %>%
pivot_wider(names_from = .data$.resp, values_from = .data$n, values_fill = 0) %>%
group_by(.data$.item) %>%
select(-{{ group }}) %>%
nest() %>%
mutate(chsq = map_dbl(.data$data, ~ suppressWarnings(chisq.test(.x)) %>%
pluck("statistic"))) %>%
arrange(desc(.data$chsq)) %>%
pull(.data$.item)
}
if (!desc) item_order <- rev(item_order)
plt_data <- plt_data %>%
mutate(
prop = .data$n / sum(.data$n),
label = percent(.data$prop, 1, suffix = " %") # category size threshold for label to display
) %>%
ungroup() %>%
mutate(.item = fct_relevel(as.factor(.data$.item), item_order)) # sort facets according order table
if (absolute_counts) {
plt_data <- plt_data %>% mutate(label = paste0(.data$label, " (", number(.data$n, 1), ")"))
}
plt_data <- plt_data %>% mutate(
label = if_else(.data$prop > min_label_width, .data$label, NA_character_)
)
# helper values -----------------------------------------------------------
axis_x_breaks <- seq(0, 1, length.out = n_breaks)
axis_x_hjust <- c(0, rep(.5, n_breaks - 2), 1)
n_cats <- plt_data %>%
pull(.data$.resp) %>%
levels() %>%
length()
n_fill_cols <- if (escape_level == FALSE) n_cats else n_cats - 1L
legend_cols <- c(
if (escape_level != FALSE) "#dadada" else NULL,
if (reverse) {
brewer.pal(n_fill_cols, "RdYlBu")
} else {
rev(brewer.pal(n_fill_cols, "RdYlBu"))
}
)
# overwrite the palette if custom cols are provided
if (!is.null(fill_cols)) {
legend_cols <- if (reverse) {
fill_cols
} else {
rev(fill_cols)
}
}
# plot --------------------------------------------------------------------
# conditional labels geom
labels <- if (labels) {
geom_text(
aes(
label = .data$label,
col = after_scale(if_else(get_lightness(.data$fill) > .5, "grey30", "white"))
),
position = position_fill(vjust = .5), size = 3, na.rm = TRUE
)
}
plt_data %>%
ggplot(aes(
y = {{ group }}, x = .data$prop,
fill = .data$.resp
)) +
geom_col(width = .75, position = "fill", col = "white", size = .4, show.legend = show.legend) +
labels +
facet_wrap(
~ .data$.item,
ncol = 1, drop = FALSE, labeller = schola_labeller(dict, width = facet_label_wrap)
) +
scale_x_percent_cz(
limits = c(0, 1), breaks = axis_x_breaks, expand = expansion()
) +
scale_y_discrete(limits = c(FALSE, TRUE), labels = c("", "*")) +
scale_fill_manual(values = legend_cols, labels = fill_labels, drop = drop) +
guides(fill = guide_legend(
title = NULL, nrow = 1, reverse = TRUE,
override.aes = list(size = .75, col = NULL)
)) +
theme_schola("x") +
theme(
axis.text.x = element_markdown(hjust = axis_x_hjust, colour = "grey30"), # element_text does not support vectorised input, see https://github.com/tidyverse/ggplot2/issues/3492
axis.text.y = element_text(size = 18, face = "bold", vjust = .73), # not much appealing, but asterisk suites the plot best according to our focus group
axis.title = element_blank(),
panel.spacing = unit(11, "pt"),
strip.text = element_text(
colour = "grey30", size = 10, hjust = 0, vjust = 0,
margin = margin(0, 0, 3, 1.5)
),
panel.grid.major.x = element_line(colour = "grey88"),
legend.position = "bottom"
)
# FIXME - set xlim to accommodate strlen of labels
}
#' Labeller with width wrapping and item code-label dictionary
#'
#' Turns item codes into labels by looking up provided dictionary and applies
#' them as facet labels.
#'
#' @param dict code-label dictionary as named character vector, see
#' `dict_from_data()`
#' @param width maximal string length per line, if `NULL`, this is off
#'
#' @return object of class `labeller`, to be used in `facet_*` {ggplot2}
#' functions.
#' @export
#' @importFrom rlang set_names
#'
schola_labeller <- function(dict, width = 80) {
fun <- function(item_codes) {
item_codes <- lapply(item_codes, as.character)
labels <- lapply(item_codes, function(label) {
unmatched_labels <- set_names(label[!label %in% names(dict)])
dict <- c(dict, unmatched_labels)
out <- dict[label]
if (length(unmatched_labels) != 0) {
warning(
"There were unmatched labels for item codes:\n",
paste(unmatched_labels, collapse = ", "), ".",
call. = FALSE
)
}
out
})
if (!is.null(width)) {
labels <- lapply(labels, function(label) {
str_wrap(label, width = width)
})
}
labels
}
structure(fun, class = "labeller")
}
#' Extract perceived lightness from colour
#'
#' @param col colour .item or hex
#'
#' @return L .resp from HSL-converted model
#' @keywords internal
#' @importFrom grDevices col2rgb
#'
get_lightness <- function(col) {
# convert color to RGB, scale to 0-1, get min/max and compute L of HSL color
rgb_col <- col2rgb(col)
rgb_col_scaled <- rgb_col / 255
cmin <- apply(rgb_col_scaled, 2, min)
cmax <- apply(rgb_col_scaled, 2, max)
(cmax + cmin) / 2
}
#' Get item code-label dictionary from data
#'
#' Item labels have to be stored as variable attribute `label`. Exported by
#' default by `ls_export()`, but many data wrangling operations strip them out,
#' unfortunately.
#'
#' @param .data data.frame or tibble
#'
#' @return named character vector of items' labels, if no label found, NULL is
#' introduced and is omitted in the output
#' @export
#'
#' @importFrom purrr map
#'
dict_from_data <- function(.data) {
# try parent.frame()
out <- .data %>%
map(~ attr(.x, "label")) %>%
unlist()
if (is.null(out)) warning("No labels were retrieved from the data. Please, provide the dictionary manually.", call. = FALSE)
out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.