#' Wrapper for `(grouped_)ggbetweenstats` in `ggstatsplot`
#'
#' @description
#' A combination of box and violin plots along with jittered data points for between-subjects designs with statistical details included in the plot as a subtitle.
#'
#' * Remove outliers from violin plot for clarity, while using all of the data for pairwise test and boxplot.
#' * Remove useless parameters for outliers.
#'
#' @param data A dataframe (or a tibble) from which variables specified are to be taken. Other data types (e.g., matrix,table, array, etc.) will **not** be accepted.
#' @param x The grouping (or independent) variable from the dataframe `data`.
#' @param y The response (or outcome or dependent) variable from the dataframe `data`.
#' @param facet whether to use `grouping.var`, default `TRUE`.
#' @param grouping.var A single grouping variable (can be entered either as a bare name `x` or as a string `"x"`).
#' @param plotgrid.args A `list` of additional arguments passed to `patchwork::wrap_plots`, except for `guides` argument which is already separately specified here.
#' @param annotation.args A `list` of additional arguments passed to `patchwork::plot_annotation`.
#' @param plot.type Character describing the *type* of plot. Currently supported
#' plots are `"box"` (for only boxplots), `"violin"` (for only violin plots),
#' and `"boxviolin"` (for a combination of box and violin plots; default).
#' @param xlab,ylab Labels for `x` and `y` axis variables. If `NULL` (default),
#' variable names for `x` and `y` will be used.
#' @param pairwise.comparisons Logical that decides whether pairwise comparisons
#' are to be displayed (default: `TRUE`). Please note that only
#' **significant** comparisons will be shown by default. To change this
#' behavior, select appropriate option with `pairwise.display` argument. The
#' pairwise comparison dataframes are prepared using the
#' `ggstatsplot::pairwise_comparisons` function. For more details
#' about pairwise comparisons, see the documentation for that function.
#' @param p.adjust.method Adjustment method for *p*-values for multiple
#' comparisons. Possible methods are: `"holm"` (default), `"hochberg"`,
#' `"hommel"`, `"bonferroni"`, `"BH"`, `"BY"`, `"fdr"`, `"none"`.
#' @param pairwise.display Decides *which* pairwise comparisons to display.
#' Available options are:
#' - `"significant"` (abbreviation accepted: `"s"`)
#' - `"non-significant"` (abbreviation accepted: `"ns"`)
#' - `"all"`
#'
#' You can use this argument to make sure that your plot is not uber-cluttered
#' when you have multiple groups being compared and scores of pairwise
#' comparisons being displayed.
#' @param bf.prior A number between `0.5` and `2` (default `0.707`), the prior
#' width to use in calculating Bayes factors.
#' @param bf.message Logical that decides whether to display Bayes Factor in
#' favor of the *null* hypothesis. This argument is relevant only **for
#' parametric test** (Default: `TRUE`).
#' @param results.subtitle Decides whether the results of statistical tests are
#' to be displayed as a subtitle (Default: `TRUE`). If set to `FALSE`, only
#' the plot will be returned.
#' @param title The text for the plot title.
#' @param subtitle The text for the plot subtitle. Will work only if
#' `results.subtitle = FALSE`.
#' @param caption The text for the plot caption.
#' @param outlier.coef Coefficient for outlier detection using Tukey's method.
#' With Tukey's method, outliers are below (1st Quartile) or above (3rd
#' Quartile) `outlier.coef` times the Inter-Quartile Range (IQR) (Default:
#' `1.5`).
#' @param centrality.plotting Logical that decides whether centrality tendency
#' measure is to be displayed as a point with a label (Default: `TRUE`).
#' Function decides which central tendency measure to show depending on the
#' `type` argument.
#' - **mean** for parametric statistics
#' - **median** for non-parametric statistics
#' - **trimmed mean** for robust statistics
#' - **MAP estimator** for Bayesian statistics
#'
#' If you want default centrality parameter, you can specify this using
#' `centrality.type` argument.
#' @param centrality.type Decides which centrality parameter is to be displayed.
#' The default is to choose the same as `type` argument. You can specify this
#' to be:
#' - `"parameteric"` (for **mean**)
#' - `"nonparametric"` (for **median**)
#' - `robust` (for **trimmed mean**)
#' - `bayes` (for **MAP estimator**)
#'
#' Just as `type` argument, abbreviations are also accepted.
#' @param point.args A list of additional aesthetic arguments to be passed to
#' the `geom_point` displaying the raw data.
#' @param violin.args A list of additional aesthetic arguments to be passed to
#' the `geom_violin`.
#' @param ggplot.component A `ggplot` component to be added to the plot prepared
#' by `ggstatsplot`. This argument is primarily helpful for `grouped_`
#' variants of all primary functions. Default is `NULL`. The argument should
#' be entered as a `ggplot2` function or a list of `ggplot2` functions.
#' @param package,palette Name of the package from which the given palette is to
#' be extracted. The available palettes and packages can be checked by running
#' `View(paletteer::palettes_d_names)`.
#' @param centrality.point.args,centrality.label.args A list of additional aesthetic
#' arguments to be passed to `ggplot2::geom_point` and
#' `ggrepel::geom_label_repel` geoms, which are involved in mean plotting.
#' @param ggsignif.args A list of additional aesthetic
#' arguments to be passed to `ggsignif::geom_signif`.
#'
#' @import ggplot2
#' @import ggstatsplot
#'
#' @importFrom dplyr select group_by arrange mutate
#' @importFrom ggrepel geom_label_repel
#' @importFrom stats t.test oneway.test quantile
#' @importFrom rlang enquo as_name !! as_string ensym
#' @importFrom paletteer scale_color_paletteer_d scale_fill_paletteer_d
#' @importFrom ggsignif geom_signif
#' @importFrom purrr pmap
#' @importFrom ipmisc stats_type_switch
#' @importFrom tidyr drop_na
#'
#' @seealso [ggstatsplot::ggbetweenstats()] and [ggstatsplot::grouped_ggbetweenstats()]
#'
#' @references
#' \url{https://indrajeetpatil.github.io/ggstatsplot/articles/web_only/ggbetweenstats.html}
#'
#' @author
#' * Originally by Indrajeet Patil
#' * Wrapped by Yujie Liu
################################################################
#' @export ggbetweenstats_wrapper
ggbetweenstats_wrapper <- function(data,
x,
y,
facet = TRUE,
grouping.var = NULL,
plotgrid.args = list(),
annotation.args = list(),
...) {
# ======================== preparing dataframe ==========================
if (facet) {
# grouped data
df <-
data %>%
dplyr::select({{ grouping.var }}, {{ x }}, {{ y }}) %>%
ggstatsplot:::grouped_list(grouping.var = {{ grouping.var }})
} else {
# ungrouped data
df <- list(dplyr::select(data, {{ x }}, {{ y }}))
names(df) <- "data"
}
# ============== creating a list of plots using `pmap`=======================
plotlist <-
purrr::pmap(
.l = list(data = df, title = names(df)),
.f = ggbetweenstats_wrapper_main,
# put common parameters here
x = {{ x }},
y = {{ y }},
...
)
# combining the list of plots into a single plot
return(
ggstatsplot::combine_plots(
plotlist,
plotgrid.args = plotgrid.args,
annotation.args = annotation.args
)
)
}
################################################################
ggbetweenstats_wrapper_main <-
function(data,
x,
y,
plot.type = "boxviolin",
type = "parametric",
pairwise.comparisons = TRUE,
pairwise.display = "significant",
p.adjust.method = "holm",
effsize.type = "unbiased",
bf.prior = 0.707,
bf.message = TRUE,
results.subtitle = TRUE,
xlab = NULL,
ylab = NULL,
caption = NULL,
title = NULL,
subtitle = NULL,
k = 2L,
var.equal = FALSE,
conf.level = 0.95,
nboot = 100L,
tr = 0.2,
centrality.plotting = TRUE,
centrality.type = type,
centrality.point.args = list(size = 5, color = "darkred"),
centrality.label.args = list(size = 3,
nudge_x = 0.4,
segment.linetype = 4),
outlier.coef = 1.5,
point.args = list(
position = ggplot2::position_jitterdodge(dodge.width = 0.60),
alpha = 0.4,
size = 3,
stroke = 0
),
violin.args = list(width = 0.5, alpha = 0.2),
ggsignif.args = list(textsize = 3),
ggtheme = ggplot2::theme_bw(),
ggstatsplot.layer = TRUE,
package = "RColorBrewer",
palette = "Dark2",
ggplot.component = NULL
) {
# convert entered stats type to a standard notation
type <- ipmisc::stats_type_switch(type)
# make sure both quoted and unquoted arguments are allowed
x <- rlang::ensym(x)
y <- rlang::ensym(y)
# --------------------------------- data -----------------------------------
# creating a dataframe
data %<>%
dplyr::select({{ x }}, {{ y }}) %>%
tidyr::drop_na(.) %>%
dplyr::mutate({{ x }} := droplevels(as.factor({{ x }})))
# no outliers data for each group defined by `x`
no_outliers <- data.frame()
for (g in levels(data %>% dplyr::select({{ x }}) %>% .[[1]])) {
tmp <- data %>% dplyr::filter({{ x }} == g)
qt <- quantile(tmp[[2]], probs = c(0.25, 0.75))
min <- max(min(tmp[[2]]), qt[1] - outlier.coef * (qt[2] - qt[1]))
max <- min(max(tmp[[2]]), qt[2] + outlier.coef * (qt[2] - qt[1]))
no_outliers <- rbind(no_outliers,
dplyr::filter(tmp, {{ y }} >= min & {{ y }} <= max))
}
# --------------------- subtitle/caption preparation ------------------------
# figure out which test to run based on the no. of levels of the independent variable
test <- ifelse(nlevels(data %>% dplyr::pull({{ x }}))[[1]] < 3, "t", "anova")
if (isTRUE(results.subtitle)) {
# preparing the Bayes factor message
if (type == "parametric" && isTRUE(bf.message)) {
caption_df <- tryCatch(
ggstatsplot:::function_switch(
test = test,
# arguments relevant for expression helper functions
data = data,
x = rlang::as_string(x),
y = rlang::as_string(y),
type = "bayes",
bf.prior = bf.prior,
top.text = caption,
paired = FALSE,
k = k
),
error = function(e) NULL
)
caption <- if (!is.null(caption_df)) caption_df$expression[[1]]
}
# extracting the subtitle using the switch function
subtitle_df <- tryCatch(
ggstatsplot:::function_switch(
test = test,
# arguments relevant for expression helper functions
data = data,
x = rlang::as_string(x),
y = rlang::as_string(y),
paired = FALSE,
type = type,
effsize.type = effsize.type,
var.equal = var.equal,
bf.prior = bf.prior,
tr = tr,
nboot = nboot,
conf.level = conf.level,
k = k
),
error = function(e) NULL
)
subtitle <- if (!is.null(subtitle_df)) subtitle_df$expression[[1]]
}
# -------------------------- basic plot -----------------------------------
# add only the points which are not outliers
plot <-
ggplot2::ggplot() +
rlang::exec(
.fn = ggplot2::geom_point,
data = no_outliers,
mapping = ggplot2::aes(x = {{ x }}, y = {{ y }}, color = {{ x }}),
!!!point.args
)
# add boxplot
if (plot.type %in% c("box", "boxviolin")) {
plot <- plot +
rlang::exec(
.fn = ggplot2::stat_boxplot,
data = data,
mapping = ggplot2::aes(x = {{ x }}, y = {{ y }}),
width = 0.3,
alpha = 0.2,
fill = "white",
coef = outlier.coef,
outlier.shape = NA,
geom = "boxplot",
position = ggplot2::position_dodge(width = NULL)
)
}
# add violin plot
if (plot.type %in% c("violin", "boxviolin")) {
plot <- plot +
rlang::exec(
.fn = ggplot2::geom_violin,
data = no_outliers,
mapping = ggplot2::aes(x = {{ x }}, y = {{ y }}),
fill = "white",
!!!violin.args
)
}
# ---------------- centrality tagging -------------------------------------
# add labels for centrality measure
if (isTRUE(centrality.plotting)) {
plot <-
ggstatsplot:::centrality_ggrepel(
plot = plot,
data = data,
x = {{ x }},
y = {{ y }},
k = k,
type = ipmisc::stats_type_switch(centrality.type),
tr = tr,
centrality.point.args = centrality.point.args,
centrality.label.args = centrality.label.args
)
}
# ggsignif labels -----------------------------------------------------------
if (isTRUE(pairwise.comparisons) && test == "anova") {
# creating dataframe with pairwise comparison results
df_pairwise <-
ggstatsplot::pairwise_comparisons(
data = data,
x = {{ x }},
y = {{ y }},
type = type,
tr = tr,
paired = FALSE,
var.equal = var.equal,
p.adjust.method = p.adjust.method,
k = k
)
# preparing the caption for pairwise comparisons test
if (type != "bayes") {
caption <-
ggstatsplot:::pairwise_caption(
caption,
unique(df_pairwise$test.details),
pairwise.display
)
}
# check whether to add signif layer
df_pairwise %<>%
dplyr::mutate(groups =
purrr::pmap(.l = list(group1, group2), .f = c))
if (pairwise.display %in% c("s", "significant")) {
df_pairwise %<>%
dplyr::filter(p.value < 0.05) %>%
dplyr::arrange(group1, group2)
}
if (pairwise.display %in% c("ns",
"nonsignificant",
"non-significant")) {
df_pairwise %<>%
dplyr::filter(p.value >= 0.05) %>%
dplyr::arrange(group1, group2)
}
# calc scale values
min_y_data <- min(no_outliers[[{{ y }}]])
max_y_data <- max(no_outliers[[{{ y }}]])
range_y_data <- max_y_data - min_y_data
# calc init plot scale
max_y_plot <- max_y_data + range_y_data * 0.02
min_y_plot <- min_y_data
# adding the layer for pairwise comparisons
if (dim(df_pairwise)[[1]] != 0) {
plot <-
plot + rlang::exec(
.f = geom_signif_wrapper,
data = no_outliers,
mapping = ggplot2::aes(x = {{ x }}, y = {{ y }}),
comparisons = df_pairwise$groups,
map_signif_level = TRUE,
y_position = max_y_data + range_y_data * 0.1,
margin_top = 0,
step_increase = range_y_data * 0.06,
tip_length = range_y_data * 0.01,
annotations = df_pairwise$label,
test = NULL,
parse = TRUE,
vjust = 0,
!!!ggsignif.args
)
max_y_plot <-
max_y_plot + range_y_data * 0.1 + range_y_data * 0.06 * (nrow(df_pairwise) - 1)
}
}
# # ------------------------ annotations and themes -------------------------
# add y lim
plot <- plot + coord_cartesian(ylim = c(min_y_plot, max_y_plot))
# using `coord_cartesian` instead of `scale_y_continuous(limits = c(0,100))` will only zoom the plot, without changing boxplot's shape!
# specifying annotations and other aesthetic aspects for the plot
ggstatsplot:::aesthetic_addon(
plot = plot,
x = data %>% dplyr::pull({{ x }}),
xlab = xlab,
ylab = ylab,
title = title,
subtitle = subtitle,
caption = caption,
ggtheme = ggtheme,
ggstatsplot.layer = ggstatsplot.layer,
package = package,
palette = palette,
ggplot.component = ggplot.component
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.