Nothing
#' Barplot of means
#'
#' Plots means, with confidence intervals, and (optionally) p-values for differences of means and interactions
#'
#' @param formula A formula, e.g., \code{y ~ x1+x2}, where x1 & x2 are
#' grouping variables (e.g., condition indicators in a 2x2 experiment).
#' The formula can include up to three grouping variables. The plot will
#' show contiguous bars for x1, and sets of bars for x2 and x3
#' @param data Optional data frame containing variables in the formula.
#' @param cluster Optional clustering variable when there are repeated
#' observations per cluster
#' e.g., \code{cluster="participant_ID"}.
#' When provided, inference for reported tests is based on regressions with
#' clustered standard errors (via \code{lm2(..., clusters=...)}).
#' @param tests specifies which comparisons of means to report. Syntax involves
#' putting column numbers in a character string, with a - to symbolize a
#' comparison and a + symbolizing combination. For example tests="1-2" reports
#' t-test comparing columns 1 and 2. tests="1-2,3-4" t-tests comparing columns
#' 1 & 2 and another 3 & 4. To run an interaction use parentheses:
#' tests=\code{"(4-3)-(2-1)"} and can also be combined with simple tests.
#' Main effects can be specified using `+`, for example (1+2)-(3+4) compares
#' all observations in the first two columns with all observations in
#' the next two columns.
#' @param quiet Logical. When \code{TRUE}, suppresses console messages from \code{plot_means()}.
#' @param order Controls the order of \code{x1} groups (bar order and colors).
#' Use \code{-1} to reverse the default order (e.g., if plot shows 'male' first and 'female' second, order=-1 will flip that).
#' @param legend.title Character string. Title for the legend. If \code{NULL},
#' no title is shown.
#' @param col Color(s) for \code{x1} bars. If \code{NULL}, colors are chosen
#' automatically.
#' @param col.text Color for confidence intervals and other non-bar annotations.
#' If \code{NULL}, defaults to a dark gray.
#' @param values.cex Numeric scalar controlling text size for mean value labels
#' (and related annotations).
#' @param values.align Where within the bars to put the mean value labels: \code{"top"}, \code{"middle"},
#' \code{"bottom"}, or \code{"none"}.
#' @param values.round Non-negative integer. Number of decimal places for mean
#' value labels.
#' @param pvalue.cex Numeric scalar controlling p-value label size.
#' @param pvalue.col Color for p-value brackets/labels.
#' @param ci.level Confidence interval level for \code{ciL}/\code{ciH} in \code{$means}.
#' Default is \code{95}. You can also pass a proportion (e.g., \code{0.95}).
#' @param buffer.top Either \code{"auto"} (default) or a numeric value. Extra
#' vertical headroom (as a fraction of the data y-range) added above the
#' maximum y value to make room for annotations. When \code{"auto"}, uses 0.35
#' when an interaction p-value is shown (scenario 2) and 0.25 otherwise.
#' @param ... Additional arguments passed to \code{plot()} (e.g., \code{main},
#' \code{ylim}, \code{ylab}).
#'
#' @details
#' When \code{tests="auto"}, the function reports a small default set of
#' differences-of-means tests (when applicable) and, in 2x2 designs, an
#' interaction test:
#' \describe{
#' \item{Differences in means}{If \code{cluster} is \code{NULL}, these are Welch
#' two-sample t-tests computed with \code{t.test(..., var.equal=FALSE)}. If
#' \code{cluster} is provided, these comparisons are computed from a
#' regression using \code{lm2()} with clustered standard errors.}
#' \item{Interaction}{The interaction is tested using a linear regression fit
#' with \code{lm2()}, even when \code{cluster} is \code{NULL}; when
#' \code{cluster} is provided, the interaction test uses clustered standard
#' errors.}
#' }
#' The regression-based tests use heteroskedasticity-robust inference (HC3) when
#' \code{cluster} is \code{NULL}. HC3 is a common small-sample adjustment to
#' White-type robust standard errors and is used to reduce sensitivity to
#' heteroskedasticity. When \code{cluster} is provided, \code{plot_means()}
#' instead uses clustered standard errors (robust to within-cluster correlation).
#'
#' In the returned \code{$means} table, \code{ciL} and \code{ciH} are the lower and
#' upper bounds of a \code{ci.level}\% confidence interval for the mean (when
#' available). The same confidence level is used for the confidence-interval
#' whiskers drawn in the figure.
#'
#' @return A minimal list returned invisibly with two elements:
#' \describe{
#' \item{\code{means}}{A data frame of means (and, when available, confidence intervals)
#' aligned to the plotting grid.}
#' \item{\code{tests}}{A data frame of comparisons used for p-value
#' annotation (or \code{NULL} if not applicable).}
#' }
#'
#' @examples
#' df <- data.frame(y = rnorm(100), group = rep(c("A", "B"), 50))
#' plot_means(y ~ group, data = df)
#'
#' df2 <- data.frame(
#' y = rnorm(200),
#' x1 = rep(c("A", "B"), 100),
#' x2 = rep(c("X", "Y"), each = 100)
#' )
#' plot_means(y ~ x1 + x2, data = df2)
#'
#' df3 <- data.frame(
#' y = rnorm(600),
#' x1 = rep(c("control", "treatment"), times = 300),
#' x2 = rep(rep(c("low", "high"), each = 150), times = 2),
#' x3 = rep(c("online", "lab"), each = 300)
#' )
#' plot_means(y ~ x1 + x2 + x3, data = df3)
#'
#' @export
#' @importFrom grDevices recordPlot replayPlot
#' @importFrom stats na.pass
#'
#: 1 plot_means: validate -> descriptives -> params -> compute -> draw -> output/export
#: 2 plot_means_validate: NSE-safe arg evaluation + input validation/normalization
#: 3 plot_means_params: derive factor levels/order, colors, legend layout, buffer.top
#: 4 plot_means_compute: build full grid, compute CIs, and bar/layout vectors used for plotting
#: 5 plot_means_draw: render the plot in base graphics (bars, CIs, labels, legend, p-values)
#: 6 plot_means_compute_pvalues: compute and format p-values shown on the plot (scenario-specific)
#----------------------------------
# plot_means (exported) ----
plot_means <- function(formula,
data = NULL,
cluster = NULL,
tests = "auto",
quiet = FALSE,
order = NULL,
# Graphics / annotation options
legend.title = NULL,
col = NULL,
col.text = NULL,
values.cex = 1,
values.align = "top",
values.round = 1,
pvalue.cex = 0.9,
pvalue.col = "gray50",
ci.level = 95,
buffer.top = "auto",
...) {
# 2. Validate + normalize inputs (NSE-safe)
# (mc must be captured before anything is evaluated)
mc <- match.call()
calling_env <- parent.frame()
# Track whether any console output was emitted by plot_means()
printed_any <- FALSE
msg2 <- function(...) {
if (isTRUE(quiet)) return(invisible(NULL))
printed_any <<- TRUE
message2(...)
}
# 1. Validate and normalize inputs
# Convert user-facing arguments into a single normalized list (`v`) used throughout.
# This is where we resolve NSE inputs (formula/data/cluster), validate types and
# lengths, and set defaults so downstream helpers can assume consistent fields.
v <- plot_means_validate(
mc = mc,
data = data,
order = order,
legend.title = legend.title,
col = col,
col.text = col.text,
cluster = cluster,
values.cex = values.cex,
values.align = values.align,
values.round = values.round,
tests = tests,
pvalue.cex = pvalue.cex,
pvalue.col = pvalue.col,
ci.level = ci.level,
buffer.top = buffer.top,
calling_env = calling_env
)
formula <- v$formula_eval
y_name <- v$y_name
x_names <- v$x_names
x1_name <- v$x1_name
x2_name <- v$x2_name
x3_name <- v$x3_name
values.cex <- v$values.cex
values.align <- v$values.align
values.round <- v$values.round
tests <- v$tests
pvalue.cex <- v$pvalue.cex
pvalue.col <- v$pvalue.col
ci.level <- v$ci.level
buffer.top <- v$buffer.top
# 2. Compute descriptives (means) using statuser::desc_var()
result <- eval(call("desc_var", v$mc$formula, data = v$data), envir = v$calling_env)
# 2.1 Normalize desc_var output for single grouping variable
result_plot <- result
if (length(x_names) == 1 && "group" %in% names(result_plot) && !(x1_name %in% names(result_plot))) {
result_plot[[x1_name]] <- as.character(result_plot$group)
}
# 2.2 CI settings (always computed)
ci_level <- as.numeric(ci.level) / 100
# 3. Levels/order/colors/buffer/legend layout (derived from data + args)
params <- plot_means_params(v, result = result, result_plot = result_plot)
result <- params$result
x1_levels <- params$x1_levels
x2_levels <- params$x2_levels
x3_levels <- params$x3_levels
k <- params$k
col <- params$col_vec
legend_horiz <- params$legend_horiz
buffer_top_effective <- params$buffer_top_effective
format_level_label <- params$format_level_label
# 4. Results table with CI computation and bar layout
comp <- plot_means_compute(v, params = params, result_plot = result_plot, ci_level = ci_level)
merged <- comp$merged
ci_map <- comp$ci_map
x_lefts <- comp$x_lefts
x_rights <- comp$x_rights
x_centers_drawn <- comp$x_centers_drawn
cell_keys_drawn <- comp$cell_keys_drawn
n_total_drawn <- comp$n_total_drawn
n_missing_drawn <- comp$n_missing_drawn
heights <- comp$heights
cols <- comp$cols
block_centers <- comp$block_centers
block_labels <- comp$block_labels
x3_section_centers <- comp$x3_section_centers
x3_section_labels <- comp$x3_section_labels
x_pos <- comp$x_pos
bar_width <- comp$bar_width
bar_step <- comp$bar_step
# 5. Prepare comparison table used for p-value drawing
# Build one output table aligned with the plotting grid (keeps missing combos)
means <- merged
means$ciL <- NA_real_
means$ciH <- NA_real_
if (nrow(ci_map) > 0) {
# Join CIs onto the grid via the same stable cell key used in CI model
x2_col <- if (is.null(x2_name)) ".x2" else x2_name
x3_col <- if (is.null(x3_name)) ".x3" else x3_name
if (!(x2_col %in% names(means))) means[[x2_col]] <- "All"
if (!(x3_col %in% names(means))) means[[x3_col]] <- "All"
means$cell_key <- paste(
as.character(means[[x1_name]]),
as.character(means[[x2_col]]),
as.character(means[[x3_col]]),
sep = "|"
)
ci_idx <- match(means$cell_key, ci_map$cell_key)
means$ciL <- ifelse(is.na(ci_idx), NA_real_, ci_map$lwr[ci_idx])
means$ciH <- ifelse(is.na(ci_idx), NA_real_, ci_map$upr[ci_idx])
}
group_cols <- character(0)
if (length(x_names) == 1) {
group_cols <- x1_name
} else {
group_cols <- c(x1_name, x2_name, x3_name)
}
group_cols <- group_cols[!is.null(group_cols) & nzchar(group_cols)]
group_cols <- intersect(group_cols, names(means))
keep_cols <- c(group_cols, "mean", "sd", "n.total", "n.missing", "ciL", "ciH")
keep_cols <- keep_cols[keep_cols %in% names(means)]
means <- means[, keep_cols, drop = FALSE]
rownames(means) <- NULL
means_comparisons <- plot_means_compute_pvalues(v = v, params = params, mean_results = means, comp = comp)
means_comparisons__out <- means_comparisons
if (!is.null(means_comparisons__out) && is.data.frame(means_comparisons__out)) {
drop_cols <- intersect(c("col1", "col2", "col3", "col4"), names(means_comparisons__out))
if (length(drop_cols)) means_comparisons__out[drop_cols] <- NULL
}
if (!isTRUE(quiet) && !is.null(means_comparisons) && is.data.frame(means_comparisons) && nrow(means_comparisons) > 0) {
fmt_p <- function(p) {
if (!is.finite(p)) return("NA")
if (p < 0.001) return("<0.001")
format(round(p, 3), nsmall = 3, scientific = FALSE)
}
fmt_df <- function(df) {
if (!is.finite(df)) return("NA")
format(round(df, 1), nsmall = 1, scientific = FALSE)
}
fmt_df_int <- function(df) {
if (!is.finite(df)) return("NA")
format(round(df, 0), nsmall = 0, scientific = FALSE)
}
fmt_t <- function(t) {
if (!is.finite(t)) return("NA")
format(round(t, 2), nsmall = 2, scientific = FALSE)
}
method_fallback <- function(row) {
# When not provided by compute_pvalues(), infer from cluster + test type.
if (!is.null(v$cluster_vec)) return("regression (clustered SE)")
if ("test_type" %in% names(row) && identical(as.character(row$test_type[1]), "interaction")) return("regression interaction")
"Welch t-test"
}
lines <- character(0)
any_welch <- FALSE
any_interaction <- FALSE
for (i in seq_len(nrow(means_comparisons))) {
r <- means_comparisons[i, , drop = FALSE]
label <- ""
if (all(c("test_type", "col1", "col2") %in% names(r))) {
tt <- as.character(r$test_type[1])
if (identical(tt, "simple")) {
label <- paste0(r$col1[1], " vs ", r$col2[1])
} else if (identical(tt, "pooled") && all(c("cols_left", "cols_right") %in% names(r))) {
left <- gsub(",", ",", as.character(r$cols_left[1]), fixed = TRUE)
right <- gsub(",", ",", as.character(r$cols_right[1]), fixed = TRUE)
label <- paste0(left, " vs ", right)
} else if (identical(tt, "interaction") && all(c("col3", "col4") %in% names(r))) {
label <- paste0("(", r$col1[1], "-", r$col2[1], ")-(", r$col3[1], "-", r$col4[1], ")")
} else {
label <- as.character(r$group1[1])
}
} else if (all(c("group1", "group2") %in% names(r)) && nzchar(as.character(r$group2[1]))) {
label <- paste0(as.character(r$group1[1]), " vs ", as.character(r$group2[1]))
} else if ("group1" %in% names(r)) {
label <- as.character(r$group1[1])
} else {
label <- paste0("test ", i)
}
is_interaction_row <- (("test_type" %in% names(r) && identical(as.character(r$test_type[1]), "interaction")) ||
("group1" %in% names(r) && grepl("^interaction\\(", as.character(r$group1[1]))))
method <- if ("method" %in% names(r) && nzchar(as.character(r$method[1]))) as.character(r$method[1]) else method_fallback(r)
df_txt <- if ("df" %in% names(r)) {
if (isTRUE(is_interaction_row)) fmt_df_int(as.numeric(r$df[1])) else fmt_df(as.numeric(r$df[1]))
} else {
"NA"
}
t_txt <- fmt_t(as.numeric(r$t.value[1]))
p_txt <- fmt_p(as.numeric(r$p.value[1]))
any_welch <- any_welch || identical(method, "Welch t-test")
any_interaction <- any_interaction || isTRUE(is_interaction_row)
if (identical(method, "Welch t-test")) {
lines <- c(lines, paste0(i, ") ", label, ": t(", df_txt, ")=", t_txt, ", p=", p_txt))
} else {
lines <- c(lines, paste0(i, ") ", label, ": ", method, ": t(", df_txt, ")=", t_txt, ", p=", p_txt))
}
}
footer <- character(0)
if (identical(tests, "auto")) {
footer <- c(footer, "You can specify which statistical tests to report with the `tests` argument.")
}
msg2(paste0("tests:\n", paste(lines, collapse = "\n"), if (length(footer)) paste0("\n", paste(footer, collapse = "\n")) else ""), col = "gray")
}
# 6. Draw plot
plot_means_draw(
v = v,
params = params,
comp = comp,
y_name = y_name,
x1_levels = x1_levels,
x2_name = x2_name,
x3_name = x3_name,
x1_name = x1_name,
col = col,
legend.title = legend.title,
col.text = col.text,
values.cex = values.cex,
values.align = values.align,
values.round = values.round,
tests = tests,
means_comparisons = means_comparisons,
pvalue.cex = pvalue.cex,
pvalue.col = pvalue.col,
buffer_top_effective = buffer_top_effective,
...
)
# 7. Prepare minimal output (returned invisibly)
tests_out <- means_comparisons__out
if (is.null(tests_out)) {
tests_out <- "Use `tests` argument to request tests contrasting specific means"
}
out <- list(
means = means,
tests = tests_out
)
if (isTRUE(printed_any) && !isTRUE(quiet)) {
msg2("\nSet `quiet=TRUE` to suppress this output.", col = "gray")
}
invisible(out)
}
#----------------------------------
# plot_means_validate: evaluate NSE inputs, validate args, and normalize to a single list `v` ----
plot_means_validate <- function(mc,
data,
order,
legend.title,
col,
col.text,
cluster,
values.cex,
values.align,
values.round,
tests,
pvalue.cex,
pvalue.col,
ci.level,
buffer.top,
calling_env) {
# 1. Resolve and validate formula input (NSE-safe)
# \"Resolve\" here means: take what the user typed for `formula` (which may be a name in the
# calling environment) and evaluate it into an actual `formula` object we can inspect and use.
formula_resolved <- evaluate_variable_arguments(
arg_expr = mc$formula,
arg_name = "formula",
data = data,
calling_env = calling_env,
func_name = "plot_means",
allow_null = FALSE
)
formula_eval <- formula_resolved$value
validate_formula(formula_eval, data, func_name = "plot_means", calling_env = calling_env)
if (!inherits(formula_eval, "formula")) {
stop("plot_means(): First argument must be a formula like y ~ x1 + x2", call. = FALSE)
}
# 2. Determine grouping variables (up to 3)
vars <- all.vars(formula_eval)
if (length(vars) < 2) {
stop("plot_means(): Formula must have at least one grouping variable: y ~ x1", call. = FALSE)
}
y_name <- vars[1]
x_names <- vars[-1]
if (length(x_names) > 3) {
stop("plot_means(): Currently supports up to 3 grouping variables: y ~ x1 + x2 + x3", call. = FALSE)
}
x1_name <- x_names[1]
x2_name <- if (length(x_names) >= 2) x_names[2] else NULL
x3_name <- if (length(x_names) >= 3) x_names[3] else NULL
# 3. Validate display and testing options
# 3.1 Validate label sizing argument
if (!is.numeric(values.cex) || length(values.cex) != 1 || is.na(values.cex) || values.cex <= 0) {
stop("plot_means(): 'values.cex' must be a single positive number", call. = FALSE)
}
# 3.2 Validate value label position
if (!is.null(mc$values.pos) && !identical(mc$values.pos, quote(NULL))) {
stop("plot_means(): `values.pos` was renamed to `values.align`", call. = FALSE)
}
values_align_effective <- match.arg(values.align, c("top", "middle", "bottom", "none"))
# 3.3 Validate rounding argument for mean labels
if (!is.numeric(values.round) || length(values.round) != 1 || is.na(values.round) || values.round < 0) {
stop("plot_means(): 'values.round' must be a single non-negative number", call. = FALSE)
}
values.round <- as.integer(values.round)
# 3.4 Validate tests argument
# Accept "auto", "none", or a custom comparison string like "3-1,4-2,(4-3)-(2-1)".
if (is.character(tests) && length(tests) == 1 && !is.na(tests) && nzchar(tests)) {
if (!(tests %in% c("auto", "none"))) {
tests <- as.character(tests)
} else {
tests <- match.arg(tests, c("auto", "none"))
}
} else {
stop("plot_means(): 'tests' must be a single string: \"auto\", \"none\", or custom comparisons like \"3-1, 4-2\"", call. = FALSE)
}
# 3.5 Validate p-value styling arguments
if (!is.numeric(pvalue.cex) || length(pvalue.cex) != 1 || is.na(pvalue.cex) || pvalue.cex <= 0) {
stop("plot_means(): 'pvalue.cex' must be a single positive number", call. = FALSE)
}
if (!is.character(pvalue.col) || length(pvalue.col) != 1 || is.na(pvalue.col) || !nzchar(pvalue.col)) {
stop("plot_means(): 'pvalue.col' must be a single color name", call. = FALSE)
}
# 3.6 Validate ci.level argument (confidence interval level for ciL/ciH)
if (!is.numeric(ci.level) || length(ci.level) != 1 || is.na(ci.level) || ci.level <= 0) {
stop("plot_means(): 'ci.level' must be a single positive number (e.g., 95 or 0.95)", call. = FALSE)
}
if (ci.level < 1) ci.level <- ci.level * 100
if (!is.finite(ci.level) || ci.level <= 0) {
stop("plot_means(): 'ci.level' must be a single positive number (e.g., 95 or 0.95)", call. = FALSE)
}
if (ci.level >= 100) {
stop("plot_means(): confidence can never be equal to or greater than 100%", call. = FALSE)
}
# 3.6 Validate buffer.top argument
if (is.character(buffer.top) && length(buffer.top) == 1 && identical(buffer.top, "auto")) {
buffer.top <- "auto"
} else {
if (!is.numeric(buffer.top) || length(buffer.top) != 1 || is.na(buffer.top) || buffer.top < 0) {
stop("plot_means(): 'buffer.top' must be 'auto' or a single non-negative number (e.g., 0.3)", call. = FALSE)
}
}
# 4. Resolve cluster argument (optional; for lm2(..., clusters=))
# Accept both `cluster=` (documented) and `clusters=` (alias) in the user call.
# If both are provided, error to avoid ambiguity.
cluster_vec <- NULL
has_cluster_expr <- !is.null(mc$cluster) && !identical(mc$cluster, quote(NULL))
has_clusters_expr <- !is.null(mc$clusters) && !identical(mc$clusters, quote(NULL))
if (isTRUE(has_cluster_expr) && isTRUE(has_clusters_expr)) {
stop("plot_means(): use only one of `cluster=` or `clusters=` (not both)", call. = FALSE)
}
cluster_expr <- if (isTRUE(has_cluster_expr)) mc$cluster else if (isTRUE(has_clusters_expr)) mc$clusters else NULL
if (!is.null(cluster_expr)) {
cluster_resolved <- evaluate_variable_arguments(
arg_expr = cluster_expr,
arg_name = if (isTRUE(has_clusters_expr)) "clusters" else "cluster",
data = data,
calling_env = calling_env,
func_name = "plot_means",
allow_null = TRUE
)
cluster_vec <- cluster_resolved$value
if (!is.null(cluster_vec)) {
if (!is.atomic(cluster_vec) || length(cluster_vec) == 0) {
stop("plot_means(): 'cluster' must be a vector (or a column name in 'data')", call. = FALSE)
}
}
}
list2(
mc,
formula_eval,
y_name,
x_names,
x1_name,
x2_name,
x3_name,
data,
order,
legend.title,
col,
col.text,
cluster,
cluster_vec,
values.cex,
values.align = values_align_effective,
values.round,
tests,
pvalue.cex,
pvalue.col,
ci.level,
buffer.top,
calling_env
)
}
#----------------------------------
# plot_means_params: derive plotting parameters (levels/order, colors, legend layout, buffer.top) ----
plot_means_params <- function(v, result, result_plot) {
get_levels <- function(var_name, fallback_values) {
if (!is.null(v$data) && is.data.frame(v$data) && var_name %in% names(v$data) && is.factor(v$data[[var_name]])) {
return(levels(v$data[[var_name]]))
}
vals <- fallback_values
if (is.null(vals) || !length(vals)) return(character(0))
vals_chr <- as.character(vals)
is_numeric_like <- grepl("^\\s*-?\\d+(\\.\\d+)?\\s*$", vals_chr)
if (all(is_numeric_like)) {
return(as.character(sort(unique(as.numeric(trimws(vals_chr))))))
}
sort(unique(vals_chr))
}
x1_levels <- get_levels(v$x1_name, if (v$x1_name %in% names(result_plot)) result_plot[[v$x1_name]] else NULL)
x2_levels <- if (is.null(v$x2_name)) "All" else get_levels(v$x2_name, if (v$x2_name %in% names(result_plot)) result_plot[[v$x2_name]] else NULL)
x3_levels <- if (is.null(v$x3_name)) "All" else get_levels(v$x3_name, if (v$x3_name %in% names(result_plot)) result_plot[[v$x3_name]] else NULL)
# Apply ordering to x1 only (controls bar order/colors)
# `order` is interpreted as an instruction for the *within-block* series (x1):
# it changes the left-to-right bar order and, by extension, the mapping of
# `col_vec[i]` to the i-th x1 level. We do not reorder x2/x3 here because
# those define block layout on the x-axis (reordering them would change the
# overall panel/block arrangement rather than the series inside each block).
if (!is.null(v$order)) {
if (length(v$order) == 1 && is.numeric(v$order) && v$order == -1) {
x1_levels <- rev(x1_levels)
} else {
missing_groups <- setdiff(x1_levels, v$order)
extra_groups <- setdiff(v$order, x1_levels)
if (length(missing_groups) > 0) {
stop(
sprintf("plot_means(): 'order' is missing x1 group(s): %s", paste(missing_groups, collapse = ", ")),
call. = FALSE
)
}
if (length(extra_groups) > 0) {
warning(
sprintf("plot_means(): 'order' contains x1 group(s) not in data: %s", paste(extra_groups, collapse = ", ")),
call. = FALSE
)
}
x1_levels <- v$order[v$order %in% x1_levels]
}
}
# If there is a single grouping variable, reorder the returned table too
if (length(v$x_names) == 1) {
x1_in_result <- if (v$x1_name %in% names(result)) {
as.character(result[[v$x1_name]])
} else if ("group" %in% names(result)) {
as.character(result$group)
} else {
NULL
}
if (!is.null(x1_in_result)) {
row_idx <- match(x1_levels, x1_in_result)
if (all(!is.na(row_idx))) {
result <- result[row_idx, , drop = FALSE]
}
}
}
# Resolve colors for x1 levels
k <- length(x1_levels)
col_vec <- v$col
if (is.null(col_vec)) {
col_vec <- get.colors(k)
} else {
if (!is.character(col_vec)) stop("plot_means(): 'col' must be a character vector (color name(s))", call. = FALSE)
if (length(col_vec) == 1) col_vec <- rep(col_vec, k)
if (length(col_vec) != k) {
stop(sprintf("plot_means(): 'col' must have length 1 or %d (number of x1 levels)", k), call. = FALSE)
}
}
# Legend layout (used for auto buffer sizing too)
max_legend_chars <- max(nchar(as.character(x1_levels)))
if (!is.finite(max_legend_chars)) max_legend_chars <- 0
legend_horiz <- isTRUE(max_legend_chars <= 10)
# Resolve buffer.top when set to 'auto'
is_default_interaction_scenario <- length(v$x_names) == 2 &&
length(x1_levels) == 2 &&
!is.null(v$x2_name) &&
length(x2_levels) == 2 &&
is.null(v$x3_name)
# Treat custom diff-in-diff requests like the default interaction scenario for auto buffer sizing.
# This ensures identical tests produce identical headroom regardless of whether they are requested
# via tests="auto" or an explicit string like "(4-3)-(2-1),2-1,4-3".
tests_str <- if (is.character(v$tests) && length(v$tests) == 1 && !is.na(v$tests)) gsub("\\s+", "", v$tests) else ""
has_interaction_test <- nzchar(tests_str) && grepl("\\([0-9]+-[0-9]+\\)-\\([0-9]+-[0-9]+\\)", tests_str)
show_interaction <- (identical(v$tests, "auto") && is_default_interaction_scenario) || has_interaction_test
buffer_top_effective <- if (identical(v$buffer.top, "auto")) {
base <- if (show_interaction) 0.3 else 0.2
if (!legend_horiz) base <- base + 0.05
base
} else {
v$buffer.top
}
# Label disambiguation overlap set and formatter
# When x2 and/or x3 are present, the same label text can appear in more than
# one role (e.g., an x1 level "A" and an x2 level "A"). If we print raw levels
# everywhere, axis labels become ambiguous. We precompute the set of labels
# that overlap across x1/x2/x3 and, only for those, format as "var=level"
# (e.g., "x2=A") to make the plot self-explanatory without being verbose.
label_overlap_set <- character(0)
if (!is.null(v$x2_name) || !is.null(v$x3_name)) {
x1_lab <- as.character(x1_levels)
x2_lab <- if (is.null(v$x2_name)) character(0) else as.character(x2_levels)
x3_lab <- if (is.null(v$x3_name)) character(0) else as.character(x3_levels)
present <- list(x1 = unique(x1_lab), x2 = unique(x2_lab), x3 = unique(x3_lab))
all_labels <- unique(c(present$x1, present$x2, present$x3))
label_overlap_set <- all_labels[sapply(all_labels, function(lbl) {
sum(c(lbl %in% present$x1, lbl %in% present$x2, lbl %in% present$x3)) >= 2
})]
}
format_level_label <- function(var_name, level) {
level_chr <- as.character(level)
if (!is.null(var_name) && nzchar(var_name) && level_chr %in% label_overlap_set) {
return(paste0(var_name, "=", level_chr))
}
level_chr
}
list2(
result = result,
x1_levels,
x2_levels,
x3_levels,
k,
col_vec,
legend_horiz,
buffer_top_effective,
format_level_label
)
}
#----------------------------------
# Internal helpers used by multiple plot_means_* functions ----
plot_means_model_frame <- function(formula_eval, data, na.action, context = "model.frame") {
mf <- tryCatch(
model.frame(formula_eval, data = data, na.action = na.action),
error = function(e) e
)
if (inherits(mf, "error")) {
stop("plot_means(): failed to build model frame for ", context, ": ", conditionMessage(mf), call. = FALSE)
}
mf
}
plot_means_align_cluster <- function(v, mf) {
if (is.null(v$cluster_vec)) return(NULL)
if (length(v$cluster_vec) == nrow(mf)) return(v$cluster_vec)
if (!is.null(v$data) && is.data.frame(v$data) && length(v$cluster_vec) == nrow(v$data)) {
idx <- as.integer(rownames(mf))
if (anyNA(idx)) idx <- seq_len(nrow(mf))
return(v$cluster_vec[idx])
}
stop("plot_means(): 'cluster' length must match rows in 'data'", call. = FALSE)
}
plot_means_span <- function(y_min, y_max) {
span <- (y_max - y_min)
if (!is.finite(span) || span <= 0) span <- abs(y_max)
if (!is.finite(span) || span <= 0) span <- 1
span
}
plot_means_parse_tests <- function(tests_str) {
if (!is.character(tests_str) || length(tests_str) != 1 || is.na(tests_str) || !nzchar(tests_str)) {
stop("plot_means(): 'tests' must be a single non-empty string", call. = FALSE)
}
tests_str <- gsub("\\s+", "", tests_str)
parts <- strsplit(tests_str, ",", fixed = TRUE)[[1]]
parts <- parts[nzchar(parts)]
if (!length(parts)) {
stop("plot_means(): 'tests' is empty. Example: tests=\"3-1,4-2,(1+2)-(3+4),(4-3)-(2-1)\"", call. = FALSE)
}
parse_pair <- function(s) {
m <- regexec("^([0-9]+)-([0-9]+)$", s)
mm <- regmatches(s, m)[[1]]
if (!length(mm)) return(NULL)
a <- as.integer(mm[2])
b <- as.integer(mm[3])
if (!is.finite(a) || !is.finite(b) || a <= 0L || b <= 0L) return(NULL)
list(a = a, b = b)
}
parse_sum <- function(s) {
# one-or-more indices joined by '+', e.g. "1+3" or "2+1+4"
if (!grepl("^[0-9]+(\\+[0-9]+)+$", s)) return(NULL)
ss <- strsplit(s, "\\+", fixed = FALSE)[[1]]
idx <- suppressWarnings(as.integer(ss))
if (!length(idx) || any(!is.finite(idx)) || any(idx <= 0L)) return(NULL)
idx
}
out <- vector("list", length(parts))
for (i in seq_along(parts)) {
p <- parts[i]
# interaction: (a-b)-(c-d)
m_int <- regexec("^\\(([0-9]+)-([0-9]+)\\)-\\(([0-9]+)-([0-9]+)\\)$", p)
mm <- regmatches(p, m_int)[[1]]
if (length(mm)) {
a <- as.integer(mm[2]); b <- as.integer(mm[3]); c <- as.integer(mm[4]); d <- as.integer(mm[5])
if (any(!is.finite(c(a, b, c, d))) || any(c(a, b, c, d) <= 0L)) {
stop("plot_means(): invalid interaction test in 'tests': ", parts[i], call. = FALSE)
}
out[[i]] <- list(type = "interaction", a = a, b = b, c = c, d = d, expr = parts[i])
next
}
# pooled: (a+b+...)-(c+d+...)
m_pool <- regexec("^\\(([0-9\\+]+)\\)-\\(([0-9\\+]+)\\)$", p)
mm <- regmatches(p, m_pool)[[1]]
if (length(mm)) {
left <- parse_sum(mm[2])
right <- parse_sum(mm[3])
if (is.null(left) || is.null(right)) {
stop("plot_means(): invalid pooled test in 'tests': ", parts[i], call. = FALSE)
}
out[[i]] <- list(type = "pooled", left = left, right = right, expr = parts[i])
next
}
pr <- parse_pair(p)
if (!is.null(pr)) {
out[[i]] <- list(type = "simple", a = pr$a, b = pr$b, expr = parts[i])
next
}
stop("plot_means(): could not parse 'tests' entry: ", parts[i], ". Example: tests=\"3-1,4-2,(1+2)-(3+4),(4-3)-(2-1)\"", call. = FALSE)
}
out
}
#----------------------------------
# plot_means_compute: expand to a full x1/x2/x3 grid, compute CIs, and build bar/layout vectors ----
plot_means_compute <- function(v, params, result_plot, ci_level = 0.95) {
x1_name <- v$x1_name
x2_name <- v$x2_name
x3_name <- v$x3_name
x1_levels <- params$x1_levels
x2_levels <- params$x2_levels
x3_levels <- params$x3_levels
k <- params$k
col <- params$col_vec
format_level_label <- params$format_level_label
# Build complete grid of combinations, keeping empty slots
# Intent: preserve (x1,x2,x3) combinations with no observations so spacing and axes remain stable.
result_key <- result_plot
if (x1_name %in% names(result_key)) result_key[[x1_name]] <- as.character(result_key[[x1_name]])
if (!is.null(x2_name) && x2_name %in% names(result_key)) result_key[[x2_name]] <- as.character(result_key[[x2_name]])
if (!is.null(x3_name) && x3_name %in% names(result_key)) result_key[[x3_name]] <- as.character(result_key[[x3_name]])
grid_df <- expand.grid(
x1 = x1_levels,
x2 = x2_levels,
x3 = x3_levels,
stringsAsFactors = FALSE
)
names(grid_df) <- c(x1_name, if (is.null(x2_name)) ".x2" else x2_name, if (is.null(x3_name)) ".x3" else x3_name)
if (is.null(x2_name)) grid_df$.x2 <- "All"
if (is.null(x3_name)) grid_df$.x3 <- "All"
merge_by <- c(x1_name, if (is.null(x2_name)) ".x2" else x2_name, if (is.null(x3_name)) ".x3" else x3_name)
if (is.null(x2_name)) result_key$.x2 <- "All"
if (is.null(x3_name)) result_key$.x3 <- "All"
merged <- merge(grid_df, result_key, by = merge_by, all.x = TRUE, sort = FALSE)
# Confidence intervals for each plotted mean
# Default: per-cell t interval (one-sample t CI within each cell).
# Exceptions:
# - When clustering is used: regression-based CI via lm2(..., clusters=...)
# - 2x2 interaction scenario (tests="auto", x1/x2 both binary, no x3): regression-based CI
ci_map <- data.frame(
cell_key = character(0),
lwr = numeric(0),
upr = numeric(0),
stringsAsFactors = FALSE
)
mf <- plot_means_model_frame(v$formula_eval, data = v$data, na.action = na.pass, context = "confidence intervals")
if (nrow(mf) > 0) {
df_m <- mf
names(df_m)[1] <- ".__y"
df_m[[x1_name]] <- as.character(df_m[[x1_name]])
if (is.null(x2_name)) df_m$.x2 <- "All" else df_m[[x2_name]] <- as.character(df_m[[x2_name]])
if (is.null(x3_name)) df_m$.x3 <- "All" else df_m[[x3_name]] <- as.character(df_m[[x3_name]])
cluster_mf <- plot_means_align_cluster(v, df_m)
if (!is.null(cluster_mf)) df_m$.__cluster <- cluster_mf
x2_col <- if (is.null(x2_name)) ".x2" else x2_name
x3_col <- if (is.null(x3_name)) ".x3" else x3_name
complete_mask <- !is.na(df_m$.__y) &
!is.na(df_m[[x1_name]]) &
!is.na(df_m[[x2_col]]) &
!is.na(df_m[[x3_col]])
df_m <- df_m[complete_mask, , drop = FALSE]
if (nrow(df_m) > 0) {
df_m$cell_key <- paste(df_m[[x1_name]], df_m[[x2_col]], df_m[[x3_col]], sep = "|")
df_m$cell_factor <- factor(df_m$cell_key)
# Determine when to use regression-based CIs for the bars.
is_2x2_interaction_scenario <- identical(v$tests, "auto") &&
length(v$x_names) == 2 &&
length(params$x1_levels) == 2 &&
!is.null(v$x2_name) &&
length(params$x2_levels) == 2 &&
is.null(v$x3_name)
use_regression_ci <- !is.null(v$cluster_vec) || isTRUE(is_2x2_interaction_scenario)
if (!isTRUE(use_regression_ci)) {
# Per-cell t interval for the mean (one-sample t CI within each cell)
levels_cf <- levels(df_m$cell_factor)
lwr <- rep(NA_real_, length(levels_cf))
upr <- rep(NA_real_, length(levels_cf))
alpha <- 1 - ci_level
for (j in seq_along(levels_cf)) {
key <- levels_cf[j]
yy <- df_m$.__y[df_m$cell_key == key]
yy <- yy[is.finite(yy)]
n <- length(yy)
if (n < 2) next
m <- mean(yy)
s <- stats::sd(yy)
if (!is.finite(m) || !is.finite(s) || s < 0) next
tcrit <- stats::qt(1 - alpha / 2, df = n - 1)
half <- tcrit * s / sqrt(n)
lwr[j] <- m - half
upr[j] <- m + half
}
ci_map <- data.frame(
cell_key = levels_cf,
lwr = lwr,
upr = upr,
stringsAsFactors = FALSE
)
}
if (isTRUE(use_regression_ci)) {
# Regression-based CI from lm2 coef/vcov (robust or clustered SE)
fit <- if (is.null(v$cluster_vec)) {
lm2(.__y ~ 0 + cell_factor, data = df_m)
} else {
lm2(.__y ~ 0 + cell_factor, data = df_m, clusters = df_m$.__cluster)
}
beta <- stats::coef(fit)
V <- stats::vcov(fit)
if (!is.null(beta) && length(beta) > 0 && is.matrix(V) && nrow(V) == length(beta)) {
levels_cf <- levels(df_m$cell_factor)
newdata <- data.frame(cell_factor = factor(levels_cf, levels = levels_cf))
X <- stats::model.matrix(~ 0 + cell_factor, data = newdata)
b_idx <- match(colnames(X), names(beta))
if (anyNA(b_idx)) {
X <- X[, !is.na(b_idx), drop = FALSE]
b_idx <- b_idx[!is.na(b_idx)]
}
beta_use <- beta[b_idx]
V_use <- V[b_idx, b_idx, drop = FALSE]
fit_hat <- as.numeric(X %*% beta_use)
se_hat <- sqrt(pmax(0, diag(X %*% V_use %*% t(X))))
df_fit <- fit$df
if (length(df_fit) != 1) df_fit <- suppressWarnings(min(df_fit, na.rm = TRUE))
if (!is.finite(df_fit) || df_fit <= 0) df_fit <- 1
alpha <- 1 - ci_level
tcrit <- stats::qt(1 - alpha / 2, df = df_fit)
lwr <- fit_hat - tcrit * se_hat
upr <- fit_hat + tcrit * se_hat
ci_map <- data.frame(
cell_key = levels_cf,
lwr = lwr,
upr = upr,
stringsAsFactors = FALSE
)
}
}
}
}
# Bar positions and block labels
# Intent: compute x-positions once and return vectors used by the draw step.
# Performance: use a pre-keyed `merged` lookup + pre-allocation to avoid O(n^2) scans and repeated vector growth.
gap_x2 <- 1
gap_x3 <- 2
bar_width <- 1
bar_step <- 1
x2_col <- if (is.null(x2_name)) ".x2" else x2_name
x3_col <- if (is.null(x3_name)) ".x3" else x3_name
# Pre-key merged once by "x1|x2|x3" for O(1) lookups inside the loop
merged$cell_key <- paste(
as.character(merged[[x1_name]]),
as.character(merged[[x2_col]]),
as.character(merged[[x3_col]]),
sep = "|"
)
rownames(merged) <- merged$cell_key
n_x2 <- length(x2_levels)
n_x3 <- length(x3_levels)
n_blocks <- n_x2 * n_x3
n_cells <- k * n_blocks
block_centers <- numeric(n_blocks)
block_labels <- character(n_blocks)
x3_section_centers <- numeric(n_x3)
x3_section_labels <- character(n_x3)
# Pre-allocate per-bar vectors to max possible size; trim after
x_lefts <- numeric(n_cells)
x_rights <- numeric(n_cells)
x_centers_drawn <- numeric(n_cells)
cell_keys_drawn <- character(n_cells)
n_total_drawn <- numeric(n_cells)
n_missing_drawn <- numeric(n_cells)
heights <- numeric(n_cells)
cols <- character(n_cells)
drawn_idx <- 0L
x_pos <- 1
blk_idx <- 0L
for (x3_idx in seq_along(x3_levels)) {
x3_val <- x3_levels[x3_idx]
x3_start <- x_pos
for (x2_idx in seq_along(x2_levels)) {
x2_val <- x2_levels[x2_idx]
blk_idx <- blk_idx + 1L
centers_block <- numeric(k)
for (i in seq_len(k)) {
x1_val <- x1_levels[i]
key <- paste(as.character(x1_val), as.character(x2_val), as.character(x3_val), sep = "|")
row_sel <- merged[key, , drop = FALSE]
mean_val <- if (!is.na(row_sel$mean[1])) row_sel$mean[1] else NA_real_
n_total <- if ("n.total" %in% names(row_sel)) row_sel$n.total[1] else NA_real_
n_missing <- if ("n.missing" %in% names(row_sel)) row_sel$n.missing[1] else NA_real_
x_center <- x_pos + (i - 1L) * bar_step
centers_block[i] <- x_center
if (!is.na(mean_val)) {
drawn_idx <- drawn_idx + 1L
cell_keys_drawn[drawn_idx] <- key
x_centers_drawn[drawn_idx] <- x_center
n_total_drawn[drawn_idx] <- n_total
n_missing_drawn[drawn_idx] <- n_missing
x_lefts[drawn_idx] <- x_center - bar_width / 2
x_rights[drawn_idx] <- x_center + bar_width / 2
heights[drawn_idx] <- mean_val
cols[drawn_idx] <- col[i]
}
}
block_centers[blk_idx] <- mean(centers_block)
block_labels[blk_idx] <- if (is.null(x2_name)) "" else format_level_label(x2_name, x2_val)
x_pos <- x_pos + k * bar_step
if (x2_idx < n_x2) x_pos <- x_pos + gap_x2
}
x3_end <- x_pos - bar_step
x3_section_centers[x3_idx] <- (x3_start + x3_end) / 2
x3_section_labels[x3_idx] <- if (is.null(x3_name)) "" else format_level_label(x3_name, x3_val)
if (x3_idx < n_x3) x_pos <- x_pos + gap_x3
}
# Trim pre-allocated vectors to actually drawn bars
if (drawn_idx < n_cells) {
keep <- seq_len(drawn_idx)
cell_keys_drawn <- cell_keys_drawn[keep]
x_centers_drawn <- x_centers_drawn[keep]
n_total_drawn <- n_total_drawn[keep]
n_missing_drawn <- n_missing_drawn[keep]
x_lefts <- x_lefts[keep]
x_rights <- x_rights[keep]
heights <- heights[keep]
cols <- cols[keep]
}
list2(
merged,
ci_map,
x_lefts,
x_rights,
x_centers_drawn,
cell_keys_drawn,
n_total_drawn,
n_missing_drawn,
heights,
cols,
block_centers,
block_labels,
x3_section_centers,
x3_section_labels,
x_pos,
bar_width,
bar_step
)
}
#----------------------------------
# plot_means_draw: render bars, CIs, labels, legend, and p-value annotations in base graphics ----
plot_means_draw <- function(v,
params,
comp,
y_name,
x1_levels,
x2_name,
x3_name,
x1_name,
col,
legend.title,
col.text,
values.cex,
values.align,
values.round,
tests,
means_comparisons = NULL,
pvalue.cex,
pvalue.col,
buffer_top_effective,
...) {
heights <- comp$heights
cols <- comp$cols
x_lefts <- comp$x_lefts
x_rights <- comp$x_rights
x_centers_drawn <- comp$x_centers_drawn
cell_keys_drawn <- comp$cell_keys_drawn
n_total_drawn <- comp$n_total_drawn
block_centers <- comp$block_centers
block_labels <- comp$block_labels
x3_section_centers <- comp$x3_section_centers
x3_section_labels <- comp$x3_section_labels
x_pos <- comp$x_pos
bar_width <- comp$bar_width
ci_map <- comp$ci_map
legend_horiz <- params$legend_horiz
dots <- list(...)
# Establish plotting range.
# Bars are drawn from y=0 to `heights`, so we always include 0 in the y-limits.
y_max_raw <- max(heights, na.rm = TRUE)
if (!is.finite(y_max_raw)) y_max_raw <- 1
y_max <- max(0, y_max_raw)
y_min <- min(0, min(heights, na.rm = TRUE))
if (!is.finite(y_min)) y_min <- 0
if (nrow(ci_map) > 0) {
lwr_f <- ci_map$lwr[is.finite(ci_map$lwr)]
upr_f <- ci_map$upr[is.finite(ci_map$upr)]
if (length(lwr_f)) y_min <- min(y_min, min(lwr_f))
if (length(upr_f)) y_max <- max(y_max, max(upr_f), 0)
}
if (!"xlab" %in% names(dots)) dots$xlab <- ""
# When x2 is present but x3 is not, show x2 name as x-axis label (centered)
if (is.null(x3_name) && !is.null(x2_name) && nzchar(x2_name) && identical(dots$xlab, "")) {
dots$xlab <- x2_name
}
if (!"ylab" %in% names(dots)) dots$ylab <- "Mean"
if (!"main" %in% names(dots)) dots$main <- paste0("Means of ", y_name)
y_span_data <- plot_means_span(y_min, y_max)
# If everything is below 0, p-value brackets should be drawn "from below".
# Reserve extra bottom space so brackets/labels don't overlap bars (and so the
# interaction bracket, when present, isn't clipped at the bottom).
all_below_zero <- (is.finite(y_max_raw) && y_max_raw <= 0)
if (isTRUE(all_below_zero) && !identical(tests, "none") && !"ylim" %in% names(dots)) {
is_interaction_scenario <- length(v$x_names) == 2 &&
length(params$x1_levels) == 2 &&
!is.null(v$x2_name) &&
length(params$x2_levels) == 2 &&
is.null(v$x3_name)
extra_bottom <- if (isTRUE(is_interaction_scenario)) 0.28 else 0.18
y_min <- y_min - extra_bottom * y_span_data
y_span_data <- plot_means_span(y_min, y_max)
}
# Reserve a band below the lowest error whisker for n= labels.
if (length(x_centers_drawn) && !"yaxt" %in% names(dots) && !"ylim" %in% names(dots)) {
min_whisker <- y_min
if (nrow(ci_map) > 0 && length(cell_keys_drawn) == length(x_centers_drawn)) {
mi <- match(cell_keys_drawn, ci_map$cell_key)
ok <- !is.na(mi)
if (any(ok)) {
lwr <- ci_map$lwr[mi[ok]]
lwr <- lwr[is.finite(lwr)]
if (length(lwr)) min_whisker <- min(min_whisker, min(lwr))
}
}
# Minimal expansion: just enough room to place n= slightly below the lowest whisker.
# If all CIs are above 0, we still want n= below the bars (below 0), not inside them.
min_anchor <- min(0, min_whisker)
pad_n <- 0.04 * y_span_data
y_n_target <- min_anchor - pad_n
y_min_target <- y_n_target - pad_n
y_min <- min(y_min, y_min_target)
y_span_data <- plot_means_span(y_min, y_max)
}
# Extra headroom only when stacked p-value brackets require it.
extra_top <- 0
if (!identical(tests, "none") && !is.null(means_comparisons) && is.data.frame(means_comparisons) && nrow(means_comparisons) > 0) {
assign_overlap_tier <- function(x0, x1) {
n <- length(x0)
if (!n) return(integer(0))
left <- pmin(x0, x1)
right <- pmax(x0, x1)
ord <- order(left, right)
tiers <- integer(n)
tier_rights <- numeric(0)
for (ii in ord) {
placed <- FALSE
if (length(tier_rights)) {
for (t in seq_along(tier_rights)) {
if (left[ii] > tier_rights[t]) {
tiers[ii] <- t
tier_rights[t] <- right[ii]
placed <- TRUE
break
}
}
}
if (!placed) {
tiers[ii] <- length(tier_rights) + 1L
tier_rights <- c(tier_rights, right[ii])
}
}
tiers
}
assign_overlap_tier_in_order <- function(x0, x1) {
# Greedy tier assignment in input order (stable), checking overlap against
# all intervals already assigned to each tier.
n <- length(x0)
if (!n) return(integer(0))
left <- pmin(x0, x1)
right <- pmax(x0, x1)
tiers <- integer(n)
tier_lefts <- list()
tier_rights <- list()
overlaps_any <- function(l, r, L, R) {
if (!length(L)) return(FALSE)
# Treat touching endpoints as non-overlap so adjacent brackets can share a tier.
any(!(r <= L | l >= R))
}
for (i in seq_len(n)) {
l <- left[i]
r <- right[i]
if (!is.finite(l) || !is.finite(r)) next
placed <- FALSE
if (length(tier_lefts)) {
for (t in seq_along(tier_lefts)) {
if (!isTRUE(overlaps_any(l, r, tier_lefts[[t]], tier_rights[[t]]))) {
tiers[i] <- t
tier_lefts[[t]] <- c(tier_lefts[[t]], l)
tier_rights[[t]] <- c(tier_rights[[t]], r)
placed <- TRUE
break
}
}
}
if (!placed) {
tiers[i] <- length(tier_lefts) + 1L
tier_lefts[[length(tier_lefts) + 1L]] <- l
tier_rights[[length(tier_rights) + 1L]] <- r
}
}
tiers
}
if (all(c("col1", "col2") %in% names(means_comparisons))) {
simple_rows <- means_comparisons[is.na(means_comparisons$col3) & is.finite(means_comparisons$p.value), , drop = FALSE]
x0 <- rep(NA_real_, nrow(simple_rows))
x1 <- rep(NA_real_, nrow(simple_rows))
for (r in seq_len(nrow(simple_rows))) {
is_pooled <- ("test_type" %in% names(simple_rows)) && identical(as.character(simple_rows$test_type[r]), "pooled")
if (isTRUE(is_pooled) && all(c("cols_left", "cols_right") %in% names(simple_rows))) {
left <- suppressWarnings(as.integer(strsplit(as.character(simple_rows$cols_left[r]), ",", fixed = TRUE)[[1]]))
right <- suppressWarnings(as.integer(strsplit(as.character(simple_rows$cols_right[r]), ",", fixed = TRUE)[[1]]))
idx <- c(left, right)
idx <- idx[is.finite(idx)]
if (length(idx)) {
xx <- x_centers_drawn[idx]
x0[r] <- min(xx, na.rm = TRUE)
x1[r] <- max(xx, na.rm = TRUE)
}
} else {
i1 <- as.integer(simple_rows$col1[r])
i2 <- as.integer(simple_rows$col2[r])
if (is.finite(i1) && is.finite(i2)) {
x0[r] <- x_centers_drawn[i1]
x1[r] <- x_centers_drawn[i2]
}
}
}
} else {
simple_rows <- means_comparisons[is.character(means_comparisons$group2) & nzchar(means_comparisons$group2) & is.finite(means_comparisons$p.value), , drop = FALSE]
# Build a temporary label -> bar index map (idx_by_label is created later for drawing).
group_label_tmp <- function(x1, x2 = NULL, x3 = NULL) {
parts <- character(0)
if (!is.null(x3) && nzchar(x3)) parts <- c(parts, x3)
if (!is.null(x2) && nzchar(x2)) parts <- c(parts, x2)
parts <- c(parts, x1)
paste(parts, collapse = "_")
}
key_parts <- strsplit(cell_keys_drawn, "\\|")
x1_vals <- vapply(key_parts, function(z) z[1], character(1))
x2_vals <- vapply(key_parts, function(z) z[2], character(1))
x3_vals <- vapply(key_parts, function(z) z[3], character(1))
x2_use <- if (is.null(v$x2_name)) rep(list(NULL), length(x1_vals)) else as.list(x2_vals)
x3_use <- if (is.null(v$x3_name)) rep(list(NULL), length(x1_vals)) else as.list(x3_vals)
bar_labels <- mapply(group_label_tmp, x1 = x1_vals, x2 = x2_use, x3 = x3_use, USE.NAMES = FALSE)
idx_by_label_tmp <- seq_along(bar_labels)
names(idx_by_label_tmp) <- bar_labels
i1 <- idx_by_label_tmp[as.character(simple_rows$group1)]
i2 <- idx_by_label_tmp[as.character(simple_rows$group2)]
x0 <- x_centers_drawn[as.integer(i1)]
x1 <- x_centers_drawn[as.integer(i2)]
}
ok <- is.finite(x0) & is.finite(x1)
if (any(ok)) {
tiers <- assign_overlap_tier(x0[ok], x1[ok])
step <- 0.05 * y_span_data
extra_top <- (max(tiers, na.rm = TRUE) - 1L) * step
if (!is.finite(extra_top) || extra_top < 0) extra_top <- 0
}
}
ylim_top <- y_max + buffer_top_effective * y_span_data + extra_top
if (!"ylim" %in% names(dots)) dots$ylim <- c(y_min, ylim_top)
if (!"xlim" %in% names(dots)) dots$xlim <- c(0, x_pos)
if (!"las" %in% names(dots)) dots$las <- 1
if (!"font.lab" %in% names(dots)) dots$font.lab <- 2
if (!"cex.lab" %in% names(dots)) dots$cex.lab <- 1.2
if (!"cex.main" %in% names(dots)) dots$cex.main <- 1.38
dots$type <- "n"
dots$xaxt <- "n"
user_provided_yaxt <- "yaxt" %in% names(dots)
if (!user_provided_yaxt) dots$yaxt <- "n"
plot_args <- c(list(x = 0, y = 0), dots)
do.call(plot, plot_args)
abline(h = 0, col = "gray80")
if (!user_provided_yaxt && (is.null(dots$axes) || isTRUE(dots$axes))) {
y_ticks <- pretty(c(y_min, y_max), n = 5)
y_ticks <- y_ticks[y_ticks >= y_min & y_ticks <= y_max + 1e-9]
axis(2, at = y_ticks, las = 1)
}
if (length(heights)) rect(x_lefts, 0, x_rights, heights, col = cols, border = cols)
lum <- function(col_one) {
rgb <- grDevices::col2rgb(col_one)
as.numeric((0.299 * rgb[1, ] + 0.587 * rgb[2, ] + 0.114 * rgb[3, ]) / 255)
}
text_cols <- ifelse(sapply(cols, lum) < 0.5, "white", "black")
if (nrow(ci_map) > 0 && length(x_centers_drawn) == length(cell_keys_drawn)) {
eb_col <- "black"
cap <- bar_width * 0.08
match_idx <- match(cell_keys_drawn, ci_map$cell_key)
ok <- !is.na(match_idx)
if (any(ok)) {
lwr <- ci_map$lwr[match_idx[ok]]
upr <- ci_map$upr[match_idx[ok]]
x_ok <- x_centers_drawn[ok]
ok2 <- is.finite(lwr) & is.finite(upr)
if (any(ok2)) {
x_ok <- x_ok[ok2]
lwr <- lwr[ok2]
upr <- upr[ok2]
segments(x_ok, lwr, x_ok, upr, col = eb_col, lwd = 1)
segments(x_ok - cap, lwr, x_ok + cap, lwr, col = eb_col, lwd = 1)
segments(x_ok - cap, upr, x_ok + cap, upr, col = eb_col, lwd = 1)
}
}
}
if (!identical(tests, "none") && !is.null(means_comparisons) && is.data.frame(means_comparisons) && nrow(means_comparisons) > 0) {
pvalue_line <- function(x0, x1, y, p, from_below = FALSE) {
n <- max(length(x0), length(x1), length(y), length(p))
x0 <- rep_len(x0, n)
x1 <- rep_len(x1, n)
y <- rep_len(y, n)
p <- rep_len(p, n)
y_span <- diff(par("usr")[3:4])
tick <- 0.015 * y_span
segments(x0, y, x1, y, col = pvalue.col)
if (isTRUE(from_below)) {
segments(x0, y, x0, y + tick, col = pvalue.col)
segments(x1, y, x1, y + tick, col = pvalue.col)
} else {
segments(x0, y - tick, x0, y, col = pvalue.col)
segments(x1, y - tick, x1, y, col = pvalue.col)
}
x_mid <- (x0 + x1) / 2
for (i in seq_len(n)) {
p_txt <- format_p_expr(p[i], digits = 3)
if (!is.null(p_txt)) {
y_txt <- if (isTRUE(from_below)) y[i] - 0.03 * y_span else y[i] + 0.03 * y_span
text2(x_mid[i], y_txt, p_txt, bg = "white", cex = pvalue.cex, col = pvalue.col, pad = 0, pad_v = 0)
}
}
invisible(NULL)
}
pvalue_line_pooled <- function(xL0, xL1, xR0, xR1, y, p, from_below = FALSE) {
# Draw a two-level bracket for pooled comparisons:
# - a small bracket over the pooled-left bars, and one over pooled-right bars (y_sub)
# - a main bracket connecting the two pooled groups (y)
y_span <- diff(par("usr")[3:4])
sub_drop <- 0.03 * y_span
y_sub <- if (isTRUE(from_below)) y + sub_drop else y - sub_drop
# Small grouping lines for each pooled group (flat: no end ticks)
segments(xL0, y_sub, xL1, y_sub, col = pvalue.col)
segments(xR0, y_sub, xR1, y_sub, col = pvalue.col)
# Connect pooled-group midpoints up to the main bracket
xLm <- (xL0 + xL1) / 2
xRm <- (xR0 + xR1) / 2
if (isTRUE(from_below)) {
segments(xLm, y_sub, xLm, y, col = pvalue.col)
segments(xRm, y_sub, xRm, y, col = pvalue.col)
} else {
segments(xLm, y, xLm, y_sub, col = pvalue.col)
segments(xRm, y, xRm, y_sub, col = pvalue.col)
}
# Main bracket + p-value label
pvalue_line(x0 = xLm, x1 = xRm, y = y, p = p, from_below = from_below)
invisible(NULL)
}
format_p_expr <- function(p, digits = 3) {
if (!is.finite(p)) return(NULL)
min_threshold <- 10^(-digits)
max_threshold <- 1 - 10^(-digits)
min_str <- format(min_threshold, nsmall = digits, scientific = FALSE)
max_str <- format(max_threshold, nsmall = digits, scientific = FALSE)
if (p < min_threshold) return(as.expression(parse(text = paste0("italic(p) < ", min_str))))
if (p > max_threshold) return(as.expression(parse(text = paste0("italic(p) > ", max_str))))
p_clean <- round(p, digits)
p_str <- format(p_clean, nsmall = digits, scientific = FALSE)
as.expression(parse(text = paste0("italic(p) == ", p_str)))
}
# Map drawn bars to group labels used by plot_means_compute_pvalues().
group_label <- function(x1, x2 = NULL, x3 = NULL) {
parts <- character(0)
if (!is.null(x3) && nzchar(x3)) parts <- c(parts, x3)
if (!is.null(x2) && nzchar(x2)) parts <- c(parts, x2)
parts <- c(parts, x1)
paste(parts, collapse = "_")
}
if (!length(cell_keys_drawn)) return(invisible(NULL))
key_parts <- strsplit(cell_keys_drawn, "\\|")
x1_vals <- vapply(key_parts, function(z) z[1], character(1))
x2_vals <- vapply(key_parts, function(z) z[2], character(1))
x3_vals <- vapply(key_parts, function(z) z[3], character(1))
x2_use <- if (is.null(v$x2_name)) rep(list(NULL), length(x1_vals)) else as.list(x2_vals)
x3_use <- if (is.null(v$x3_name)) rep(list(NULL), length(x1_vals)) else as.list(x3_vals)
bar_labels <- mapply(group_label, x1 = x1_vals, x2 = x2_use, x3 = x3_use, USE.NAMES = FALSE)
idx_by_label <- seq_along(bar_labels)
names(idx_by_label) <- bar_labels
from_below <- (is.finite(max(heights, na.rm = TRUE)) && max(heights, na.rm = TRUE) <= 0)
y_span <- diff(par("usr")[3:4])
assign_overlap_tier <- function(x0, x1) {
n <- length(x0)
if (!n) return(integer(0))
left <- pmin(x0, x1)
right <- pmax(x0, x1)
ord <- order(left, right)
tiers <- integer(n)
tier_rights <- numeric(0)
for (ii in ord) {
placed <- FALSE
if (length(tier_rights)) {
for (t in seq_along(tier_rights)) {
if (left[ii] > tier_rights[t]) {
tiers[ii] <- t
tier_rights[t] <- right[ii]
placed <- TRUE
break
}
}
}
if (!placed) {
tiers[ii] <- length(tier_rights) + 1L
tier_rights <- c(tier_rights, right[ii])
}
}
tiers
}
has_custom_cols <- all(c("col1", "col2") %in% names(means_comparisons))
if (isTRUE(has_custom_cols)) {
simple_rows <- means_comparisons[is.na(means_comparisons$col3) & is.finite(means_comparisons$p.value), , drop = FALSE]
int_rows <- means_comparisons[is.finite(means_comparisons$col3) & is.finite(means_comparisons$col4) & is.finite(means_comparisons$p.value), , drop = FALSE]
# 1) Compute a shared baseline y-position for all custom tests.
involved_idx <- integer(0)
if (nrow(simple_rows) > 0) {
involved_idx <- unique(c(involved_idx, as.integer(simple_rows$col1), as.integer(simple_rows$col2)))
if ("test_type" %in% names(simple_rows) && "cols_left" %in% names(simple_rows) && "cols_right" %in% names(simple_rows)) {
pooled_idx <- which(as.character(simple_rows$test_type) == "pooled")
if (length(pooled_idx)) {
for (rr in pooled_idx) {
left <- suppressWarnings(as.integer(strsplit(as.character(simple_rows$cols_left[rr]), ",", fixed = TRUE)[[1]]))
right <- suppressWarnings(as.integer(strsplit(as.character(simple_rows$cols_right[rr]), ",", fixed = TRUE)[[1]]))
involved_idx <- unique(c(involved_idx, left, right))
}
}
}
}
if (nrow(int_rows) > 0) {
involved_idx <- unique(c(involved_idx, as.integer(int_rows$col1), as.integer(int_rows$col2), as.integer(int_rows$col3), as.integer(int_rows$col4)))
}
involved_idx <- involved_idx[is.finite(involved_idx)]
if (!length(involved_idx)) return(invisible(NULL))
if (any(involved_idx < 1L) || any(involved_idx > length(x_centers_drawn))) {
stop("plot_means(): invalid column index in custom tests (out of range)", call. = FALSE)
}
y_extreme <- if (isTRUE(from_below)) min(heights[involved_idx], na.rm = TRUE) else max(heights[involved_idx], na.rm = TRUE)
if (nrow(ci_map) > 0) {
k_all <- cell_keys_drawn[involved_idx]
mi_all <- match(k_all, ci_map$cell_key)
mi_all <- mi_all[!is.na(mi_all)]
if (length(mi_all)) {
if (isTRUE(from_below)) {
y_extreme <- min(y_extreme, ci_map$lwr[mi_all], na.rm = TRUE)
} else {
y_extreme <- max(y_extreme, ci_map$upr[mi_all], na.rm = TRUE)
}
}
}
y_base <- if (isTRUE(from_below)) y_extreme - 0.02 * y_span else y_extreme + 0.02 * y_span
# 2) Tier all custom tests together based on their x-spans (keep current x definitions).
x0 <- rep(NA_real_, nrow(simple_rows))
x1 <- rep(NA_real_, nrow(simple_rows))
if (nrow(simple_rows) > 0) {
for (r in seq_len(nrow(simple_rows))) {
is_pooled <- ("test_type" %in% names(simple_rows)) && identical(as.character(simple_rows$test_type[r]), "pooled")
if (isTRUE(is_pooled) && all(c("cols_left", "cols_right") %in% names(simple_rows))) {
left <- suppressWarnings(as.integer(strsplit(as.character(simple_rows$cols_left[r]), ",", fixed = TRUE)[[1]]))
right <- suppressWarnings(as.integer(strsplit(as.character(simple_rows$cols_right[r]), ",", fixed = TRUE)[[1]]))
idx <- c(left, right)
idx <- idx[is.finite(idx)]
if (length(idx)) {
xx <- x_centers_drawn[idx]
x0[r] <- min(xx, na.rm = TRUE)
x1[r] <- max(xx, na.rm = TRUE)
}
} else {
i1 <- as.integer(simple_rows$col1[r])
i2 <- as.integer(simple_rows$col2[r])
if (is.finite(i1) && is.finite(i2)) {
x0[r] <- x_centers_drawn[i1]
x1[r] <- x_centers_drawn[i2]
}
}
}
}
x0_int <- rep(NA_real_, nrow(int_rows))
x1_int <- rep(NA_real_, nrow(int_rows))
if (nrow(int_rows) > 0) {
for (r in seq_len(nrow(int_rows))) {
a <- as.integer(int_rows$col1[r]); b <- as.integer(int_rows$col2[r])
c <- as.integer(int_rows$col3[r]); d <- as.integer(int_rows$col4[r])
if (any(!is.finite(c(a, b, c, d)))) next
x_ab <- mean(c(x_centers_drawn[a], x_centers_drawn[b]))
x_cd <- mean(c(x_centers_drawn[c], x_centers_drawn[d]))
x0_int[r] <- min(x_ab, x_cd, na.rm = TRUE)
x1_int[r] <- max(x_ab, x_cd, na.rm = TRUE)
}
}
x0_all <- c(x0, x0_int)
x1_all <- c(x1, x1_int)
tiers_all <- assign_overlap_tier_in_order(x0_all, x1_all)
step <- 0.05 * y_span
# 3) Draw simple/pooled tests at y determined solely by tier.
if (nrow(simple_rows) > 0) {
for (r in seq_len(nrow(simple_rows))) {
off <- (tiers_all[r] - 1L) * step
y <- if (isTRUE(from_below)) y_base - off else y_base + off
if (!is.finite(x0[r]) || !is.finite(x1[r])) next
is_pooled <- ("test_type" %in% names(simple_rows)) && identical(as.character(simple_rows$test_type[r]), "pooled")
if (isTRUE(is_pooled) && all(c("cols_left", "cols_right") %in% names(simple_rows))) {
left <- suppressWarnings(as.integer(strsplit(as.character(simple_rows$cols_left[r]), ",", fixed = TRUE)[[1]]))
right <- suppressWarnings(as.integer(strsplit(as.character(simple_rows$cols_right[r]), ",", fixed = TRUE)[[1]]))
left <- left[is.finite(left)]
right <- right[is.finite(right)]
if (!length(left) || !length(right)) next
xL <- x_centers_drawn[left]
xR <- x_centers_drawn[right]
xL0 <- min(xL, na.rm = TRUE); xL1 <- max(xL, na.rm = TRUE)
xR0 <- min(xR, na.rm = TRUE); xR1 <- max(xR, na.rm = TRUE)
pvalue_line_pooled(xL0 = xL0, xL1 = xL1, xR0 = xR0, xR1 = xR1, y = y, p = simple_rows$p.value[r], from_below = from_below)
} else {
pvalue_line(x0 = x0[r], x1 = x1[r], y = y, p = simple_rows$p.value[r], from_below = from_below)
}
}
}
# 4) Draw interaction tests at y determined solely by tier (aligned with simple/pooled).
if (nrow(int_rows) > 0) {
n_simple <- nrow(simple_rows)
for (r in seq_len(nrow(int_rows))) {
a <- as.integer(int_rows$col1[r]); b <- as.integer(int_rows$col2[r])
c <- as.integer(int_rows$col3[r]); d <- as.integer(int_rows$col4[r])
if (any(!is.finite(c(a, b, c, d)))) next
x_ab <- mean(c(x_centers_drawn[a], x_centers_drawn[b]))
x_cd <- mean(c(x_centers_drawn[c], x_centers_drawn[d]))
p_txt <- format_p_expr(int_rows$p.value[r], digits = 3)
if (is.null(p_txt)) next
tier_idx <- n_simple + r
off <- (tiers_all[tier_idx] - 1L) * step
y_anchor <- if (isTRUE(from_below)) y_base - off else y_base + off
if (isTRUE(from_below)) {
y_line <- y_anchor - 0.06 * y_span
y_lab <- y_line - 0.025 * y_span
y_line_from <- y_anchor - 0.045 * y_span
segments(x_ab, y_line, x_ab, y_line_from, col = pvalue.col)
segments(x_cd, y_line, x_cd, y_line_from, col = pvalue.col)
} else {
y_line <- y_anchor + 0.06 * y_span
y_lab <- y_line + 0.025 * y_span
y_line_from <- y_anchor + 0.045 * y_span
segments(x_ab, y_line_from, x_ab, y_line, col = pvalue.col)
segments(x_cd, y_line_from, x_cd, y_line, col = pvalue.col)
}
segments(x_ab, y_line, x_cd, y_line, col = pvalue.col)
text2(mean(c(x_ab, x_cd)), y_lab, p_txt, bg = "white", cex = pvalue.cex, col = pvalue.col, pad = 0, pad_v = 0)
}
}
invisible(NULL)
}
# Simple-effect comparisons: rows with a non-empty group2 and non-missing p.value
simple_rows <- means_comparisons[is.character(means_comparisons$group2) & nzchar(means_comparisons$group2), , drop = FALSE]
simple_rows <- simple_rows[is.finite(simple_rows$p.value), , drop = FALSE]
if (nrow(simple_rows) > 0) {
# Shared y-position: 2% beyond the most extreme CI among all involved bars
involved <- unique(c(as.character(simple_rows$group1), as.character(simple_rows$group2)))
idx_all <- idx_by_label[involved]
idx_all <- idx_all[is.finite(idx_all)]
y_simple <- NA_real_
if (length(idx_all)) {
y_extreme <- if (isTRUE(from_below)) min(heights[idx_all], na.rm = TRUE) else max(heights[idx_all], na.rm = TRUE)
if (nrow(ci_map) > 0) {
k_all <- cell_keys_drawn[idx_all]
mi_all <- match(k_all, ci_map$cell_key)
mi_all <- mi_all[!is.na(mi_all)]
if (length(mi_all)) {
if (isTRUE(from_below)) {
y_extreme <- min(y_extreme, ci_map$lwr[mi_all], na.rm = TRUE)
} else {
y_extreme <- max(y_extreme, ci_map$upr[mi_all], na.rm = TRUE)
}
}
}
y_simple <- if (isTRUE(from_below)) y_extreme - 0.02 * y_span else y_extreme + 0.02 * y_span
}
if (is.finite(y_simple)) {
x0 <- x_centers_drawn[as.integer(idx_by_label[as.character(simple_rows$group1)])]
x1 <- x_centers_drawn[as.integer(idx_by_label[as.character(simple_rows$group2)])]
tiers <- assign_overlap_tier(x0, x1)
step <- 0.05 * y_span
for (r in seq_len(nrow(simple_rows))) {
i1 <- idx_by_label[as.character(simple_rows$group1[r])]
i2 <- idx_by_label[as.character(simple_rows$group2[r])]
if (!is.finite(i1) || !is.finite(i2)) next
off <- (tiers[r] - 1L) * step
y <- if (isTRUE(from_below)) y_simple - off else y_simple + off
pvalue_line(x0 = x_centers_drawn[i1], x1 = x_centers_drawn[i2], y = y, p = simple_rows$p.value[r], from_below = from_below)
}
}
}
# Interaction comparison: a single row labeled interaction(x1:x2)
int_rows <- means_comparisons[is.character(means_comparisons$group1) & grepl("^interaction\\(", means_comparisons$group1), , drop = FALSE]
int_rows <- int_rows[is.finite(int_rows$p.value), , drop = FALSE]
if (nrow(int_rows) > 0 && !is.null(v$x2_name) && length(params$x2_levels) == 2 && length(x1_levels) == 2) {
p_int_txt <- format_p_expr(int_rows$p.value[1], digits = 3)
if (!is.null(p_int_txt)) {
x2_levels <- params$x2_levels
x_mids <- rep(NA_real_, length(x2_levels))
for (j in seq_along(x2_levels)) {
g1 <- group_label(x1_levels[1], x2 = x2_levels[j])
g2 <- group_label(x1_levels[2], x2 = x2_levels[j])
i1 <- idx_by_label[g1]
i2 <- idx_by_label[g2]
if (!is.finite(i1) || !is.finite(i2)) next
x_mids[j] <- mean(c(x_centers_drawn[i1], x_centers_drawn[i2]))
}
if (all(is.finite(x_mids))) {
# Place interaction bracket offset from the simple-effect line, if present; otherwise anchor on CI extreme
y_anchor <- if (exists("y_simple", inherits = FALSE) && is.finite(y_simple)) y_simple else {
if (nrow(ci_map) > 0) {
y_extreme <- if (isTRUE(from_below)) min(ci_map$lwr, na.rm = TRUE) else max(ci_map$upr, na.rm = TRUE)
if (is.finite(y_extreme)) {
if (isTRUE(from_below)) y_extreme - 0.02 * y_span else y_extreme + 0.02 * y_span
} else {
0
}
} else {
0
}
}
if (exists("tiers", inherits = FALSE) && length(tiers)) {
max_off <- (max(tiers, na.rm = TRUE) - 1L) * (0.05 * y_span)
y_anchor <- if (isTRUE(from_below)) y_anchor - max_off else y_anchor + max_off
}
if (isTRUE(from_below)) {
y_line <- y_anchor - 0.06 * y_span
y_lab <- y_line - 0.025 * y_span
y_line_from <- y_anchor - 0.045 * y_span
segments(x_mids[1], y_line, x_mids[1], y_line_from, col = pvalue.col)
segments(x_mids[2], y_line, x_mids[2], y_line_from, col = pvalue.col)
} else {
y_line <- y_anchor + 0.06 * y_span
y_lab <- y_line + 0.025 * y_span
y_line_from <- y_anchor + 0.045 * y_span
segments(x_mids[1], y_line_from, x_mids[1], y_line, col = pvalue.col)
segments(x_mids[2], y_line_from, x_mids[2], y_line, col = pvalue.col)
}
segments(x_mids[1], y_line, x_mids[2], y_line, col = pvalue.col)
text2(mean(x_mids), y_lab, p_int_txt, bg = "white", cex = pvalue.cex, col = pvalue.col, pad = 0, pad_v = 0)
}
}
}
}
if (length(x_centers_drawn) > 0 && length(n_total_drawn) == length(x_centers_drawn)) {
usr <- par("usr")
y_span <- (usr[4] - usr[3])
# Place sample sizes just below the lowest CI whisker (with a small pad), so we
# don't have to create a large empty band under the plot.
min_whisker <- 0
if (nrow(ci_map) > 0 && length(cell_keys_drawn) == length(x_centers_drawn)) {
mi <- match(cell_keys_drawn, ci_map$cell_key)
ok <- !is.na(mi)
if (any(ok)) {
lwr <- ci_map$lwr[mi[ok]]
lwr <- lwr[is.finite(lwr)]
if (length(lwr)) min_whisker <- min(lwr, na.rm = TRUE)
}
}
pad_n <- 0.04 * y_span
min_anchor <- min(0, min_whisker)
y_n <- min_anchor - pad_n
y_n <- max(y_n, usr[3] + pad_n)
n_vals <- ifelse(is.finite(n_total_drawn), n_total_drawn, NA)
labels <- paste0("n=", n_vals)
n_cex <- 0.9 * values.cex
graphics::text(x_centers_drawn, y_n, labels, col = pvalue.col, cex = n_cex, adj = c(0.5, 0))
}
if (!identical(values.align, "none") && length(x_centers_drawn) > 0) {
usr <- par("usr")
pad_top <- 0.03 * (usr[4] - usr[3])
pad_bot <- 0.06 * (usr[4] - usr[3])
fmt <- formatC(heights, format = "f", digits = values.round)
mean_labels <- if (identical(values.align, "bottom")) paste0("M=", fmt) else fmt
neg <- is.finite(heights) & heights < 0
y_mean <- if (identical(values.align, "top")) {
# Place labels near the mean endpoint of each bar (not near the baseline at 0).
# Positive bars end at `heights` above 0; negative bars end at `heights` below 0.
ifelse(neg, heights + pad_top, heights - pad_top)
} else if (identical(values.align, "middle")) {
heights / 2
} else if (identical(values.align, "bottom")) {
# "bottom" still means the label is pulled toward the bar body, but anchored
# near the mean endpoint rather than near 0 for negative bars.
ifelse(neg, heights + pad_bot, heights - pad_bot)
} else {
rep(0, length(heights))
}
text2(
x = x_centers_drawn,
y = y_mean,
labels = mean_labels,
bg = cols,
cex = values.cex,
col = text_cols,
pad = 0,
pad_v = 0
)
}
usr <- par("usr")
x_left <- usr[1] - 0.03 * (usr[2] - usr[1])
old_xpd <- par("xpd")
par(xpd = NA)
on.exit(par(xpd = old_xpd), add = TRUE)
if (is.null(x2_name) || !nzchar(x2_name)) {
axis(1, at = block_centers, labels = block_labels, las = 1)
} else {
if (is.null(x3_name) || !nzchar(x3_name)) {
axis(1, at = block_centers, labels = block_labels, las = 1)
} else {
axis(1, at = block_centers, labels = FALSE, las = 1)
mtext(x2_name, side = 1, at = x_left, adj = 0, line = 1, font = 2, cex = 1.17)
mtext(block_labels, side = 1, at = block_centers, line = 1, cex = 1.17)
mtext(x3_name, side = 1, at = x_left, adj = 0, line = 2.5, font = 2, cex = 1.17)
mtext(x3_section_labels, side = 1, at = x3_section_centers, line = 2.5, font = 2, cex = 1.17)
}
}
if (params$k > 1) {
w <- strwidth(x1_levels, cex = 1.3)
w <- w[is.finite(w)]
tw <- if (length(w)) max(w) else 0
gap_w <- strwidth(" ", cex = 1.3)
if (!is.finite(gap_w)) gap_w <- 0
legend_args <- list(
"top",
legend = x1_levels,
pch = 15,
col = col,
bty = "n",
inset = 0.01,
cex = 1.15,
pt.cex = 2,
x.intersp = 1.2,
text.width = tw + gap_w,
horiz = legend_horiz
)
if (!legend_horiz) {
legend_args$text.width <- NULL
legend_args$x.intersp <- 1.2
}
if (!is.null(legend.title)) {
legend_args$title <- legend.title
legend_args$title.font <- 2
}
do.call(legend, legend_args)
}
invisible(NULL)
}
#----------------------------------
# plot_means_compute_pvalues: run the default inferential tests and return p-values for annotation ----
# This is only used when `tests = "auto"`. It returns a small table with the exact
# comparisons needed by the plotting code (group labels, means, diffs, and p-values).
plot_means_compute_pvalues <- function(v, params, mean_results, comp = NULL) {
if (identical(v$tests, "none")) return(NULL)
# Custom tests: parse and compute from bar indices (left-to-right draw order)
if (!identical(v$tests, "auto")) {
if (is.null(comp) || !is.list(comp) || is.null(comp$cell_keys_drawn)) {
stop("plot_means(): internal error: custom 'tests' requires computed bar layout", call. = FALSE)
}
specs <- plot_means_parse_tests(v$tests)
cell_keys_drawn <- comp$cell_keys_drawn
n_cols <- length(cell_keys_drawn)
all_idx <- unlist(lapply(specs, function(s) {
if (identical(s$type, "pooled")) {
c(s$left, s$right)
} else {
unlist(s[c("a", "b", "c", "d")], use.names = FALSE)
}
}), use.names = FALSE)
all_idx <- all_idx[is.finite(all_idx)]
if (any(all_idx < 1L) || any(all_idx > n_cols)) {
stop("plot_means(): invalid column index in 'tests'. There are ", n_cols, " plotted columns.", call. = FALSE)
}
mf_tests <- plot_means_model_frame(v$formula_eval, data = v$data, na.action = na.omit, context = "custom tests")
if (nrow(mf_tests) == 0) return(NULL)
names(mf_tests)[1] <- ".__y"
x1_name <- v$x1_name
x2_name <- v$x2_name
x3_name <- v$x3_name
x2_col <- if (is.null(x2_name)) ".x2" else x2_name
x3_col <- if (is.null(x3_name)) ".x3" else x3_name
mf_tests[[x1_name]] <- as.character(mf_tests[[x1_name]])
if (is.null(x2_name)) mf_tests$.x2 <- "All" else mf_tests[[x2_name]] <- as.character(mf_tests[[x2_name]])
if (is.null(x3_name)) mf_tests$.x3 <- "All" else mf_tests[[x3_name]] <- as.character(mf_tests[[x3_name]])
complete_mask <- !is.na(mf_tests$.__y) &
!is.na(mf_tests[[x1_name]]) &
!is.na(mf_tests[[x2_col]]) &
!is.na(mf_tests[[x3_col]])
mf_tests <- mf_tests[complete_mask, , drop = FALSE]
if (nrow(mf_tests) == 0) return(NULL)
mf_tests$cell_key <- paste(mf_tests[[x1_name]], mf_tests[[x2_col]], mf_tests[[x3_col]], sep = "|")
cluster_mf <- plot_means_align_cluster(v, mf_tests)
if (!is.null(cluster_mf)) mf_tests$.__cluster <- cluster_mf
calc_simple <- function(a, b) {
kA <- cell_keys_drawn[a]
kB <- cell_keys_drawn[b]
df_sub <- mf_tests[mf_tests$cell_key %in% c(kA, kB), c(".__y", "cell_key", if (!is.null(cluster_mf)) ".__cluster" else NULL), drop = FALSE]
if (nrow(df_sub) == 0) stop("plot_means(): no data for columns ", a, " and ", b, call. = FALSE)
df_sub$.__g <- factor(ifelse(df_sub$cell_key == kA, "A", "B"), levels = c("A", "B"))
if (length(unique(df_sub$.__g)) < 2) stop("plot_means(): not enough data to compare columns ", a, " and ", b, call. = FALSE)
if (is.null(cluster_mf)) {
tt <- stats::t.test(.__y ~ .__g, data = df_sub, var.equal = FALSE)
meanA <- as.numeric(tt$estimate[[1]])
meanB <- as.numeric(tt$estimate[[2]])
list(
mean1 = meanA,
mean2 = meanB,
diff = meanB - meanA,
# Align sign with diff (B - A). t.test reports t for (A - B).
t.value = -as.numeric(tt$statistic[[1]]),
p.value = as.numeric(tt$p.value),
df = as.numeric(tt$parameter[[1]]),
method = "Welch t-test"
)
} else {
fit <- lm2(.__y ~ .__g, data = df_sub, clusters = df_sub$.__cluster)
tab <- attr(fit, "statuser_table")
if (is.null(tab) || !is.data.frame(tab) || !"term" %in% names(tab)) stop("plot_means(): failed to extract lm2 table for custom test", call. = FALSE)
idx <- which(grepl("^\\.__g", as.character(tab$term)))
if (!length(idx)) stop("plot_means(): failed to find group term for custom test", call. = FALSE)
tr <- tab[idx[1], , drop = FALSE]
meanA <- mean(df_sub$.__y[df_sub$.__g == "A"], na.rm = TRUE)
meanB <- mean(df_sub$.__y[df_sub$.__g == "B"], na.rm = TRUE)
list(
mean1 = as.numeric(meanA),
mean2 = as.numeric(meanB),
diff = as.numeric(tr$estimate),
t.value = as.numeric(tr$t),
p.value = as.numeric(tr$p.value),
df = if ("df" %in% names(tr)) as.numeric(tr$df) else NA_real_,
method = "regression (clustered SE)"
)
}
}
calc_interaction <- function(a, b, c, d) {
keys <- unique(cell_keys_drawn[c(a, b, c, d)])
df_sub <- mf_tests[mf_tests$cell_key %in% keys, c(".__y", "cell_key", if (!is.null(cluster_mf)) ".__cluster" else NULL), drop = FALSE]
if (nrow(df_sub) == 0) stop("plot_means(): no data for interaction test", call. = FALSE)
df_sub$.__g <- factor(df_sub$cell_key, levels = keys)
fit <- if (is.null(cluster_mf)) {
lm2(.__y ~ 0 + .__g, data = df_sub)
} else {
lm2(.__y ~ 0 + .__g, data = df_sub, clusters = df_sub$.__cluster)
}
beta <- stats::coef(fit)
V <- stats::vcov(fit)
if (is.null(beta) || is.null(V)) stop("plot_means(): failed to extract coef/vcov for interaction test", call. = FALSE)
coef_names <- names(beta)
name_for <- function(key) {
target <- paste0(".__g", key)
idx <- match(target, coef_names)
if (is.na(idx)) stop("plot_means(): missing coefficient for bar in interaction test", call. = FALSE)
coef_names[idx]
}
nA <- name_for(cell_keys_drawn[a])
nB <- name_for(cell_keys_drawn[b])
nC <- name_for(cell_keys_drawn[c])
nD <- name_for(cell_keys_drawn[d])
L <- rep(0, length(beta))
names(L) <- coef_names
L[nA] <- 1
L[nB] <- -1
L[nC] <- -1
L[nD] <- 1
est <- sum(L * beta)
se <- sqrt(as.numeric(t(L) %*% V %*% L))
if (!is.finite(se) || se <= 0) stop("plot_means(): could not compute SE for interaction test", call. = FALSE)
t_val <- est / se
df_fit <- fit$df
if (length(df_fit) != 1) df_fit <- suppressWarnings(min(df_fit, na.rm = TRUE))
if (!is.finite(df_fit)) df_fit <- suppressWarnings(min(fit$df, na.rm = TRUE))
if (!is.finite(df_fit)) df_fit <- 1
p_val <- 2 * stats::pt(-abs(t_val), df = df_fit)
list(
diff = as.numeric(est),
t.value = as.numeric(t_val),
p.value = as.numeric(p_val),
df = as.numeric(df_fit),
method = if (is.null(cluster_mf)) "regression interaction" else "regression interaction (clustered SE)"
)
}
calc_pooled <- function(left, right) {
kL <- unique(cell_keys_drawn[left])
kR <- unique(cell_keys_drawn[right])
keys <- unique(c(kL, kR))
df_sub <- mf_tests[mf_tests$cell_key %in% keys, c(".__y", "cell_key", if (!is.null(cluster_mf)) ".__cluster" else NULL), drop = FALSE]
if (nrow(df_sub) == 0) stop("plot_means(): no data for pooled test", call. = FALSE)
df_sub$.__g <- factor(ifelse(df_sub$cell_key %in% kL, "A", "B"), levels = c("A", "B"))
if (length(unique(df_sub$.__g)) < 2) stop("plot_means(): not enough data for pooled test", call. = FALSE)
if (is.null(cluster_mf)) {
tt <- stats::t.test(.__y ~ .__g, data = df_sub, var.equal = FALSE)
meanA <- as.numeric(tt$estimate[[1]])
meanB <- as.numeric(tt$estimate[[2]])
list(
mean1 = meanA,
mean2 = meanB,
diff = meanB - meanA,
# Align sign with diff (B - A). t.test reports t for (A - B).
t.value = -as.numeric(tt$statistic[[1]]),
p.value = as.numeric(tt$p.value),
df = as.numeric(tt$parameter[[1]]),
method = "Welch t-test"
)
} else {
fit <- lm2(.__y ~ .__g, data = df_sub, clusters = df_sub$.__cluster)
tab <- attr(fit, "statuser_table")
if (is.null(tab) || !is.data.frame(tab) || !"term" %in% names(tab)) stop("plot_means(): failed to extract lm2 table for pooled test", call. = FALSE)
idx <- which(grepl("^\\.__g", as.character(tab$term)))
if (!length(idx)) stop("plot_means(): failed to find group term for pooled test", call. = FALSE)
tr <- tab[idx[1], , drop = FALSE]
meanA <- mean(df_sub$.__y[df_sub$.__g == "A"], na.rm = TRUE)
meanB <- mean(df_sub$.__y[df_sub$.__g == "B"], na.rm = TRUE)
list(
mean1 = as.numeric(meanA),
mean2 = as.numeric(meanB),
diff = as.numeric(tr$estimate),
t.value = as.numeric(tr$t),
p.value = as.numeric(tr$p.value),
df = if ("df" %in% names(tr)) as.numeric(tr$df) else NA_real_,
method = "regression (clustered SE)"
)
}
}
rows <- data.frame(
group1 = character(0),
group2 = character(0),
mean1 = numeric(0),
mean2 = numeric(0),
diff = numeric(0),
t.value = numeric(0),
p.value = numeric(0),
df = numeric(0),
method = character(0),
test_type = character(0),
cols_left = character(0),
cols_right = character(0),
col1 = integer(0),
col2 = integer(0),
col3 = integer(0),
col4 = integer(0),
stringsAsFactors = FALSE
)
for (s in specs) {
if (identical(s$type, "simple")) {
res <- calc_simple(s$a, s$b)
rows <- rbind(rows, data.frame(
group1 = paste0("col", s$a),
group2 = paste0("col", s$b),
mean1 = res$mean1,
mean2 = res$mean2,
diff = res$diff,
t.value = res$t.value,
p.value = res$p.value,
df = res$df,
method = res$method,
test_type = "simple",
cols_left = "",
cols_right = "",
col1 = s$a,
col2 = s$b,
col3 = NA_integer_,
col4 = NA_integer_,
stringsAsFactors = FALSE
))
} else if (identical(s$type, "pooled")) {
res <- calc_pooled(s$left, s$right)
involved <- c(s$left, s$right)
rows <- rbind(rows, data.frame(
group1 = paste0("(", paste(s$left, collapse = "+"), ")-(", paste(s$right, collapse = "+"), ")"),
group2 = "",
mean1 = res$mean1,
mean2 = res$mean2,
diff = res$diff,
t.value = res$t.value,
p.value = res$p.value,
df = res$df,
method = res$method,
test_type = "pooled",
cols_left = paste(s$left, collapse = ","),
cols_right = paste(s$right, collapse = ","),
col1 = min(involved),
col2 = max(involved),
col3 = NA_integer_,
col4 = NA_integer_,
stringsAsFactors = FALSE
))
} else if (identical(s$type, "interaction")) {
res <- calc_interaction(s$a, s$b, s$c, s$d)
m_a <- mean(mf_tests$.__y[mf_tests$cell_key == cell_keys_drawn[s$a]], na.rm = TRUE)
m_b <- mean(mf_tests$.__y[mf_tests$cell_key == cell_keys_drawn[s$b]], na.rm = TRUE)
m_c <- mean(mf_tests$.__y[mf_tests$cell_key == cell_keys_drawn[s$c]], na.rm = TRUE)
m_d <- mean(mf_tests$.__y[mf_tests$cell_key == cell_keys_drawn[s$d]], na.rm = TRUE)
d_first <- as.numeric(m_a - m_b)
d_second <- as.numeric(m_c - m_d)
rows <- rbind(rows, data.frame(
group1 = paste0("interaction(", s$a, "-", s$b, ")-(", s$c, "-", s$d, ")"),
group2 = "",
mean1 = d_first,
mean2 = d_second,
diff = res$diff,
t.value = res$t.value,
p.value = res$p.value,
df = res$df,
method = res$method,
test_type = "interaction",
cols_left = "",
cols_right = "",
col1 = s$a,
col2 = s$b,
col3 = s$c,
col4 = s$d,
stringsAsFactors = FALSE
))
}
}
return(rows)
}
# Pull the relevant term row from the lm2() summary table.
# Non-obvious: lm2() stores the coefficient table in an attribute ("statuser_table"),
# and interaction term naming can vary, so we match by pattern.
get_term_row <- function(fit, var_a, var_b = NULL, want_interaction = FALSE) {
tab <- attr(fit, "statuser_table")
if (is.null(tab) || !is.data.frame(tab) || !"term" %in% names(tab)) return(NULL)
terms <- as.character(tab$term)
if (want_interaction) {
idx <- which(grepl(":", terms) & grepl(var_a, terms) & grepl(var_b, terms))
} else {
idx <- which(grepl(paste0("^", var_a), terms))
}
if (!length(idx)) return(NULL)
tab[idx[1], , drop = FALSE]
}
# Build the model frame once, applying the same NA filtering policy as the tests.
# This keeps p-values consistent with the data actually available for inference.
mf_tests <- plot_means_model_frame(v$formula_eval, data = v$data, na.action = na.omit, context = "plot_means_compute_pvalues()")
if (nrow(mf_tests) == 0) return(NULL)
names(mf_tests)[1] <- ".__y"
mf_tests$.__x1 <- as.factor(mf_tests[[v$x1_name]])
if (!is.null(v$x2_name)) mf_tests$.__x2 <- as.factor(mf_tests[[v$x2_name]])
cluster_mf <- plot_means_align_cluster(v, mf_tests)
if (!is.null(cluster_mf)) mf_tests$.__cluster <- cluster_mf
# Fit helper that keeps the clustered and non-clustered paths in one place.
# Returns the fitted model (or NA) so callers can extract the term row they need.
p_lm2_x1 <- function(df_sub) {
if (nrow(df_sub) == 0) return(NA_real_)
fit <- if (is.null(v$cluster_vec)) {
lm2(.__y ~ .__x1, data = df_sub)
} else {
lm2(.__y ~ .__x1, data = df_sub, clusters = df_sub$.__cluster)
}
fit
}
ttest_x1 <- function(df_sub) {
out <- list(t.value = NA_real_, p.value = NA_real_, df = NA_real_, method = "Welch t-test")
if (nrow(df_sub) == 0) return(out)
if (!(".__y" %in% names(df_sub)) || !(".__x1" %in% names(df_sub))) return(out)
if (length(unique(df_sub$.__x1)) < 2) return(out)
tt <- tryCatch(
stats::t.test(.__y ~ .__x1, data = df_sub, var.equal = FALSE),
error = function(e) NULL
)
if (is.null(tt)) return(out)
out$t.value <- as.numeric(tt$statistic[[1]])
out$p.value <- as.numeric(tt$p.value)
out$df <- as.numeric(tt$parameter[[1]])
out
}
x1_levels <- params$x1_levels
x2_levels <- params$x2_levels
x1_is_binary <- length(x1_levels) == 2
x2_is_binary <- !is.null(v$x2_name) && length(x2_levels) == 2
x3_is_null <- is.null(v$x3_name)
group_label <- function(x1, x2 = NULL, x3 = NULL) {
parts <- character(0)
if (!is.null(x3) && nzchar(x3)) parts <- c(parts, x3)
if (!is.null(x2) && nzchar(x2)) parts <- c(parts, x2)
parts <- c(parts, x1)
paste(parts, collapse = "_")
}
mean_lookup <- function(x1, x2 = NULL, x3 = NULL) {
df <- mean_results
keep <- rep(TRUE, nrow(df))
if (!is.null(v$x1_name) && v$x1_name %in% names(df)) keep <- keep & as.character(df[[v$x1_name]]) == as.character(x1)
if (!is.null(v$x2_name) && v$x2_name %in% names(df) && !is.null(x2)) keep <- keep & as.character(df[[v$x2_name]]) == as.character(x2)
if (!is.null(v$x3_name) && v$x3_name %in% names(df) && !is.null(x3)) keep <- keep & as.character(df[[v$x3_name]]) == as.character(x3)
val <- df$mean[keep]
if (!length(val)) return(NA_real_)
as.numeric(val[1])
}
# Output table: one row per displayed comparison
# group1/group2 are plot labels (used to map to specific bars), and mean/diff are included
# so the draw step can position p-value brackets relative to the bars.
rows <- data.frame(
group1 = character(0),
group2 = character(0),
mean1 = numeric(0),
mean2 = numeric(0),
diff = numeric(0),
t.value = numeric(0),
p.value = numeric(0),
df = numeric(0),
method = character(0),
stringsAsFactors = FALSE
)
# Scenario 1: x1 only, binary
if (length(v$x_names) == 1 && x1_is_binary) {
df_sub <- mf_tests[, c(".__y", ".__x1"), drop = FALSE]
if (is.null(v$cluster_vec)) {
tt <- ttest_x1(df_sub)
t_val <- tt$t.value
p_val <- tt$p.value
df_val <- tt$df
method <- tt$method
} else {
fit <- p_lm2_x1(df_sub)
tr <- get_term_row(fit, ".__x1")
t_val <- if (is.null(tr)) NA_real_ else as.numeric(tr$t)
p_val <- if (is.null(tr)) NA_real_ else as.numeric(tr$p.value)
df_val <- if (is.null(tr) || !"df" %in% names(tr)) NA_real_ else as.numeric(tr$df)
method <- "regression (clustered SE)"
}
g1 <- group_label(x1_levels[1])
g2 <- group_label(x1_levels[2])
m1 <- mean_lookup(x1_levels[1])
m2 <- mean_lookup(x1_levels[2])
rows <- rbind(rows, data.frame(
group1 = g1,
group2 = g2,
mean1 = m1,
mean2 = m2,
diff = m2 - m1,
t.value = t_val,
p.value = p_val,
df = df_val,
method = method,
stringsAsFactors = FALSE
))
return(rows)
}
# Scenario 2: x1 and x2, both binary, no x3
if (length(v$x_names) == 2 && x1_is_binary && x2_is_binary && x3_is_null) {
for (x2v in x2_levels) {
df_sub <- mf_tests[mf_tests$.__x2 == x2v, c(".__y", ".__x1"), drop = FALSE]
if (is.null(v$cluster_vec)) {
tt <- ttest_x1(df_sub)
t_val <- tt$t.value
p_val <- tt$p.value
df_val <- tt$df
method <- tt$method
} else {
fit <- p_lm2_x1(df_sub)
tr <- get_term_row(fit, ".__x1")
t_val <- if (is.null(tr)) NA_real_ else as.numeric(tr$t)
p_val <- if (is.null(tr)) NA_real_ else as.numeric(tr$p.value)
df_val <- if (is.null(tr) || !"df" %in% names(tr)) NA_real_ else as.numeric(tr$df)
method <- "regression (clustered SE)"
}
g1 <- group_label(x1_levels[1], x2 = x2v)
g2 <- group_label(x1_levels[2], x2 = x2v)
m1 <- mean_lookup(x1_levels[1], x2 = x2v)
m2 <- mean_lookup(x1_levels[2], x2 = x2v)
rows <- rbind(rows, data.frame(
group1 = g1,
group2 = g2,
mean1 = m1,
mean2 = m2,
diff = m2 - m1,
t.value = t_val,
p.value = p_val,
df = df_val,
method = method,
stringsAsFactors = FALSE
))
}
# Interaction: always use lm2(), even when cluster is NULL
fit_int <- if (is.null(v$cluster_vec)) {
lm2(.__y ~ .__x1 * .__x2, data = mf_tests[, c(".__y", ".__x1", ".__x2"), drop = FALSE])
} else {
lm2(.__y ~ .__x1 * .__x2, data = mf_tests[, c(".__y", ".__x1", ".__x2", ".__cluster"), drop = FALSE], clusters = mf_tests$.__cluster)
}
tr_int <- get_term_row(fit_int, ".__x1", ".__x2", want_interaction = TRUE)
if (!is.null(tr_int)) {
d_first <- mean_lookup(x1_levels[2], x2 = x2_levels[1]) - mean_lookup(x1_levels[1], x2 = x2_levels[1])
d_second <- mean_lookup(x1_levels[2], x2 = x2_levels[2]) - mean_lookup(x1_levels[1], x2 = x2_levels[2])
rows <- rbind(rows, data.frame(
group1 = paste0("interaction(", v$x1_name, ":", v$x2_name, ")"),
group2 = "",
mean1 = as.numeric(d_first),
mean2 = as.numeric(d_second),
diff = as.numeric(tr_int$estimate),
t.value = as.numeric(tr_int$t),
p.value = as.numeric(tr_int$p.value),
df = if ("df" %in% names(tr_int)) as.numeric(tr_int$df) else NA_real_,
method = if (is.null(v$cluster_vec)) "regression interaction" else "regression interaction (clustered SE)",
stringsAsFactors = FALSE
))
}
return(rows)
}
# Scenario 3: x1 binary, x2 has >2 levels, no x3
if (length(v$x_names) == 2 && x1_is_binary && !is.null(v$x2_name) && length(x2_levels) > 2 && x3_is_null) {
for (x2v in x2_levels) {
df_sub <- mf_tests[mf_tests$.__x2 == x2v, c(".__y", ".__x1"), drop = FALSE]
if (is.null(v$cluster_vec)) {
tt <- ttest_x1(df_sub)
t_val <- tt$t.value
p_val <- tt$p.value
df_val <- tt$df
method <- tt$method
} else {
fit <- p_lm2_x1(df_sub)
tr <- get_term_row(fit, ".__x1")
t_val <- if (is.null(tr)) NA_real_ else as.numeric(tr$t)
p_val <- if (is.null(tr)) NA_real_ else as.numeric(tr$p.value)
df_val <- if (is.null(tr) || !"df" %in% names(tr)) NA_real_ else as.numeric(tr$df)
method <- "regression (clustered SE)"
}
g1 <- group_label(x1_levels[1], x2 = x2v)
g2 <- group_label(x1_levels[2], x2 = x2v)
m1 <- mean_lookup(x1_levels[1], x2 = x2v)
m2 <- mean_lookup(x1_levels[2], x2 = x2v)
rows <- rbind(rows, data.frame(
group1 = g1,
group2 = g2,
mean1 = m1,
mean2 = m2,
diff = m2 - m1,
t.value = t_val,
p.value = p_val,
df = df_val,
method = method,
stringsAsFactors = FALSE
))
}
return(rows)
}
NULL
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.