Nothing
#' Plot Empirical Cumulative Distribution Functions by Group
#'
#' Plots empirical cumulative distribution functions (ECDFs) separately for
#' each unique value of a grouping variable, with support for vectorized
#' plotting parameters. If no grouping variable is provided, plots a single ECDF.
#'
#' @param formula Two possible uses (similar to \code{t.test()}):
#' \itemize{
#' \item {Single Variable (possibly by subgroup)}: \code{plot_cdf(y)} or \code{plot_cdf(y~x)}
#' \item {Contrast Two Variables}: \code{plot_cdf(y1, y2)}
#' }
#' @param y2 optional second variable when contrasting two variables \code{plot_cdf(y1,y2)}
#' @param data An optional data frame containing the variables in the formula.
#' If \code{data} is not provided, variables are evaluated from the calling environment.
#' @param order Controls the order in which groups appear in the plot and legend.
#' Use \code{-1} to reverse the default order. Alternatively, provide a vector specifying
#' the exact order (e.g., \code{c("B", "A", "C")}). If \code{NULL} (default), groups are
#' ordered by their factor levels (if the grouping variable is a factor) or sorted
#' alphabetically/numerically. Only applies when using grouped plots.
#' @param show.ks Logical. If TRUE (default), shows Kolmogorov-Smirnov test results
#' when there are exactly 2 groups. If FALSE, KS test results are not displayed.
#' @param show.quantiles Logical. If TRUE (default), shows horizontal lines and results
#' at 25th, 50th, and 75th percentiles when there are exactly 2 groups. If FALSE,
#' quantile lines and results are not displayed.
#' @param ... Additional arguments passed to plotting functions. Can be single values
#' (applied to all groups) or vectors (applied element-wise to each group).
#' Common parameters include \code{col}, \code{lwd}, \code{lty}, \code{pch},
#' \code{type}, etc.
#'
#' @return Invisibly returns a list containing:
#' \itemize{
#' \item \code{ecdfs}: A list of ECDF function objects, one per group. Each can be
#' called as a function to compute cumulative probabilities (e.g., \code{result$ecdfs[[1]](5)}
#' returns P(X <= 5) for group 1).
#' \item \code{ks_test}: (Only when exactly 2 groups) The Kolmogorov-Smirnov test result
#' comparing the two distributions. Access p-value with \code{result$ks_test$p.value}.
#' \item \code{quantile_regression_25}: (Only when exactly 2 groups) Quantile regression
#' model for the 25th percentile.
#' \item \code{quantile_regression_50}: (Only when exactly 2 groups) Quantile regression
#' model for the 50th percentile (median).
#' \item \code{quantile_regression_75}: (Only when exactly 2 groups) Quantile regression
#' model for the 75th percentile.
#' \item \code{warnings}: Any warnings captured during execution (if any).
#' }
#'
#' @examples
#' # Basic usage with single variable (no grouping)
#' y <- rnorm(100)
#' plot_cdf(y)
#'
#' # Basic usage with formula syntax and grouping
#' group <- rep(c("A", "B", "C"), c(30, 40, 30))
#' plot_cdf(y ~ group)
#'
#' # With custom colors (scalar - same for all)
#' plot_cdf(y ~ group, col = "blue")
#'
#' # With custom colors (vector - different for each group)
#' plot_cdf(y ~ group, col = c("red", "green", "blue"))
#'
#' # Multiple parameters
#' plot_cdf(y ~ group, col = c("red", "green", "blue"), lwd = c(1, 2, 3))
#'
#' # With line type and point character
#' plot_cdf(y ~ group, col = c("red", "green", "blue"), lty = c(1, 2, 3), lwd = 2)
#'
#' # Using data frame
#' df <- data.frame(value = rnorm(100), group = rep(c("A", "B"), 50))
#' plot_cdf(value ~ group, data = df)
#' plot_cdf(value ~ group, data = df, col = c("red", "blue"))
#'
#' # Compare two vectors
#' y1 <- rnorm(50)
#' y2 <- rnorm(50, mean = 1)
#' plot_cdf(y1, y2)
#'
#' # Formula syntax without data (variables evaluated from environment)
#' widgetness <- rnorm(100)
#' gender <- rep(c("M", "F"), 50)
#' plot_cdf(widgetness ~ gender)
#'
#' # Using the returned object
#' df <- data.frame(value = c(rnorm(50, 0), rnorm(50, 1)), group = rep(c("A", "B"), each = 50))
#' result <- plot_cdf(value ~ group, data = df)
#'
#' # Use ECDF to find P(X <= 0.5) for group A
#' result$ecdfs[[1]](0.5)
#'
#' # Access KS test p-value
#' result$ks_test$p.value
#'
#' # Summarize median quantile regression
#' summary(result$quantile_regression_50)
#' @export
plot_cdf <- function(formula, y2 = NULL, data = NULL, order = NULL, show.ks = TRUE, show.quantiles = TRUE, ...) {
#0. CAPTURE UNEVALUATED ARGUMENTS FIRST (before ANY evaluation!)
mc <- match.call()
# Resolve first argument (formula or first vector)
formula_resolved <- evaluate_variable_arguments(
arg_expr = mc$formula,
arg_name = "formula",
data = data,
calling_env = parent.frame(),
func_name = "plot_cdf",
allow_null = FALSE
)
# Resolve second argument if present (for two-vector mode)
y2_resolved <- if (!is.null(mc$y2)) {
evaluate_variable_arguments(
arg_expr = mc$y2,
arg_name = "y2",
data = data,
calling_env = parent.frame(),
func_name = "plot_cdf",
allow_null = TRUE
)
} else {
list(value = NULL, name = NULL, name_raw = NULL, was_symbol = FALSE)
}
# Now overwrite the arguments with resolved values
formula <- formula_resolved$value
y2 <- y2_resolved$value
# Extract plotting parameters from ...
dots <- list(...)
# Remove show.ks and show.quantiles from dots if passed (they're formal parameters)
dots$show.ks <- NULL
dots$show.quantiles <- NULL
# Check if we're in two-vector comparison mode (formula is vector, y is vector)
if (!is.null(y2) && !inherits(formula, "formula")) {
# Two-vector comparison mode: plot_cdf(y1, y2)
# Use resolved names
y1_name <- formula_resolved$name
y2_name <- y2_resolved$name
# Validate inputs
if (!is.numeric(formula) || !is.vector(formula)) {
stop(sprintf("plot_cdf(): First argument '%s' must be a numeric vector", y1_name), call. = FALSE)
}
if (!is.numeric(y2) || !is.vector(y2)) {
stop(sprintf("plot_cdf(): Second argument '%s' must be a numeric vector", y2_name), call. = FALSE)
}
# Create a data frame and recursively call with grouped syntax
df <- data.frame(
value = c(formula, y2),
group = c(rep(y1_name, length(formula)), rep(y2_name, length(y2))),
stringsAsFactors = FALSE
)
# Forward all arguments to the grouped version
return(plot_cdf(value ~ group, data = df, order = order,
show.ks = show.ks, show.quantiles = show.quantiles, ...))
}
# Validate formula early if it is one
validate_formula(formula, data, func_name = "plot_cdf", calling_env = parent.frame())
# Check if input is a formula or a variable
is_formula_input <- tryCatch(inherits(formula, "formula"), error = function(e) FALSE)
# Capture data name for error messages
mc <- match.call()
data_expr <- mc$data
data_name <- if (!is.null(data_expr)) {
data_name_val <- deparse(data_expr)
gsub('^"|"$', '', data_name_val)
} else {
NULL
}
# Capture original group before validation (to preserve factor levels)
group_original_before_validation <- NULL
if (is_formula_input && inherits(formula, "formula")) {
f <- formula
if (length(f) >= 3L && identical(f[[1L]], quote(`~`))) {
rhs <- f[[3L]]
rhs_is_intercept_only <- is.numeric(rhs) && length(rhs) == 1L &&
!is.na(rhs[1]) && rhs[1] == 1
if (!rhs_is_intercept_only) {
group_original_before_validation <- tryCatch({
if (!is.null(data)) {
eval(rhs, envir = data, enclos = parent.frame())
} else {
fe <- environment(f)
if (is.null(fe)) fe <- parent.frame()
eval(rhs, envir = fe)
}
}, error = function(e) NULL)
}
}
}
# Validate inputs using validation function shared with plot_density, plot_cdf, plot_freq
# If formula input, use validate_plot
if (is_formula_input) {
validated <- validate_plot(formula, NULL, data, func_name = "plot_cdf", require_group = FALSE, data_name = data_name)
} else {
# Not a formula - use resolved names from evaluate_variable_arguments
validated <- list(
y = formula,
group = NULL,
y_name = formula_resolved$name,
group_name = NULL,
y_name_raw = formula_resolved$name_raw,
group_name_raw = NULL,
data_name = data_name
)
}
y <- validated$y
group <- validated$group
y_name <- validated$y_name
group_name <- validated$group_name
y_name_raw <- validated$y_name_raw
group_name_raw <- validated$group_name_raw
data_name <- validated$data_name
# Store original group for factor level checking (before NA removal)
# Use the version captured before validation if available (to preserve factor levels)
group_original <- if (!is.null(group_original_before_validation)) {
group_original_before_validation
} else {
group
}
# Check if group is provided
has_group <- !is.null(group)
# Drop missing data
if (has_group) {
isnagroup=is.na(group)
isnay=is.na(y)
group=group[!isnagroup & !isnay]
y=y[!isnagroup & !isnay]
n.nagroup = sum(isnagroup)
n.nay = sum(isnay)
if (n.nagroup>0) message2("plot_cdf() says: dropped ",n.nagroup," observations with missing '",group_name_raw,"' values",col='red2')
if (n.nay>0) message2("plot_cdf() says: dropped ",n.nay," observations with missing '",y_name_raw,"' values",col='red2')
# Get unique groups and apply ordering
unique_groups <- unique(group)
n_groups <- length(unique_groups)
# Check if order = -1 (reverse default order)
reverse_order <- FALSE
if (!is.null(order) && length(order) == 1 && is.numeric(order) && order == -1) {
reverse_order <- TRUE
order <- NULL # Process as default, then reverse
}
if (!is.null(order)) {
# User specified custom order
# Validate that order contains all groups
missing_groups <- setdiff(unique_groups, order)
extra_groups <- setdiff(order, unique_groups)
if (length(missing_groups) > 0) {
stop(sprintf("plot_cdf(): 'order' is missing group(s): %s",
paste(missing_groups, collapse = ", ")), call. = FALSE)
}
if (length(extra_groups) > 0) {
warning(sprintf("plot_cdf(): 'order' contains group(s) not in data: %s",
paste(extra_groups, collapse = ", ")))
}
# Use the specified order (only groups that exist in data)
unique_x <- order[order %in% unique_groups]
} else if (is.factor(group_original)) {
# Respect factor levels
factor_levels <- levels(group_original)
# Only include levels that actually appear in the data
unique_x <- factor_levels[factor_levels %in% unique_groups]
} else {
# Default: sort alphabetically/numerically
unique_x <- sort(unique_groups)
}
# Reverse order if order = -1 was specified
if (reverse_order) {
unique_x <- rev(unique_x)
}
} else {
# No group variable - just drop missing y values
isnay=is.na(y)
y=y[!isnay]
n.nay = sum(isnay)
if (n.nay>0) message2("plot_cdf() says: dropped ",n.nay," observations with missing '",y_name_raw,"' values",col='red2')
unique_x <- NULL
n_groups <- 1
}
# Initialize return values for tests (when 2 groups)
ks_test_result <- NULL
quantile_regression_25 <- NULL
quantile_regression_50 <- NULL
quantile_regression_75 <- NULL
warnings_list <- list()
# Get default colors based on number of groups
default_colors <- get.colors(n_groups)
# Helper function to extract parameter value for a group
get_param <- function(param_name, group_idx) {
if (param_name %in% names(dots)) {
param_val <- dots[[param_name]]
if (length(param_val) == 1) {
return(param_val)
} else if (length(param_val) >= group_idx) {
return(param_val[group_idx])
} else {
return(param_val[1]) # Recycle if shorter
}
}
return(NULL)
}
# Compute ECDFs for each group (or single ECDF if no group)
ecdf_list <- list()
y_ranges <- list()
if (has_group) {
for (i in seq_along(unique_x)) {
group_val <- unique_x[i]
y_group <- y[group == group_val]
if (length(y_group) > 0) {
ecdf_list[[i]] <- ecdf(y_group)
y_ranges[[i]] <- range(y_group)
}
}
} else {
# No group - single ECDF for all data
if (length(y) > 0) {
ecdf_list[[1]] <- ecdf(y)
y_ranges[[1]] <- range(y)
}
}
# Determine overall range for plotting
all_y <- unlist(y_ranges)
y_min <- min(all_y, na.rm = TRUE)
y_max <- max(all_y, na.rm = TRUE)
y_range <- y_max - y_min
y_lim <- c(y_min - 0.05 * y_range, y_max + 0.05 * y_range)
# Create sequence for plotting ECDF
y_seq <- seq(y_min, y_max, length.out = 1000)
# Helper function for NULL coalescing
`%||%` <- function(x, y) if (is.null(x)) y else x
# Plot first ECDF to set up the plot
if (length(ecdf_list) > 0) {
first_ecdf <- ecdf_list[[1]]
first_y_vals <- first_ecdf(y_seq)
# Get parameters for first group
col1 <- get_param("col", 1) %||% default_colors[1]
lwd1 <- get_param("lwd", 1) %||% 4 # Default lwd=4
lty1 <- get_param("lty", 1) %||% 1
type1 <- get_param("type", 1) %||% "s"
pch1 <- get_param("pch", 1)
# Build plot arguments
# Set main title if not provided
if (!"main" %in% names(dots)) {
if (has_group) {
main_title <- paste0("Comparing Distribution of '", y_name, "' by '", group_name, "'")
} else {
main_title <- paste0("Distribution of '", y_name, "'")
}
} else {
main_title <- dots$main
}
# Set font and size for main title if not provided
font_main <- if ("font.main" %in% names(dots)) dots$font.main else 2
cex_main <- if ("cex.main" %in% names(dots)) dots$cex.main else 1.38
# Set xlab if not provided
xlab_title <- if ("xlab" %in% names(dots)) dots$xlab else y_name
# Set ylab if not provided
ylab_title <- if ("ylab" %in% names(dots)) dots$ylab else "% of observations"
# Set default ylim if not provided (extend to accommodate legend above plot if groups exist)
if (!"ylim" %in% names(dots)) {
if (has_group && n_groups > 1) {
default_ylim <- c(0, 1.25) # Reserve top 20% for legend (1.25 = 1 / 0.8)
} else {
default_ylim <- c(0, 1)
}
} else {
default_ylim <- dots$ylim
}
# Set default xlim if not provided (add padding to prevent clipping at edges)
if (!"xlim" %in% names(dots)) {
x_range <- y_max - y_min
default_xlim <- c(y_min - 0.05 * x_range, y_max + 0.05 * x_range)
} else {
default_xlim <- dots$xlim
}
# Ensure adequate top margin for main title and legend
# Only save/restore what we modify; restoring full `par()` can reset `mfg`
# and break multi-panel plotting via `par(mfrow=...)`.
old_mar <- par("mar")
old_mgp <- par("mgp")
on.exit(par(mar = old_mar, mgp = old_mgp), add = TRUE)
if (!"mar" %in% names(dots)) {
# Increase top margin if it's too small (less than 3 lines to accommodate title and legend)
# Add 1 line to the left margin
new_mar <- c(old_mar[1], old_mar[2] + 1, old_mar[3], old_mar[4])
if (new_mar[3] < 3) {
new_mar[3] <- 3
}
par(mar = new_mar)
}
# Move ylab 0.75 lines to the left
if (!"mgp" %in% names(dots)) {
new_mgp <- c(old_mgp[1], old_mgp[2] - 0.75, old_mgp[3])
par(mgp = new_mgp)
}
# Remove vectorized parameters and data from dots for plot()
# Also remove xlab, ylab, main since we handle them separately
plot_dots <- dots
vectorized_params <- c("col", "lwd", "lty", "type", "pch", "data")
plot_params_to_remove <- c(vectorized_params, "xlab", "ylab", "main")
plot_dots[plot_params_to_remove] <- NULL
plot_args <- list(x = y_seq, y = first_y_vals,
type = type1, col = col1, lwd = lwd1, lty = lty1,
xlab = xlab_title,
ylab = ylab_title,
main = main_title,
font.main = font_main,
cex.main = cex_main,
xlim = default_xlim,
ylim = default_ylim,
font.lab = 2, cex.lab = 1.2, las = 1,
yaxt = "n", # Suppress default y-axis to draw custom percentage axis
xaxt = "n") # Suppress default x-axis to redraw with adjusted label position
if (!is.null(pch1)) plot_args$pch <- pch1
# Set up plot
do.call(plot, c(plot_args, plot_dots))
# Draw custom y-axis with percentage labels
y_ticks <- seq(0, 1, by = 0.25)
y_labels <- paste0(y_ticks * 100, "%")
axis(2, at = y_ticks, labels = y_labels, las = 1)
# Draw x-axis with labels moved lower (padj moves labels down)
axis(1, padj = 0.5)
# Add remaining ECDFs
if (length(ecdf_list) > 1) {
for (i in 2:length(ecdf_list)) {
ecdf_fun <- ecdf_list[[i]]
y_vals <- ecdf_fun(y_seq)
# Get parameters for this group
coli <- get_param("col", i) %||% default_colors[i]
lwdi <- get_param("lwd", i) %||% 4 # Default lwd=4
ltyi <- get_param("lty", i) %||% 1
typei <- get_param("type", i) %||% "s"
pchi <- get_param("pch", i)
# Build lines arguments
lines_args <- list(x = y_seq, y = y_vals, type = typei,
col = coli, lwd = lwdi, lty = ltyi)
if (!is.null(pchi)) lines_args$pch <- pchi
do.call(lines, lines_args)
}
}
# Add legend on top (only if there are groups)
if (has_group && n_groups > 1) {
legend_cols <- sapply(1:length(ecdf_list), function(i) get_param("col", i) %||% default_colors[i])
legend_lwds <- sapply(1:length(ecdf_list), function(i) get_param("lwd", i) %||% 4)
legend_ltys <- sapply(1:length(ecdf_list), function(i) get_param("lty", i) %||% 1)
# Calculate sample sizes for each group and add to legend labels
group_ns <- sapply(1:length(ecdf_list), function(i) {
length(y[group == unique_x[i]])
})
# Create legend labels with sample sizes (newline before sample size, lowercase n)
legend_labels <- paste0(as.character(unique_x), "\n(n=", group_ns, ")")
legend("top", legend = legend_labels,
col = legend_cols, lwd = legend_lwds, lty = legend_ltys,
horiz = TRUE, bty = "n")
}
# If exactly 2 groups, perform KS test and quantile regression tests
if (n_groups == 2) {
# Get data for both groups
y1 <- y[group == unique_x[1]]
y2 <- y[group == unique_x[2]]
# Kolmogorov-Smirnov test (capture warnings about ties)
ks_test <- withCallingHandlers(
ks.test(y1, y2),
warning = function(w) {
if (grepl("p-value will be approximate in the presence of ties", w$message, ignore.case = TRUE)) {
warnings_list$ks_ties <<- w$message
invokeRestart("muffleWarning")
}
}
)
ks_test_result <- ks_test
# Quantile probabilities (used for both computation and display)
quantile_probs <- c(0.25, 0.50, 0.75)
# Add horizontal lines at 25%, 50%, and 75% of cumulative probability (only if show.quantiles is TRUE)
if (show.quantiles) {
abline(h = quantile_probs, lty = 2, col = "gray80")
}
# Quantile regression tests at 25th, 50th, and 75th percentiles
# Always compute quantile regressions (for return value), but only display if show.quantiles is TRUE
if (requireNamespace("quantreg", quietly = TRUE)) {
# Show message about independence assumption (only once per session)
if (!exists("plot_cdf_message_shown", envir = .statuser_state, inherits = FALSE)) {
message2("The p-values are based on quantile regressions that assume all observations are independent", col = "red")
assign("plot_cdf_message_shown", TRUE, envir = .statuser_state)
}
# Create data frame for quantile regression
df_qr <- data.frame(y = y, x_group = as.numeric(group == unique_x[2]))
# Calculate quantiles for each group (only needed if show.quantiles is TRUE for display)
if (show.quantiles) {
q1_25 <- quantile(y1, probs = 0.25, na.rm = TRUE)
q1_50 <- quantile(y1, probs = 0.50, na.rm = TRUE)
q1_75 <- quantile(y1, probs = 0.75, na.rm = TRUE)
q2_25 <- quantile(y2, probs = 0.25, na.rm = TRUE)
q2_50 <- quantile(y2, probs = 0.50, na.rm = TRUE)
q2_75 <- quantile(y2, probs = 0.75, na.rm = TRUE)
}
# Get ECDF functions for finding intersections
ecdf1 <- ecdf_list[[1]]
ecdf2 <- ecdf_list[[2]]
# Test quantiles and get p-values, store models as separate objects
quantile_pvals <- character(length(quantile_probs))
quantile_regression_25 <- NULL
quantile_regression_50 <- NULL
quantile_regression_75 <- NULL
for (i in seq_along(quantile_probs)) {
tau <- quantile_probs[i]
tryCatch({
# Capture warnings about non-uniqueness in quantile regression
qr_model <- withCallingHandlers(
quantreg::rq(y ~ x_group, data = df_qr, tau = tau),
warning = function(w) {
if (grepl("non.*unique|nonunique", w$message, ignore.case = TRUE)) {
warning_key <- paste0("quantile_regression_", tau, "_nonunique")
warnings_list[[warning_key]] <<- w$message
invokeRestart("muffleWarning")
}
}
)
# Store model first (before summary, so it's stored even if summary fails)
qr_summary <- withCallingHandlers(
summary(qr_model, se = "iid"),
warning = function(w) {
if (grepl("non.*unique|nonunique", w$message, ignore.case = TRUE)) {
warning_key <- paste0("quantile_regression_", tau, "_summary_nonunique")
warnings_list[[warning_key]] <<- w$message
invokeRestart("muffleWarning")
}
}
)
qr_p <- qr_summary$coefficients[2, 4] # p-value for x_group coefficient
quantile_pvals[i] <- format_pvalue(qr_p, include_p = TRUE)
# Store model with appropriate name
if (tau == 0.25) {
quantile_regression_25 <- qr_model
} else if (tau == 0.50) {
quantile_regression_50 <- qr_model
} else if (tau == 0.75) {
quantile_regression_75 <- qr_model
}
}, error = function(e) {
quantile_pvals[i] <- "NA"
})
}
# Get plot boundaries
usr <- par("usr")
x_range <- usr[2] - usr[1]
y_range <- usr[4] - usr[3]
# Add p-values just above the horizontal lines (only if show.quantiles is TRUE)
if (show.quantiles) {
for (i in seq_along(quantile_probs)) {
text(x = usr[1] + 0.02 * x_range, y = quantile_probs[i] + 0.02,
labels = quantile_pvals[i],
adj = c(0, 0.5), cex = 0.8, font = 2)
}
# Add value labels next to CDF lines
# Position labels based on which CDF is to the left/right at each quantile
# Use adj to control alignment and add offset to separate labels
usr <- par("usr")
x_range <- usr[2] - usr[1]
label_offset <- 0.05 / 3 * x_range # ~1.67% of plot width for separation
# 25th percentile
if (q1_25 < q2_25) {
# Group 1 is to the left, right-align its label and offset left
text(x = q1_25 - label_offset, y = 0.25 + 0.02, labels = round(q1_25, 2),
adj = c(1, 0.5), cex = 0.8, font = 2, col = legend_cols[1])
# Group 2 is to the right, left-align its label and offset right
text(x = q2_25 + label_offset, y = 0.25 + 0.02, labels = round(q2_25, 2),
adj = c(0, 0.5), cex = 0.8, font = 2, col = legend_cols[2])
} else {
# Group 2 is to the left, right-align its label and offset left
text(x = q2_25 - label_offset, y = 0.25 + 0.02, labels = round(q2_25, 2),
adj = c(1, 0.5), cex = 0.8, font = 2, col = legend_cols[2])
# Group 1 is to the right, left-align its label and offset right
text(x = q1_25 + label_offset, y = 0.25 + 0.02, labels = round(q1_25, 2),
adj = c(0, 0.5), cex = 0.8, font = 2, col = legend_cols[1])
}
# 50th percentile
if (q1_50 < q2_50) {
# Group 1 is to the left, right-align its label and offset left
text(x = q1_50 - label_offset, y = 0.50 + 0.02, labels = round(q1_50, 2),
adj = c(1, 0.5), cex = 0.8, font = 2, col = legend_cols[1])
# Group 2 is to the right, left-align its label and offset right
text(x = q2_50 + label_offset, y = 0.50 + 0.02, labels = round(q2_50, 2),
adj = c(0, 0.5), cex = 0.8, font = 2, col = legend_cols[2])
} else {
# Group 2 is to the left, right-align its label and offset left
text(x = q2_50 - label_offset, y = 0.50 + 0.02, labels = round(q2_50, 2),
adj = c(1, 0.5), cex = 0.8, font = 2, col = legend_cols[2])
# Group 1 is to the right, left-align its label and offset right
text(x = q1_50 + label_offset, y = 0.50 + 0.02, labels = round(q1_50, 2),
adj = c(0, 0.5), cex = 0.8, font = 2, col = legend_cols[1])
}
# 75th percentile
if (q1_75 < q2_75) {
# Group 1 is to the left, right-align its label and offset left
text(x = q1_75 - label_offset, y = 0.75 + 0.02, labels = round(q1_75, 2),
adj = c(1, 0.5), cex = 0.8, font = 2, col = legend_cols[1])
# Group 2 is to the right, left-align its label and offset right
text(x = q2_75 + label_offset, y = 0.75 + 0.02, labels = round(q2_75, 2),
adj = c(0, 0.5), cex = 0.8, font = 2, col = legend_cols[2])
} else {
# Group 2 is to the left, right-align its label and offset left
text(x = q2_75 - label_offset, y = 0.75 + 0.02, labels = round(q2_75, 2),
adj = c(1, 0.5), cex = 0.8, font = 2, col = legend_cols[2])
# Group 1 is to the right, left-align its label and offset right
text(x = q1_75 + label_offset, y = 0.75 + 0.02, labels = round(q1_75, 2),
adj = c(0, 0.5), cex = 0.8, font = 2, col = legend_cols[1])
}
}
}
# Add KS test results in bottom right corner (only if show.ks is TRUE)
if (show.ks) {
ks_d <- round(ks_test$statistic, 3)
ks_p <- format_pvalue(ks_test$p.value, include_p = TRUE)
usr <- par("usr")
x_range <- usr[2] - usr[1]
y_range <- usr[4] - usr[3]
# Format KS p-value (format_pvalue with include_p=TRUE gives "p = .05")
ks_p_formatted <- format_pvalue(ks_test$p.value, include_p = TRUE)
# Format KS results: "Kolmogorov-Smirnov\nD = xx\np = p"
# format_pvalue already includes "p = ", so we use it directly
ks_values <- paste0("Kolmogorov-Smirnov\nD = ", ks_d, "\n", ks_p_formatted)
# Position in bottom right corner
text(x = usr[2] - 0.02 * x_range, y = usr[3] + 0.02 * y_range,
labels = ks_values,
adj = c(1, 0), cex = 0.8, font = 1)
}
}
}
# Return ECDFs and test results
if (has_group) {
names(ecdf_list) <- as.character(unique_x)
} else {
names(ecdf_list) <- "all"
}
# Build return list
result <- list(ecdfs = ecdf_list)
# Add test results if 2 groups
if (has_group && n_groups == 2) {
result$ks_test <- ks_test_result
# Add quantile regression models as separate named objects
if (!is.null(quantile_regression_25)) {
result$quantile_regression_25 <- quantile_regression_25
}
if (!is.null(quantile_regression_50)) {
result$quantile_regression_50 <- quantile_regression_50
}
if (!is.null(quantile_regression_75)) {
result$quantile_regression_75 <- quantile_regression_75
}
}
# Add warnings if any were captured
if (length(warnings_list) > 0) {
result$warnings <- warnings_list
}
invisible(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.