Nothing
## INTERNAL FUNCTIONS ----
## correltable ----
#' @keywords internal
#' @noRd
compute_correlations <- function(data, method = "pearson", use = "pairwise") {
r <- stats::cor(data, use = use, method = method)
n <- crossprod(!is.na(data))
t <- (r * sqrt(n - 2)) / sqrt(1 - r^2)
p <- 2 * pt(abs(t), n - 2, lower.tail = FALSE)
p[p > 1] <- 1
list(r = r, p = p, n = n)
}
#' Compute t-tests between numeric and binary factor
#' @keywords internal
#' @noRd
compute_ttests <- function(data, num_vars, factor_vars) {
results <- expand.grid(num = num_vars, fac = factor_vars,
stringsAsFactors = FALSE)
results$statistic <- mapply(function(n, f) {
tryCatch(
t.test(data[[n]] ~ data[[f]])$statistic,
error = function(e) NA_real_
)
}, results$num, results$fac)
results$p_value <- mapply(function(n, f) {
tryCatch(
t.test(data[[n]] ~ data[[f]])$p.value,
error = function(e) NA_real_
)
}, results$num, results$fac)
results
}
#' Compute ANOVA F-tests between numeric and multi-level factor
#' @keywords internal
#' @noRd
compute_anova <- function(data, num_vars, factor_vars) {
results <- expand.grid(num = num_vars, fac = factor_vars,
stringsAsFactors = FALSE)
results$statistic <- mapply(function(n, f) {
tryCatch({
aov_result <- aov(data[[n]] ~ data[[f]])
summary(aov_result)[[1]]$"F value"[1]
}, error = function(e) NA_real_)
}, results$num, results$fac)
results$p_value <- mapply(function(n, f) {
tryCatch({
aov_result <- aov(data[[n]] ~ data[[f]])
summary(aov_result)[[1]]$"Pr(>F)"[1]
}, error = function(e) NA_real_)
}, results$num, results$fac)
results
}
#' Compute chi-squared tests between factors
#' @keywords internal
#' @noRd
compute_chisq <- function(data, factor_vars) {
# Only compute upper triangle (no self-comparisons)
pairs <- utils::combn(factor_vars, 2, simplify = FALSE)
results <- data.frame(
var1 = sapply(pairs, `[`, 1),
var2 = sapply(pairs, `[`, 2),
stringsAsFactors = FALSE
)
results$statistic <- mapply(function(v1, v2) {
tryCatch(
chisq.test(data[[v1]], data[[v2]])$statistic,
error = function(e) NA_real_
)
}, results$var1, results$var2)
results$p_value <- mapply(function(v1, v2) {
tryCatch(
chisq.test(data[[v1]], data[[v2]])$p.value,
error = function(e) NA_real_
)
}, results$var1, results$var2)
results
}
#' Add significance stars to p-values
#' @keywords internal
#' @noRd
add_stars <- function(p, alpha = c(0.001, 0.01, 0.05)) {
ifelse(p < alpha[1], "***",
ifelse(p < alpha[2], "**",
ifelse(p < alpha[3], "*", "")))
}
#' Format correlation coefficient
#' @keywords internal
#' @noRd
format_r <- function(r, p, round_n = 2) {
r_fmt <- sub("0\\.", ".", format(round(r, round_n), nsmall = round_n))
paste0(r_fmt, add_stars(p))
}
#' Format t-statistic
#' @keywords internal
#' @noRd
format_t <- function(t_stat, p, round_n = 2) {
t_fmt <- format(round(t_stat, round_n), nsmall = round_n)
paste0("t=", t_fmt, add_stars(p))
}
#' Format F-statistic
#' @keywords internal
#' @noRd
format_f <- function(f_stat, p, round_n = 2) {
f_fmt <- format(round(f_stat, round_n), nsmall = round_n)
paste0("F=", f_fmt, add_stars(p))
}
#' Format chi-squared statistic
#' @keywords internal
#' @noRd
format_chisq <- function(chi_stat, p, round_n = 2) {
chi_fmt <- format(round(chi_stat, round_n), nsmall = round_n)
paste0("X2", chi_fmt, add_stars(p))
}
#' Assemble complete statistics matrix
#' @keywords internal
#' @noRd
assemble_matrix <- function(data, vars, method = "pearson",
use = "pairwise", round_n = 2) {
# Classify variables
var_types <- classify_variables(data, vars)
# Initialize matrix
n_vars <- length(vars)
stat_matrix <- matrix("", nrow = n_vars, ncol = n_vars)
rownames(stat_matrix) <- vars
colnames(stat_matrix) <- vars
diag(stat_matrix) <- "-"
# Correlations for numeric variables
if (length(var_types$numeric) > 0) {
cor_data <- data[, var_types$numeric, drop = FALSE]
cor_results <- compute_correlations(cor_data, method, use)
for (i in seq_along(var_types$numeric)) {
for (j in seq_along(var_types$numeric)) {
if (i != j) {
stat_matrix[var_types$numeric[i], var_types$numeric[j]] <-
format_r(cor_results$r[i, j], cor_results$p[i, j], round_n)
}
}
}
}
# T-tests for numeric vs binary factors
if (length(var_types$numeric) > 0 && length(var_types$binary_factor) > 0) {
t_results <- compute_ttests(data, var_types$numeric, var_types$binary_factor)
for (i in seq_len(nrow(t_results))) {
stat_matrix[t_results$num[i], t_results$fac[i]] <-
format_t(t_results$statistic[i], t_results$p_value[i], round_n)
stat_matrix[t_results$fac[i], t_results$num[i]] <-
stat_matrix[t_results$num[i], t_results$fac[i]]
}
}
# ANOVA for numeric vs multi-level factors
if (length(var_types$numeric) > 0 && length(var_types$multi_factor) > 0) {
f_results <- compute_anova(data, var_types$numeric, var_types$multi_factor)
for (i in seq_len(nrow(f_results))) {
stat_matrix[f_results$num[i], f_results$fac[i]] <-
format_f(f_results$statistic[i], f_results$p_value[i], round_n)
stat_matrix[f_results$fac[i], f_results$num[i]] <-
stat_matrix[f_results$num[i], f_results$fac[i]]
}
}
# Chi-squared for factor vs factor
all_factors <- c(var_types$binary_factor, var_types$multi_factor)
if (length(all_factors) > 1) {
chi_results <- compute_chisq(data, all_factors)
for (i in seq_len(nrow(chi_results))) {
val <- format_chisq(chi_results$statistic[i], chi_results$p_value[i], round_n)
stat_matrix[chi_results$var1[i], chi_results$var2[i]] <- val
stat_matrix[chi_results$var2[i], chi_results$var1[i]] <- val
}
}
stat_matrix
}
#' Classify variables by type
#' @keywords internal
#' @noRd
classify_variables <- function(data, vars) {
numeric_vars <- character(0)
binary_factor_vars <- character(0)
multi_factor_vars <- character(0)
for (v in vars) {
if (is.numeric(data[[v]])) {
numeric_vars <- c(numeric_vars, v)
} else {
# Convert to factor if needed
if (!is.factor(data[[v]])) {
data[[v]] <- factor(data[[v]])
}
n_levels <- nlevels(data[[v]])
if (n_levels == 2) {
binary_factor_vars <- c(binary_factor_vars, v)
} else if (n_levels > 2) {
multi_factor_vars <- c(multi_factor_vars, v)
}
}
}
list(
numeric = numeric_vars,
binary_factor = binary_factor_vars,
multi_factor = multi_factor_vars
)
}
#' Apply triangle selection to matrix
#' @keywords internal
#' @noRd
apply_triangle <- function(mat, tri = "upper", cutempty = FALSE) {
if (tri == "upper") {
mat[lower.tri(mat, diag = TRUE)] <- ""
if (cutempty) {
mat <- mat[-nrow(mat), -1, drop = FALSE]
}
} else if (tri == "lower") {
mat[upper.tri(mat, diag = TRUE)] <- ""
if (cutempty) {
mat <- mat[-1, -ncol(mat), drop = FALSE]
}
}
# tri == "all" returns unchanged
mat
}
#' Validate inputs
#' @keywords internal
#' @noRd
validate_inputs <- function(data, vars, vars2, strata) {
# Check data is data.frame
if (!is.data.frame(data)) {
stop("data must be a data.frame", call. = FALSE)
}
# Check vars exist
if (!is.null(vars) && !all(vars %in% names(data))) {
missing <- setdiff(vars, names(data))
stop("Variables not found in data: ", paste(missing, collapse = ", "),
call. = FALSE)
}
if (!is.null(vars2) && !all(vars2 %in% names(data))) {
missing <- setdiff(vars2, names(data))
stop("vars2 not found in data: ", paste(missing, collapse = ", "),
call. = FALSE)
}
# Check strata
if (!is.null(strata)) {
if (length(strata) != 1 || !strata %in% names(data)) {
stop("strata must be a single variable name in data", call. = FALSE)
}
strata_levels <- unique(na.omit(data[[strata]]))
if (length(strata_levels) != 2) {
stop("strata variable must have exactly 2 levels", call. = FALSE)
}
if (min(table(data[[strata]])) < 3) {
stop("each strata level must have at least 3 observations", call. = FALSE)
}
if (!is.null(vars2)) {
stop("cannot use strata with vars2", call. = FALSE)
}
}
invisible(TRUE)
}
#' Build table caption
#' @keywords internal
#' @noRd
build_caption <- function(method, use, n_obs, n_missing, strata,
data, all_vars) {
# Missing data info
if (use == "complete") {
missing_text <- sprintf("list-wise deletion (N=%d%s)",
n_obs,
if (n_missing > 0) sprintf(", %d cases deleted", n_missing) else "")
} else {
missing_text <- "pairwise deletion"
}
# Strata info
strata_text <- if (!is.null(strata)) {
strata_levels <- sort(unique(na.omit(data[[strata]])))
sprintf(" Data stratified by %s (upper = %s; lower = %s).",
strata, strata_levels[1], strata_levels[2])
} else {
""
}
sprintf(
"Note. %s correlations with %s.%s * p<.05, ** p<.01, *** p<.001",
tools::toTitleCase(method),
missing_text,
strata_text
)
}
### partial_correltable ----
#' @keywords internal
#' @noRd
residualize_variables <- function(data, vars, partialvars) {
# Create output data frame
resid_data <- data.frame(matrix(NA, nrow = nrow(data), ncol = length(vars)))
names(resid_data) <- vars
# Residualize each variable
for (v in vars) {
formula_str <- paste0("scale(", v, ") ~ ", paste(partialvars, collapse = " + "))
model_formula <- as.formula(formula_str)
tryCatch({
fit <- lm(model_formula, data = data, na.action = stats::na.exclude)
resid_data[[v]] <- resid(fit)
}, error = function(e) {
warning("Failed to residualize ", v, ": ", e$message, call. = FALSE)
resid_data[[v]] <- NA
})
}
resid_data
}
#' Validate partial correlation inputs
#' @keywords internal
#' @noRd
validate_partial_inputs <- function(data, vars, var_names,
partialvars, partialvar_names) {
# Check data
if (!is.data.frame(data)) {
stop("data must be a data.frame", call. = FALSE)
}
# Check minimum requirements
if (is.null(vars) || length(vars) < 2) {
stop("must specify at least 2 variables in vars", call. = FALSE)
}
if (is.null(partialvars) || length(partialvars) < 1) {
stop("must specify at least 1 variable in partialvars", call. = FALSE)
}
# Check variables exist
if (!all(vars %in% names(data))) {
missing <- setdiff(vars, names(data))
stop("Variables not found in data: ", paste(missing, collapse = ", "),
call. = FALSE)
}
if (!all(partialvars %in% names(data))) {
missing <- setdiff(partialvars, names(data))
stop("Partial variables not found in data: ", paste(missing, collapse = ", "),
call. = FALSE)
}
# Check name lengths
if (length(var_names) != length(vars)) {
stop("length of var_names must match length of vars", call. = FALSE)
}
if (length(partialvar_names) != length(partialvars)) {
stop("length of partialvar_names must match length of partialvars",
call. = FALSE)
}
invisible(TRUE)
}
#' Clean and prepare variables
#' @keywords internal
#' @noRd
prepare_variables <- function(data, vars, var_names,
partialvars, partialvar_names) {
# Remove duplicates from vars
if (anyDuplicated(vars)) {
keep <- !duplicated(vars)
vars <- vars[keep]
var_names <- var_names[keep]
warning("Removed duplicate entries from vars", call. = FALSE)
}
# Remove duplicates from partialvars
if (anyDuplicated(partialvars)) {
keep <- !duplicated(partialvars)
partialvars <- partialvars[keep]
partialvar_names <- partialvar_names[keep]
warning("Removed duplicate entries from partialvars", call. = FALSE)
}
# Check for numeric vars only
non_numeric_vars <- vars[!sapply(data[vars], is.numeric)]
if (length(non_numeric_vars) > 0) {
warning("Dropping non-numeric vars: ",
paste(non_numeric_vars, collapse = ", "),
call. = FALSE)
# Remove non-numeric vars
keep <- vars %in% setdiff(vars, non_numeric_vars)
vars <- vars[keep]
var_names <- var_names[keep]
}
# Check if vars still has at least 2 variables
if (length(vars) < 2) {
stop("At least 2 numeric variables required in vars after filtering",
call. = FALSE)
}
# Check for overlap between vars and partialvars
overlap <- intersect(vars, partialvars)
if (length(overlap) > 0) {
warning("Variables in both vars and partialvars removed from partialvars: ",
paste(overlap, collapse = ", "),
call. = FALSE)
# Remove overlap from partialvars
keep <- !(partialvars %in% overlap)
partialvars <- partialvars[keep]
partialvar_names <- partialvar_names[keep]
}
# Check if partialvars still has at least 1 variable
if (length(partialvars) < 1) {
stop("At least 1 variable required in partialvars after filtering",
call. = FALSE)
}
list(
vars = vars,
var_names = var_names,
partialvars = partialvars,
partialvar_names = partialvar_names
)
}
#' Handle missing data for partial correlations
#' @keywords internal
#' @noRd
handle_missing_data <- function(data, vars, partialvars, use = "pairwise") {
all_vars <- c(vars, partialvars)
data_subset <- data[, all_vars, drop = FALSE]
if (use == "complete") {
# Complete case analysis: remove any row with ANY missing value
complete_rows <- complete.cases(data_subset)
n_excluded <- sum(!complete_rows)
data_clean <- data_subset[complete_rows, , drop = FALSE]
info <- list(
data = data_clean,
n_excluded = n_excluded,
exclusion_reason = if (n_excluded > 0) {
sprintf("N=%d cases excluded due to missing data", n_excluded)
} else {
NULL
}
)
} else {
# Pairwise: require complete partial variables
# (correlations can still be pairwise on vars)
complete_partial <- complete.cases(data_subset[, partialvars, drop = FALSE])
n_excluded <- sum(!complete_partial)
data_clean <- data_subset[complete_partial, , drop = FALSE]
info <- list(
data = data_clean,
n_excluded = n_excluded,
exclusion_reason = if (n_excluded > 0) {
sprintf("N=%d cases excluded for missing covariates", n_excluded)
} else {
NULL
}
)
}
info
}
#' Build caption for partial correlation table
#' @keywords internal
#' @noRd
build_partial_caption <- function(base_caption, partialvar_names,
exclusion_reason = NULL) {
# Replace "correlation coefficients" with "partial correlation coefficients"
caption <- sub(
"correlation coefficients",
paste0("partial correlation coefficients controlling for ",
paste(partialvar_names, collapse = ", ")),
base_caption
)
# Add exclusion information if present
if (!is.null(exclusion_reason)) {
caption <- sub(
"\\* p<\\.05",
paste0(exclusion_reason, ". * p<.05"),
caption
)
}
caption
}
### FullTable1 ----
#' @keywords internal
#' @noRd
validate_table1_inputs <- function(data, vars, var_names, strata) {
if (!is.data.frame(data)) {
stop("data must be a data.frame", call. = FALSE)
}
if (!is.null(vars) && !all(vars %in% names(data))) {
missing <- setdiff(vars, names(data))
stop("Variables not found in data: ", paste(missing, collapse = ", "),
call. = FALSE)
}
if (!is.null(strata) && !strata %in% names(data)) {
stop("Strata variable '", strata, "' not found in data", call. = FALSE)
}
# Check strata has reasonable number of levels
if (!is.null(strata)) {
strata_unique <- length(unique(na.omit(data[[strata]])))
# Check if truly continuous (many unique values)
if (is.numeric(data[[strata]]) && strata_unique > 10) {
stop("Strata variable '", strata, "' appears to be continuous (",
strata_unique, " unique values). ",
"Table 1 requires a categorical grouping variable. ",
"Consider: (1) creating groups with cut() or (2) using a different grouping variable.",
call. = FALSE)
}
# Warn if many levels (likely still problematic)
if (strata_unique > 10) {
warning("Strata variable '", strata, "' has ", strata_unique,
" levels. Tables with >10 groups may be difficult to interpret.",
call. = FALSE)
}
# Check minimum observations per group
strata_counts <- table(data[[strata]], useNA = "no")
if (any(strata_counts < 2)) {
small_groups <- names(strata_counts[strata_counts < 2])
warning("Strata groups with <2 observations: ",
paste(small_groups, collapse = ", "),
". Statistical tests may fail.",
call. = FALSE)
}
}
if (length(var_names) != length(vars)) {
stop("length of var_names must match length of vars", call. = FALSE)
}
invisible(TRUE)
}
#' Prepare data for Table 1
#' @keywords internal
#' @noRd
prepare_table1_data <- function(data, vars, strata, factor_vars) {
# Convert to data frame (not tibble for consistency)
data <- as.data.frame(data)
# Handle NULL strata - create dummy column
if (is.null(strata)) {
strata <- ".dummy_strata"
data[[strata]] <- factor("Sample")
}
# Handle NULL vars - use all except strata
if (is.null(vars)) {
vars <- setdiff(names(data), strata)
}
# Subset to relevant columns
all_cols <- unique(c(vars, strata))
data_subset <- data[, all_cols, drop = FALSE]
# Convert specified factors
if (!is.null(factor_vars)) {
for (fv in factor_vars) {
if (fv %in% names(data_subset)) {
data_subset[[fv]] <- factor(data_subset[[fv]])
}
}
}
# Convert strata to factor
data_subset[[strata]] <- factor(data_subset[[strata]])
# Auto-convert other types to factor with warnings
conversions <- list()
for (v in vars) {
original_class <- class(data_subset[[v]])[1]
if (is.character(data_subset[[v]])) {
data_subset[[v]] <- factor(data_subset[[v]])
conversions$character <- c(conversions$character, v)
} else if (is.logical(data_subset[[v]])) {
data_subset[[v]] <- factor(data_subset[[v]])
conversions$logical <- c(conversions$logical, v)
} else if (is.ordered(data_subset[[v]])) {
data_subset[[v]] <- factor(data_subset[[v]], ordered = FALSE)
conversions$ordered <- c(conversions$ordered, v)
} else if (!is.factor(data_subset[[v]]) &&
!is.numeric(data_subset[[v]]) &&
length(unique(na.omit(data_subset[[v]]))) == 2) {
data_subset[[v]] <- factor(data_subset[[v]])
conversions$binary <- c(conversions$binary, v)
}
}
# Issue warnings for conversions
if (length(conversions$character) > 0) {
warning("Character variables converted to factor: ",
paste(conversions$character, collapse = ", "), call. = FALSE)
}
if (length(conversions$logical) > 0) {
warning("Logical variables converted to factor: ",
paste(conversions$logical, collapse = ", "), call. = FALSE)
}
if (length(conversions$ordered) > 0) {
warning("Ordered variables converted to unordered factor: ",
paste(conversions$ordered, collapse = ", "), call. = FALSE)
}
if (length(conversions$binary) > 0) {
warning("Binary variables converted to factor: ",
paste(conversions$binary, collapse = ", "), call. = FALSE)
}
# Handle missing strata
n_missing_strata <- sum(is.na(data_subset[[strata]]))
if (n_missing_strata > 0 && strata != ".dummy_strata") {
warning("N=", n_missing_strata, " excluded for missing group variable: ",
strata, call. = FALSE)
data_subset <- data_subset[!is.na(data_subset[[strata]]), , drop = FALSE]
}
# Classify variables
factor_vars_final <- names(data_subset)[sapply(data_subset, is.factor)]
factor_vars_final <- setdiff(factor_vars_final, strata)
list(
data = data_subset,
strata = strata,
vars = vars,
factor_vars = factor_vars_final,
n_missing_strata = n_missing_strata
)
}
#' Test numeric variable across groups
#' @keywords internal
#' @noRd
test_numeric_variable <- function(y, x, round_n = 2) {
# Remove NA values
complete <- !is.na(y) & !is.na(x)
y <- y[complete]
x <- droplevels(x[complete])
n_groups <- length(unique(x))
if (n_groups < 2) {
return(list(statistic = NA, p_value = NA, effect_size = NA))
}
if (n_groups == 2) {
# T-test
test_result <- tryCatch(
t.test(y ~ x),
error = function(e) NULL
)
if (is.null(test_result)) {
return(list(statistic = NA, p_value = NA, effect_size = NA))
}
# Cohen's d with pooled standard deviation
means <- tapply(y, x, mean, na.rm = TRUE)
sds <- tapply(y, x, sd, na.rm = TRUE)
# Pooled SD = sqrt((sd1^2 + sd2^2) / 2)
pooled_sd <- sqrt((sds[1]^2 + sds[2]^2) / 2)
cohens_d <- (means[2] - means[1]) / pooled_sd
list(
statistic = -1 * test_result$statistic, # Flip for g2 > g1
p_value = test_result$p.value,
effect_size = cohens_d,
test_type = "t",
es_type = "d"
)
} else {
# ANOVA
test_result <- tryCatch(
aov(y ~ x),
error = function(e) NULL
)
if (is.null(test_result)) {
return(list(statistic = NA, p_value = NA, effect_size = NA))
}
aov_summary <- summary(test_result)[[1]]
f_stat <- aov_summary[["F value"]][1]
p_val <- aov_summary[["Pr(>F)"]][1]
# Partial eta-squared
ss_effect <- aov_summary[["Sum Sq"]][1]
ss_total <- sum(aov_summary[["Sum Sq"]])
eta_sq <- ss_effect / ss_total
list(
statistic = f_stat,
p_value = p_val,
effect_size = eta_sq,
test_type = "F",
es_type = "n2"
)
}
}
#' Summarize numeric variable by group
#' @keywords internal
#' @noRd
summarize_numeric <- function(y, x, round_n = 2) {
result <- tapply(y, x, function(vals) {
m <- mean(vals, na.rm = TRUE)
s <- sd(vals, na.rm = TRUE)
sprintf("%s (%s)",
format(round(m, round_n), nsmall = round_n),
format(round(s, round_n), nsmall = round_n))
})
as.character(result)
}
#' Test categorical variable across groups
#' @keywords internal
#' @noRd
test_categorical_variable <- function(y, x, round_n = 2) {
# Remove NA values
complete <- !is.na(y) & !is.na(x)
y <- droplevels(y[complete])
x <- droplevels(x[complete])
n_groups <- length(unique(x))
if (n_groups < 2) {
return(list(statistic = NA, p_value = NA, effect_size = NA))
}
# Chi-squared test
test_result <- tryCatch(
chisq.test(y, x),
error = function(e) NULL
)
if (is.null(test_result)) {
return(list(statistic = NA, p_value = NA, effect_size = NA))
}
# Effect size depends on variable structure
if (nlevels(y) == 2 && n_groups == 2) {
# Odds ratio for 2x2 table
fisher_result <- tryCatch(
fisher.test(x, y),
error = function(e) NULL
)
effect_size <- if (!is.null(fisher_result)) fisher_result$estimate else NA
es_type <- "OR"
} else {
# Cramer's V for larger tables
n <- sum(complete.cases(cbind(y, x)))
cramers_v <- sqrt(test_result$statistic / (n * test_result$parameter))
effect_size <- cramers_v
es_type <- "V"
}
list(
statistic = test_result$statistic,
p_value = test_result$p.value,
effect_size = effect_size,
test_type = "X2",
es_type = es_type
)
}
#' Summarize categorical variable by group
#' @keywords internal
#' @noRd
summarize_categorical <- function(y, x, round_n = 2) {
# Create frequency table
tab <- table(x, y, useNA = "no")
# Calculate percentages within each group
result <- apply(tab, 1, function(row) {
total <- sum(row)
if (total == 0) return(rep("-", length(row)))
pcts <- 100 * row / total
sprintf("%d (%s%%)", row, format(round(pcts, round_n), nsmall = round_n))
})
# Transpose to get groups as columns
if (is.matrix(result)) {
result <- t(result)
}
result
}
#' Format p-value
#' @keywords internal
#' @noRd
format_p_value <- function(p) {
if (is.na(p)) return("-")
if (p < 0.001) {
"<.001"
} else if (p < 0.01) {
sub("0\\.", ".", format(round(p, 3), nsmall = 3))
} else {
sub("0\\.", ".", format(round(p, 2), nsmall = 2))
}
}
#' Add significance stars
#' @keywords internal
#' @noRd
get_stars <- function(p) {
if (is.na(p)) return("")
if (p < 0.001) "***"
else if (p < 0.01) "**"
else if (p < 0.05) "*"
else ""
}
#' Format test statistic
#' @keywords internal
#' @noRd
format_statistic <- function(stat, test_type, round_n = 2, include_label = FALSE) {
if (is.na(stat)) return("-")
label <- if (include_label) paste0(test_type, "=") else ""
paste0(label, format(round(stat, round_n), nsmall = round_n))
}
#' Format effect size
#' @keywords internal
#' @noRd
format_effect_size <- function(es, es_type, round_n = 2, include_label = FALSE) {
if (is.na(es)) return("-")
label <- if (include_label) paste0(es_type, "=") else ""
paste0(label, format(round(es, round_n), nsmall = round_n))
}
#' Create row for numeric variable
#' @keywords internal
#' @noRd
create_numeric_row <- function(var_name, y, x, round_n, include_tests) {
# Summary statistics
summaries <- summarize_numeric(y, x, round_n)
# Statistical test (if multiple groups)
if (include_tests) {
test_results <- test_numeric_variable(y, x, round_n)
} else {
test_results <- list(statistic = NA, p_value = NA, effect_size = NA,
test_type = "", es_type = "")
}
# Build row
row <- c(
Variable = var_name,
summaries,
Stat = as.character(test_results$statistic),
p = as.character(test_results$p_value),
sig = get_stars(test_results$p_value),
es = as.character(test_results$effect_size)
)
# Store metadata as list element instead of attributes
list(
row = row,
test_type = test_results$test_type,
es_type = test_results$es_type,
is_numeric = TRUE
)
}
#' Create rows for categorical variable
#' @keywords internal
#' @noRd
create_categorical_rows <- function(var_name, y, x, round_n, include_tests) {
y_levels <- levels(y)
# Summary statistics
summaries <- summarize_categorical(y, x, round_n)
# Statistical test (if multiple groups)
if (include_tests) {
test_results <- test_categorical_variable(y, x, round_n)
} else {
test_results <- list(statistic = NA, p_value = NA, effect_size = NA,
test_type = "", es_type = "")
}
# Handle binary vs multi-level factors differently
if (length(y_levels) == 2) {
# Binary: single row showing second level
row <- c(
Variable = paste0(var_name, " (", y_levels[2], ")"),
summaries[, 2], # Second level counts
Stat = as.character(test_results$statistic),
p = as.character(test_results$p_value),
sig = get_stars(test_results$p_value),
es = as.character(test_results$effect_size)
)
list(list(
row = row,
test_type = test_results$test_type,
es_type = test_results$es_type,
is_numeric = FALSE
))
} else {
# Multi-level: header row + level rows
rows <- vector("list", length(y_levels) + 1)
# Header row
rows[[1]] <- list(
row = c(
Variable = var_name,
rep("-", ncol(summaries)),
Stat = as.character(test_results$statistic),
p = as.character(test_results$p_value),
sig = get_stars(test_results$p_value),
es = as.character(test_results$effect_size)
),
test_type = test_results$test_type,
es_type = test_results$es_type,
is_numeric = FALSE
)
# Level rows
for (i in seq_along(y_levels)) {
rows[[i + 1]] <- list(
row = c(
Variable = paste0(" ", y_levels[i]),
summaries[, i],
Stat = "-",
p = "-",
sig = "",
es = "-"
),
test_type = "",
es_type = "",
is_numeric = FALSE
)
}
rows
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.