Nothing
#' @importFrom utils globalVariables
utils::globalVariables(c("group", "max_val", "angle", "cld_var", "grouping", "mean_val"))
#' @title Example Data for Non-parametric test
#' @description An example dataset of pollen collection by honeybee at different times and different months.
#' @usage df_nonparam
#' @export
df_nonparam <- read.table(text = "Month Time Pollen
1 08:00 29
1 10:00 41.33
1 12:00 17.33
1 14:00 1
1 16:00 7
1 18:00 4
1 08:00 1.66
1 10:00 17.66
1 12:00 22
1 14:00 24.66
1 16:00 17
1 18:00 8
1 08:00 3
1 10:00 39
1 12:00 28.33
1 14:00 18.33
1 16:00 32
1 18:00 26.33
1 08:00 6
1 10:00 57.33
1 12:00 57
1 14:00 30.33
1 16:00 1.66
1 18:00 15.33
2 08:00 10.33
2 10:00 73
2 12:00 16.33
2 14:00 3.33
2 16:00 2
2 18:00 1.66
2 08:00 0.66
2 10:00 55.33
2 12:00 46.33
2 14:00 5.33
2 16:00 1.66
2 18:00 5.66
2 08:00 14.66
2 10:00 55.66
2 12:00 18.66
2 14:00 13.33
2 16:00 21
2 18:00 3.33
2 08:00 36.66
2 10:00 58.66
2 12:00 25.66
2 14:00 12.33
2 16:00 1
2 18:00 2.33
3 08:00 28.33
3 10:00 85
3 12:00 43.33
3 14:00 36.66
3 16:00 20.66
3 18:00 2.66
3 08:00 31.66
3 10:00 50.33
3 12:00 10.66
3 14:00 7.33
3 16:00 13
3 18:00 5.33
3 08:00 82.66
3 10:00 65
3 12:00 26
3 14:00 12
3 16:00 12.33
3 18:00 1.33
3 08:00 43.33
3 10:00 107.33
3 12:00 12.66
3 14:00 15.66
3 16:00 3
3 18:00 4.33
3 08:00 49
3 10:00 73.33
3 12:00 8.66
3 14:00 12
3 16:00 9
3 18:00 1
4 08:00 76.66
4 10:00 67.33
4 12:00 20.33
4 14:00 17
4 16:00 10.33
4 18:00 3.66
4 08:00 53.33
4 10:00 49
4 12:00 21.33
4 14:00 20.33
4 16:00 8.66
4 18:00 2.33
5 08:00 47.33
5 10:00 34.33
5 12:00 9.66
5 14:00 3
5 16:00 4.33
5 18:00 12.66
5 08:00 35.33
5 10:00 27.66
5 12:00 13
5 14:00 17
5 16:00 1.33
5 18:00 3.66
5 08:00 23.66
5 10:00 58.66
5 12:00 25.66
5 14:00 13
5 16:00 2.66
5 18:00 0.66
6 08:00 27.66
6 10:00 60.33
6 12:00 29.66
6 14:00 21.66
6 16:00 18.33
6 18:00 17
8 08:00 36
8 10:00 32.33
8 12:00 20.33
8 14:00 11.66
8 16:00 12.33
8 18:00 1
10 08:00 11.33
10 10:00 47.33
10 12:00 10.66
10 14:00 30
10 16:00 9.66
10 18:00 17.66
10 08:00 44
10 10:00 75.33
10 12:00 15.66
10 14:00 12
10 16:00 7
10 18:00 9.33", header = TRUE, row.names = NULL)
#' Run Statistical Decision Workflow
#'
#' Automatically checks normality, selects appropriate test (ANOVA or Kruskal-Wallis), performs post-hoc, and visualizes results with compact letter display (CLD). Returns all results as an object with optional console output.
#'
#' @param data A data frame.
#' @param dep_var Character. Name of the dependent variable.
#' @param group_vars Character vector. One or two grouping variables.
#' @param cld_offset Numeric. Vertical offset to place CLD labels above the boxplot (default: 5).
#' @param verbose Logical. Whether to print progress messages and results (default: TRUE).
#' @import agricolae ggplot2 dplyr effectsize stringr
#' @importFrom stats kruskal.test setNames aov as.formula shapiro.test
#'
#' @return A list containing:
#' \describe{
#' \item{normality_test}{Results of Shapiro-Wilk test}
#' \item{main_effects}{Results for each main effect}
#' \item{interaction}{Interaction results (if 2 group_vars)}
#' \item{plots}{List of ggplot objects}
#' \item{facet_plot}{Faceted ggplot (if 2 group_vars)}
#' \item{heatmap}{Heatmap ggplot (if 2 group_vars)}
#' }
#' @export
#' @examples
#' # Silent operation
#' results <- run_statdecide(data = df_nonparam, dep_var = "Pollen",
#' group_vars = c("Month","Time"), verbose = FALSE)
#'
#' # With console output
#' run_statdecide(data = df_nonparam, dep_var = "Pollen", group_vars = "Month")
run_statdecide <- function(data, dep_var, group_vars, cld_offset = 5, verbose = TRUE) {
stopifnot(length(group_vars) %in% c(1, 2))
# Initialize results object
results <- list(
normality_test = NULL,
main_effects = list(),
interaction = NULL,
plots = list(),
facet_plot = NULL,
heatmap = NULL
)
if (verbose) message("\n===== Step 1: Normality Test =====")
shapiro <- shapiro.test(data[[dep_var]])
normal <- shapiro$p.value > 0.05
# Store results
results$normality_test <- list(
test = "Shapiro-Wilk",
p.value = shapiro$p.value,
is.normal = normal
)
if (verbose) {
message("Shapiro-Wilk p-value: ", signif(shapiro$p.value, 4))
message("Normality? ", ifelse(normal, "Yes (parametric)", "No (non-parametric)"))
}
plot_with_cld <- function(data, dep_var, group_var, normal = TRUE, cld_offset = 5, verbose = TRUE) {
analysis_results <- list(
test = ifelse(normal, "ANOVA", "Kruskal-Wallis"),
p.value = NA,
posthoc = NULL,
effect_size = NULL,
plot = NULL
)
data$grouping <- factor(data[[group_var]])
if (verbose) message("\n\n=============================\n",
"Analysis for: ", group_var, "\n",
"=============================\n")
if (normal) {
model <- aov(as.formula(paste(dep_var, "~ grouping")), data = data)
anova_summary <- summary(model)
p_val <- anova_summary[[1]][["Pr(>F)"]][1]
analysis_results$p.value <- p_val
if (verbose) {
message("\nANOVA Results:")
print(anova_summary)
message("Significance: ", .get_sig_label(p_val))
}
if (!is.na(p_val) && p_val < 0.05) {
tukey <- agricolae::HSD.test(model, "grouping", group = TRUE)
cld <- tukey$groups
cld$group <- gsub(" ", "", cld$groups)
cld[[group_var]] <- rownames(cld)
analysis_results$posthoc <- list(
method = "Tukey HSD",
results = cld[, c(group_var, "means", "group")]
)
analysis_results$effect_size <- effectsize::eta_squared(model)
if (verbose) {
message("\nTukey HSD Post-hoc Results with CLD:")
print(analysis_results$posthoc$results)
message("\nEffect Size (eta^2):")
print(analysis_results$effect_size)
}
}
} else {
kruskal_test <- kruskal.test(as.formula(paste(dep_var, "~ grouping")), data = data)
p_val <- kruskal_test$p.value
analysis_results$p.value <- p_val
if (verbose) {
message("\nKruskal-Wallis Results:")
print(kruskal_test)
message("Significance: ", .get_sig_label(p_val))
}
if (!is.na(p_val) && p_val < 0.05) {
kruskal <- agricolae::kruskal(data[[dep_var]], data[[group_var]], group = TRUE)
cld <- kruskal$groups
cld$group <- gsub(" ", "", cld$groups)
cld[[group_var]] <- rownames(cld)
if ("means" %in% names(cld)) names(cld)[names(cld) == "means"] <- "mean_val"
analysis_results$posthoc <- list(
method = "Kruskal post-hoc",
results = cld[, c(group_var, intersect(c("mean_val", "group"), names(cld)))]
)
if (verbose) {
message("\nKruskal Post-hoc Results with CLD:")
print(analysis_results$posthoc$results)
}
}
}
if (!is.na(p_val) && p_val < 0.05) {
means <- data |>
dplyr::group_by(grouping) |>
dplyr::summarise(
mean_val = mean(.data[[dep_var]], na.rm = TRUE),
max_val = max(.data[[dep_var]], na.rm = TRUE),
.groups = "drop"
) |>
dplyr::left_join(cld, by = setNames(group_var, "grouping")) |>
dplyr::mutate(
angle = ifelse(nchar(group) > 3, 90, 0),
cld_var = max_val + cld_offset
)
p <- ggplot2::ggplot(data, aes(x = grouping, y = .data[[dep_var]], fill = grouping)) +
ggplot2::geom_boxplot() +
ggplot2::geom_text(
data = means,
aes(x = grouping, y = cld_var, label = group, angle = angle),
size = 4,
vjust = 0
) +
ggplot2::theme_bw(base_size = 14) +
ggplot2::theme(
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 60, hjust = 1),
legend.position = "none"
) +
ggplot2::scale_x_discrete(labels = function(x) stringr::str_wrap(x, width = 10)) +
ggplot2::labs(x = group_var, y = dep_var)
if (verbose) print(p)
analysis_results$plot <- p
} else {
if (verbose) message("No significant effect for '", group_var, "' (p = ", round(p_val, 4), ").")
}
return(analysis_results)
}
# Helper function for significance labels
.get_sig_label <- function(p) {
if (p < 0.001) return("***")
else if (p < 0.01) return("**")
else if (p < 0.05) return("*")
else return("ns")
}
# Run main effect analyses
for (grp in group_vars) {
if (verbose) message("\n===== Analyzing: ", grp, " =====")
analysis <- plot_with_cld(data, dep_var, grp, normal = normal,
cld_offset = cld_offset, verbose = verbose)
results$main_effects[[grp]] <- analysis
if (!is.null(analysis$plot)) results$plots[[grp]] <- analysis$plot
}
# Handle interaction if two grouping variables
if (length(group_vars) == 2) {
interaction_var <- "interaction"
data[[interaction_var]] <- interaction(data[[group_vars[1]]], data[[group_vars[2]]], sep = ":")
if (verbose) message("\n===== Analyzing Interaction: ", paste(group_vars, collapse = " X "), " =====")
interaction_results <- plot_with_cld(data, dep_var, interaction_var, normal = normal,
cld_offset = cld_offset, verbose = verbose)
results$interaction <- interaction_results
if (!is.null(interaction_results$plot)) {
results$plots[[interaction_var]] <- interaction_results$plot
}
# Faceted boxplot
results$facet_plot <- ggplot2::ggplot(
data,
ggplot2::aes(x = .data[[group_vars[2]]], y = .data[[dep_var]], fill = .data[[group_vars[2]]])
) +
ggplot2::geom_boxplot() +
ggplot2::facet_wrap(stats::as.formula(paste("~", group_vars[1])), ncol = 4) +
ggplot2::theme_bw(base_size = 14) +
ggplot2::theme(panel.grid.minor = element_blank()) +
ggplot2::labs(
title = paste("Faceted Boxplot of", dep_var),
x = group_vars[2], y = dep_var
)
if (verbose) print(results$facet_plot)
# Heatmap
heatmap_data <- data |>
dplyr::group_by_at(group_vars) |>
dplyr::summarise(mean_val = mean(.data[[dep_var]], na.rm = TRUE), .groups = "drop")
results$heatmap <- ggplot2::ggplot(
heatmap_data,
ggplot2::aes(x = .data[[group_vars[2]]], y = .data[[group_vars[1]]], fill = mean_val)
) +
ggplot2::geom_tile(color = "white") +
ggplot2::scale_fill_viridis_c(option = "C", name = paste("Mean", dep_var)) +
ggplot2::theme_bw(base_size = 14) +
ggplot2::theme(panel.grid.minor = element_blank()) +
ggplot2::labs(
title = paste("Heatmap of", dep_var),
x = group_vars[2], y = group_vars[1]
)
if (verbose) print(results$heatmap)
}
invisible(results)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.