R/vis_lm_assumptions.R
In visStatistics: Automated Selection and Visualisation of Statistical Hypothesis Tests

Documented in vis_anova_assumptions vis_lm_assumptions

#' Visualisation of the normality distribution of the standardised residuals
#'
#' Checks for normality of the standardised residuals in the general linear model
#' Student's t-test 
#' (t.test,var=EQUAL) 
#' Fisher oneway ANOVA (aov) or simple linear regression.
#' Performs the Shapiro-Wilk test and Anderson-Darling test for normality and,
#' if not a regression, also the Levene-Brown-Forsythe and the Bartlett's test for homogeneity of variances.
#' It produces a histogram with normal overlay, a residuals vs fitted plot,
#' and a normal Q-Q plot.
#'
#' @param samples Numeric vector; the dependent variable.
#' @param fact Factor; the independent variable.
#' @param cex Numeric; scaling factor for plot text and symbols (default: 1).
#' @param regression Logical; if TRUE, skips Bartlett's test (for regression diagnostics). Default is FALSE.
#'
#' @return A list with elements:
#' \describe{
#'   \item{summary_anova}{Summary of the ANOVA model.}
#'   \item{shapiro_test}{Result from \code{shapiro.test()}.}
#'   \item{ad_test}{Result from \code{nortest::ad.test()} or a character message if n < 7.}
#'   \item{levene_test}{Result from \code{levene.test()} (only if \code{regression = FALSE}).}
#'   \item{bartlett_test}{Result from \code{bartlett.test()} (only if \code{regression = FALSE}).}
#'   \item{bp_test}{Result from \code{bp.test()} (only if \code{regression = TRUE}).}
#' }
#'
#' @examples
#' ToothGrowth$dose <- as.factor(ToothGrowth$dose)
#' vis_lm_assumptions(ToothGrowth$len, ToothGrowth$dose)
#'
#' @export

vis_lm_assumptions <- function(samples, fact, cex = 1, regression = FALSE) {
  
  oldpar <- par(no.readonly = TRUE)
  on.exit(par(oldpar))
  
  # Clean the input
  samples3 <- na.omit(samples)
  fact <- subset(fact, !is.na(samples))
  samples <- samples3
  
  # Fit model
  anova_model <- aov(samples ~ fact) # needed for correct output structure, do not use lm(samples~fact)
  std_residuals <- rstandard(anova_model)
  
  # Run assumption tests
  # Shapiro-Wilk test (for n >= 3 and n <= 5000)
  if (length(std_residuals) >= 3 && length(std_residuals) <= 5000) {
    shapiro_test <- shapiro.test(std_residuals)
  } else {
    shapiro_test <- list(
      method = "Shapiro-Wilk normality test",
      statistic = NA,
      p.value = NA,
      data.name = "standardized residuals"
    )
  }
  
  # Anderson-Darling test (for n >= 7)
  if (length(std_residuals) >= 7) {
    ad_test <- nortest::ad.test(std_residuals)
  } else {
    ad_test <- "Sample size too small (n < 7) for Anderson-Darling test"
  }
  
  # Variance tests (only for non-regression)
  if (!regression) {
    levene_test <- levene.test(samples, fact)
    bartlett_test <- bartlett.test(samples ~ fact)
     bp_test <- NULL
  } else {
    # For regression: use Breusch-Pagan test for heteroscedasticity
    # (tests if error variance depends on independent variables)
    levene_test <- NULL
    bartlett_test <- NULL
     bp_test <-   bp.test(anova_model)
  }
  
  # Set up plotting area with outer margin for title
  par(mfrow = c(2, 2), oma = c(0, 0, 3, 0), cex = 0.7 * cex)
  
  # Plot 1: Histogram with normal overlay
  x_min <- min(min(std_residuals), -3.2)
  x_max <- max(max(std_residuals), 3.2)
  temp_hist <- hist(std_residuals, plot = FALSE)
  y_max <- max(max(temp_hist$density), 0.45)
  
  hist(std_residuals, freq = FALSE, main = "Histogram and Normal Density", 
       xlab = "Std. Residuals", col = "lightblue", border = "black",
       xlim = c(x_min, x_max), ylim = c(0, y_max))
  x_seq <- seq(x_min, x_max, length.out = 200)
  lines(x_seq, dnorm(x_seq), col = "red", lwd = 1)
  
  # Plot 2: Standardized Residuals vs Fitted (manual - plot.lm doesn't use standardized)
  ylim_res <- extendrange(std_residuals, f = 0.08)
  plot(fitted(anova_model), std_residuals, 
       main = "Std. Res. vs. Fitted",
       xlab = "Fitted values", 
       ylab = "Std. Residuals",
       ylim = ylim_res)
  abline(h = 0, col = "red", lwd = 1)
  
  
  
  # Plot 3: Normal Q-Q (using internal plot.lm method)
  plot(anova_model, which = 2, cex = cex, caption = "", sub.caption = "", main = "Normal Q-Q Plot")
  
  # Plot 4: Depends on regression vs ANOVA
  if (regression) {
    # Residuals vs Leverage with Cook's distance (using internal plot.lm method)
    plot(anova_model, which = 5, cex = cex, caption = "", sub.caption = "", main = "Std. Res. vs. Leverage")
  } else {
    # Scale-Location plot (using internal plot.lm method)
    plot(anova_model, which = 3, cex = cex, caption = "", sub.caption = "", main = "Scale-Location")
  }
  
  # Add overall title with test results
  p_shapiro <- if (!is.na(shapiro_test$p.value)) signif(shapiro_test$p.value, 2) else "N/A"
  p_AD <- if (is.list(ad_test) && !is.null(ad_test$p.value)) signif(ad_test$p.value, 2) else "N/A"
  
  if (regression) {
    # Regression title with Breusch-Pagan test - split into two rows
    p_bp <- signif( bp_test$p.value, 2)
    title_line1 <- paste("Linear model assumptions: Shapiro-Wilk p =", p_shapiro, 
                         "| Anderson-Darling p =", p_AD)
    title_line2 <- paste("Breusch-Pagan p =", p_bp)
    
    mtext(title_line1, side = 3, outer = TRUE, line = 1, cex = 0.7)
    mtext(title_line2, side = 3, outer = TRUE, line = 0, cex = 0.7)
  } else {
    # ANOVA title with Levene and Bartlett - split into two rows
    title_line1 <- paste("Linear model assumptions: Shapiro-Wilk p =", p_shapiro,
                         "| Anderson-Darling p =", if(is.numeric(p_AD)) signif(p_AD, 2) else p_AD)
    title_line2 <- paste("Levene-Brown-Forsythe p =", signif(levene_test$p.value, 2),
                         "| Bartlett p =", signif(bartlett_test$p.value, 2))
    
    mtext(title_line1, side = 3, outer = TRUE, line = 1, cex = 0.7)
    mtext(title_line2, side = 3, outer = TRUE, line = 0, cex = 0.7)
  }
  
  # Return results
  result <- list(
    summary_anova = summary(anova_model),
    shapiro_test = shapiro_test,
    ad_test = ad_test,
    levene_test = if (!regression) levene_test else NULL,
    bartlett_test = if (!regression) bartlett_test else NULL,
    bp_test = if (regression)  bp_test else NULL
  )
  
  class(result) <- "vis_lm_assumptions"
  return(invisible(result))
}

#' @rdname vis_lm_assumptions
#' @param ... Arguments passed to \code{vis_lm_assumptions()}.
#' @export
vis_anova_assumptions <- function(...) {
  .Deprecated("vis_lm_assumptions")
  vis_lm_assumptions(...)
}

Any scripts or data that you put into this service are public.

visStatistics documentation built on May 13, 2026, 1:08 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

visStatistics
Automated Selection and Visualisation of Statistical Hypothesis Tests

R/vis_lm_assumptions.R
In visStatistics: Automated Selection and Visualisation of Statistical Hypothesis Tests

Defines functions vis_anova_assumptions vis_lm_assumptions

Documented in vis_anova_assumptions vis_lm_assumptions

Try the visStatistics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

visStatistics Automated Selection and Visualisation of Statistical Hypothesis Tests

R/vis_lm_assumptions.R In visStatistics: Automated Selection and Visualisation of Statistical Hypothesis Tests

Defines functions vis_anova_assumptions vis_lm_assumptions

Documented in vis_anova_assumptions vis_lm_assumptions

Try the visStatistics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

visStatistics
Automated Selection and Visualisation of Statistical Hypothesis Tests

R/vis_lm_assumptions.R
In visStatistics: Automated Selection and Visualisation of Statistical Hypothesis Tests