R/bcat_plt_diag.R

Defines functions .print_diag_tests bcat_plt_diag

Documented in bcat_plt_diag

#' Regression diagnostic dashboard
#'
#' Produce a multi-panel diagnostic plot for a fitted model with UC styling.
#' Optionally prints assumption test results (Breusch-Pagan, Shapiro-Wilk,
#' Durbin-Watson) to the console.
#'
#' @param model A fitted model object (e.g., \code{lm}, \code{glm}).
#' @param which Integer vector specifying which panels to include:
#'   1 = Residuals vs Fitted, 2 = Q-Q Plot, 3 = Scale-Location,
#'   4 = Residuals vs Leverage. Default: \code{c(1, 2, 3, 4)}.
#' @param labels Logical. Label influential observations? Default is TRUE.
#' @param n_labels Integer. Number of extreme observations to label. Default is 3.
#' @param tests Logical. Print assumption test results to console? Default is TRUE.
#' @param nrow Number of panel rows.
#' @param ncol Number of panel columns.
#' @return A patchwork object combining the diagnostic panels.
#' @author Saannidhya Rawat
#' @family plots
#' @export
#'
#' @examples
#' m <- lm(mpg ~ wt + hp + cyl, data = mtcars)
#' bcat_plt_diag(m)
#' bcat_plt_diag(m, which = c(1, 2))
bcat_plt_diag <- function(model,
                          which = c(1, 2, 3, 4),
                          labels = TRUE,
                          n_labels = 3,
                          tests = TRUE,
                          nrow = NULL,
                          ncol = NULL) {

  # Extract diagnostic data
  diag_df <- data.frame(
    fitted = stats::fitted(model),
    resid = stats::residuals(model),
    std_resid = stats::rstandard(model),
    sqrt_std_resid = sqrt(abs(stats::rstandard(model))),
    leverage = stats::hatvalues(model),
    cooks = stats::cooks.distance(model),
    obs = seq_len(length(stats::residuals(model)))
  )

  # Label extreme observations
  if (labels) {
    top_idx <- order(abs(diag_df$std_resid), decreasing = TRUE)[
      seq_len(min(n_labels, nrow(diag_df)))
    ]
    diag_df$label <- ""
    diag_df$label[top_idx] <- as.character(diag_df$obs[top_idx])
  }

  panels <- list()

  # Panel 1: Residuals vs Fitted
  if (1 %in% which) {
    p1 <- ggplot2::ggplot(diag_df, ggplot2::aes(x = fitted, y = resid)) +
      ggplot2::geom_point(alpha = 0.6, color = .uc_color("Bearcats Black")) +
      ggplot2::geom_hline(yintercept = 0, linetype = "dashed",
                          color = .uc_reference_color()) +
      ggplot2::geom_smooth(method = "loess", se = FALSE,
                           color = .uc_color("UC Red"), linewidth = 0.7,
                           formula = y ~ x) +
      ggplot2::labs(x = "Fitted Values", y = "Residuals",
                    title = "Residuals vs Fitted") +
      theme_UC_nogrid()
    if (labels) {
      p1 <- p1 + ggplot2::geom_text(ggplot2::aes(label = label),
                                     hjust = -0.2, size = 3, color = .uc_reference_color())
    }
    panels <- c(panels, list(p1))
  }

  # Panel 2: Q-Q Plot
  if (2 %in% which) {
    p2 <- ggplot2::ggplot(diag_df, ggplot2::aes(sample = std_resid)) +
      ggplot2::stat_qq(alpha = 0.6, color = .uc_color("Bearcats Black")) +
      ggplot2::stat_qq_line(color = .uc_color("UC Red"), linewidth = 0.7) +
      ggplot2::labs(x = "Theoretical Quantiles", y = "Std. Residuals",
                    title = "Normal Q-Q") +
      theme_UC_nogrid()
    panels <- c(panels, list(p2))
  }

  # Panel 3: Scale-Location
  if (3 %in% which) {
    p3 <- ggplot2::ggplot(diag_df, ggplot2::aes(x = fitted, y = sqrt_std_resid)) +
      ggplot2::geom_point(alpha = 0.6, color = .uc_color("Bearcats Black")) +
      ggplot2::geom_smooth(method = "loess", se = FALSE,
                           color = .uc_color("UC Red"), linewidth = 0.7,
                           formula = y ~ x) +
      ggplot2::labs(x = "Fitted Values", y = expression(sqrt("|Std. Residuals|")),
                    title = "Scale-Location") +
      theme_UC_nogrid()
    panels <- c(panels, list(p3))
  }

  # Panel 4: Residuals vs Leverage
  if (4 %in% which) {
    p4 <- ggplot2::ggplot(diag_df, ggplot2::aes(x = leverage, y = std_resid)) +
      ggplot2::geom_point(alpha = 0.6, color = .uc_color("Bearcats Black")) +
      ggplot2::geom_hline(yintercept = 0, linetype = "dashed",
                          color = .uc_reference_color()) +
      ggplot2::geom_smooth(method = "loess", se = FALSE,
                           color = .uc_color("UC Red"), linewidth = 0.7,
                           formula = y ~ x) +
      ggplot2::labs(x = "Leverage", y = "Std. Residuals",
                    title = "Residuals vs Leverage") +
      theme_UC_nogrid()
    if (labels) {
      p4 <- p4 + ggplot2::geom_text(ggplot2::aes(label = label),
                                     hjust = -0.2, size = 3, color = .uc_reference_color())
    }
    panels <- c(panels, list(p4))
  }

  # Combine panels
  combined <- patchwork::wrap_plots(panels, nrow = nrow, ncol = ncol)

  # Print diagnostic tests
  if (tests) .print_diag_tests(model)

  combined
}

#' Print diagnostic test results to console
#' @noRd
.print_diag_tests <- function(model) {
  cat("\n--- Diagnostic Tests ---\n\n")

  tryCatch({
    bp <- lmtest::bptest(model)
    pass <- bp$p.value > 0.05
    cat(sprintf("Breusch-Pagan (heteroskedasticity): stat=%.3f, p=%.4f  [%s]\n",
                bp$statistic, bp$p.value,
                if (pass) "PASS - no evidence of heteroskedasticity"
                else "WARN - possible heteroskedasticity"))
  }, error = function(e) cat("Breusch-Pagan: not applicable\n"))

  resid <- stats::residuals(model)
  n <- length(resid)
  if (n >= 3L && n <= 5000L) {
    sw <- stats::shapiro.test(resid)
    pass <- sw$p.value > 0.05
    cat(sprintf("Shapiro-Wilk (normality):           stat=%.3f, p=%.4f  [%s]\n",
                sw$statistic, sw$p.value,
                if (pass) "PASS - residuals appear normal"
                else "WARN - residuals may not be normal"))
  }

  tryCatch({
    dw <- lmtest::dwtest(model)
    pass <- dw$p.value > 0.05
    cat(sprintf("Durbin-Watson (autocorrelation):     stat=%.3f, p=%.4f  [%s]\n",
                dw$statistic, dw$p.value,
                if (pass) "PASS - no evidence of autocorrelation"
                else "WARN - possible autocorrelation"))
  }, error = function(e) cat("Durbin-Watson: not applicable\n"))

  cat("\n")
}

Try the Rbearcat package in your browser

Any scripts or data that you put into this service are public.

Rbearcat documentation built on March 21, 2026, 5:07 p.m.