Nothing
#' @title Check overdispersion (and underdispersion) of GL(M)M's
#' @name check_overdispersion
#'
#' @description `check_overdispersion()` checks generalized linear (mixed)
#' models for overdispersion (and underdispersion).
#'
#' @param x Fitted model of class `merMod`, `glmmTMB`, `glm`, or `glm.nb`
#' (package **MASS**), or an object returned by `simulate_residuals()`.
#'
#' @inheritParams check_zeroinflation
#'
#' @return A list with results from the overdispersion test, like chi-squared
#' statistics, p-value or dispersion ratio.
#'
#' @details Overdispersion occurs when the observed variance is higher than the
#' variance of a theoretical model. For Poisson models, variance increases
#' with the mean and, therefore, variance usually (roughly) equals the mean
#' value. If the variance is much higher, the data are "overdispersed". A less
#' common case is underdispersion, where the variance is much lower than the
#' mean.
#'
#' @section Interpretation of the Dispersion Ratio:
#' If the dispersion ratio is close to one, a Poisson model fits well to the
#' data. Dispersion ratios larger than one indicate overdispersion, thus a
#' negative binomial model or similar might fit better to the data. Dispersion
#' ratios much smaller than one indicate underdispersion. A p-value < .05
#' indicates either overdispersion or underdispersion (the first being more common).
#'
#' @section Overdispersion in Poisson Models:
#' For Poisson models, the overdispersion test is based on the code from
#' _Gelman and Hill (2007), page 115_.
#'
#' @section Overdispersion in Negative Binomial or Zero-Inflated Models:
#' For negative binomial (mixed) models or models with zero-inflation component,
#' the overdispersion test is based simulated residuals (see [`simulate_residuals()`]).
#'
#' @section Overdispersion in Mixed Models:
#' For `merMod`- and `glmmTMB`-objects, `check_overdispersion()`
#' is based on the code in the
#' [GLMM FAQ](http://bbolker.github.io/mixedmodels-misc/glmmFAQ.html),
#' section *How can I deal with overdispersion in GLMMs?*. Note that this
#' function only returns an *approximate* estimate of an overdispersion
#' parameter. Using this approach would be inaccurate for zero-inflated or
#' negative binomial mixed models (fitted with `glmmTMB`), thus, in such cases,
#' the overdispersion test is based on [`simulate_residuals()`] (which is identical
#' to `check_overdispersion(simulate_residuals(model))`).
#'
#' @inheritSection check_zeroinflation Tests based on simulated residuals
#'
#' @section How to fix Overdispersion:
#' Overdispersion can be fixed by either modeling the dispersion parameter, or
#' by choosing a different distributional family (like Quasi-Poisson, or
#' negative binomial, see _Gelman and Hill (2007), pages 115-116_).
#'
#' @family functions to check model assumptions and and assess model quality
#'
#' @references
#' - Bolker B et al. (2017):
#' [GLMM FAQ.](http://bbolker.github.io/mixedmodels-misc/glmmFAQ.html)
#'
#' - Gelman, A., and Hill, J. (2007). Data analysis using regression and
#' multilevel/hierarchical models. Cambridge; New York: Cambridge University
#' Press.
#'
#' @examplesIf getRversion() >= "4.0.0" && require("glmmTMB")
#' data(Salamanders, package = "glmmTMB")
#' m <- glm(count ~ spp + mined, family = poisson, data = Salamanders)
#' check_overdispersion(m)
#' @export
check_overdispersion <- function(x, ...) {
UseMethod("check_overdispersion")
}
# default -----------------------
#' @export
check_overdispersion.default <- function(x, ...) {
.is_model_valid(x)
insight::format_error(
paste0("`check_overdisperion()` not yet implemented for models of class `", class(x)[1], "`.")
)
}
# Methods -----------------------------
#' @export
plot.check_overdisp <- function(x, ...) {
insight::check_if_installed(c("see", "graphics"), "for overdispersion plots")
obj_name <- attr(x, "object_name", exact = TRUE)
model <- NULL
if (!is.null(obj_name)) {
model <- .safe(get(obj_name, envir = parent.frame()))
if (is.null(model)) {
# second try, global env
model <- .safe(get(obj_name, envir = globalenv()))
}
}
if (!is.null(model)) {
x <- .diag_overdispersion(model)
class(x) <- c("see_check_overdisp", "data.frame")
attr(x, "colors") <- list(...)$colors
attr(x, "line_size") <- list(...)$size_line
attr(x, "overdisp_type") <- list(...)$plot_type
graphics::plot(x, ...)
}
}
#' @export
print.check_overdisp <- function(x, digits = 3, ...) {
orig_x <- x
x$dispersion_ratio <- sprintf("%.*f", digits, x$dispersion_ratio)
if (!is.null(x$chisq_statistic)) {
x$chisq_statistic <- sprintf("%.*f", digits, x$chisq_statistic)
}
x$p_value <- pval <- round(x$p_value, digits = digits)
if (x$p_value < 0.001) x$p_value <- "< 0.001"
maxlen <- max(
nchar(x$dispersion_ratio),
nchar(x$chisq_statistic),
nchar(x$p_value)
)
insight::print_color("# Overdispersion test\n\n", "blue")
if (is.null(x$chisq_statistic)) {
cat(sprintf(" dispersion ratio = %s\n", format(x$dispersion_ratio, justify = "right", width = maxlen)))
cat(sprintf(" p-value = %s\n\n", format(x$p_value, justify = "right", width = maxlen)))
} else {
cat(sprintf(" dispersion ratio = %s\n", format(x$dispersion_ratio, justify = "right", width = maxlen)))
cat(sprintf(" Pearson's Chi-Squared = %s\n", format(x$chisq_statistic, justify = "right", width = maxlen)))
cat(sprintf(" p-value = %s\n\n", format(x$p_value, justify = "right", width = maxlen)))
}
if (pval > 0.05) {
message("No overdispersion detected.")
} else if (x$dispersion_ratio > 1) {
message("Overdispersion detected.")
} else {
message("Underdispersion detected.")
}
invisible(orig_x)
}
# Overdispersion for classical models -----------------------------
#' @export
check_overdispersion.glm <- function(x, verbose = TRUE, ...) {
# model info
info <- insight::model_info(x)
obj_name <- insight::safe_deparse_symbol(substitute(x))
# for certain distributions, simulated residuals are more accurate
use_simulated <- info$is_bernoulli || info$is_binomial || (!info$is_count && !info$is_binomial) || info$is_negbin
# model classes not supported in DHARMa
not_supported <- c("fixest", "glmx")
if (use_simulated && !inherits(x, not_supported)) {
return(check_overdispersion(simulate_residuals(x, ...), object_name = obj_name, ...))
}
# check if we have poisson - need this for models not supported by DHARMa
if (!info$is_count && !info$is_binomial) {
insight::format_error(
"Overdispersion checks can only be used for models from Poisson families or binomial families with trials > 1."
)
}
# check for Bernoulli
if (info$is_bernoulli) {
insight::format_error("Overdispersion checks cannot be used for Bernoulli models.")
}
yhat <- stats::fitted(x)
n <- stats::nobs(x)
k <- length(insight::find_parameters(x, effects = "fixed", flatten = TRUE))
zi <- (insight::get_response(x, verbose = FALSE) - yhat) / sqrt(yhat)
chisq <- sum(zi^2)
ratio <- chisq / (n - k)
p.value <- stats::pchisq(chisq, df = n - k, lower.tail = FALSE)
out <- list(
chisq_statistic = chisq,
dispersion_ratio = ratio,
residual_df = n - k,
p_value = p.value
)
class(out) <- c("check_overdisp", "see_check_overdisp")
attr(out, "object_name") <- obj_name
out
}
#' @export
check_overdispersion.fixest <- check_overdispersion.glm
#' @export
check_overdispersion.fixest_multi <- function(x, verbose = TRUE, ...) {
lapply(x, check_overdispersion.fixest)
}
#' @export
check_overdispersion.glmx <- check_overdispersion.glm
# mfx models ------------------------------
#' @export
check_overdispersion.poissonmfx <- function(x, ...) {
check_overdispersion(x$fit, ...)
}
#' @export
check_overdispersion.poissonirr <- check_overdispersion.poissonmfx
#' @export
check_overdispersion.negbinirr <- check_overdispersion.poissonmfx
#' @export
check_overdispersion.negbinmfx <- check_overdispersion.poissonmfx
#' @export
check_overdispersion.model_fit <- check_overdispersion.poissonmfx
# Overdispersion for mixed models ---------------------------
#' @export
check_overdispersion.merMod <- function(x, ...) {
# for certain distributions, simulated residuals are more accurate
info <- insight::model_info(x)
obj_name <- insight::safe_deparse_symbol(substitute(x))
# for certain distributions, simulated residuals are more accurate
use_simulated <- info$family == "genpois" || info$is_zero_inflated || info$is_bernoulli || info$is_binomial || (!info$is_count && !info$is_binomial) || info$is_negbin # nolint
if (use_simulated) {
return(check_overdispersion(simulate_residuals(x, ...), object_name = obj_name, ...))
}
rdf <- stats::df.residual(x)
rp <- insight::get_residuals(x, type = "pearson")
# check if pearson residuals are available
if (insight::is_empty_object(rp)) {
return(check_overdispersion(simulate_residuals(x, ...), object_name = obj_name, ...))
}
Pearson.chisq <- sum(rp^2)
prat <- Pearson.chisq / rdf
pval <- stats::pchisq(Pearson.chisq, df = rdf, lower.tail = FALSE)
out <- list(
chisq_statistic = Pearson.chisq,
dispersion_ratio = prat,
residual_df = rdf,
p_value = pval
)
class(out) <- c("check_overdisp", "see_check_overdisp")
attr(out, "object_name") <- obj_name
out
}
#' @export
check_overdispersion.negbin <- check_overdispersion.merMod
#' @export
check_overdispersion.glmmTMB <- check_overdispersion.merMod
# simulated residuals -----------------------------
#' @rdname check_overdispersion
#' @export
check_overdispersion.performance_simres <- function(x, alternative = c("two.sided", "less", "greater"), ...) {
alternative <- match.arg(alternative)
# check for special arguments - we may pass "object_name" from other methods
dots <- list(...)
if (is.null(dots$object_name)) {
obj_name <- insight::safe_deparse_symbol(substitute(x))
} else {
obj_name <- dots$object_name
}
# statistics function
variance <- stats::sd(x$simulatedResponse)^2
dispersion <- function(i) stats::var(i - x$fittedPredictedResponse) / variance
# compute test results
result <- .simres_statistics(x, statistic_fun = dispersion, alternative = alternative)
out <- list(
dispersion_ratio = result$observed / mean(result$simulated),
p_value = result$p
)
class(out) <- c("check_overdisp", "see_check_overdisp")
attr(out, "object_name") <- obj_name
out
}
#' @export
check_overdispersion.DHARMa <- check_overdispersion.performance_simres
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.