Nothing
#' Collinearity Diagnostics
#'
#' This function computes tolerance, standard error inflation factor, variance
#' inflation factor, eigenvalues, condition index, and variance proportions for
#' linear, generalized linear, and mixed-effects models.
#'
#' Collinearity diagnostics can be conducted for objects returned from the \code{lm()}
#' and \code{glm()} function, but also from objects returned from the \code{lmer()}
#' and \code{glmer()} function from the \pkg{lme4} package, \code{lme()} function
#' from the \pkg{nlme} package, and the \code{glmmTMB()} function from the \pkg{glmmTMB}
#' package.
#'
#' The generalized variance inflation factor (Fox & Monette, 1992) is computed
#' for terms with more than 1 df resulting from factors with more than two levels.
#' The generalized VIF (GVIF) is interpretable as the inflation in size of the
#' confidence ellipse or ellipsoid for the coefficients of the term in comparison
#' with what would be obtained for orthogonal data. GVIF is invariant to the
#' coding of the terms in the model. In order to adjust for the dimension of the
#' confidence ellipsoid, GVIF\eqn{^\frac{1}{2df}} is computed. Note that the
#' adjusted GVIF (aGVIF) is actually a generalized standard error inflation factor
#' (GSIF). Thus, the aGIF needs to be squared before applying a common cutoff
#' threshold for the VIF (e.g., VIF > 10). Note that the output of \code{check.collin()}
#' function reports either the variance inflation factor or the squared generalized
#' variance inflation factor in the column \code{VIF}, while the standard error
#' inflation factor or the adjusted generalized variance inflation factor is
#' reported in the column \code{SIF}.
#'
#' @param model a fitted model of class \code{"lm"}, \code{"glm"},
#' \code{"lmerMod"}, \code{"lmerModLmerTest"}, \code{"glmerMod"},
#' \code{"lme"}, or \code{"glmmTMB"}.
#' @param print a character vector indicating which results to show, i.e.
#' \code{"all"}, for all results, \code{"vif"} for tolerance,
#' std. error inflation factor, and variance inflation factor,
#' or \code{eigen} for eigenvalue, condition index, and variance
#' proportions.
#' @param digits an integer value indicating the number of decimal places to be
#' used for displaying results.
#' @param p.digits an integer value indicating the number of decimal places to be
#' used for displaying the \emph{p}-value.
#' @param write a character string naming a text file with file extension
#' \code{".txt"} (e.g., \code{"Output.txt"}) for writing the
#' output into a text file.
#' @param append logical: if \code{TRUE} (default), output will be appended
#' to an existing text file with extension \code{.txt} specified
#' in \code{write}, if \code{FALSE} existing text file will be
#' overwritten.
#' @param check logical: if \code{TRUE} (default), argument specification is
#' checked.
#' @param output logical: if \code{TRUE} (default), output is shown on the
#' console.
#'
#' @author
#' Takuya Yanagida \email{takuya.yanagida@@univie.ac.at}
#'
#' @seealso
#' \code{\link{check.outlier}}, \code{\link{lm}}
#'
#' @references
#' Fox, J., & Monette, G. (1992). Generalized collinearity diagnostics.
#' \emph{Journal of the American Statistical Association, 87}, 178-183.
#'
#' Fox, J., Weisberg, S., & Price, B. (2020). \emph{car: Companion to Applied
#' Regression}. R package version 3.0-8. https://cran.r-project.org/web/packages/car/
#'
#' Hebbali, A. (2020). \emph{olsrr: Tools for building OLS regression models}.
#' R package version 0.5.3. https://cran.r-project.org/web/packages/olsrr/
#'
#' @return
#' Returns an object of class \code{misty.object}, which is a list with following
#' entries:
#'
#' \item{\code{call}}{function call}
#' \item{\code{type}}{type of analysis}
#' \item{\code{model}}{model specified in the \code{model} argument}
#' \item{\code{args}}{specification of function arguments}
#' \item{\code{result}}{list with result tables, i.e., \code{coef} for the regression
#' table including tolerance, std. error inflation factor and
#' variance inflation factors, \code{vif} for the tolerance,
#' std. error inflation factor, and variance inflation factor,
#' and \code{eigen} for eigenvalue condition index, and variance
#' proportion}
#'
#' @note
#' The computation of the VIF and the GVIF is based on the \code{vif()} function
#' in the \pkg{car} package by John Fox, Sanford Weisberg and Brad Price (2020),
#' and the computation of eigenvalues, condition index, and variance proportions
#' is based on the \code{ols_eigen_cindex()} function in the \pkg{olsrr} package
#' by Aravind Hebbali (2020).
#'
#' @export
#'
#' @examples
#' dat <- data.frame(group = c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4),
#' x1 = c(3, 2, 4, 9, 5, 3, 6, 4, 5, 6, 3, 5),
#' x2 = c(1, 4, 3, 1, 2, 4, 3, 5, 1, 7, 8, 7),
#' x3 = c(7, 3, 4, 2, 5, 6, 4, 2, 3, 5, 2, 8),
#' x4 = c("a", "b", "a", "c", "c", "c", "a", "b", "b", "c", "a", "c"),
#' y1 = c(2, 7, 4, 4, 7, 8, 4, 2, 5, 1, 3, 8),
#' y2 = c(0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1), stringsAsFactors = TRUE)
#'
#' #----------------------------------------------------------------------------
#' # Linear model
#'
#' # Estimate linear model with continuous predictors
#' mod.lm1 <- lm(y1 ~ x1 + x2 + x3, data = dat)
#'
#' # Example 1: Tolerance, std. error, and variance inflation factor
#' check.collin(mod.lm1)
#'
#' # Example 2: Tolerance, std. error, and variance inflation factor
#' # Eigenvalue, Condition index, and variance proportions
#' check.collin(mod.lm1, print = "all")
#'
#' # Estimate model with continuous and categorical predictors
#' mod.lm2 <- lm(y1 ~ x1 + x2 + x3 + x4, data = dat)
#'
#' # Example 3: Tolerance, generalized std. error, and variance inflation factor
#' check.collin(mod.lm2)
#'
#' #----------------------------------------------------------------------------
#' # Generalized linear model
#'
#' # Estimate logistic regression model with continuous predictors
#' mod.glm <- glm(y2 ~ x1 + x2 + x3, data = dat, family = "binomial")
#'
#' # Example 4: Tolerance, std. error, and variance inflation factor
#' check.collin(mod.glm)
#'
#' \dontrun{
#' # Load lme4, nlme, and glmmTMB package
#' libraries(lme4, nlme, glmmTMB)
#'
#' #----------------------------------------------------------------------------
#' # Linear mixed-effects model
#'
#' # Estimate linear mixed-effects model using lme4 package
#' mod.lmer <- lmer(y1 ~ x1 + x2 + x3 + (1|group), data = dat)
#'
#' # Example 5: Tolerance, std. error, and variance inflation factor
#' check.collin(mod.lmer)
#'
#' # Estimate linear mixed-effects model using nlme package
#' mod.lme <- lme(y1 ~ x1 + x2 + x3, random = ~ 1 | group, data = dat)
#'
#' # Example 6: Tolerance, std. error, and variance inflation factor
#' check.collin(mod.lme)
#'
#' # Estimate linear mixed-effects model using glmmTMB package
#' mod.glmmTMB1 <- glmmTMB(y1 ~ x1 + x2 + x3 + (1|group), data = dat)
#'
#' # Example 7: Tolerance, std. error, and variance inflation factor
#' check.collin(mod.glmmTMB1)
#'
#' #----------------------------------------------------------------------------
#' # Generalized linear mixed-effects model
#'
#' # Estimate mixed-effects logistic regression model using lme4 package
#' mod.glmer <- glmer(y2 ~ x1 + x2 + x3 + (1|group), data = dat, family = "binomial")
#'
#' # Example 8: Tolerance, std. error, and variance inflation factor
#' check.collin(mod.glmer)
#'
#' # Estimate mixed-effects logistic regression model using glmmTMB package
#' mod.glmmTMB2 <- glmmTMB(y2 ~ x1 + x2 + x3 + (1|group), data = dat, family = "binomial")
#'
#' # Example 9: Tolerance, std. error, and variance inflation factor
#' check.collin(mod.glmmTMB2)
#'
#' #----------------------------------------------------------------------------
#' # Write Results
#'
#' # Example 10: Write Results into a text file
#' check.collin(mod.lm1, write = "Diagnostics.txt")
#' }
check.collin <- function(model, print = c("all", "vif", "eigen"),
digits = 3, p.digits = 3, write = NULL, append = TRUE,
check = TRUE, output = TRUE) {
#_____________________________________________________________________________
#
# Initial Check --------------------------------------------------------------
# Check if input 'model' is missing
if (isTRUE(missing(model))) { stop("Input for the argument 'model' is missing.", call. = FALSE) }
# Check if input 'model' is NULL
if (isTRUE(is.null(model))) { stop("Input specified for the argument 'model' is NULL.", call. = FALSE) }
# Check if input 'model' is "lm", "glm", "lmerMod", "lmerModLmerTest", "glmerMod", "lme", or "glmmTMB"
if (isTRUE(!all(class(model) %in% c("lm", "glm", "lmerMod", "lmerModLmerTest", "glmerMod", "lme", "glmmTMB")))) { stop("Please specify an \"lm\", \"glm\", \"lmerMod\", \"lmerModLmerTest\", \"glmerMod\", \"lme\", or \"glmmTMB\" object for the argument 'model'.", call. = FALSE) }
# Check if model has more than one predictor variable
if (isTRUE(length(labels(terms(model))) < 2L)) { stop("Please specify a model with more than one predictor variable.", call. = FALSE) }
#_____________________________________________________________________________
#
# Input Check ----------------------------------------------------------------
# Check inputs
.check.input(logical = c("append", "output"), m.character = list(print = c("all", "vif", "eigen")), args = c("digits", "p.digits", "write1"), envir = environment(), input.check = check)
#_____________________________________________________________________________
#
# Arguments ------------------------------------------------------------------
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Print variance inflation factor and/or eigenvalues ####
if (isTRUE(all(c("all", "vif", "eigen") %in% print))) { print <- "vif" }
if (isTRUE(length(print) == 1L && "all" %in% print)) { print <- c("vif", "eigen") }
#_____________________________________________________________________________
#
# Main Function --------------------------------------------------------------
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Variance inflation factor ####
#...................
### Class: lm or glm ####
if (isTRUE(all(class(model) %in% c("lm", "glm")))) {
# Regression model with intercept
if (isTRUE("(Intercept)" %in% names(coefficients(model)))) {
intercept <- TRUE
R <- cov2cor(vcov(model)[-1, -1])
assign <- attr(model.matrix(model, data = model$model), "assign")[-1L]
# Regression model without intercept
} else {
intercept <- FALSE
R <- cov2cor(vcov(model))
assign <- attr(model.matrix(model), "assign")
}
#...................
### Class: lmerMod, lmerModLmerTest or glmerMod ####
} else if (isTRUE(all(class(model) %in% c("lmerMod", "lmerModLmerTest", "glmerMod", "lme")))) {
# Regression model with intercept
if (isTRUE("(Intercept)" %in% names(lme4::fixef(model)))) {
intercept <- TRUE
R <- cov2cor(as.matrix(vcov(model)[-1L, -1L]))
assign <- attr(model.matrix(model, data = model$data), "assign")[-1L]
# Regression model without intercept
} else {
intercept <- FALSE
R <- cov2cor(vcov(model))
assign <- attr(model.matrix(model, data = model$data), "assign")
}
#...................
### Class: glmmTMB ####
} else if (isTRUE(all(class(model) %in% "glmmTMB"))) {
# Regression model with intercept
if (isTRUE("(Intercept)" %in% names(lme4::fixef(model)$cond))) {
intercept <- TRUE
R <- cov2cor(vcov(model)$cond[-1L, -1L])
assign <- attr(model.matrix(model), "assign")[-1L]
# Regression model without intercept
} else {
intercept <- FALSE
R <- cov2cor(vcov(model)$cond)
assign <- attr(model.matrix(model), "assign")
}
}
#...................
### Warning message: Model without intercept ####
if (isTRUE(isFALSE(intercept) && "vif" %in% print)) { warning("Variance inflation factor might not be sensible in models without an intercept.", call. = FALSE) }
terms <- labels(terms(model))
# Determinant of the Matrix
det.R <- det(R)
# Result object
vif <- data.frame(matrix(0, nrow = length(terms), ncol = 5L, dimnames = list(terms, c("Tol", "df", "GVIF", "aGSIF", "aGVIF"))))
for (i in seq_along(terms)) {
subs <- which(assign == i)
# Degrees of freedom
vif[i, "df"] <- length(subs)
# Generalized standard error inflation factor
vif[i, "GVIF"] <- det(as.matrix(R[subs, subs])) * det(as.matrix(R[-subs, -subs])) / det.R
# Generalized standard error inflation factor made compareable
vif[i, "aGSIF"] <- vif[i, "GVIF"]^(1L / (2L * length(subs)))
# Generalized variance inflation factor made compareable
vif[i, "aGVIF"] <- vif[i, "aGSIF"]^2L
# Tolerance statistic
vif[i, "Tol"] <- 1L / vif[i, "aGVIF"]
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Eigenvalue and Condition Index ####
if (isTRUE(all(class(model) %in% c("lmerMod", "lmerModLmerTest", "glmerMod", "lme", "glmmTMB")))) {
z <- scale(as.data.frame(model.matrix(model, data = model$data), stringsAsFactors = FALSE), center = FALSE, scale = TRUE)
} else if (isTRUE(all(class(model) %in% c("lm", "glm")))) {
z <- scale(as.data.frame(model.matrix(model, data = model$model), stringsAsFactors = FALSE), center = FALSE, scale = TRUE)
}
# Eigenvalue
e <- eigen(t(z) %*% z / diag(t(z) %*% z))$values
# Condition index
ci <- sqrt(e[1L] / e)
# Singular value decomposition
svdx <- svd(z)
ph <- t((svdx$v %*% diag(1L / svdx$d))^2L)
# Regression coefficient variance-decomposition matrix
vd <- prop.table(ph %*% diag(rowSums(ph, dims = 1L)), margin = 2L)
eigenvalue <- data.frame(1:length(e), e, ci, vd, stringsAsFactors = FALSE)
colnames(eigenvalue) <- c("dim", "eigen", "ci", attributes(z)$dimnames[[2L]])
#_____________________________________________________________________________
#
# Return Object --------------------------------------------------------------
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Regression coefficients with VIF ####
# Regression coefficients
if (isTRUE(all(class(model) %in% c("lm", "glm", "lmerMod", "lmerModLmerTest", "glmerMod")))) {
coeff <- summary(model)$coefficients
} else if (isTRUE(any(class(model) == "lme"))) {
coeff <- summary(model)$tTable
} else if (isTRUE(any(class(model) == "glmmTMB"))) {
coeff <- summary(model)$coefficients$cond
}
# Match VIF with coefficients
vif.list <- sapply(row.names(vif), function(y) which(substr(row.names(coeff), 1L, nchar(y)) == y))
coeff.vif <- matrix(NA, ncol = 5L, nrow = nrow(coeff), dimnames = list(row.names(coeff), colnames(vif)))
for (i in seq_along(vif.list)) {
coeff.vif[vif.list[[i]], ] <- as.matrix(vif[row.names(vif) == names(vif.list)[i], ][rep(1L, times = length(vif.list[[i]])), ])
}
coeff <- data.frame(cbind(coeff, coeff.vif), check.names = FALSE, stringsAsFactors = FALSE)
# Return object
object <- list(call = match.call(),
type = "check.collin",
model = model,
args = list(print = print, digits = digits, p.digits = p.digits, write = write, append = append, check = check, output = output),
result = list(coef = coeff, vif = vif, eigen = eigenvalue))
class(object) <- "misty.object"
#_____________________________________________________________________________
#
# Write results --------------------------------------------------------------
if (isTRUE(!is.null(write))) {
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Text file ####
# Send R output to textfile
sink(file = write, append = ifelse(isTRUE(file.exists(write)), append, FALSE), type = "output", split = FALSE)
if (isTRUE(append && file.exists(write))) { write("", file = write, append = TRUE) }
# Print object
print(object, check = FALSE)
# Close file connection
sink()
}
#_____________________________________________________________________________
#
# Output ---------------------------------------------------------------------
if (isTRUE(output)) { print(object, check = FALSE) }
return(invisible(object))
}
#_______________________________________________________________________________
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.