Nothing
#' Reports the output of testing all assumptions for a multilevel model
#'
#' Reports the results from testing all assumptions of a multilevel model and provides suggestions if an assumption is not passed
#'
#' @param model A linear mixed-effects model of class lmerMod, lmerModLmerTest, or glmerMod of type binomial.
#'
#' @return Tests the relevant assumptions of the specified multilevel model.
#'
#' @references Glaser, R. E. (2006). Levene’s Robust Test of Homogeneity of Variances. Encyclopedia of Statistical Sciences. 6.
#'
#' @examples
#' # Read in data
#' data(instruction)
#' # Create model
#' mod <- lme4::lmer(mathgain ~ mathkind + (1 | classid), data = instruction)
#' # Evaluate assumptions
#' mlm_assumptions(mod)
#'
#' @export mlm_assumptions
mlm_assumptions <- function(model) {
# Model class must be 'lmerMod' or 'lmerModLmerTest'
if (!(inherits(model,"lmerMod")|!inherits(model,"lmerModLmerTest")|!inherits(model,"glmerMod"))) {
stop("Model class is not 'lmerMod', 'lmerModLmerTest', or 'glmerMod'.", call. = FALSE)
return(NULL)
}
if(inherits(model,"glmerMod") & !stats::family(model)[[1]]=="binomial"){
stop("Function currently only supports binomial family glmerMod models.", call. = FALSE)
return(NULL)
}
# Data
data <- lme4::getData(model)
# Original y variable
form <- deparse(stats::formula(model))
y <- trimws(strsplit(form, "[~+]")[[1]][1]) # Extracts dependent variable from the model
# Predictors
x <- attributes(stats::terms(model))$term.labels # Extracts independent variables from the model
# Model has to have predictors
if(length(x) == 0){
stop("mlm_assumptions requires 1 or more predictors to be present in the model.", call. = FALSE)
return(NULL)
}
if(inherits(model,"glmerMod")){
data$probabilities <- stats::predict(model, type = "response")
data$logit <- log(data$probabilities/(1-data$probabilities))
}
# Multiply two continuous interactors
# Plot continuous at every level of categorical interactor
# Ignore two categorical variable interactions
# Linearity
linearityplot_fun <- function(xvar){
if(inherits(model,"glmerMod")){
yvar <- "logit"
} else {
yvar <- y
}
# If interaction
if(grepl(":",xvar)){
int <- xvar[grepl(":",xvar)] # Interaction name
int.var <- strsplit(int,"[~:]")[[1]] # Split interaction(s) into variables
n.int <- length(int.var)
# If three way or more interaction
if(n.int > 2){
return(paste0("Linearity plot cannot be estimated for ", xvar, ". Function currently does not support interactions beyond 2 variables."))
}
# If two way interaction
else {
# For each column, check the class of the variable in data
int.class <- sapply(int.var, function(x) class(data[,x]))
# Check the combination of the variables
if(all(int.class %in% c("numeric","integer"))){
data$interaction <- data[,int.var[1]]*data[,int.var[2]]
int <- gsub("\\:", ".", int)
colnames(data)[which(colnames(data)=="interaction")] <- int
ggplot2::ggplot(data, ggplot2::aes_string(x=int, y=yvar)) +
ggplot2::geom_point()+
ggplot2::geom_smooth(formula = y ~ x, method=stats::loess) +
ggplot2::theme_classic()
}
### Both are character factor or logical with numeric or integer
else {
if(all(int.class %in% c("character", "factor","logical"))){
return(paste0("Linearity plot cannot be estimated for ", xvar, ". Function currently does not support interactions between only character, factor, or logical variables."))
}
### One character, factor, or logical with numeric or integer
else {
data$facet <- data[,names(which(int.class != c("numeric","integer")))]
ggplot2::ggplot(data, ggplot2::aes_string(x=names(which(int.class == c("numeric","integer"))), y=yvar)) +
ggplot2::geom_point()+
ggplot2::geom_smooth(formula = y ~ x, method=stats::loess) +
ggplot2::theme_classic() +
ggplot2::facet_wrap(~facet) +
ggplot2::ggtitle(xvar)
}
}
}
}
# If not an interaction
else {
ggplot2::ggplot(data, ggplot2::aes_string(x=xvar, y=yvar)) +
ggplot2::geom_point()+
ggplot2::geom_smooth(formula = y ~ x, method=stats::loess) +
ggplot2::theme_classic() +
ggplot2::ggtitle(xvar)
}
}
linearity.plots <- lapply(x, linearityplot_fun)
if(!inherits(model,"glmerMod")){
# Homogeneity of Variance
data$model.Res2<- abs(stats::residuals(model))^2 # squares the absolute values of the residuals to provide the more robust estimate
data$group <- data[,names(lme4::getME(model, name = "flist"))]
Levene.model <- stats::lm(model.Res2 ~ group, data=data) #ANOVA of the squared residuals
homo.test <- stats::anova(Levene.model) #displays the results
predicted <- stats::predict(model)
data$predicted <- predicted
#create a fitted vs residual plot
fitted.residual.plot <- ggplot2::ggplot(data=data,mapping=ggplot2::aes(x=predicted,y=stats::residuals(model))) +
ggplot2::geom_point() +
ggplot2::geom_hline(yintercept=0,linetype="dashed") +
ggplot2::theme_classic() +
ggplot2::xlab("Predicted Values") +
ggplot2::ylab("Model Residuals") +
ggplot2::ggtitle("Fitted vs. Residuals")
# Normally distributed residuals
V1 <- stats::resid(model)
V2 <- data[,y]
resid.data <- as.data.frame(cbind(V1,V2))
resid.linearity.plot <- ggplot2::ggplot(resid.data,
ggplot2::aes(x = V1, y = V2)) +
ggplot2::geom_point() +
ggplot2::geom_smooth(formula = y ~ x, method=stats::loess) +
ggplot2::theme_classic() +
ggplot2::xlab("Residuals") +
ggplot2::ylab("Original y")
}
if(inherits(model,"glmerMod")){
data$predicted <- stats::predict(model)
}
data$Leverage = data$predicted/(1 - data$predicted)
data$mse = (stats::residuals(model))^2/stats::var(stats::residuals(model))
data$CooksD <- (1/6)*(data$mse)*data$Leverage
outliers <- rownames(data[data$CooksD > (4/nrow(data)),])
if(length(outliers) == 0){
outliers <- "No outliers detected."
}
model.Res <- stats::residuals(model)
data$model.Res <- model.Res
if(!inherits(model,"glmerMod")){
resid.normality.plot <- ggplot2::qplot(sample = model.Res, data = data) +
ggplot2::stat_qq_line() +
ggplot2::xlab("Theoretical Quantiles") +
ggplot2::ylab("Standardized Residuals") +
ggplot2::ggtitle("Normal Q-Q") +
ggplot2::theme_classic()
}
# Component + Residual plots
### continuous predictors only - Will not produce a plot for logical, unordered factor, character, < 3 unique values in predictors, interactions between categorical variables, or interactions of > 3 variables
if(!inherits(model,"glmerMod")){
x.ResidComponent <- c(x[!grepl(":",x)][sapply(x[!grepl(":",x)], function(x) ifelse(!is.factor(data[,x]), TRUE, is.ordered(data[,x])) & !is.character(data[,x]) & length(unique((data[,x])))>2)],x[grepl(":",x)])
#data$model.Res <- residuals(model)
ResidComponent_fun <- function(xvar){
# If interaction
if(grepl(":",xvar)){
int <- xvar[grepl(":",xvar)] # Interaction name
int.var <- strsplit(int,"[~:]")[[1]] # Split interaction(s) into variables
n.int <- length(int.var)
# If three way or more interaction
if(n.int > 2){
return(paste0("Residual Component plot cannot be estimated for ", xvar, ". Function currently does not support interactions beyond 2 variables."))
}
# If two way interaction
else {
# For each column, check the class of the variable in data
int.class <- sapply(int.var, function(x) class(data[,x]))
# Check the combination of the variables
if(all(int.class %in% c("numeric","integer"))){
data$interaction <- data[,int.var[1]]*data[,int.var[2]]
int <- gsub("\\:", ".", int)
colnames(data)[which(colnames(data)=="interaction")] <- int
ggplot2::ggplot(data, ggplot2::aes_string(x=int, y=data[,"model.Res"])) +
ggplot2::geom_point() +
ggplot2::geom_smooth() +
ggplot2::geom_hline(yintercept = 0) +
ggplot2::xlab(xvar) +
ggplot2::ylab("Residuals") +
ggplot2::theme_classic() +
ggplot2::ggtitle(xvar)
}
### Both are character factor or logical with numeric or integer
else {
if(all(int.class %in% c("character", "factor","logical"))){
return(paste0("Residual Component plot cannot be estimated for ", xvar, ". Function currently does not support interactions between only character, factor, or logical variables."))
}
### One character, factor, or logical with numeric or integer
else {
data$facet <- data[,names(which(int.class != c("numeric","integer")))]
ggplot2::ggplot(data, ggplot2::aes_string(x=names(which(int.class == c("numeric","integer"))), y=data[,"model.Res"])) +
ggplot2::geom_point() +
ggplot2::geom_smooth() +
ggplot2::facet_wrap(~facet) +
ggplot2::geom_hline(yintercept = 0) +
ggplot2::xlab(xvar) +
ggplot2::ylab("Residuals") +
ggplot2::theme_classic() +
ggplot2::ggtitle(xvar)
}
}
}
}
# If not an interaction
else {
ggplot2::ggplot(data, ggplot2::aes_string(x=data[,xvar], y=data[,"model.Res"])) +
ggplot2::geom_point() +
ggplot2::geom_smooth() +
ggplot2::geom_hline(yintercept = 0) +
ggplot2::xlab(xvar) +
ggplot2::ylab("Residuals") +
ggplot2::theme_classic()
}
}
resid.component.plots <- lapply(x.ResidComponent, ResidComponent_fun)
}
# Multicollinearity
x_character <- which(sapply(data[,x],class) == "character")
if(length(x_character > 0)) {
x_vif <- x[-which(sapply(data[,x],class) == "character")]
} else {
x_vif <- x
}
if (length(x_vif) < 2) {
multicollinearity <- ("Model contains fewer than 2 terms, multicollinearity cannot be assessed.\n")
} else {
v <- as.matrix(stats::vcov(model))
assign <- attr(stats::model.matrix(model),"assign")[-which(attr(stats::model.matrix(model), "assign")==which(sapply(data[,x],class) == "character"))]
if (names(lme4::fixef(model)[1]) == "(Intercept)") {
v <- v[-1, -1]
assign <- assign[-1]
}
R <- stats::cov2cor(v)
detR <- det(R)
multicollinearity <- matrix(0, length(x_vif), 3)
rownames(multicollinearity) <- x_vif
colnames(multicollinearity) <- c("GVIF", "Df", "GVIF^(1/(2*Df))")
for (term in 1:length(x_vif)) {
subs <- which(assign == x_vif)
multicollinearity[term, 1] <- det(as.matrix(R[subs, subs])) *
det(as.matrix(R[-subs, -subs])) / detR
multicollinearity[x_vif, 2] <- length(subs)
}
if (all(multicollinearity[, 2] == 1)){
multicollinearity <- multicollinearity[, 1]
} else {
multicollinearity[, 3] <- multicollinearity[, 1]^(1/(2 * multicollinearity[, 2]))
}
multicollinearity <- multicollinearity[,1]
}
# Combining Results
result <- if(inherits(model,"glmerMod")){
list(linearity.plots,outliers,multicollinearity)
} else {
list(linearity.plots,homo.test,fitted.residual.plot,outliers,resid.normality.plot,resid.component.plots,multicollinearity)
}
names(result) <- if(inherits(model,"glmerMod")){
c("linearity.plots","outliers","multicollinearity")
} else {
c("linearity.plots","homo.test","fitted.residual.plot","outliers","resid.normality.plot","resid.component.plots","multicollinearity")
}
# Adding messages for summary/interpretation
if(!inherits(model,"glmerMod")){
message(if(result$homo.test$`Pr(>F)`[1] >= .05){
c("Homogeneity of variance assumption met.\n")
} else {
c("Homogeneity of variance assumption NOT met. See: TO DO ADD RESOURCES\n")
})
message(if(is.character(result$multicollinearity)){
c(result$multicollinearity)
} else {
if(any(result$multicollinearity > 5)){
c("Multicollinearity detected - VIF value above 5. This might be problematic for the model - consider removing the variable from the model.\n Check the multicollinearity object for more details.\n")
} else {
c("No multicollinearity detected in the model.\n")
}
})
message(if(outliers != "No outliers detected."){
c("Outliers detected. See outliers object for more information.\n")
} else {
c("No outliers detected.\n")
})
message(c("Visually inspect all 4 plot types by calling them from the object created by mlm_assumptions() such as object$fitted.residual.plot and object$resid.normality.plot. linearity.plots and resid.component.plots may contain more than one plot depending on the model specified. Check how many there are, for example using length(object$linearity.plots). Then inspect each plot within each object, for example using object$linearity.plots[[1]] to access the first plot within the linearity.plots list.\n"))
message(c("See ?mlm_assumptions for more details and resources."))
} else {
message(if(is.character(result$multicollinearity)){
c(result$multicollinearity)
} else {
if(any(result$multicollinearity > 5)){
c("Multicollinearity detected - VIF value above 5. This might be problematic for the model - consider removing the variable from the model.\n Check the multicollinearity object for more details.\n")
} else {
c("No multicollinearity detected in the model.\n")
}
})
message(if(result$outliers!="No outliers detected."){
c("Outliers detected. See outliers object for more information.\n")
} else {
c("No outliers detected.\n")
})
message(c("Visually inspect the linearity.plots object by calling it from the object created by mlm_assumptions() such as object$linearity.plots. linearity.plots may contain more than one plot depending on the model specified. Check how many there are, for example using length(object$linearity.plots). Then inspect each plot within the object, for example using object$linearity.plots[[1]] to access the first plot within the linearity.plots list.\n"))
message(c("See ?mlm_assumptions for more details and resources."))
}
# Assigning object class
class(result) <- "mlm_assumptions"
# Returning object
suppressMessages(return(result))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.