#' Advanced residual plots and tests for linear models
#'
#' Function for creating residual plots in ggplot style and all necessary tests,
#' so that the assumptions of linear regression models can be verified.
#' @param mdl A linear model of class lm.
#' @param dat The dataset on which the model was applied
#' @param results "markdown" creates a markdown summary document.
#' "return" returns all analyses in a list.
#' "print" prints the output on the console and shows the plot.
#' @param mkd_path Can be used to set the path, where the created markdown document should be stored.
#' If NULL, the current path (getwd()) will be used to save the markdown document.
#' @return Returns, if desired, a list of length 6 with an summary of tests,
#' all calculated tests and plots.
#' @author Dennis Freuer
#' @importFrom MASS stdres
#' @importFrom stats dnorm shapiro.test
#' @importFrom lmtest bptest dwtest
#' @importFrom gridExtra grid.arrange
#' @importFrom car vif
#' @import ggplot2 ggfortify dplyr
#' @export
#'
verif_lm <- function(mdl, dat=NULL, results=c("markdown","return","print"), mkd_path=NULL){
library(rms); library(Hmisc); library(dplyr)
library(MASS); library(lmtest); library(car); library(ggfortify)
if(any(class(mdl) %in% "orm")){ # convert Harrel's orm model in a lm object
if(is.null(dat)){ stop("Dataset required in case of Harrel's orm model") }
mdl <- lm(mdl, data=dat)
}
sres <- MASS::stdres(mdl)
bp <- bptest(mdl)
dw <- dwtest(mdl, alternative="two.sided")
sw <- shapiro.test(sres)
gof <- broom::glance(mdl)
gof <- bind_cols("n"=nobs(mdl), gof[,c(1,2,5,8,9)])
sw$method <- "Shapiro-Wilk normality test of standardised residuals"
bp$method <- "Breusch-Pagan homoscedasticity test"
dw$method <- "Durbin-Watson autocorrelation test"
sw$data.name <- as.character(mdl$call)
bp$data.name <- as.character(mdl$call)
dw$data.name <- as.character(mdl$call)
v <- vif(mdl)
g <- tryCatch({
ggplot2::autoplot(mdl, which = 1:6, ncol = 3, label.size = 3) # doesn't work for spline models
}, warning = function(w){ message(w)
}, error = function(e){ # Create manually residual-plots:
list(
ggplot(data.frame(), aes(x=mdl$fitted.values, y=mdl$residuals)) +
geom_hline(yintercept=0, linetype="dashed", colour="darkgrey") + geom_point() +
ggtitle("Residuals vs Fitted") + ylab("Residuals") + xlab("Fitted values"),
# QQ-Plot:
ggplot(data.frame(), aes(sample = MASS::stdres(mdl))) + stat_qq() + stat_qq_line(),
# Standardized residuals
ggplot(data.frame(), aes(x=mdl$fitted.values, y=sqrt(abs(MASS::stdres(mdl))))) +
geom_point() + ggtitle("Scale-Location") + ylab("sqrt(abs(stand residuals))")+
xlab("Fitted values"),
# empty content:
ggplot() + geom_point(),
# Cook's D:
ggplot(data.frame(), aes(x=hatvalues(mdl), y=MASS::stdres(mdl))) +
geom_hline(yintercept=0, linetype="dashed", colour="darkgrey") + geom_point() +
ggtitle("Residuals vs Leverage") + ylab("Standardized residuals") +
xlab(paste("Leverage, max Cooks D =", round(max(cooks.distance(mdl)),2))),
# Cook's D:
ggplot(data.frame(), aes(x=hatvalues(mdl), y=cooks.distance(mdl))) +
geom_hline(yintercept=0, linetype="dashed", colour="darkgrey") +
geom_vline(xintercept=0, linetype="dashed", colour="darkgrey") +
geom_point() + xlab("Leverage") +
ggtitle("Cook's dist vs Leverage") + ylab("Cook's distance")
)
})
g2 <- ggplot2::ggplot(data=data.frame(), ggplot2::aes(sres)) +
ggplot2::geom_histogram(ggplot2::aes(y=..density..), fill="grey40", color="black", bins=30) +
ggplot2::ggtitle("Residuals vs Normal") +
ggplot2::stat_function(data=data.frame(), fun=function(x) dnorm(x), color="blue") +
ggplot2::ylab(NULL) + ggplot2::xlab("Standardized Residuals")
if(results[1]=="print"){
cat("Goodness of fit:\n")
print(gof)
print(sw)
print(bp)
print(dw)
cat("\nVariance Inflation Factors (VIF)\ndue to multicollinearity:\n")
print(round(sort(v, decreasing=TRUE), 2))
gridExtra::grid.arrange(g[[1]],g[[2]],g2,g[[3]],g[[4]],g[[6]], nrow=2)
} else{
mx <- matrix(nrow=5, ncol=4)
colnames(mx) <- c("Test", "Tests", "p_value", "Decision")
mx[1:4,1] <- c("Shapiro-Wilk", "Breusch-Pagan","Durbin-Watson",
"Variance Inflation Factor")
mx[1:4,2] <- c("Normality of stand. residuals", "Homoscedasticity",
"Autocorrelation", "Muticollinearity")
mx[1:3,3] <- round(c(sw$p.value, bp$p.value, dw$p.value),3)
mx[1,4] <- ifelse(sw$p.value < 0.05, "Normality cannot be assumed",
"Normality can be assumed")
mx[2,4] <- ifelse(bp$p.value < 0.05, "Homoscedasticity cannot be assumed",
"Homoscedasticity can be assumed")
mx[3,4] <- ifelse(dw$p.value < 0.05, "Observations are autocorrelated",
"Observations are independent")
v <- sort(v[(sapply(mdl$model,class) %in% c("numeric","integer"))],
decreasing=TRUE)
mx[4,4] <- ifelse(v[1] >= 5, "Num covariates are multicorrelated",
"Num covariates are not multicorrelated")
mx <- rbind(mx, c("VIF (decreasing):", rep("",3)))
v <- round(v,2)
for(i in 1:length(v)){
mx <- rbind(mx, c(paste0("VIF: ",v[i],
" (",names(v[i]),")"), rep("",3)))
mx[is.na(mx)] <- ""
}
if(results[1]=="return"){
return(list("test_summary"=mx, "sw_test"=sw, "bp_test"=bp, "dw_test"=dw,
"vif"=v, "plot_resid"=g, "plot_hist"=g2, "gof"=gof, "mdl"=mdl))
} else if(results[1] == "markdown"){
ll <- list()
ll[[as.character(formula(mdl)[2])]] <-
list("test_summary"=mx, "sw_test"=sw, "bp_test"=bp, "dw_test"=dw,
"vif"=v, "plot_resid"=g, "plot_hist"=g2, "gof"=gof, "mdl"=mdl)
mkd_lm_verif(ll, mkd_path)
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.