#' Graphically diagnose model residuals ("classic" version with ggplot2 graphics).
#'
#' @usage cdiagnose(model, fit_type = 'response', residual_type = 'response', se = FALSE, alpha = 1)
#'
#' @param model An lm or glm object.
#' @param fit_type String. Default is "response". Type of fitted values to use based on options in predict().
#' @param residual_type String. Default is "response". Type of residuals values to use based on options in resid().
#' @param se Boolean. To overlay standard errors.
#' @param alpha Integer, [0, 1]. Points are more transparent the closer they are to 0.
#' @return 2x2 charts that closely resemble the output to plot(model.lm) with ggplot2 graphics.
#' @examples
#' model.lm <- lm(data = mtcars, formula = mpg ~ wt + gear)
#' cdiagnose(model.lm)
#' @seealso \url{https://github.com/robertschnitman/diagnoser} and
#' Raju Rimal (inspiration for Residuals vs. Leverage): \url{https://rpubs.com/therimalaya/43190}
######################################################################################
### Robert Schnitman
### 2017-11-14
###
### PURPOSE:
### 1. Generate 2x2 graphs that diagnose the residuals of a model.
### 2. Alternative for plot(lm()).
###
### IMPORTS: ggplot2 (>= 2.2.1), gridExtra (>= 2.3), scales (>= 0.5.0)
###
### RECOMMENDED CITATION:
### Schnitman, Robert (2017). cdiagnose.r. https://github.com/robertschnitman/diagnoser
###
### Idea for Residuals vs. Leverage inspired by
### https://rpubs.com/therimalaya/43190.
######################################################################################
##### === BEGIN === #####
cdiagnose <- function(model, fit_type = 'response', residual_type = 'response', se = FALSE, alpha = 1) {
### Type-checking ###
stopifnot(class(model)[1] == 'lm' | class(model)[1] == 'glm')
options(warn = -1)
if (require(ggplot2) == TRUE & require(gridExtra) == TRUE & require(scales) == TRUE) {
require(ggplot2)
require(gridExtra)
require(scales)
} else {
stop('Please install ALL of ggplot2, gridExtra, and scales.')
}
options(warn = 0)
### Set alpha value so that ggplot2 functions can process it ###
a <- alpha
### Graph is modified based on fit_type and residual_type specifications. ###
fit_type <- fit_type
family <- ifelse(attr(model, 'class') == 'lm', 'lm', model$family[1]) # To determine whether graph shows fitted values OR predicted probabilities.
pp <- 'Predicted Probabilities'
fv <- 'Fitted Values'
vspp <- 'vs. Predicted Probabilities'
vsfv <- 'vs. Fitted Values'
### Set up data frame of fit and residuals ###
fit <- predict(model, type = fit_type)
res <- resid(model, type = residual_type)
sr <- (res - mean(res))/sd(res) # Standardized Residuals.
qsr <- qqnorm(sr, plot.it = FALSE)[[1]] # Theoretical Quantiles; suppress plot.
lev <- hat(model.matrix(model)) # Leverage.
cd <- cooks.distance(model) # Cook's Distance.
df <- as.data.frame(cbind(fit, res, sr))
### Figure 1 - Residuals vs. Fitted ###
f1 <- ggplot(df, aes(y = res, x = fit)) +
geom_point(color = 'salmon', alpha = a) +
geom_hline(yintercept = 0,
col = 'red',
linetype = 'dashed') +
geom_smooth(method = 'loess',
se = se,
color = 'steelblue') +
labs(x = ifelse(fit_type == 'response' & family == 'binomial', pp, fv),
y = 'Residuals',
title = paste('Residuals',
ifelse(fit_type == 'response' & family == 'binomial', vspp, vsfv),
sep = ' ')) +
theme_bw()
### Figure 2 - Normal Q-Q Plot ###
f2 <- ggplot(df, aes(y = sr, x = qsr)) +
geom_point(color = 'salmon', alpha = a) +
geom_smooth(method = 'lm',
se = se,
linetype = 'dashed',
color = 'steelblue') +
labs(x = 'Theoretical Quantiles',
y = 'Standardized Residuals',
title = 'Normal Q-Q') +
theme_bw()
### Figure 3 - Scale-Location ###
f3 <- ggplot(df, aes(y = sqrt(abs(sr)), x = fit)) +
geom_point(color = 'salmon', alpha = a) +
geom_smooth(method = 'loess',
se = se,
color = 'steelblue') +
labs(x = ifelse(fit_type == 'response' & family == 'binomial', pp, fv),
y = expression(sqrt('|Standardized residuals|')),
title = 'Scale-Location') +
theme_bw()
### Figure 4 - Residuals vs. Leverage ###
f4 <- ggplot(df, aes(y = sr, x = lev, size = cd)) +
geom_point(color = 'salmon', alpha = a) +
geom_smooth(method = 'loess',
se = se,
color = 'steelblue',
show.legend = FALSE
) +
labs(x = 'Leverage',
y = 'Standardized Residuals',
size = 'Cook\'s Distance',
title = 'Residuals vs. Leverage') +
theme_bw() +
theme(legend.position = 'bottom')
grid.arrange(f1, f2, f3, f4, ncol = 2)
}
##### === END === #####
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.