R/ggdiagnose.R

Defines functions ggdiagnose

Documented in ggdiagnose

#' Graphically diagnose model residuals (ggplot2 version).
#'
#' @usage ggdiagnose(model, fit_type = 'response', residual_type = 'response', bins = 30, se = TRUE, freqpct = FALSE, alpha = 1)
#'
#' @param model An lm or glm object.
#' @param fit_type String. Default is "response". Type of fitted values to use based on options in predict().
#' @param residual_type String. Default is "response". Type of residuals values to use based on options in resid().
#' @param bins Number of bins to specify for histograms.
#' @param se Boolean. For overlaying shaded standard errors.
#' @param freqpct Boolean.
#' @param alpha Integer, [0, 1]. Points are more transparent the closer they are to 0. Only applies to scatter plots.
#' @return 2 scatter plots and 2 histograms of residuals and "residuals margin,"
#' which is the residuals as a percentage of the actual dependent variable values.
#' @examples
#'
#' # OLS case
#' model <- lm(data = mtcars, formula = mpg ~ wt + gear)
#' ggdiagnose(model, bins = NROW(mtcars), se = FALSE, freqpct = TRUE)
#'
#' # NLS case
#' model.nls <- nls(Ozone ~ theta0 + Temp^theta1, airquality)
#' ggdiagnose(model.nls)
#'
#' @seealso \url{https://github.com/robertschnitman/diagnoser}

########################################################################################
### Robert Schnitman
### 2017-11-15
###
### PURPOSE:
###    1. Generate 2x2 graphs that diagnose the residuals of a model.
###    2. Alternative for plot(model.object).
###
### IMPORTS: ggplot2 (>= 2.2.1), gridExtra (>= 2.3), scales (>= 0.5.0)
###
### RECOMMENDED CITATION:
###  Schnitman, Robert (2017). ggdiagnose.r. https://github.com/robertschnitman/diagnoser
########################################################################################


##### === BEGIN === #####

ggdiagnose <- function(model, fit_type = 'response', residual_type = 'response',
                       bins = 30, se = TRUE, freqpct = FALSE,
                       alpha = 1) {

  ### Type-checking ###
  lgm_condition <- class(model)[1] %in% c('lm', 'glm')
  nls_condition <- class(model)[1] == 'nls'

  stopifnot(lgm_condition | nls_condition)

  options(warn = -1)

  if (require(ggplot2) == TRUE & require(gridExtra) == TRUE & require(scales) == TRUE) {

    require(ggplot2)
    require(gridExtra)
    require(scales)

  } else {

    stop('Please install ALL of ggplot2, gridExtra, and scales.')

  }

  options(warn = 0)

  ### Set alpha value so that ggplot2 functions can process it ###
  a <- alpha

  ### Graph is modified based on fit_type and residual_type specifications. ###
  fit_type <- fit_type
  family   <-  if (class(model)[1] == 'lm') {
    'lm'
  } else if (class(model)[1] == 'glm') {
    model$family[1]
  } else if (class(model)[1] == 'nls') {
    'nls'
  }
  pp       <- 'Predicted Probabilities'
  fv       <- 'Fitted Values'
  vspp     <- 'vs. Predicted Probabilities'
  vsfv     <- 'vs. Fitted Values'

  ### Set up data frame of fit and residuals ###
  fit <- predict(model, type = fit_type)
  res <- if (lgm_condition) {
    resid(model, type = residual_type)

  } else if (nls_condition) {
    r                <- resid(model, residual_type)
    attr(r, 'label') <- NULL
    r

  }
  act <- res + fit

  pct <- (res/act)
  df  <- as.data.frame(cbind(fit, res, pct))

  ### ggplot2 graphs use the same functions/colors; need to minimize repeating code ###

  ## Residuals vs. fitted values ##
  rvf <- function(y, x, ylabel = 'yvar') {
    ggplot(df, aes(y = y, x = x)) +
      geom_point(color = 'salmon', alpha = a) +
      geom_hline(yintercept = 0,
                 col        = 'red',
                 linetype   = 'dashed') +
      geom_smooth(method = 'loess',
                  se     = se,
                  color  = 'steelblue') +
      labs(x     = ifelse(fit_type == 'response' & family == 'binomial', pp, fv),
           y     = ylabel,
           title = paste(ylabel,
                         ifelse(fit_type == 'response' & family == 'binomial', vspp, vsfv), sep = ' ')) +
      theme_bw()
  }

  ## Histogram of residuals ##
  histres <- function(x, xlabel, fpct = freqpct) {
    if (fpct == FALSE) {
      ggplot(df, aes(x = x)) +
        geom_histogram(bins   = bins,
                       fill   = 'salmon',
                       colour = 'black') +
        labs(x     = xlabel,
             y     = 'Frequency',
             title = paste('Distribution of', xlabel, sep = ' ')) +
        theme_bw()
    } else {
      ggplot(df, aes(x = x)) +
        geom_histogram(aes(y = ..count../sum(..count..)),
                       bins   = bins,
                       fill   = 'salmon',
                       colour = 'black') +
        scale_y_continuous(labels = scales::percent) +
        labs(x     = xlabel,
             y     = 'Frequency (%)',
             title = paste('Distribution of', xlabel, sep = ' ')) +
        theme_bw()
    }

  }

  ### grid.arrange() requires each of the graphs to be created beforehand ###
  f1 <- rvf(y = res, x = fit, ylabel = 'Residuals')          # Figure 1 - Residuals vs. Fitted.
  f2 <- rvf(y = pct, x = fit, ylabel = 'Residuals (%)') +    # Figure 2 - Residuals Margin (%) vs. Fitted.
    scale_y_continuous(labels = scales::percent)
  f3 <- histres(x = res, xlabel = 'Residuals')               # Figure 3 - Distribution of Residuals.
  f4 <- histres(x = pct, xlabel = 'Residuals (%)') +         # Figure 4 - Distribution of Residuals Margin (%).
    scale_x_continuous(labels = scales::percent)

  ### Arrange in 2x2 grid ###
  grid.arrange(f1, f2, f3, f4, ncol = 2)

}

##### === END === #####
robertschnitman/schnitr documentation built on Aug. 18, 2022, 8:39 p.m.