R/visualise_residuals.R

#' @title Visualise the Residuals of given a given Linear Model.
#' 
#' @description Plots appropriate visualisations for the residuals of a given dataset.
#' This function utilises ggplot2 to design and plot the Visualisations.
#' The plots a histogram of the residuals.
#' Plots a scatterplot for residuals and fitted values or predictors. 
#' Plots a qqplot to access the normality assumptions..
#' The plots are outputed as a list.
#' The plots can also be saved to a specified directory.
#' FUTURE NOTE: CONVERT THIS INTO A RESIDUAL ANALYSIS FUNCTION
#' 
#' @param model A linear model of type lm()
#' 
#' @param plots A character object specifying the type of residual plot to return.
#'     Either "Residual Hist", "Res vs. Fits", "Res vs. Pred", "QQplot".
#'     Default is "Residual Hist"
#' 
#' @param directory A character object specifying the directory where the visualisations are saved as a .pdf file.
#' 
#' @return Outputs a variety of bar charts or histograms
#' 
#' @import ggplot2
#' 
#' @export
#'
#' @seealso \code{\link{visualise_qqplot}}, \code{\link{visualise_variables_x}}, \code{\link{visualise_variables_xx}}
#' 
#' @examples 
#' # Example Data
#' mod <- lm(data = iris, Sepal.Length~.)
#' # save the curretnt working directory
#' dir <- getwd()
#' # Visualise the residuals of the model
#' visualise_residuals(model = mod, plots = "Res Hist")
#' visualise_residuals(model = mod, plots = "Res vs. Fits", directory = dir)
#' visualise_residuals(model = mod, plots = "Res vs. Pred")
#' visualise_residuals(model = mod, plots = "Res QQplot", directory = dir)
#' 
visualise_residuals <- function(model, 
                                plots = c("Res Hist", "Res vs. Fits", "Res vs. Pred", "Res QQplot"), 
                                directory = NULL)
  {
  # combine the residuals and predictors into a dataframe
  dataset <- as.data.frame(cbind(model$residuals, model$fitted.values, model$model))
  
  # Note the residuals are stored in the first column
  # Note the fitted values are stored in the second column
  # Note the predictor values are stored in the remaining columns
  colnames(dataset)[1:2] <- c("Residuals", "Fitted.Values")
  
  # Confirm correct choice for plots
  plots <- match.arg(plots)
  
  if(plots == "Res Hist"){
    
    residual_histogram <- ggplot(data = dataset, 
                                 mapping = aes(x = dataset[,1])) + 
                          geom_histogram(colour = "black",
                                         fill = "lightblue", 
                                         bins = 30) + 
                          # Give the labels to the plots
                          labs(title = "Histogram of Residuals", 
                               x = "Residuals", 
                               y = "Frequency") + 
                          # Set the limits for the axises of the plots
                          coord_cartesian(xlim = c(min(dataset[,1], na.rm = T), max(dataset[,1], na.rm = T)),
                                          expand = TRUE) +
                          # Add a vertical line at x = 0
                          geom_vline(xintercept = 0,
                                     colour = "red",
                                     size = 2) +
                          # Format the text of the plots 
                          theme_minimal()
    
    # print the residual histogram
    print(residual_histogram)
    
    # write the residual histogram to a specified directory
    if(!is.null(directory)) {
      
      ggsave(filename = "Residual_Histogram.pdf", 
             path = directory,
             device = "pdf",
             width = 8, 
             height = 6, 
             units = c("in"))
      
    }
    
  } else if (plots == "Res vs. Fits"){
    
    residual_vs_fits <- ggplot(data = dataset, 
                                mapping = aes(x = dataset[,2], 
                                              y = dataset[,1])) + 
                        geom_point() + 
                        # Add a horizontal line in red
                        geom_hline(yintercept = 0,
                                   colour = "red",
                                   size = 2) +
                        # Add a loess line in red
                        geom_smooth(method = "loess", 
                        colour = "blue", 
                        se = FALSE) +
                        # Add labels to the plot
                        labs(title = "Residuals vs Fits Scatterplot", 
                             x = "Fitted Values", 
                             y = "Residuals") + 
                        # Format the text of the plots 
                        theme_minimal()
    
    # print the residual vs fits plot
    print(residual_vs_fits)
    
    # write the residual vs fits plot to a specified directory
    if(!is.null(directory)) {
      
      ggsave(filename = "Residual_vs_Fits.pdf", 
             path = directory,
             device = "pdf",
             width = 8, 
             height = 6, 
             units = c("in"))
      
    }
    
  } else if(plots == "Res vs. Pred"){
    
    # Create the plot index p
    p = 1
    
    # use a for loop to visual each predictor
    for(i in 3:ncol(dataset)){
     
       # extract the name of the predictor variable
      xiname <- colnames(dataset)[i]
      
      # create the residual vs predictors plots
      res_vs_pred <- ggplot(data = dataset, 
                            mapping = aes(x = dataset[,i], 
                                          y = dataset[,1])) + 
                      geom_point() + 
                      # Add a horizontal line in red
                      geom_hline(yintercept = 0,
                                 colour = "red",
                                 size = 2) +
                      # Add a loess line in blue
                      geom_smooth(method = "loess", 
                                  colour = "blue", 
                                  se = FALSE) +
                      # Add labels to the plot
                      labs(title = paste("Residuals vs ",xiname, 
                                         "Scatterplot", sep = " "), 
                           x = xiname, 
                           y = "Residuals") + 
                      # Format the text of the plots 
                      theme_minimal()
      
      # print the plot
      print(res_vs_pred)
      
      # write the residual QQplot to a specified directory
      if(!is.null(directory)) {
        
        ggsave(filename = paste("Residual_vs_", xiname, 
                                "Scatterplot", ".pdf", sep = ""), 
               path = directory,
               device = "pdf",
               width = 8, 
               height = 6, 
               units = c("in"))
        
      }
      
      # print an update on the plots printed
      print(paste("Residual vs Predictor Plot ", p, " completed", spe = ""))
      
      # update plot index 
      p = p + 1
      
    }
    
  } else if(plots == "Res QQplot"){
    
    # Extract the residuals from the specified model
    Residuals <- model$residuals
    
    # construct the residual QQplot
    residual_qqplot <- visualise_qqplot(vector = Residuals, directory = NULL)
    
    # write the residual QQplot to a specified directory
    if(!is.null(directory)) {
      
      ggsave(filename = "Residual_QQplot.pdf", 
             path = directory,
             device = "pdf",
             width = 8, 
             height = 6, 
             units = c("in"))
      
    }
    
  }
  
} 
oislen/BuenaVista documentation built on May 16, 2019, 8:12 p.m.