R/residdiagnostics.R

Defines functions res.diagnostics

Documented in res.diagnostics

#' Illustrate the results from residual analysis
#'
#' At least seven plots can be produced in one panel.
#' This function prints out a table with plots similar to the one generated by SAS 
#' as well as one by one diagram describing and analyzing the residuals from a linear regression.
#' @param lmobject a fitted regression model from `lm`. With only this argument the function will print out all the plots one by one.
#' @param block `TRUE` all the plots about the residuals will be in one panel.
#' @param outliers `TRUE` only plots regarding the residuals will be produced in two scatter plots and a boxplot.
#'        The first scatter scatter plot is between the actual residuals from `lm` function and the fitted values. 
#'        The second scatter plot it the studentized residuals vs the fitted values. The boxplot is from the studentized residuals.
#' @param normality `TRUE` it will give an histogram of the residuals, a qq-plot of the residuals and 
#'        a scatter plot between the fitted values and the the actual values of the dependent variable is also included.
#' @param outnorm `TRUE` will give a panel with 6 diagrams for the residuals in `outliers` and in `normality` together.
#' @param regressors `TRUE` one or more than one scatter plot regarding each independent variable and the residuals will be produced.
#' For more than 9 regressors than it will produce one by one plot. If the independent variable is kvalitative, transform first as factor and than it will automatically produce a boxplot.
#' @export  
#' @examples
#' library(regkurs)
#' lmfit = lm(nRides ~ temp + hum + windspeed, data = bike)
#' res.diagnostics(lmfit) #6 plots together
#' res.diagnostics(lmfit,block=T)#all the plots in one panel
#' res.diagnostics(lmfit,outliers=T)#only the first three plots regarding residuals, studentized residuals
#' res.diagnostics(lmfit,normality=T)#only three plots regarding normality and fitting
#' res.diagnostics(lmfit,outnorm = F)#all plots one by one
#' res.diagnostics(lmfit,regressors=T)#show only the relationship between the residuals and each regressor included in lm()



res.diagnostics<-function(lm_obj,block=F,outliers=F,normality=F,outnorm=T,regressors=F){

  
  fitted<-fitted(lm_obj)
  residuals<-resid(lm_obj)
  
  
  k<-summary(lm_obj)$df[1]
  n<-length(lm_obj$residuals)
  residueStud<-rstudent(lm_obj)
  lab_x<-names(summary(lm_obj)$coef[,1])[-1]
  tkrit<-qt(1-0.025,n-k-1)
  lab_y<-names(lm_obj$model)[1]
  y<-lm_obj$model[,1]
  x<-lm_obj$model[,-1]
  nx<- dim(lm_obj$model)[2]#number of covariates
  
  
  
  histog<-function(x){
    x2 <- seq(min(x), max(x), length = length(x))
    
    # Normal curve
    fun <- dnorm(x2, mean = mean(x), sd = sd(x))
    
    # Histogram
    
    p_hist = hist(x, plot = F)
    hist(x, prob = TRUE, col = "light blue", xlab="Residuals",
         ylim = c(0, max(p_hist$density, fun)),
         main = "Histogram with normal curve")
    
    lines(x2, fun, col = "blue", lwd = 2)
    
  }  
  
  if(outnorm==F){
    
    
    # Fitted values vs residuals
    p1<-plot(fitted, residuals, ylab='Residuals',xlab='Fitted values',pch=20)
    abline(0, 0,lty=2,lwd=2,col="red")
    
    
    
   p2<-plot(fitted, residueStud,col="blue",ylab='Stud.Residuals',xlab='Fitted values',pch=20,
                   ylim=c(min(residueStud,-2),max(residueStud,2)))
    abline(0, 0,lty=2,lwd=2,col="red")
    abline(tkrit, 0,lty=2,lwd=2,col="brown")
    abline(0,0,lty=2,lwd=2,col="green")
    abline(-tkrit,0,lty=2,lwd=2,col="brown")
    
    
    p3<-boxplot(residueStud,xlab='Studentized residuals',col="dark green")
    
    
    # histogram for residuals
    
    
    
    
    p4<-histog(residuals)
    
    p5<- qqnorm(residuals,main="",pch=20)
    qqline(residuals,lty=2,lwd=2,col="red")
    
    p6<-plot(fitted,y,pch=20,xlab="Fitted values",ylab=lab_y)
    abline(0,1,lty=2,lwd=2,col="red")
    
    
    
    plot_final <-list(p1, p2,p3,p4,p5,p6) 
    
    for(i in 1:(nx-1)){
      
      if(nx==2){
        xx<-x
      }else{
        xx<-x[,i]
      }
      plot(xx, residuals, ylab='Residuals', xlab=lab_x[i]) 
      abline(0, 0,lty=2,lwd=2,col="red")#add horizontal line at 0 
    }
    
  }
  
  if(outnorm==T){
    
    par(mfrow=c(2,3), mar=c(5,5,2,2))
    
    # Fitted values vs residuals
    p1<-plot(fitted, residuals,
             ylab='Residuals',xlab='Fitted values',pch=20)
    abline(0, 0,lty=2,lwd=2,col="red")
    
    
    
p2<-plot(fitted, residueStud,col="blue",ylab='Stud.Residuals',xlab='Fitted values',pch=20,
                   ylim=c(min(residueStud,-2),max(residueStud,2)))
    abline(0, 0,lty=2,lwd=2,col="red")
    abline(tkrit, 0,lty=2,lwd=2,col="brown")
    abline(0,0,lty=2,lwd=2,col="green")
    abline(-tkrit,0,lty=2,lwd=2,col="brown")
    
    
    p3<-boxplot(residueStud,xlab='Studentized residuals',col="dark green")
    
    
    
    p4<-histog(residuals)
    
    p5<- qqnorm(residuals,main="",pch=20)
    qqline(residuals,lty=2,lwd=2,col="red")
    
    p6<-plot(fitted,y,pch=20,xlab="Fitted values",ylab=lab_y)
    abline(0,1,lty=2,lwd=2,col="red")
    
    
    
    plot_final <-list(p1, p2,p3,p4,p5,p6)
    
  }
  
  
  
  if(block==T){
    n<-nx-1
    if(n<=3){
      par(mfrow=c(3,3), mar=c(5,5,2,2))
    }
    if(n>=4 &n<=6){
      par(mfrow=c(4,3), mar=c(5,5,2,2))
    }
    if(n>=7){
      par(mfrow=c(1,1))
    }
    
    
    
    # Fitted values vs residuals
    p1<-plot(fitted, residuals,
             ylab='Residuals',xlab='Fitted values',pch=20)
    abline(0, 0,lty=2,lwd=2,col="red")
    
    
    
     p2<-plot(fitted, residueStud,col="blue",ylab='Stud.Residuals',xlab='Fitted values',pch=20,
                   ylim=c(min(residueStud,-2),max(residueStud,2)))
    abline(0, 0,lty=2,lwd=2,col="red")
    abline(tkrit, 0,lty=2,lwd=2,col="brown")
    abline(0,0,lty=2,lwd=2,col="green")
    abline(-tkrit,0,lty=2,lwd=2,col="brown")
    
    
    p3<-boxplot(residueStud,xlab='Studentized residuals',col="dark green")
    
    
    
    p4<-histog(residuals)
    
    p5<- qqnorm(residuals,main="",pch=20)
    qqline(residuals,lty=2,lwd=2,col="red")
    
    p6<-plot(fitted,y,pch=20,xlab="Fitted values",ylab=lab_y)
    abline(0,1,lty=2,lwd=2,col="red")
    
    
    
    plot_final <-list(p1, p2,p3,p4,p5,p6) 
    
    
    
    for(i in 1:(nx-1)){
      
      if(nx==2){
        xx<-x
      }else{
        xx<-x[,i]
      }
      plot(xx, residuals, ylab='Residuals', xlab=lab_x[i]) 
      abline(0, 0,lty=2,lwd=2,col="red")#add horizontal line at 0 
    }
    
    
    
  }
  
  
  
  
  if(outliers==T){
    
    
    par(mfrow=c(2,2), mar=c(5,5,2,2))
    
    # Fitted values vs residuals
    p1<-plot(fitted, residuals,
             ylab='Residuals',xlab='Fitted values',pch=20)
    abline(0, 0,lty=2,lwd=2,col="red")
    
    
    
  p2<-plot(fitted, residueStud,col="blue",ylab='Stud.Residuals',xlab='Fitted values',pch=20,
                   ylim=c(min(residueStud,-2),max(residueStud,2)))
    abline(0, 0,lty=2,lwd=2,col="red")
    abline(tkrit, 0,lty=2,lwd=2,col="brown")
    abline(0,0,lty=2,lwd=2,col="green")
    abline(-tkrit,0,lty=2,lwd=2,col="brown")
    
    
    p3<-boxplot(residueStud,xlab='Studentized residuals',col="dark green")
    
    final<-list(p1,p2,p3)
  }
  
  
  if(regressors==T){
    n<-nx-1
    if(n<=4){
      par(mfrow=c(2,2))
    }
    if(n>=5 &n<=6){
      par(mfrow=c(3,2))
    }
    if(n>=7 &n<=9){
      par(mfrow=c(3,3))
    }
    if(n>=10){
      par(mfrow=c(1,1))
    }
    
    
    for(i in 1:(nx-1)){
      
      if(nx==2){
        xx<-x
      }else{
        xx<-x[,i]
      }
      plot(xx, residuals, ylab='Residuals', xlab=lab_x[i]) 
      abline(0, 0,lty=2,lwd=2,col="red")#add horizontal line at 0 
    }
    
    
  }
  
  if(normality==T){
    
    
    par(mfrow=c(2,2), mar=c(5,5,2,2))
    
    
    p4<-histog(residuals)
    
    p5<- qqnorm(residuals,main="",pch=20)
    qqline(residuals,lty=2,lwd=2,col="red")
    
    p6<-plot(fitted,y,pch=20,xlab="Fitted values",ylab=lab_y)
    abline(0,1,lty=2,lwd=2,col="red")
    
    final2<-list(p4,p5,p6)
  }
}   
StatisticsSU/regkurs documentation built on Jan. 29, 2023, 4:54 p.m.