consLettersUtils: Conservation Letters Utilities

Documented in poolResiduals

# =============================================================================
# Residual analysis for multiply imputed repeated analyses
# earlycapistran@comunidad.unam.mx - July 2020
# =============================================================================

#' @title Analyse mean residuals from multiply imputed repeated analyses
#' 
#' @description 
#' Provides residual analysis for multiply imputed repeated analyses, generated
#' by package 'mice', by extracting residual and fitted values from each 
#' complete-data model generated by the 'with.mids' function in 'mice', and 
#' then averaging residual and fitted values across all models.
#' 
#' This functions averages residuals and fitted values averaged across all 'm' 
#' complete-data models, and provides a normality plot and a plot of residual 
#' vs. fitted values. It also provides a Shapiro-Wilk normality test, 
#' Levene's test for homogeneity of variance, and residual mean. 
#' 
#' @param mira An object of class 'mira' generated by the 'with.mids' function 
#' in ' mice'
#' @return Residual plots (normality, residuals vs. fitted values, lag plot)
#' Shapiro-Wilk normality test, Levene Test for homogeneity of variance, 
#' Runs test for randomness.
#' @export
#' @usage
#' poolResiduals(mira)
#' 
#' @importFrom mice is.mira
#' @importFrom car leveneTest
#' @importFrom stats shapiro.test
#' @importFrom stats residuals
#' @importFrom stats predict
#' @importFrom stats qqnorm
#' @importFrom stats qqline

# To run this function, you must have 'car' installed
poolResiduals  <- function(mira) { 
  if (!is.mira(mira)) 
    stop("The object must have class 'mira'")
  if ((m <- length(mira$analyses)) < 2) 
    stop("At least two imputations are needed for pooling.\n")
  
  # Prepare residual and predicted values from each of the 'm' 
  # complete-data models ------------------------------------------------------
  # Extract residuals from each analysis
  resi <- sapply(mira$analyses,residuals)  
  resi <- apply(resi,1,mean)  # Average residual values
  resDf <- as.data.frame(resi)
  # Subset positive and negative values
  resDf$sign <- as.factor(ifelse(resDf < 0, "negative", "positive")) 
  # Extract predicted values from each analysis
  pred <- sapply(mira$analyses,predict)  
  pred <- apply(pred,1,mean)  # Average predicted values
  
  # Plot normality
  par(mfrow = c(1, 2))
  qqnorm(resDf$resi)
  qqline(resDf$resi)
  # Plot residual vs. fitted values
  res.plot <- plot(x = pred, y = resi,
                   xlab = "Fitted values", ylab = "Residuals",
                   main = "Residuals versus fitted values",
                   abline(h=0))
  # Get residual mean
  mean <- mean(resDf$resi)
  # Run tests
  norm <- stats::shapiro.test(resDf$resi)
  levene <- car::leveneTest(resDf$resi ~ sign,
                       data = resDf)
  result <- list(normality = norm, mean = mean, levene = levene)
  names(result) <- c(
    "Residual Normality Test", 
    "Residual Mean", 
    "Levene's Test"
    )
  print(result)
}