spatialRF: Easy Spatial Modeling with Random Forest

Documented in plot_residuals_diagnostics

#' @title Plot residuals diagnostics
#' @description Plots normality and autocorrelation tests of model residuals.
#' @param model A model produced by [rf()], [rf_repeat()], or [rf_spatial()].
#' @param point.color Colors of the plotted points. Can be a single color name (e.g. "red4"), a character vector with hexadecimal codes (e.g. "#440154FF" "#21908CFF" "#FDE725FF"), or function generating a palette (e.g. `viridis::viridis(100)`). Default: `viridis::viridis(100, option = "F")`
#' @param line.color Character string, color of the line produced by `ggplot2::geom_smooth()`. Default: `"gray30"`
#' @param fill.color Character string, fill color of the bars produced by `ggplot2::geom_histogram()`. Default: `viridis::viridis(4, option = "F", alpha = 0.95 )[2]`
#' @param option (argument of [plot_moran()]) Integer, type of plot. If `1` (default) a line plot with Moran's I and p-values across distance thresholds is returned. If `2`, scatterplots of residuals versus lagged residuals per distance threshold and their corresponding slopes are returned. In models fitted with [rf_repeat()], the residuals and lags of the residuals are computed from the median residuals across repetitions. Option `2` is disabled if `x` is a data frame generated by [moran()].
#' @param ncol (argument of [plot_moran()]) Number of columns of the Moran's I plot if `option = 2`.
#' @param verbose Logical, if `TRUE`, the resulting plot is printed, Default: `TRUE`
#' @return A patchwork object.
#' @examples
#' if(interactive()){
#'
#'  #load example data
#'  data(plant_richness_df)
#'  data(distance_matrix)
#'
#'  #fit a random forest model
#'  x <- rf(
#'    data = plant_richness_df,
#'    dependent.variable.name = "richness_species_vascular",
#'    predictor.variable.names = colnames(plant_richness_df)[5:21],
#'    distance.matrix = distance_matrix,
#'    n.cores = 1
#'  )
#'
#'  #residuals diagnostics
#'  plot_residuals_diagnostics(x)
#'
#' }
#' @rdname plot_residuals_diagnostics
#' @importFrom ggplot2 stat_qq stat_qq_line geom_histogram element_text
#' @importFrom patchwork plot_annotation
#' @importFrom stats qqnorm
#' @export
plot_residuals_diagnostics <- function(
  model,
  point.color = viridis::viridis(
    100,
    option = "F"
  ),
  line.color = "gray10",
  fill.color = viridis::viridis(
    4,
    option = "F",
    alpha = 0.95
  )[2],
  option = 1,
  ncol = 1,
  verbose = TRUE
  ){

  #declaring variables to avoid check complaints
  x <- NULL
  y <- NULL
  Predicted <- NULL
  Residuals <- NULL

  #checking option
  if(!(option %in% c(1, 2)) |
     inherits(model, "data.frame")){
    option <- 1
  }

  #getting residuals
  residuals <- model$residuals$values
  residuals.df <- as.data.frame(residuals)
  predictions <- model$predictions$values

  #getting normality test
  normality <- model$residuals$normality

  #normality scores of the residuals
  residuals.qq <- qqnorm(residuals, plot.it=FALSE) %>%
    as.data.frame()

  #plot title
  plot.title <- paste0(
    "Shapiro W = ",
    round(normality$shapiro.w, 3),
    "; p-value = ",
    round(normality$p.value, 4),
    "; ",
    normality$interpretation
  )

  #qqplot
  p1 <- ggplot2::ggplot(data = residuals.qq) +
    ggplot2::geom_point(
      data = residuals.qq,
      ggplot2::aes(
        x = x,
        y = y,
        color = y
      )
    ) +
    ggplot2::scale_color_gradientn(colors = point.color) +
    ggplot2::stat_qq_line(
      ggplot2::aes(sample = residuals),
      col = line.color,
      linetype = "dashed"
    ) +
    ggplot2::theme_bw() +
    ggplot2::theme(legend.position = "none") +
    ggplot2::ylab("Residuals") +
    ggplot2::xlab("Theoretical")

  #computing optimal binwidth for histogram
  #using the max of the Freedman-Diaconist rule
  #or 1/100th of the data range
  bw <- max(
    2 * stats::IQR(residuals) / length(residuals)^(1/3),
    (range(residuals)[2] - range(residuals)[1]) / 100
  )

  #histogram
  p2 <- ggplot2::ggplot(data = residuals.df) +
    ggplot2::aes(
      x = residuals
      ) +
    ggplot2::geom_histogram(
      binwidth = bw,
      color = NA,
      fill = fill.color
    ) +
    ggplot2::theme_bw() +
    ggplot2::theme(legend.position = "none") +
    ggplot2::geom_vline(
      xintercept = median(residuals),
      col = line.color,
      linetype = "dashed"
    ) +
    ggplot2::ylab("Count") +
    ggplot2::xlab("Residuals")

  #residuals vs predictions
  p3 <- ggplot2::ggplot(data = data.frame(
    Residuals = residuals,
    Predicted = predictions
  )
  ) +
    ggplot2::aes(
      x = Predicted,
      y = Residuals,
      color = Residuals
    ) +
    ggplot2::geom_hline(
      yintercept = 0,
      linetype = "dashed",
      color = line.color
    ) +
    ggplot2::geom_point() +
    ggplot2::scale_color_gradientn(colors = point.color) +
    ggplot2::theme_bw() +
    ggplot2::ggtitle("Residuals vs. predictions") +
    ggplot2::theme(
      plot.title = ggplot2::element_text(hjust = 0.5),
      legend.position = "none"
      )

  #final plot
  normality.plot <- (p1 + p2) / p3 + patchwork::plot_annotation(
    title = plot.title,
    theme = ggplot2::theme(plot.title = element_text(hjust = 0.5))
  )


  #getting autocorrelation if available
  if("autocorrelation" %in% names(model$residuals)){

    #getting autocorrelation plot
    autocorrelation.plot <- plot_moran(
      model = model,
      option = option,
      ncol = ncol,
      point.color = rev(point.color),
      line.color = line.color,
      verbose = FALSE
    )

    #combined plot
    p1 <- normality.plot / autocorrelation.plot

  } else {

    p1 <- normality.plot

  }

  if(verbose == TRUE){
    suppressWarnings(print(p1))
  }

  p1


}