#' @title Plot residuals diagnostics
#' @description Plots normality and autocorrelation tests of model residuals.
#' @param model A model produced by [rf()], [rf_repeat()], or [rf_spatial()].
#' @param point.color Colors of the plotted points. Can be a single color name (e.g. "red4"), a character vector with hexadecimal codes (e.g. "#440154FF" "#21908CFF" "#FDE725FF"), or function generating a palette (e.g. `viridis::viridis(100)`). Default: `viridis::viridis(100, option = "F")`
#' @param line.color Character string, color of the line produced by `ggplot2::geom_smooth()`. Default: `"gray30"`
#' @param fill.color Character string, fill color of the bars produced by `ggplot2::geom_histogram()`. Default: `viridis::viridis(4, option = "F", alpha = 0.95 )[2]`
#' @param option (argument of [plot_moran()]) Integer, type of plot. If `1` (default) a line plot with Moran's I and p-values across distance thresholds is returned. If `2`, scatterplots of residuals versus lagged residuals per distance threshold and their corresponding slopes are returned. In models fitted with [rf_repeat()], the residuals and lags of the residuals are computed from the median residuals across repetitions. Option `2` is disabled if `x` is a data frame generated by [moran()].
#' @param ncol (argument of [plot_moran()]) Number of columns of the Moran's I plot if `option = 2`.
#' @param verbose Logical, if `TRUE`, the resulting plot is printed, Default: `TRUE`
#' @return A patchwork object.
#' @examples
#' if(interactive()){
#'
#' #load example data
#' data(plant_richness_df)
#' data(distance_matrix)
#'
#' #fit a random forest model
#' x <- rf(
#' data = plant_richness_df,
#' dependent.variable.name = "richness_species_vascular",
#' predictor.variable.names = colnames(plant_richness_df)[5:21],
#' distance.matrix = distance_matrix,
#' n.cores = 1
#' )
#'
#' #residuals diagnostics
#' plot_residuals_diagnostics(x)
#'
#' }
#' @rdname plot_residuals_diagnostics
#' @importFrom ggplot2 stat_qq stat_qq_line geom_histogram element_text
#' @importFrom patchwork plot_annotation
#' @importFrom stats qqnorm
#' @export
plot_residuals_diagnostics <- function(
model,
point.color = viridis::viridis(
100,
option = "F"
),
line.color = "gray10",
fill.color = viridis::viridis(
4,
option = "F",
alpha = 0.95
)[2],
option = 1,
ncol = 1,
verbose = TRUE
){
#declaring variables to avoid check complaints
x <- NULL
y <- NULL
Predicted <- NULL
Residuals <- NULL
#checking option
if(!(option %in% c(1, 2)) |
inherits(model, "data.frame")){
option <- 1
}
#getting residuals
residuals <- model$residuals$values
residuals.df <- as.data.frame(residuals)
predictions <- model$predictions$values
#getting normality test
normality <- model$residuals$normality
#normality scores of the residuals
residuals.qq <- qqnorm(residuals, plot.it=FALSE) %>%
as.data.frame()
#plot title
plot.title <- paste0(
"Shapiro W = ",
round(normality$shapiro.w, 3),
"; p-value = ",
round(normality$p.value, 4),
"; ",
normality$interpretation
)
#qqplot
p1 <- ggplot2::ggplot(data = residuals.qq) +
ggplot2::geom_point(
data = residuals.qq,
ggplot2::aes(
x = x,
y = y,
color = y
)
) +
ggplot2::scale_color_gradientn(colors = point.color) +
ggplot2::stat_qq_line(
ggplot2::aes(sample = residuals),
col = line.color,
linetype = "dashed"
) +
ggplot2::theme_bw() +
ggplot2::theme(legend.position = "none") +
ggplot2::ylab("Residuals") +
ggplot2::xlab("Theoretical")
#computing optimal binwidth for histogram
#using the max of the Freedman-Diaconist rule
#or 1/100th of the data range
bw <- max(
2 * stats::IQR(residuals) / length(residuals)^(1/3),
(range(residuals)[2] - range(residuals)[1]) / 100
)
#histogram
p2 <- ggplot2::ggplot(data = residuals.df) +
ggplot2::aes(
x = residuals
) +
ggplot2::geom_histogram(
binwidth = bw,
color = NA,
fill = fill.color
) +
ggplot2::theme_bw() +
ggplot2::theme(legend.position = "none") +
ggplot2::geom_vline(
xintercept = median(residuals),
col = line.color,
linetype = "dashed"
) +
ggplot2::ylab("Count") +
ggplot2::xlab("Residuals")
#residuals vs predictions
p3 <- ggplot2::ggplot(data = data.frame(
Residuals = residuals,
Predicted = predictions
)
) +
ggplot2::aes(
x = Predicted,
y = Residuals,
color = Residuals
) +
ggplot2::geom_hline(
yintercept = 0,
linetype = "dashed",
color = line.color
) +
ggplot2::geom_point() +
ggplot2::scale_color_gradientn(colors = point.color) +
ggplot2::theme_bw() +
ggplot2::ggtitle("Residuals vs. predictions") +
ggplot2::theme(
plot.title = ggplot2::element_text(hjust = 0.5),
legend.position = "none"
)
#final plot
normality.plot <- (p1 + p2) / p3 + patchwork::plot_annotation(
title = plot.title,
theme = ggplot2::theme(plot.title = element_text(hjust = 0.5))
)
#getting autocorrelation if available
if("autocorrelation" %in% names(model$residuals)){
#getting autocorrelation plot
autocorrelation.plot <- plot_moran(
model = model,
option = option,
ncol = ncol,
point.color = rev(point.color),
line.color = line.color,
verbose = FALSE
)
#combined plot
p1 <- normality.plot / autocorrelation.plot
} else {
p1 <- normality.plot
}
if(verbose == TRUE){
suppressWarnings(print(p1))
}
p1
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.