Nothing
#' Diagnostic plots for a fitted GAM, GAMM or threshold-GAM(M)
#'
#' \code{plot_diagnostics} takes a list of models of class `gam`, `gamm`
#' or `thresh_gam` or a mix of those and produces some diagnostic
#' information of the fitting procedure and results. The function returns a
#' tibble with 6 list-columns containing individual plots (ggplot2 objects)
#' and one list-column containing a plot that shows all diagnostic plots
#' together.
#'
#' @param model_list A list with models of class gam(m) and/or thresh_gam,
#' e.g. the list-column \code{model} from the \code{\link{model_gam}} output
#' tibble.
#'
#' @details
#' The function can deal with any model of the classes `gam`, `gamm` or
#' `thresh_gam` as long as the input is a flat list. That means:
#' \itemize{
#' \item If only one model is provided as input coerce the model explicitly
#' to class `list`. An input such as model_gam_ex[1, "model"] will not work
#' as the class is a tibble. Use instead model_gam_ex$model[1].
#' \item If the input are one or more threshold-GAMs selected from
#' the \code{\link{test_interaction}} output (variable \code{thresh_models}
#' the model list features a nested structure:
#' each IND~pressure pair (row) might have more than one threshold-GAM.
#' To remove the nested structure use e.g. the \code{\link[purrr]{flatten}}
#' function (see examples).
#' }
#'
#' @return
#' The function returns a \code{\link[tibble]{tibble}}, which is a trimmed down version
#' of the data.frame(), including the following elements:
#' \describe{
#' \item{\code{ind}}{Indicator names.}
#' \item{\code{press}}{Pressure names.}
#' \item{\code{cooks_dist}}{A list-column of ggplot2 objects that show the
#' Cook`s distance of all observations, which is a leave-one-out
#' deletion diagnostics to measure the influence of each
#' observation. Data points with a large Cook`s distance (> 1)
#' are considered to merit closer examination in the analysis.}
#' \item{\code{acf_plot}}{A list-column of ggplot2 objects that show the
#' autocorrelation function for the residuals. NAs in the time
#' series due to real missing values, test data extraction or
#' exclusion of outliers are explicitly considered.}
#' \item{\code{pacf_plot}}{A list-column of ggplot2 objects that show the
#' partial autocorrelation function for the residuals. NAs are
#' explicitly considered.}
#' \item{\code{resid_plot}}{A list-column of ggplot2 objects that show residuals
#' vs. fitted values.}
#' \item{\code{qq_plot}}{A list-column of ggplot2 objects that show the
#' quantile-quantile plot for normality.}
#' \item{\code{gcvv_plot}}{A list-column of ggplot2 objects that show for a
#' threshold-GAM the development of the generalized cross-validation
#' value at different thresholds level of the modifying pressure
#' variable. The GCV value of the final chosen threshold should be
#' distinctly lower than for all other potential thresholds, i.e.,
#' the line should show a pointy negative peak at this threshold.
#' If this is not the case, e.g. the trough is very wide with similar
#' GCV values for nearby thresholds, the threshold-GAM is not
#' optimal and should not be favored over a GAM despite the better
#' LOOCV (leave-one-out cross-validation value).}
#' \item{\code{all_plots}}{A list-column of ggplot2 objects that show all
#' five (six if threshold-GAM) plots together. For this plot,
#' drawing canvas from the \code{cowplot} package were added
#' on top of ggplot2.}
#' }
#'
#' @seealso \code{\link[stats]{cooks.distance}}, \code{\link[stats]{acf}},
#' \code{\link[stats]{pacf}}, \code{\link[stats]{qqnorm}}, and
#' \code{\link[purrr]{flatten}} for removing a level hierarchy from a list
#' @family IND~pressure modeling functions
#'
#' @export
#'
#' @examples
#' # Using some models of the Baltic Sea demo data:
#' # Apply function to a list of various model types
#' model_list <- c(all_results_ex$thresh_models[[5]],
#' model_gam_ex$model[39], all_results_ex$model[76])
#' plots <- plot_diagnostics(model_list)
#' plots$cooks_dist[[1]]
#' plots$acf_plot[[2]]
#' plots$pacf_plot[[3]]
#' plots$resid_plot[[1]]
#' plots$qq_plot[[1]]
#' plots$gcvv_plot[[1]] # for threshold models
#' plots$all_plots[[1]] # shows all 5-6 plots
#'
#' # Make sure that thresh_models have not a nested list structure:
#' model_list <- all_results_ex$thresh_models[5:6] %>% purrr::flatten(.)
#' plots <- plot_diagnostics(model_list)
plot_diagnostics <- function(model_list) {
# Data input validation --------------------
if (missing(model_list)) {
stop("Argument model_list is missing.")
}
# Check input and return warning if not a model
if ("data.frame" %in% class(model_list)) {
stop("You must provide a model or a list of models")
}
# If only 1 model used as input, which is not a
# list -> conversion
if (any(c("gam", "gamm") %in% class(model_list))) {
model_list <- list(model_list)
}
# In case model_list is a list of many models, but
# some are empty (NULL) or some are nested
# thresh_models --> flatten and discard
model_list <- model_list %>% purrr::discard(.,
is.na(.))
# ---------------------------------------------
# Get model type
check <- purrr::map_chr(model_list, ~class(.x)[1])
# Create many empty models
model_pred <- gcvv <- t_val <- best_t_val <- vector(mode = "list",
length = length(model_list))
cooks_dist <- model_resid <- model_resid_na <- vector(mode = "list",
length = length(model_list))
# Get model type
for (i in seq_along(model_list)) {
if (grepl("thresh", check[i])) {
pass_model <- model_list[[i]]
if (class(pass_model)[1] == "thresh_gam") {
cooks_dist[[i]] <- stats::cooks.distance(pass_model)
gcvv[[i]] <- pass_model$gcvv
t_val[[i]] <- pass_model$t_val
best_t_val[[i]] <- pass_model$mr
model_resid[[i]] <- mgcv::residuals.gam(pass_model,
type = "deviance")
# for testing autocorrelation incl. NAs
model_resid_na[[i]] <- rep(NA, length(pass_model$train_na))
model_resid_na[[i]][!pass_model$train_na] <- model_resid[[i]]
}
# Not yet implemented
if (class(pass_model)[1] == "thresh_gamm") {
pass_model <- pass_model$gam
cooks_dist[[i]] <- stats::cooks.distance(pass_model)
gcvv[[i]] <- pass_model$gcvv
t_val[[i]] <- pass_model$t_val
best_t_val[[i]] <- pass_model$mr
model_resid[[i]] <- stats::residuals(model_list[[i]]$lme,
type = "normalized")
# for testing autocorrelation incl. NAs
model_resid_na[[i]] <- rep(NA, length(model_list[[i]]$train_na))
# (or should it be length(pass_model$train_na))???)
model_resid_na[[i]][ !model_list[[i]]$train_na ] <- model_resid[[i]] }
} else {
if (check[i] == "gam") {
pass_model <- model_list[[i]]
cooks_dist[[i]] <- stats::cooks.distance(pass_model)
model_resid[[i]] <- mgcv::residuals.gam(pass_model,
type = "deviance")
# for testing autocorrelation incl. NAs
model_resid_na[[i]] <- rep(NA, length(model_list[[i]]$train_na))
model_resid_na[[i]][!model_list[[i]]$train_na] <- model_resid[[i]]
}
if (check[i] == "gamm") {
pass_model <- model_list[[i]]$gam
cooks_dist[[i]] <- cooks_dist_gamm(pass_model)
model_resid[[i]] <- stats::residuals(model_list[[i]]$lme,
type = "normalized")
# for testing autocorrelation incl. NAs
model_resid_na[[i]] <- rep(NA, length(model_list[[i]]$gam$train_na))
model_resid_na[[i]][!model_list[[i]]$gam$train_na] <- model_resid[[i]]
}
}
# Independent of the model class
model_pred[[i]] <- mgcv::predict.gam(pass_model,
type = "response")
}
# Get quantiles for the q-q-plot
quan_normal <- purrr::map(model_resid, ~quantile(x = rnorm(length(.))))
# (throws an error if 0% and 100% quantile are
# equal (in case of a missing model) - it`s not
# elegant but working..)
theo_quan <- purrr::map2(.x = quan_normal, .y = model_resid,
~seq(from = .x[1], to = .x[5], by = (.x[5] -
.x[1])/(length(.y) - 1)))
# Get acf/ pacf values
tac <- test_tac(model_resid_na)
# Get the lags from the acf/pacf functions (also
# used in external function test_tac())
acf_lag <- purrr::map(model_resid_na, ~as.vector(stats::acf(.,
na.action = stats::na.pass, plot = FALSE)$lag))
pacf_lag <- purrr::map(model_resid_na, ~as.vector(stats::pacf(.,
na.action = stats::na.pass, plot = FALSE)$lag))
# Helper function to create title including the corrstruct
# and threshold variable depending on the model
create_title <- function(x) {
if(grepl("thresh", class(x)[1])) { # add interacting variable for thresh_models
if(grepl("gamm", class(x)[1])) { # i.e. the thresh_gamm
t <- paste0(all.vars(x$gam$formula)[1], # ind
" ~ ", all.vars(x$gam$formula)[2], # press
" | ", all.vars(x$gam$formula)[3], # threshold variable
" \n (", toupper(class(x)[1]), # model class
")" )
} else { # thresh_gam
t <- paste0(all.vars(x$formula)[1], # ind
" ~ ", all.vars(x$formula)[2], # press
" | ", all.vars(x$formula)[3], # threshold variable
" \n (", toupper(class(x)[1]), # model class
")" )
}
} else {
if (grepl("gamm", class(x)[1])) {
temp <- strsplit(
utils::capture.output(x$lme$modelStruct$corStruct)[1],
"cor")[[1]][2]
corrstruct <- strsplit(temp, " ")[[1]][1]
if(grepl("ARMA", corrstruct)) {
corrstruct <- paste0(corrstruct,
attr(x$lme$modelStruct$corStruct, "p"),
",", attr(x$lme$modelStruct$corStruct, "q"))
}
t <- paste0(all.vars(x$gam$formula)[1], # ind
" ~ ", all.vars(x$gam$formula)[2], # press
" \n (", toupper(class(x)[1]), # model class
" [", toupper(corrstruct), "])")
} else {
t <- paste0(all.vars(x$formula)[1], # ind
" ~ ", all.vars(x$formula)[2], # press
" \n (", toupper(class(x)[1]), # model class
")" )
}
}
return(t)
}
title <- purrr::map(model_list, ~create_title(.x))
# Apply helper plot functions to each model
# (if input has values)
cook_plots <- purrr::map(1:length(cooks_dist),
~ if ( sum(is_value(cooks_dist[[.]])) > 1 ) {
plot_cook(values = cooks_dist[[.]], title = title[[.]])
} else {NA})
acf_plots <- purrr::map(1:length(tac$acf),
~ if ( sum(is_value(tac$acf[[.]])) > 1 ) {
plot_acf(x_var = acf_lag[[.]],
y_var = tac$acf[[.]], title = title[[.]])
} else {NA})
pacf_plots <- purrr::map(1:length(tac$pacf),
~ if ( sum(is_value(tac$pacf[[.]])) > 1 ) {
plot_pacf(x_var = pacf_lag[[.]],
y_var = tac$pacf[[.]], title = title[[.]])
} else {NA})
resid_plots <- purrr::map(1:length(model_pred),
~ if ( (sum(is_value(model_pred[[.]])) > 1) | (sum(is_value(model_resid[[.]])) > 1) ) {
plot_resid(model_resid = model_resid[[.]][!is.na(model_resid[[.]])],
model_fitted = model_pred[[.]], title = title[[.]])
} else {NA} )
qq_plots <- purrr::map(1:length(model_resid),
~ if ( sum(is_value(model_resid[[.]])) > 1 ) {
plot_qq(model_resid = model_resid[[.]][!is.na(model_resid[[.]])],
theo_quan = theo_quan[[.]][!is.na(theo_quan[[.]])], title = title[[.]])
} else {NA})
gcvv_plots <- purrr::pmap(list(t_val, gcvv, model_list,
best_t_val, title = title), plot_gcvv)
# Plot everything on 1 page (the absolutely longest
# step)
# --> since some plots might be NULL return NA instead
plot_all_diag_safe <- purrr::possibly(plot_all_diag, otherwise = NA)
all_plots <- suppressWarnings(purrr::pmap(list(p1 = cook_plots,
p2 = acf_plots, p3 = pacf_plots, p4 = resid_plots,
p5 = qq_plots, p6 = gcvv_plots, title = title),
plot_all_diag_safe))
# Get ind and press for output_tbl
get_names <- function(x) {
if (grepl("gamm", class(x)[1])) {
ind <- all.vars(x$gam$formula)[1]
press <- all.vars(x$gam$formula)[2]
} else {
ind <- all.vars(x$formula)[1]
press <- all.vars(x$formula)[2]
}
return(data.frame(ind = ind, press = press))
}
dat <- suppressWarnings(purrr::map_df(model_list,
get_names))
# Combine all together
out <- tibble::tibble(ind = dat$ind, press = dat$press,
cooks_dist = cook_plots, acf_plot = acf_plots,
pacf_plot = pacf_plots, resid_plot = resid_plots,
qq_plot = qq_plots, gcvv_plot = gcvv_plots,
all_plots = all_plots)
# Insert NA in $gcvv_plot if model not a
# thresh_gam(m)
out$gcvv_plot[check %in% c("gam", "gamm")] <- NA
# (NULL does not work)
### END OF FUNCTION
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.