#' Function to plot residuals for each model of the ensemble run
#'
#' Plot residual diagnostic plots. Residuals are calculated by (sim - obs)
#' for each corresponding depth and time step there is an observed value.
#'
#' @param ncdf filepath; to the netcdf file created by `run_ensemble()`
#' @param var string; of variable which to plot. Defaults to "temp"
#' @param dim character; NetCDF dimensions to extract. Must be either "member" or "model". Defaults to "model". Only used if plotting from netCDF file. Currently only works with "model".
#' @param dim_index numeric; Index of dimension chosen to extract from. Defaults to 1. Only used if plotting from netCDF file.
#' @param var_list list; of variables in the format when loaded using `load_var()`. Defaults to NULL
#' @param model string vector; of models which should be included in the plot. If NULL all models in the netCDF/list are plotted. Defaults to NULL.
#' @return list with four ggplot objects: "obs_res" = Observations versus residuals,
#' "res_depth" = Residuals versus depth,
#' "yday_res" = residuals for day of year,
#' "res_dist" = distribution of residuals
#' @author Tadhg Moore, Johannes Feldbauer
#' @importFrom reshape2 melt
#' @import ggplot2
#' @import dplyr
#' @import RColorBrewer
#' @examples
#' \dontrun{
#' plist <- plot_resid(ncdf = "output/ensemble_output.nc",var = "temp",
#' model = c('FLake', 'GLM', 'GOTM', 'Simstrat', 'MyLake'))
#' plist[['obs_res']]+
#' theme_classic()
#' }
#'
#'
#' @export
plot_resid <- function(ncdf = NULL, var = "temp", dim = "model", dim_index = 1,
var_list = NULL, model = NULL) {
# check if model input is correct
model <- check_models(model)
if(!is.null(ncdf)){
# Check if netCDF exists
if(!file.exists(ncdf)){
stop("File '", ncdf, "' does not exist. Check you have the correct filepath.")
}
# Check if var is in ncdf
vars <- gotmtools::list_vars(ncdf)
if(!(var %in% vars)){
stop("Variable '", var, "' is not present in the netCDF file '", ncdf, "'")
}
# get variable
var_list <- load_var(ncdf, var = var, dim = dim,
dim_index = dim_index, return = "list")
}else{
var_list <- var_list
}
# only the selected models
if(is.null(model) & dim == "model"){
model <- names(var_list)
model <- model[-c(length(model))] # Remove last name "Obs"
}
if(dim == "model") {
var_list <- var_list[c(model, "Obs")]
} else if(dim == "member") {
# Load obs data
obs_list <- load_var(ncdf, var = var, return = "list", dim = "model",
dim_index = 1, print = FALSE)
obs_list <- obs_list[["Obs"]]
var_list[["Obs"]] <- obs_list
}
data <- var_list %>%
reshape2::melt(id.vars = "datetime") %>%
dplyr::group_by(datetime)
colnames(data) <- c("datetime", "Depth", "value", "C4")
obs <- data %>%
dplyr::filter(C4 == "Obs")
colnames(obs) <- c("datetime", "Depth", "obs", "Observed")
dat <- data %>%
dplyr::filter(C4 != "Obs")
colnames(dat)[3] <- "mod"
# Check for observed values
if(sum(is.na(obs$obs)) == nrow(obs)){
stop("There are no observations in netCDF/list provided.
Please inspect the model output and re-run 'run_ensemble() if necessary.'")
}
# remove NAs
dat <- dat[!is.na(dat$mod), ] # Remove NAs
if(nrow(dat) == 0) {
stop("Modelled data is all NAs.
Please inspect the model output and re-run 'run_ensemble()' if necessary.")
}
obs <- obs[!is.na(obs$obs), ] # Remove NAs
if(nrow(obs) == 0) {
stop("Observed data is all NAs.
Please inspect the model output and re-run 'run_ensemble()' if necessary.")
}
# Colours
spec <- RColorBrewer::brewer.pal(11, "Spectral")
cols <- RColorBrewer::brewer.pal(8, "Set2")
dep_cols <- RColorBrewer::brewer.pal(3, "Paired")
df <- merge(dat, obs, by = 1:2)
df$depth <- -as.numeric(gsub("wtr_", "", df$Depth))
df$fdepth <- factor(df$depth)
df$res <- df$mod - df$obs
df$yday <- lubridate::yday(df$datetime)
pred <- seq(min(df$res, na.rm = TRUE), max(df$res, na.rm = TRUE), length.out = 100)
# Calculate densities for p5
if(dim == "member") {
n <- names(var_list)
n <- n[!n %in% "Obs"]
} else {
n <- model
}
dens <- lapply(n, FUN = function(x){
idx <- which(df$C4 == x)
mean <- mean(df$res[idx], na.rm = TRUE)
sd <- sd(df$res[idx], na.rm = TRUE)
data.frame(pred,
density = dnorm(pred, mean, sd),
C4 = x)
})
dens <- do.call("rbind", dens)
# # Obs v Mod
# p1 <- ggplot(df, aes(mod, obs, colour = depth))+
# # geom_abline(slope = 1, intercept = 0, linetype = "dashed")+
# # geom_hex()+
# geom_point(alpha = 0.1)+
# # scale_colour_viridis_c()+
# facet_wrap(~C4)+
# coord_fixed(xlim = rnge, ylim = rnge)
# Obs v Res
p2 <- ggplot(df, aes(obs, res, colour = depth)) +
geom_hline(yintercept = 0, linetype = "dashed") +
scale_colour_gradientn(colours = c("black", dep_cols[2], dep_cols[1])) +
geom_point(alpha = 0.1) +
coord_equal() +
ggtitle("Observed vs. Residuals")+
facet_wrap(~C4)
p2
# Res v Depth
p3 <- ggplot(df, aes(res, depth, colour = obs)) +
geom_vline(xintercept = 0, linetype = "dashed", colour = "grey") +
geom_point(alpha = 0.1) +
scale_colour_gradientn(colours = rev(spec)) +
labs(colour = "temp") +
ggtitle('Residuals vs. Depth') +
facet_wrap(~C4)
# Time v Res
p4 <- ggplot(df, aes(yday, res, colour = depth)) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_point(alpha = 0.1) +
scale_colour_gradientn(colours = c("black", dep_cols[2], dep_cols[1])) +
ggtitle("Year day vs. Residuals") +
facet_wrap(~C4)
p5 <- ggplot(df) +
geom_histogram(aes(x = res, y = ..density..),
fill = cols[8], binwidth = 0.5) +
geom_line(data = dens, aes(x = pred, y = density), colour = cols[2], size = 1) +
geom_vline(xintercept = 0, linetype = "dashed") +
scale_x_continuous("Residuals (\u00B0C)") +
ggtitle("Density distribution of Residuals") +
facet_wrap(~C4)
plist <- list("obs_res" = p2, "res_depth" = p3,
"yday_res" = p4, "res_dist" = p5)
return(plist)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.