R/calc_fit.R

Defines functions calc_fit

Documented in calc_fit

#' Calculate model performance metrics
#'
#' Calculates model performace metrics from the netcdf file generated by running `run_ensemble`
#'
#' @param ncdf Path to the netcdf file generated by running `run_ensemble`
#' @param list Alternatively to `ncdf` a list of siimulated variables, as returned by
#'    `run_ensemble()` when argument `return_list = TRUE`
#' @param dim character; NetCDF dimensions to extract. Must be either "member" or "model". Defaults to "model". Only used if using the netCDF file. Currently only works with "model".
#' @param dim_index numeric; Index of dimension chosen to extract from. Defaults to 1. Only used if using the netCDF file.
#' @param model Vector of models for which to calculate the performance measures
#' @param var Variable for which to calculate the performance measures.
#' Defaults to "temp".
#' @param qualfun Function to calculate the performance measures. Per default calculates root
#'    mean suqared error (rmse), Nash-Shutcliff efficiency (nse), Pearson correlation (r),
#'    bias (bias), mean absolute error (mae), normalized mean absolute error (nmae), and bias.
#'    Can be any function that takes observed data as first, and simulated data at the same time
#'    and depth as the second argument.
#' @param avfun Name of the function to calculate the ensemble average, defaults to "mean"
#' @author Johannes Feldbauer
#' @export
#' @examples
#' \dontrun{
#' # using standard quality measures
#' calc_fit(ncdf = "output/ensemble_output.nc",
#'                    model = c("FLake", "GLM",  "GOTM", "Simstrat", "MyLake"),
#'                    var = "temp")
#' # using own performance measure
#'  calc_fit(ncdf = "output/ensemble_output.nc",
#'                   model = c("FLake", "GLM",  "GOTM", "Simstrat", "MyLake"),
#'                   var = "temp", qualfun = function(O, S) mean(O - S, na.rm = TRUE))
#' }
calc_fit <- function(ncdf, list = NULL, model, var = "temp", dim = "model", dim_index = 1,
                     qualfun = qual_fun, avfun = "mean") {

  # check if model input is correct
  model <- check_models(model)

  if(is.null(list)) {
    # get variable
    var_list <- load_var(ncdf, var = var, return = "list", dim = dim,
                         dim_index = dim_index, print = FALSE)
    if(dim_index != 1 & dim == "model") {
      obs_l <- load_var(ncdf, var = var, return = "list", dim = dim,
                        dim_index = 1, print = FALSE)
      var_list$Obs <- obs_l$Obs
    }
  } else {
    var_list <- list
    if(any(names(var_list) %in% paste0(c(model, "Obs"), "_", var))) {
      names(var_list) <- c(model, "Obs")
    }
  }

  # only the selected models
  if(dim == "model") {
    var_list <- var_list[c(model, "Obs")]
    n <- names(var_list)
    n_no_obs <- model
    # only select depth where observations are available
    obs_col <- which(apply(var_list$Obs, 2, function(x)sum(!is.na(x))) != 0)
    var_list <- lapply(c(model, "Obs"), function(m) dplyr::select(var_list[[m]], all_of(obs_col)))
    names(var_list) <- c(model, "Obs")
    # create list with long format data.frames
    var_long <-  lapply(model, function(m)
      cbind(data.frame(reshape2::melt(var_list[[m]],id.vars = "datetime")),
            data.frame(obs = reshape2::melt(var_list$Obs,id.vars = "datetime")$value)))

    names(var_long) <- model
  } else {
    # load observations
    obs_list <- load_var(ncdf, var = var, return = "list", dim = "model",
                         dim_index = 1, print = FALSE)
    var_list <- c(var_list, Obs = list(obs_list$Obs))
    # only select depth where observations are available
    obs_col <- which(apply(obs_list$Obs, 2, function(x)sum(!is.na(x))) != 0)
    n <- names(var_list)
    var_list <- lapply(n, function(m) dplyr::select(var_list[[m]], all_of(obs_col)))
    names(var_list) <- n
    n_no_obs <- n[! n %in% "Obs"]
    # create list with long format data.frames
    var_long <-  lapply(n_no_obs, function(m)
      cbind(data.frame(reshape2::melt(var_list[[m]],id.vars = "datetime")),
            data.frame(obs = reshape2::melt(var_list$Obs,id.vars = "datetime")$value)))

    names(var_long) <- n_no_obs
  }



 # change water depth to nummeric
 var_long <- purrr::map(var_long,
                        function(m) dplyr::mutate(m, variable = -as.numeric(gsub("wtr_", "",
                                                                                 variable))))

 # calculate ensemble average
 ens_data <- var_long[[n_no_obs[1]]]
 ens_data$value <- apply(sapply(n_no_obs, function(m) var_long[[m]]$value), 1, get(avfun),
                         na.rm = TRUE)
 var_long[[paste0("ensemble_",avfun)]] <- ens_data

 # calculate quality measures
 qual <- lapply(var_long, function(m){qualfun(m$obs, m$value)})


 return(qual)


}
aemon-j/LakeEnsemblR documentation built on April 11, 2025, 10:09 p.m.