R/generic_stats.R

Defines functions R2 r_means CV_sim CV_obs sd_sim sd_obs mean_sim mean_obs n_obs statistics statistics_situations

Documented in CV_obs CV_sim mean_obs mean_sim n_obs R2 r_means sd_obs sd_sim statistics statistics_situations

#' Generic statistics for all situations
#'
#' @description Compute statistics for simulations of one or several situations
#' with its observations, eventually grouped by a model version
#' (or any group actually).
#'
#' @param ...  Simulation outputs (each element= model version), each being a
#' named list of `data.frame` for each situation.
#' See examples.
#' @param obs  A list (each element= situation) of observations `data.frame`s
#' (named by situation)
#' @param stat A character vector of required statistics, "all" for all, or any
#' of [predictor_assessment()] (e.g. `"n"` or `"RMSE"`, or both `c("n", "RMSE")`).
#' @param all_situations Boolean (default = TRUE). If `TRUE`, computes
#' statistics for all situations.
#' @param all_plants Boolean (default = TRUE). If `TRUE`, computes statistics
#' for all plants (when applicable).
#' @param verbose Boolean. Print information during execution.
#' @param formater The function used to format the models outputs and
#' observations in a standard way. You can design your own function
#' that format one situation and provide it here (see [statistics()] and
#' [format_cropr()] for more information).
#'
#' @seealso All the functions used to compute the statistics:
#' [predictor_assessment()].
#'
#' @return A [tibble::as_tibble()] with statistics grouped by group
#' (i.e. model version) and situation
#'
#' @keywords internal
statistics_situations <- function(..., obs = NULL, stat = "all",
                                  all_situations = TRUE,
                                  all_plants = TRUE, verbose = TRUE, formater) {
  . <- NULL
  dot_args <- list(...)

  # Name the groups if not named:
  if (is.null(names(dot_args))) {
    names(dot_args) <- paste0("Version_", seq_along(dot_args))
  }

  # Restructure data into a list of one single element if all_situations
  if (all_situations) {
    list_data <- cat_situations(dot_args, obs)
    dot_args <- list_data[[1]]
    obs <- list_data[[2]]

  } else {
    list_data <- add_situation_col(dot_args, obs)
    dot_args <- list_data[[1]]
    obs <- list_data[[2]]
    # obs_sd <- list_data[[3]]
  }


  # Compute stats (assign directly into dot_args):
  for (versions in seq_along(dot_args)) {
    class(dot_args[[versions]]) <- NULL
    # Remove the class to avoid messing up with it afterward
    for (situation in rev(names(dot_args[[versions]]))) {
      # NB: rev() is important here because if the result is NULL,
      # the situation is popped out of the list, so we want to decrement the
      # list in case it is popped (and not increment with the wrong index)
      dot_args[[versions]][[situation]] <-
        statistics(
          sim = dot_args[[versions]][[situation]],
          obs = obs[[situation]], all_situations = all_situations,
          all_plants = all_plants, verbose = verbose,
          formater = formater,
          stat = stat
        )
    }
  }

  stats <-
    lapply(dot_args, dplyr::bind_rows, .id = "situation") %>%
    dplyr::bind_rows(.id = "group") %>%
    {
      if (length(stat) == 1 && stat == "all") {
        .
      } else {
        stat <- c("group", "situation", "variable", stat)
        dplyr::select(., !!stat)
      }
    }
  class(stats) <- c("statistics", class(stats))


  return(stats)
}

#' Generic simulated/observed statistics for one situation
#'
#' @description Compute statistics for evaluation of any model outputs against
#' observations, providing a formater function (see [format_cropr()]).
#'
#' @param sim A simulation data.frame
#' @param obs An observation data.frame (variable names must match)
#' @param all_situations Boolean (default = FALSE). If `TRUE`, computes
#' statistics for all situations. If `TRUE`, \code{sim} and \code{obs} are
#' respectively an element of the first element and the second element of the
#' output of cat_situations.
#' @param all_plants Boolean (default = TRUE). If `TRUE`, computes statistics
#' for all plants (when applicable).
#' @param verbose Boolean. Print informations during execution.
#' @param formater The function used to format the models outputs and
#' observations in a standard way. You can design your own function
#' that format one situation and provide it here.
#' @param stat A character vector of required statistics, "all" for all, or any
#' of [predictor_assessment()] (e.g. `"n"` or `"RMSE"`, or both `c("n", "RMSE")`).
#'
#' @note Because this function has the purpose to assess model quality, all
#'       statistics are computed on dates were observations are present only.
#'       So the simulation mean is only the mean on dates with observations,
#'       not the overall simulation mean.
#'
#' @return A data.frame with statistics for each variable and possibly each
#' grouping variable returned by the formater.
#'
#' @importFrom reshape2 melt
#' @importFrom parallel parLapply stopCluster
#' @importFrom dplyr ungroup group_by summarise "%>%" filter
#' @importFrom plyr join_all
#' @examples
#' \dontrun{
#' workspace <- system.file(file.path("extdata", "stics_example_1"),
#'   package = "CroPlotR"
#' )
#' situations <- SticsRFiles::get_usms_list(
#'   usm_path =
#'     file.path(workspace, "usms.xml")
#' )
#' sim <- SticsRFiles::get_sim(workspace = workspace, usm = situations)
#' obs <- SticsRFiles::get_obs(workspace = workspace, usm = situations)
#' statistics(
#'   sim = sim$`IC_Wheat_Pea_2005-2006_N0`,
#'   obs = obs$`IC_Wheat_Pea_2005-2006_N0`,
#'   formater = format_cropr
#' )
#' }
#'
#' @keywords internal
#'
statistics <- function(sim, obs = NULL, all_situations = FALSE,
                       all_plants = TRUE, verbose = TRUE, formater, stat="all") {
  . <- NULL # To avoid CRAN check note

  is_obs <- !is.null(obs) && nrow(obs) > 0

  if (!is_obs) {
    if (verbose) cli::cli_alert_warning("No observations found")
    return(NULL)
  }

  # Testing if the obs and sim have the same plants names:
  if (is_obs && !is.null(obs$Plant) && !is.null(sim$Plant)) {
    common_crops <- unique(sim$Plant) %in% unique(obs$Plant)

    if (any(!common_crops)) {
      cli::cli_alert_warning(
        paste0(
          "Observed and simulated crops are different. Obs Plant: ",
          "{.value {unique(obs$Plant)}},
              Sim Plant: {.value {unique(sim$Plant)}}"
        )
      )
    }
  }

  # Format the data:
  formated_df <- formater(sim, obs,
    type = "scatter",
    all_situations = all_situations
  )

  # In case obs is given but no common variables between obs and sim:
  if (is.null(formated_df) || is.null(formated_df$Observed)) {
    if (verbose) {
      cli::cli_alert_warning("No observations found for required variables")
    }
    return(NULL)
  }

  # Define list of stats to compute
  all_stats <-
    data.frame(
      n_obs = "Number of observations",
      mean_obs = "Mean of the observations",
      mean_sim = "Mean of the simulations",
      r_means = "Ratio between mean simulated values and mean observed values (%)",
      sd_obs = "Standard deviation of the observations",
      sd_sim = "Standard deviation of the simulation",
      CV_obs = "Coefficient of variation of the observations",
      CV_sim = "Coefficient of variation of the simulation",
      R2 = "coefficient of determination for obs~sim",
      SS_res = "Residual sum of squares",
      Inter = "Intercept of regression line",
      Slope = "Slope of regression line",
      RMSE = "Root Mean Squared Error",
      RMSEs = "Systematic Root Mean Squared Error",
      RMSEu = "Unsystematic Root Mean Squared Error",
      nRMSE = "Normalized Root Mean Squared Error, CV(RMSE)",
      rRMSE = "Relative Root Mean Squared Error",
      rRMSEs = "Relative Systematic Root Mean Squared Error",
      rRMSEu = "Relative Unsystematic Root Mean Squared Error",
      pMSEs = "Proportion of Systematic Mean Squared Error in Mean Squared Error",
      pMSEu = "Proportion of Unsystematic Mean Squared Error in Mean Squared Error",
      Bias2 = "Bias squared (1st term of Kobayashi and Salam (2000) MSE decomposition)",
      SDSD = "Difference between sd_obs and sd_sim squared (2nd term of Kobayashi and Salam (2000) MSE decomposition)",
      LCS = "Correlation between observed and simulated values (3rd term of Kobayashi and Salam (2000) MSE decomposition)",
      rbias2 = "Relative bias squared",
      rSDSD = "Relative difference between sd_obs and sd_sim squared",
      rLCS = "Relative correlation between observed and simulated values",
      MAE = "Mean Absolute Error",
      FVU = "Fraction of variance unexplained",
      MSE = "Mean squared Error",
      EF = "Model efficiency",
      Bias = "Bias",
      ABS = "Mean Absolute Bias",
      MAPE = "Mean Absolute Percentage Error",
      RME = "Relative mean error (%)",
      tSTUD = "T student test of the mean difference",
      tLimit = "T student threshold",
      Decision = "Decision of the t student test of the mean difference"
    )

  stat_names <- names(all_stats)
  if (length(stat) == 1 && stat == "all") stat <- stat_names
  if (!all(stat %in% stat_names)) {
    warning(paste("Argument stats includes statistics not defined in CroPlot:",
                  paste(setdiff(stat,stat_names),collapse = ","),
                  "\nPlease chose between:",paste(stat_names,collapse = ", ")))
    stat <- intersect(stat, stat_names)
  }

  # Filter and group data
  formated_df  <- formated_df %>%
    dplyr::filter(!is.na(.data$Observed) & !is.na(.data$Simulated)) %>%
    {
      if (all_plants) {
        dplyr::group_by(., .data$variable)
      } else {
        dplyr::group_by(., .data$variable, .data$Plant)
      }
    }

  # Compute the selected list of stats
  potential_arglist <- list(obs="Observed",
                            sim="Simulated")
  x <- lapply(stat, function(cur_stat) {
    arglist <- potential_arglist[intersect(names(potential_arglist),
                                           names(formals(cur_stat)))]
    arglist_quoted <-do.call(call, c("list", lapply(arglist, as.name)), quote=TRUE)
    formated_df %>%
      dplyr::summarise(!!cur_stat := do.call(cur_stat, !!arglist_quoted))
  })
  x <- plyr::join_all(x, by="variable")
  attr(x, "description") <- dplyr::select(all_stats, stat)

  return(x)
}

#' Model quality assessment
#'
#' @description
#' Provide several metrics to assess the quality of the predictions of a model
#' (see note) against observations.
#'
#' @param obs       Observed values
#' @param sim       Simulated values
#' @param risk      Risk of the statistical test
#' @param na.rm     Boolean. Remove `NA` values if `TRUE` (default)
#' @param na.action A function which indicates what should happen when the data
#' contain NAs.
#'
#' @details The statistics for model quality can differ between sources. Here is
#'          a short description of each statistic and its equation (see html
#'          version for `LATEX`):
#' \itemize{
#'   \item `n_obs()`: Number of observations.
#'   \item `mean_obs()`: Mean of observed values
#'   \item `mean_sim()`: Mean of simulated values
#'   \item `sd_obs()`: Standard deviation of observed values
#'   \item `sd_sim()`: standard deviation of simulated values
#'   \item `CV_obs()`: Coefficient of variation of observed values
#'   \item `CV_sim()`: Coefficient of variation of simulated values
#'   \item `r_means()`: Ratio between mean simulated values and mean observed
#'              values (%),
#'      computed as : \deqn{r\_means = \frac{100*\frac{\sum_1^n(\hat{y_i})}{n}}
#'             {\frac{\sum_1^n(y_i)}{n}}}{r_means = 100*mean(sim)/mean(obs)}
#'   \item `R2()`: coefficient of determination, computed using [stats::lm()]
#'   on obs~sim.
#'   \item `SS_res()`: residual sum of squares (see notes).
#'   \item `Inter()`: Intercept of regression line, computed using [stats::lm()]
#'    on sim~obs.
#'   \item `Slope()`: Slope of regression line, computed using [stats::lm()]
#'    on sim~obs.
#'   \item `RMSE()`: Root Mean Squared Error, computed as
#'             \deqn{RMSE = \sqrt{\frac{\sum_1^n(\hat{y_i}-y_i)^2}{n}}}
#'             {RMSE = sqrt(mean((sim-obs)^2)}
#'   \item `RMSEs()`: Systematic Root Mean Squared Error, computed as
#'             \deqn{RMSEs = \sqrt{\frac{\sum_1^n(\sim{y_i}-y_i)^2}{n}}}
#'             {RMSEs = sqrt(mean((fitted.values(lm(formula=sim~obs))-obs)^2)}
#'   \item `RMSEu()`: Unsystematic Root Mean Squared Error, computed as
#'             \deqn{RMSEu = \sqrt{\frac{\sum_1^n(\sim{y_i}-\hat{y_i})^2}{n}}}
#'             {RMSEu = sqrt(mean((fitted.values(lm(formula=sim~obs))-sim)^2)}
#'   \item `NSE()`: Nash-Sutcliffe Efficiency, alias of EF, provided for user
#'   convenience.
#'   \item `nRMSE()`: Normalized Root Mean Squared Error, also denoted as
#'   CV(RMSE), and computed as:
#'              \deqn{nRMSE = \frac{RMSE}{\bar{y}}\cdot100}
#'              {nRMSE = (RMSE/mean(obs))*100}
#'   \item `rRMSE()`: Relative Root Mean Squared Error, computed as:
#'              \deqn{rRMSE = \frac{RMSE}{\bar{y}}}{rRMSE = (RMSE/mean(obs))}
#'   \item `rRMSEs()`: Relative Systematic Root Mean Squared Error, computed as
#'             \deqn{rRMSEs = \frac{RMSEs}{\bar{y}}}{rRMSEs = (RMSEs/mean(obs))}
#'   \item `rRMSEu()`: Relative Unsystematic Root Mean Squared Error,
#'   computed as
#'             \deqn{rRMSEu = \frac{RMSEu}{\bar{y}}}{rRMSEu = (RMSEu/mean(obs))}
#'   \item `pMSEs()`: Proportion of Systematic Mean Squared Error in Mean
#'   Square Error, computed as:
#'              \deqn{pMSEs = \frac{MSEs}{MSE}}{pMSEs = MSEs/MSE}
#'   \item `pMSEu()`: Proportion of Unsystematic Mean Squared Error in MEan
#'   Square Error, computed as:
#'              \deqn{pMSEu = \frac{MSEu}{MSE}}{pMSEu = MSEu^2/MSE^2}
#'   \item `Bias2()`: Bias squared (1st term of Kobayashi and Salam
#'   (2000) MSE decomposition):
#'             \deqn{Bias2 = Bias^2}
#'   \item `SDSD()`: Difference between sd_obs and sd_sim squared
#'   (2nd term of Kobayashi and Salam (2000) MSE decomposition), computed as:
#'              \deqn{SDSD = (sd\_obs-sd\_sim)^2}{SDSD = (sd\_obs-sd\_sim)^2}
#'   \item `LCS()`: Correlation between observed and simulated values
#'   (3rd term of Kobayashi and Salam (2000) MSE decomposition), computed as:
#'              \deqn{LCS = 2*sd\_obs*sd\_sim*(1-r)}
#'   \item `rbias2()`: Relative bias squared, computed as:
#'              \deqn{rbias2 = \frac{Bias^2}{\bar{y}^2}}
#'              {rbias2 = Bias^2/mean(obs)^2}
#'   \item `rSDSD()`: Relative difference between sd_obs and sd_sim squared,
#'    computed as:
#'             \deqn{rSDSD = \frac{SDSD}{\bar{y}^2}}{rSDSD = (SDSD/mean(obs)^2)}
#'   \item `rLCS()`: Relative correlation between observed and simulated values,
#'    computed as:
#'              \deqn{rLCS = \frac{LCS}{\bar{y}^2}}{rLCS = (LCS/mean(obs)^2)}
#'   \item `MAE()`: Mean Absolute Error, computed as:
#'            \deqn{MAE = \frac{\sum_1^n(\left|\hat{y_i}-y_i\right|)}{n}}
#'            {MAE = mean(abs(sim-obs))}
#'   \item `ABS()`: Mean Absolute Bias, which is an alias of `MAE()`
#'   \item `FVU()`: Fraction of variance unexplained, computed as:
#'            \deqn{FVU = \frac{SS_{res}}{SS_{tot}}}{FVU = SS_res/SS_tot}
#'   \item `MSE()`: Mean squared Error, computed as:
#'            \deqn{MSE = \frac{1}{n}\sum_{i=1}^n(Y_i-\hat{Y_i})^2}
#'            {MSE = mean((sim-obs)^2)}
#'   \item `EF()`: Model efficiency, also called Nash-Sutcliffe efficiency
#'   (NSE). This statistic is related to the FVU as
#'   \eqn{EF= 1-FVU}. It is also related to the \eqn{R^2}{R2}
#'           because they share the same equation, except SStot is applied
#'           relative to the identity function (*i.e.* 1:1 line) instead of the
#'           regression line. It is computed
#'           as: \deqn{EF = 1-\frac{SS_{res}}{SS_{tot}}}{EF = 1-SS_res/SS_tot}
#'   \item `Bias()`: Modelling bias, simply computed as:
#'             \deqn{Bias = \frac{\sum_1^n(\hat{y_i}-y_i)}{n}}
#'             {Bias = mean(sim-obs)}
#'   \item `MAPE()`: Mean Absolute Percent Error, computed as:
#'            \deqn{MAPE = \frac{\sum_1^n(\frac{\left|\hat{y_i}-y_i\right|}
#'            {y_i})}{n}}{MAPE = mean(abs(obs-sim)/obs)}
#'   \item `RME()`: Relative mean error, computed as:
#'            \deqn{RME = \frac{\sum_1^n(\frac{\hat{y_i}-y_i}{y_i})}{n}}
#'            {RME = mean((sim-obs)/obs)}
#'   \item `tSTUD()`: T student test of the mean difference, computed as:
#'            \deqn{tSTUD = \frac{Bias}{\sqrt(\frac{var(M)}{n_obs})}}
#'            {tSTUD = Bias/sqrt(var(M)/n_obs)}
#'   \item `tLimit()`: T student threshold, computed using [qt()]:
#'            \deqn{tLimit = qt(1-\frac{\alpha}{2},df=length(obs)-1)}
#'            {tLimit = qt(1-risk/2,df =length(obs)-1)}
#'   \item `Decision()`: Decision of the t student test of the mean difference
#'   (can bias be considered statistically not different from 0 at alpha level
#'   0.05, i.e. 5% probability of erroneously rejecting this hypothesis?),
#'   computed as:
#'            \deqn{Decision = abs(tSTUD ) < tLimit}
#' }
#'
#' @note \eqn{SS_{res}}{SS_res} is the residual sum of squares and
#'      \eqn{SS_{tot}}{SS_tot} the total sum of squares. They are computed as:
#'       \deqn{SS_{res} = \sum_{i=1}^n (y_i - \hat{y_i})^2}
#'       {SS_res= sum((obs-sim)^2)}
#'       \deqn{SS_{tot} = \sum_{i=1}^{n}\left(y_{i}-\bar{y}\right)^2}
#'       {SS_tot= sum((obs-mean(obs))^2}
#'       Also, it should be noted that \eqn{y_i} refers to the observed values
#'       and \eqn{\hat{y_i}} to the predicted values, \eqn{\bar{y}} to the mean
#'       value of observations and \eqn{\sim{y_i}} to
#'       values predicted by linear regression.
#'
#' @return A statistic depending on the function used.
#'
#' @name predictor_assessment
#'
#' @importFrom dplyr "%>%"
#' @importFrom stats lm sd var na.omit qt cor
#'
#' @examples
#' \dontrun{
#' sim <- rnorm(n = 5, mean = 1, sd = 1)
#' obs <- rnorm(n = 5, mean = 1, sd = 1)
#' RMSE(sim, obs)
#' }
#'
NULL

#' @export
#' @rdname predictor_assessment
n_obs <- function(obs) {
  sum(!is.na(obs))
}

#' @export
#' @rdname predictor_assessment
mean_obs <- function(obs, na.rm = TRUE) {
  mean(obs, na.rm = na.rm)
}

#' @export
#' @rdname predictor_assessment
mean_sim <- function(sim, na.rm = TRUE) {
  mean(sim, na.rm = na.rm)
}

#' @export
#' @rdname predictor_assessment
sd_obs <- function(obs, na.rm = TRUE) {
  sd(obs, na.rm = na.rm)
}

#' @export
#' @rdname predictor_assessment
sd_sim <- function(sim, na.rm = TRUE) {
  sd(sim, na.rm = na.rm)
}

#' @export
#' @rdname predictor_assessment
CV_obs <- function(obs, na.rm = TRUE) {
  ( sd(obs, na.rm = na.rm) / mean(obs, na.rm = na.rm) ) * 100
}

#' @export
#' @rdname predictor_assessment
CV_sim <- function(sim, na.rm = TRUE) {
  ( sd(sim, na.rm = na.rm) / mean(sim, na.rm = na.rm) ) * 100
}

#' @export
#' @rdname predictor_assessment
r_means <- function(sim, obs, na.rm = TRUE) {
  100 * mean(sim, na.rm = na.rm) / mean(obs, na.rm = na.rm)
}

#' @export
#' @rdname predictor_assessment
R2 <- function(sim, obs, na.action = stats::na.omit) {
  . <- NULL
  if (!all(is.na(sim))) {
    stats::lm(formula = obs ~ sim, na.action = na.action) %>%
      summary(.) %>%
      .$r.squared
  } else {
    NA
  }
}

#' @export
#' @rdname predictor_assessment
SS_res <- function(sim, obs, na.rm = TRUE) {
  sum((obs - sim)^2, na.rm = na.rm) # residual sum of squares
}

#' @export
#' @rdname predictor_assessment
Inter <- function(sim, obs, na.action = stats::na.omit) {
  if (!all(is.na(sim))) {
    stats::lm(formula = sim ~ obs, na.action = na.action)$coef[1]
  } else {
    NA
  }
}

#' @export
#' @rdname predictor_assessment
Slope <- function(sim, obs, na.action = stats::na.omit) {
  if (!all(is.na(sim))) {
    stats::lm(formula = sim ~ obs, na.action = na.action)$coef[2]
  } else {
    NA
  }
}

#' @export
#' @rdname predictor_assessment
RMSE <- function(sim, obs, na.rm = TRUE) {
  sqrt(mean((sim - obs)^2, na.rm = na.rm))
}

#' @export
#' @rdname predictor_assessment
RMSEs <- function(sim, obs, na.rm = TRUE) {
  if (!all(is.na(sim))) {
    reg <- stats::fitted.values(lm(formula = sim ~ obs))
    sqrt(mean((reg[seq_along(sim)] - obs)^2, na.rm = na.rm))
  } else {
    NA
  }
}

#' @export
#' @rdname predictor_assessment
RMSEu <- function(sim, obs, na.rm = TRUE) {
  if (!all(is.na(sim))) {
    reg <- stats::fitted.values(lm(formula = sim ~ obs))
    sqrt(mean((reg[seq_along(sim)] - sim)^2, na.rm = na.rm))
  } else {
    NA
  }
}

#' @export
#' @rdname predictor_assessment
nRMSE <- function(sim, obs, na.rm = TRUE) {
  (RMSE(sim = sim, obs = obs, na.rm = na.rm) /
    mean(obs, na.rm = na.rm)) * 100
}

#' @export
#' @rdname predictor_assessment
rRMSE <- function(sim, obs, na.rm = TRUE) {
  (RMSE(sim = sim, obs = obs, na.rm = na.rm) /
    mean(obs, na.rm = na.rm))
}

#' @export
#' @rdname predictor_assessment
rRMSEs <- function(sim, obs, na.rm = TRUE) {
  (RMSEs(sim = sim, obs = obs, na.rm = na.rm) /
    mean(obs, na.rm = na.rm))
}

#' @export
#' @rdname predictor_assessment
rRMSEu <- function(sim, obs, na.rm = TRUE) {
  (RMSEu(sim = sim, obs = obs, na.rm = na.rm) /
    mean(obs, na.rm = na.rm))
}

#' @export
#' @rdname predictor_assessment
pMSEs <- function(sim, obs, na.rm = TRUE) {
  RMSEs(sim, obs, na.rm)^2 / RMSE(sim, obs, na.rm)^2
}

#' @export
#' @rdname predictor_assessment
pMSEu <- function(sim, obs, na.rm = TRUE) {
  RMSEu(sim, obs, na.rm)^2 / RMSE(sim, obs, na.rm)^2
}

#' @export
#' @rdname predictor_assessment
Bias2 <- function(sim, obs, na.rm = TRUE) {
  Bias(sim, obs, na.rm = na.rm)^2
}

#' @export
#' @rdname predictor_assessment
SDSD <- function(sim, obs, na.rm = TRUE) {
  (sd(obs, na.rm = na.rm) - sd(sim, na.rm = na.rm))^2
}

#' @export
#' @rdname predictor_assessment
LCS <- function(sim, obs, na.rm = TRUE) {
  sdobs <- sd(obs, na.rm = na.rm)
  sdsim <- sd(sim, na.rm = na.rm)
  r <- 1
  if (is.na(sdobs) || is.na(sdsim)) {
    return(NA)
  }
  if (sdobs > 0 && sdsim > 0) {
    r <- cor(x = obs, y = sim, use = "pairwise.complete.obs")
  }
  2 * sdobs * sdsim * (1 - r)
}

#' @export
#' @rdname predictor_assessment
rbias2 <- function(sim, obs, na.rm = TRUE) {
  Bias2(sim, obs, na.rm = na.rm) / ((mean(obs, na.rm = na.rm))^2)
}

#' @export
#' @rdname predictor_assessment
rSDSD <- function(sim, obs, na.rm = TRUE) {
  SDSD(sim, obs, na.rm = na.rm) / ((mean(obs, na.rm = na.rm))^2)
}

#' @export
#' @rdname predictor_assessment
rLCS <- function(sim, obs, na.rm = TRUE) {
  LCS(sim, obs, na.rm = na.rm) / ((mean(obs, na.rm = na.rm))^2)
}

#' @export
#' @rdname predictor_assessment
MAE <- function(sim, obs, na.rm = TRUE) {
  mean(abs(sim - obs), na.rm = na.rm)
}

#' @export
#' @rdname predictor_assessment
ABS <- function(sim, obs, na.rm = TRUE) {
  MAE(sim, obs, na.rm)
}

#' @export
#' @rdname predictor_assessment
MSE <- function(sim, obs, na.rm = TRUE) {
  mean((sim - obs)^2, na.rm = na.rm)
}

#' @export
#' @rdname predictor_assessment
EF <- function(sim, obs, na.rm = TRUE) {
  # Modeling efficiency
  SStot <- sum((obs - mean(obs, na.rm = na.rm))^2, na.rm = na.rm)
  # total sum of squares
  # SSreg= sum((sim-mean(obs))^2) # explained sum of squares
  1 - SS_res(sim = sim, obs = obs, na.rm = na.rm) / SStot
}

#' @export
#' @rdname predictor_assessment
NSE <- function(sim, obs, na.rm = TRUE) {
  EF(sim, obs, na.rm = na.rm)
}

#' @export
#' @rdname predictor_assessment
Bias <- function(sim, obs, na.rm = TRUE) {
  mean(sim - obs, na.rm = na.rm)
}

#' @export
#' @rdname predictor_assessment
MAPE <- function(sim, obs, na.rm = TRUE) {

  non_zero_obs_indices <- obs != 0
  sim_filtered <- sim[non_zero_obs_indices]
  obs_filtered <- obs[non_zero_obs_indices]

  if (length(sim_filtered) > 0) {
    mape <- mean(abs(sim_filtered - obs_filtered) / obs_filtered, na.rm = na.rm)
    if (any(obs == 0)) {
      message("Attention: some observed values are zero. They are filtered for the computation of MAPE")
    }
  } else {
    cat("All observed values are zero. MAPE cannot be computed.\n")
    mape <- Inf
  }
  return(mape)
}

#' @export
#' @rdname predictor_assessment
FVU <- function(sim, obs, na.rm = TRUE) {
  var(obs - sim, na.rm = na.rm) / var(obs, na.rm = na.rm)
}

#' @export
#' @rdname predictor_assessment
RME <- function(sim, obs, na.rm = TRUE) {

  # indices were obs = 0
  non_zero_obs_indices <- obs != 0
  sim_filtered <- sim[non_zero_obs_indices]
  obs_filtered <- obs[non_zero_obs_indices]

  if (length(sim_filtered) > 0) {
    rme_value <- mean((sim_filtered - obs_filtered) / obs_filtered, na.rm = na.rm)

    if (any(obs == 0)) {
      message("Attention: some observed values are zero. They are filtered for the computation of RME")
      return(rme_value)
    }

  } else {
    cat("All observed values are zero. RME cannot be computed.\n")
    rme_value <- Inf
  }

  return(rme_value)
}

#' @export
#' @rdname predictor_assessment
tSTUD <- function(sim, obs, na.rm = TRUE) {
  M <- Bias(sim, obs, na.rm = na.rm)
  M / sqrt(var(sim - obs, na.rm = na.rm) / length(obs))
}

#' @export
#' @rdname predictor_assessment
tLimit <- function(sim, obs, risk = 0.05, na.rm = TRUE) {
  # Setting value for n_obs > 140
  if (length(obs) > 140) {
    return(1.96)
  }

  if (length(obs) > 0) {
    suppressWarnings(qt(1 - risk / 2, df = length(obs) - 1))
  } else {
    return(NA)
  }
}

#' @export
#' @rdname predictor_assessment
Decision <- function(sim, obs, risk = 0.05, na.rm = TRUE) {
  Stud <- tSTUD(sim, obs, na.rm = na.rm)
  Threshold <- tLimit(sim, obs, risk = risk, na.rm = na.rm)
  if (is.na(Stud)) {
    return("Insufficient size")
  }
  if (Stud >= 0) {
    Stud <- -Stud
  }
  if (Threshold + Stud > 0) {
    return("OK")
  } else {
    return("Rejected")
  }
}
SticsRPacks/CroPloteR documentation built on April 1, 2024, 9:25 a.m.