R/model_evaluation.R

Defines functions estimate_effect_size calc_summary_statistics calc_performance_metrics

Documented in calc_performance_metrics calc_summary_statistics estimate_effect_size

#' Calculates performance metrics of a business-as-usual model
#'
#' Model agnostic function to calculate a number of common performance
#' metrics on the reference time window.
#' Uses the true data `value` and the predictions `prediction` for this calculation.
#' The coverage is calculated from the columns `value`, `prediction_lower` and
#' `prediction_upper`.
#' Removes dates in the effect and buffer range as the model is not expected to
#' be performing correctly for these times. The incorrectness is precisely
#' what we are using for estimating the effect.
#' @param predictions data.table or data.frame with the following columns
#'  \describe{
#'    \item{date}{Date of the observation. Needs to be comparable to
#'    date_effect_start element.}
#'    \item{value}{True observed value of the station}
#'    \item{prediction}{Predicted model output for the same time and station
#'    as value}
#'    \item{prediction_lower}{Lower end of the prediction interval}
#'    \item{prediction_upper}{Upper end of the prediction interval}
#'  }
#'
#' @param date_effect_start A date. Start date of the
#' effect that is to be evaluated. The data from this point onwards is disregarded
#' for calculating model performance
#' @param buffer Integer. An additional buffer window before date_effect_start to account
#' for uncertainty in the effect start point. Disregards additional buffer data
#' points for model evaluation
#' @return Named vector with performance metrics of the model
#' @export
calc_performance_metrics <- function(predictions, date_effect_start = NULL, buffer = 0) {
  df <- data.table::copy(predictions)
  stopifnot("Buffer needs to be larger or equal to 0" = buffer >= 0)
  if (!is.null(date_effect_start)) {
    stopifnot(
      "date_effect_start needs to be NULL or a date object" =
        inherits(date_effect_start, "Date") | lubridate::is.POSIXt(date_effect_start)
    )
    df <- df[date < (date_effect_start - as.difftime(buffer, units = "hours"))]
  }
  metrics <- c(
    "RMSE" = sqrt(mean((df$value - df$prediction)**2)),
    "MSE" = mean((df$value - df$prediction)**2),
    "MAE" = mean(abs(df$value - df$prediction)),
    "MAPE" = mean(abs(df$value - df$prediction) / (df$value + 1)),
    "Bias" = mean(df$prediction - df$value),
    "R2" = 1 - sum((df$value - df$prediction)**2) / sum((df$value - mean(df$value))**2),
    "Coverage lower" = mean(df$value >= df$prediction_lower),
    "Coverage upper" = mean(df$value <= df$prediction_upper),
    "Coverage" = mean(df$value >= df$prediction_lower) +
      mean(df$value <= df$prediction_upper) - 1,
    "Correlation" = stats::cor(df$value, df$prediction),
    "MFB" = 2 * mean((df$prediction - df$value) / (df$prediction + df$value)),
    "FGE" = 2 * mean(abs(df$prediction - df$value) / (df$prediction + df$value))
  )
  metrics
}


#' Calculates summary statistics for predictions and true values
#'
#' Helps with analyzing predictions by comparing them with the true values on
#' a number of relevant summary statistics.
#' @param predictions Data.table or data.frame with the following columns
#'  \describe{
#'    \item{date}{Date of the observation. Needs to be comparable to
#'    date_effect_start element.}
#'    \item{value}{True observed value of the station}
#'    \item{prediction}{Predicted model output for the same time and station
#'    as value}
#'  }
#' @param date_effect_start A date. Start date of the
#' effect that is to be evaluated. The data from this point onwards is disregarded
#' for calculating model performance
#' @param buffer Integer. An additional buffer window before date_effect_start to account
#' for uncertainty in the effect start point. Disregards additional buffer data
#' points for model evaluation
#' @return data.frame of summary statistics with columns true and prediction
#' @export
calc_summary_statistics <- function(predictions, date_effect_start = NULL, buffer = 0) {
  df <- data.table::copy(predictions)
  stopifnot("Buffer needs to be larger or equal to 0" = buffer >= 0)
  if (!is.null(date_effect_start)) {
    stopifnot(
      "date_effect_start needs to be NULL or a date object" =
        inherits(date_effect_start, "Date") | lubridate::is.POSIXt(date_effect_start)
    )
    df <- df[date < (date_effect_start - as.difftime(buffer, units = "hours"))]
  }
  data.frame(
    true = c(
      min(df$value),
      max(df$value),
      stats::var(df$value),
      mean(df$value),
      stats::quantile(df$value, probs = 0.05),
      stats::quantile(df$value, probs = 0.25),
      stats::median(df$value),
      stats::quantile(df$value, probs = 0.75),
      stats::quantile(df$value, probs = 0.95)
    ),
    prediction = c(
      min(df$prediction),
      max(df$prediction),
      stats::var(df$prediction),
      mean(df$prediction),
      stats::quantile(df$prediction, probs = 0.05),
      stats::quantile(df$prediction, probs = 0.25),
      stats::median(df$prediction),
      stats::quantile(df$prediction, probs = 0.75),
      stats::quantile(df$prediction, probs = 0.95)
    ),
    row.names = c(
      "min", "max", "var", "mean", "5-percentile",
      "25-percentile", "median/50-percentile",
      "75-percentile", "95-percentile"
    )
  )
}

#' Estimates size of the external effect
#'
#' Calculates an estimate for the absolute and relative effect size of the
#' external effect. The absolute effect is the difference between the model
#' bias in the reference time and the effect time windows. The relative effect
#' is the absolute effect divided by the mean true value in the reference
#' window.
#'
#' Note: Since the bias is of the model is an average over predictions and true
#' values, it is important, that the effect window is specified correctly.
#' Imagine a scenario like a fire which strongly affects the outcome for one
#' hour and is gone the next hour. If we use a two week effect window, the
#' estimated effect will be 14*24=336 times smaller compared to using a 1-hour
#' effect window. Generally, we advise against studying very short effects (single
#' hour or single day). The variability of results will be too large to learn
#' anything meaningful.
#'
#' @param df Data.table or data.frame with the following columns
#'  \describe{
#'    \item{date}{Date of the observation. Needs to be comparable to
#'    date_effect_start element.}
#'    \item{value}{True observed value of the station}
#'    \item{prediction}{Predicted model output for the same time and station
#'    as value}
#'  }
#' @param date_effect_start A date. Start date of the
#' effect that is to be evaluated. The data from this point onward is disregarded
#' for calculating model performance.
#' @param buffer Integer. An additional buffer window before date_effect_start to account
#' for uncertainty in the effect start point. Disregards additional buffer data
#' points for model evaluation
#' @param verbose Prints an explanation of the results if TRUE
#' @return A list with two numbers: Absolute and relative estimated effect size.
#' @export
estimate_effect_size <- function(df, date_effect_start, buffer = 0, verbose = FALSE) {
  stopifnot(
    "date_effect_start needs to be NULL or a date object" =
      inherits(date_effect_start, "Date") | lubridate::is.POSIXt(date_effect_start)
  )
  stopifnot("Buffer needs to be larger or equal to 0" = buffer >= 0)
  reference <- df[date < (date_effect_start - as.difftime(buffer, units = "hours"))]
  effect <- df[date >= date_effect_start]
  bias_ref <- mean(reference$prediction - reference$value)
  bias_effect <- mean(effect$prediction - effect$value)
  effectsize <- bias_ref - bias_effect
  rel_effectsize <- effectsize / mean(effect$value)
  rel_effectsize <- round(rel_effectsize, 4)
  if (verbose) {
    cat(sprintf("The external effect changed the target value on average by %.3f compared to the reference time window. This is a %1.2f%% relative change.", effectsize, 100 * rel_effectsize))
  }
  list(absolute_effect = effectsize, relative_effect = rel_effectsize)
}

Try the ubair package in your browser

Any scripts or data that you put into this service are public.

ubair documentation built on April 12, 2025, 2:12 a.m.