Nothing
#' Calculates performance metrics of a business-as-usual model
#'
#' Model agnostic function to calculate a number of common performance
#' metrics on the reference time window.
#' Uses the true data `value` and the predictions `prediction` for this calculation.
#' The coverage is calculated from the columns `value`, `prediction_lower` and
#' `prediction_upper`.
#' Removes dates in the effect and buffer range as the model is not expected to
#' be performing correctly for these times. The incorrectness is precisely
#' what we are using for estimating the effect.
#' @param predictions data.table or data.frame with the following columns
#' \describe{
#' \item{date}{Date of the observation. Needs to be comparable to
#' date_effect_start element.}
#' \item{value}{True observed value of the station}
#' \item{prediction}{Predicted model output for the same time and station
#' as value}
#' \item{prediction_lower}{Lower end of the prediction interval}
#' \item{prediction_upper}{Upper end of the prediction interval}
#' }
#'
#' @param date_effect_start A date. Start date of the
#' effect that is to be evaluated. The data from this point onwards is disregarded
#' for calculating model performance
#' @param buffer Integer. An additional buffer window before date_effect_start to account
#' for uncertainty in the effect start point. Disregards additional buffer data
#' points for model evaluation
#' @return Named vector with performance metrics of the model
#' @export
calc_performance_metrics <- function(predictions, date_effect_start = NULL, buffer = 0) {
df <- data.table::copy(predictions)
stopifnot("Buffer needs to be larger or equal to 0" = buffer >= 0)
if (!is.null(date_effect_start)) {
stopifnot(
"date_effect_start needs to be NULL or a date object" =
inherits(date_effect_start, "Date") | lubridate::is.POSIXt(date_effect_start)
)
df <- df[date < (date_effect_start - as.difftime(buffer, units = "hours"))]
}
metrics <- c(
"RMSE" = sqrt(mean((df$value - df$prediction)**2)),
"MSE" = mean((df$value - df$prediction)**2),
"MAE" = mean(abs(df$value - df$prediction)),
"MAPE" = mean(abs(df$value - df$prediction) / (df$value + 1)),
"Bias" = mean(df$prediction - df$value),
"R2" = 1 - sum((df$value - df$prediction)**2) / sum((df$value - mean(df$value))**2),
"Coverage lower" = mean(df$value >= df$prediction_lower),
"Coverage upper" = mean(df$value <= df$prediction_upper),
"Coverage" = mean(df$value >= df$prediction_lower) +
mean(df$value <= df$prediction_upper) - 1,
"Correlation" = stats::cor(df$value, df$prediction),
"MFB" = 2 * mean((df$prediction - df$value) / (df$prediction + df$value)),
"FGE" = 2 * mean(abs(df$prediction - df$value) / (df$prediction + df$value))
)
metrics
}
#' Calculates summary statistics for predictions and true values
#'
#' Helps with analyzing predictions by comparing them with the true values on
#' a number of relevant summary statistics.
#' @param predictions Data.table or data.frame with the following columns
#' \describe{
#' \item{date}{Date of the observation. Needs to be comparable to
#' date_effect_start element.}
#' \item{value}{True observed value of the station}
#' \item{prediction}{Predicted model output for the same time and station
#' as value}
#' }
#' @param date_effect_start A date. Start date of the
#' effect that is to be evaluated. The data from this point onwards is disregarded
#' for calculating model performance
#' @param buffer Integer. An additional buffer window before date_effect_start to account
#' for uncertainty in the effect start point. Disregards additional buffer data
#' points for model evaluation
#' @return data.frame of summary statistics with columns true and prediction
#' @export
calc_summary_statistics <- function(predictions, date_effect_start = NULL, buffer = 0) {
df <- data.table::copy(predictions)
stopifnot("Buffer needs to be larger or equal to 0" = buffer >= 0)
if (!is.null(date_effect_start)) {
stopifnot(
"date_effect_start needs to be NULL or a date object" =
inherits(date_effect_start, "Date") | lubridate::is.POSIXt(date_effect_start)
)
df <- df[date < (date_effect_start - as.difftime(buffer, units = "hours"))]
}
data.frame(
true = c(
min(df$value),
max(df$value),
stats::var(df$value),
mean(df$value),
stats::quantile(df$value, probs = 0.05),
stats::quantile(df$value, probs = 0.25),
stats::median(df$value),
stats::quantile(df$value, probs = 0.75),
stats::quantile(df$value, probs = 0.95)
),
prediction = c(
min(df$prediction),
max(df$prediction),
stats::var(df$prediction),
mean(df$prediction),
stats::quantile(df$prediction, probs = 0.05),
stats::quantile(df$prediction, probs = 0.25),
stats::median(df$prediction),
stats::quantile(df$prediction, probs = 0.75),
stats::quantile(df$prediction, probs = 0.95)
),
row.names = c(
"min", "max", "var", "mean", "5-percentile",
"25-percentile", "median/50-percentile",
"75-percentile", "95-percentile"
)
)
}
#' Estimates size of the external effect
#'
#' Calculates an estimate for the absolute and relative effect size of the
#' external effect. The absolute effect is the difference between the model
#' bias in the reference time and the effect time windows. The relative effect
#' is the absolute effect divided by the mean true value in the reference
#' window.
#'
#' Note: Since the bias is of the model is an average over predictions and true
#' values, it is important, that the effect window is specified correctly.
#' Imagine a scenario like a fire which strongly affects the outcome for one
#' hour and is gone the next hour. If we use a two week effect window, the
#' estimated effect will be 14*24=336 times smaller compared to using a 1-hour
#' effect window. Generally, we advise against studying very short effects (single
#' hour or single day). The variability of results will be too large to learn
#' anything meaningful.
#'
#' @param df Data.table or data.frame with the following columns
#' \describe{
#' \item{date}{Date of the observation. Needs to be comparable to
#' date_effect_start element.}
#' \item{value}{True observed value of the station}
#' \item{prediction}{Predicted model output for the same time and station
#' as value}
#' }
#' @param date_effect_start A date. Start date of the
#' effect that is to be evaluated. The data from this point onward is disregarded
#' for calculating model performance.
#' @param buffer Integer. An additional buffer window before date_effect_start to account
#' for uncertainty in the effect start point. Disregards additional buffer data
#' points for model evaluation
#' @param verbose Prints an explanation of the results if TRUE
#' @return A list with two numbers: Absolute and relative estimated effect size.
#' @export
estimate_effect_size <- function(df, date_effect_start, buffer = 0, verbose = FALSE) {
stopifnot(
"date_effect_start needs to be NULL or a date object" =
inherits(date_effect_start, "Date") | lubridate::is.POSIXt(date_effect_start)
)
stopifnot("Buffer needs to be larger or equal to 0" = buffer >= 0)
reference <- df[date < (date_effect_start - as.difftime(buffer, units = "hours"))]
effect <- df[date >= date_effect_start]
bias_ref <- mean(reference$prediction - reference$value)
bias_effect <- mean(effect$prediction - effect$value)
effectsize <- bias_ref - bias_effect
rel_effectsize <- effectsize / mean(effect$value)
rel_effectsize <- round(rel_effectsize, 4)
if (verbose) {
cat(sprintf("The external effect changed the target value on average by %.3f compared to the reference time window. This is a %1.2f%% relative change.", effectsize, 100 * rel_effectsize))
}
list(absolute_effect = effectsize, relative_effect = rel_effectsize)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.