In epiforecasts/europe-covid-forecast: Forecasting Covid-19 related metrics for the ECDC Forecast Hub

library(scoringutils)
library(ggplot2)
library(dplyr)
library(DT)
library(here)
library(knitr)
library(covidHubUtils)
library(lubridate)
library(purrr)
knitr::opts_chunk$set(echo = FALSE, 
                      message = FALSE, 
                      warning = FALSE)

include_ranking <- TRUE
include_forecast_plot <- TRUE
include_avail_forecasts <- FALSE
include_ranks_over_time <- FALSE

include_countries <- include_forecast_plot || 
  include_forecast_plot || 
  include_avail_forecasts || 
  include_ranks_over_time

data <- params$data[location_name == "United Kingdom"]

horizons <- 1:4

summarise_by = c("model", "target_variable")

target_variables <- c(Cases = "inc case", Deaths = "inc death")

Welcome to the UK Covid-19 Forecasting Challenge evaluation board!

Below you find a visualisation of all forecasts made for the UK Covid-19 Crowd Forecasting Challenge as well as an evaluation of the forecasts.

If you want to learn more about the crowd forecasting challenge and how to participate, visit crowdforecastr.org

{.unlisted .unnumbered}

out <- paste0("\n\n## Visualisation of the combined crowd forecast {.tabset .tabset_fade}\n\n")

out <- c(out, "The plot shows the combined forecast of all participants. Forecasts are combined by taking the average of all predictions. The date of the tab marks the date on",
         "which a forecast was made.\n\n")

r paste(if (include_forecast_plot) knit(text = out), collapse = '\n\n')

# copy data temporarily to allow for filtering
tmp_data <- copy(data)
data <- data[model %in% c(NA, "EpiExpert-ensemble")]

out <- NULL
forecast_dates <-
  data$forecast_date[!is.na(data$forecast_date)] %>%
  unique() %>%
  sort() %>%
  as.character() %>%
  rev()
loc <- params$location_name
out <- c(out, knit_child(here::here("reports", "evaluation",
                                    "template-plot-forecasts.Rmd")))

# move tmp_data back
data <- tmp_data

r paste(if (include_forecast_plot) knit(text = out), collapse = '\n\n')

{.unlisted .unnumbered}

out <- paste0("\n\n## Forecast visualisation {.tabset .tabset_fade}\n\n")

out <- c(out, "Visualisation of individual forecasts. The date of the tab marks the date on",
         "which a forecast was made.\n\n")

r paste(if (include_forecast_plot) knit(text = out), collapse = '\n\n')

out <- NULL
forecast_dates <-
  data$forecast_date[!is.na(data$forecast_date)] %>%
  unique() %>%
  sort() %>%
  as.character() %>%
  rev()
loc <- params$location_name
out <- c(out, knit_child(here::here("reports", "evaluation",
                                    "template-plot-forecasts.Rmd")))

r paste(if (include_forecast_plot) knit(text = out), collapse = '\n\n')

{.unlisted .unnumbered}

Leaderboard

rankingdata <- copy(data)

# log data 
rankingdata[, true_value := pmax(true_value, 0)]
rankingdata[, true_value := log(true_value + 1)]
rankingdata[, prediction := pmax(prediction, 0)]
rankingdata[, prediction := log(prediction + 1)]

table <- eval_forecasts(rankingdata, summarise_by = c("model", "forecast_date"))


# assign median score if no forecast was made ----------------------------------
# for ever forecast_date, calculate the median
medians <- table[model != "EpiExpert-ensemble", 
                 .(median = median(interval_score)), by = "forecast_date"]

# get complete set of forecasters and dates
fcdates <- medians$forecast_date
participants <- unique(rankingdata$model)
complete_set <- expand.grid(forecast_date = fcdates, 
                            model = participants[!is.na(participants)])

# merge medians with complete set and that with scores
medians <- merge(complete_set, medians, all.y = TRUE)

table <- merge(table, medians, all.y = TRUE)
table[is.na(interval_score), interval_score := median]

table <- table[, .(interval_score = sum(interval_score)), by = "model"]

out <- htmltools::tagList(
  table %>%
    arrange(interval_score) %>%
    mutate_if(is.numeric, round, 2) %>%
    dplyr::mutate(ranking = 1:n()) %>%
    dplyr::rename(`interval score (lower = better)` = interval_score, 
                  forecaster = model) %>%
     DT::datatable(extensions = c('FixedColumns', 'Buttons'),
                    width = "100%",
                    options = list(
                      paging = FALSE,
                      info = FALSE,
                      buttons = c('csv', 'excel'),
                      dom = 'Bfrtip',
                      scrollX = TRUE,
                      fixedColumns = TRUE
                    ), 
                    class = 'white-space: nowrap')
)

r paste(knit(text = out), collapse = '\n\n\n')

{.unlisted .unnumbered}

out <- paste0("\n\n\n## Detailed forecast scores {.tabset .tabset-fade}\n\n")

out <- c(out, "Scores separated by target and forecast horizon.\n\n")

r paste(if (include_ranking) knit(text = out, quiet = TRUE), collapse = '\n\n')

out <- NULL
for (variable in names(target_variables)) {
  out <- c(out, paste("\n\n###", variable, " {.tabset .tabset-fade}\n\n"))
  for (this_horizon in horizons) {
    filter_list = list(paste0("target_variable == '",
                              target_variables[[variable]], "'"),
                     "!(is.na(quantile))",
                     paste0("horizon == ", this_horizon),
                     paste0("location_name == '", loc, "'"))
    horizon_string <-
      paste0(this_horizon, " week",
             if_else(this_horizon > 1, "s", ""), " ahead horizon")
    out <- c(out, paste("\n\n####", horizon_string, "\n\n"))
    out <- c(out, knit_child(here::here("reports", "evaluation",
                                        "template-ranking-table.Rmd"),
                            quiet = TRUE))
  }
}

r paste(if (include_ranking) knit(text = out, quiet = TRUE), collapse = '\n\n')

{.unlisted .unnumbered}

Evaluation metrics

Relative skill is a metric based on the weighted interval score (WIS) that is using a 'pairwise comparison tournament'. All pairs of forecasters are compared against each other in terms of the weighted interval score. The mean score of both models based on the set of common targets for which both models have made a prediction are calculated to obtain mean score ratios. The relative skill is the geometric mean of these mean score ratios. Smaller values are better and a value smaller than one means that the model beats the average forecasting model.
The weighted interval score is a proper scoring rule (meaning you can't cheat it) suited to scoring forecasts in an interval format. It has three components: sharpness, underprediction and overprediction. Sharpness is the width of the prediction interval. Over- and underprediction only come into play if the prediction interval does not cover the true value. They are the absolute value of the difference between the upper or lower bound of the prediction interval (depending on whether the forecast is too high or too low).
coverage deviation is the average difference between nominal and empirical interval coverage. Say the 50 percent prediction interval covers only 20 percent of all true values, then the coverage deviation is 0.5 - 0.2 = -0.3. The coverage deviation value in the table is calculated by averaging over the coverage deviation calculated for all possible prediction intervals. If the value is negative you have covered less then you should. If it is positve, then the forecasts could be a little more confident.
bias is a measure between -1 and 1 that expresses the tendency to underpredict (-1) or overpredict (1). In contrast to the over- and underprediction components of the WIS it is bound between -1 and 1 and cannot go to infinity. It is therefore less susceptible to outliers.
aem is the absolute error of the median forecasts. A high aem means the median forecasts tend to be far away from the true values.

{.unlisted .unnumbered}

out <- paste0("\n\n## Evolution of scores over time {.tabset .tabset-fade}\n\n")
out <- c(out, "Visualisation of the weighted interval score over time. In",
         "addition, the components of the interval score, sharpness (how",
         "narrow are forecasts - smaller is better), and penalties for",
         "underprediction and overprediction are shown. Scores are again",
         "separated by forecast horizon\n\n")

r paste(knit(text = out), collapse = '\n\n')

Number of participants

dt <- data[!(model %in% c("Crowd-Rt-Forecast",
                          "EpiNow2_secondary", 
                          "EpiExpert-ensemble", 
                          "EpiNow2")), 
           .(`number of forecasters` = length(unique(model))), , 
           by = c("forecast_date", "location_name", "target_variable")
][order(forecast_date)][
  !is.na(forecast_date)][
    location_name == "United Kingdom"
  ]

dt[, .(sd = sd(`number of forecasters`), 
                             mean = mean(`number of forecasters`), 
                             min  = min(`number of forecasters`), 
                             max = max(`number of forecasters`), 
                             median = median(`number of forecasters`)), 
   by = c("location_name", "target_variable")] 

dt %>%
  ggplot(aes(y = `number of forecasters`, 
             x = forecast_date, 
             group = target_variable)) + 
  geom_line() + 
  geom_point() + 
  facet_grid(target_variable ~ .) + 
  theme_minimal() + 
  labs(x = "Forecast date")

epiforecasts/europe-covid-forecast documentation built on Jan. 15, 2025, 8:57 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

epiforecasts/europe-covid-forecast
Forecasting Covid-19 related metrics for the ECDC Forecast Hub

In epiforecasts/europe-covid-forecast: Forecasting Covid-19 related metrics for the ECDC Forecast Hub

Welcome to the UK Covid-19 Forecasting Challenge evaluation board!

{.unlisted .unnumbered}

{.unlisted .unnumbered}

{.unlisted .unnumbered}

Leaderboard

{.unlisted .unnumbered}

{.unlisted .unnumbered}

Evaluation metrics

{.unlisted .unnumbered}

Number of participants

R Package Documentation

Browse R Packages

We want your feedback!

epiforecasts/europe-covid-forecast Forecasting Covid-19 related metrics for the ECDC Forecast Hub

In epiforecasts/europe-covid-forecast: Forecasting Covid-19 related metrics for the ECDC Forecast Hub

Welcome to the UK Covid-19 Forecasting Challenge evaluation board!

{.unlisted .unnumbered}

{.unlisted .unnumbered}

{.unlisted .unnumbered}

Leaderboard

{.unlisted .unnumbered}

{.unlisted .unnumbered}

Evaluation metrics

{.unlisted .unnumbered}

Number of participants

R Package Documentation

Browse R Packages

We want your feedback!

epiforecasts/europe-covid-forecast
Forecasting Covid-19 related metrics for the ECDC Forecast Hub