knitr::opts_chunk$set(echo = FALSE, 
                      message = FALSE, 
                      warning = FALSE)


library(here)
library(scoringutils)
library(dplyr)
library(data.table)
library(covid.ecdc.forecasts)
library(kableExtra)
# load all data ----------------------------------------------------------------
root_dir <- here::here("crowd-direct-forecast", "processed-forecast-data")
file_paths_forecast <- here::here(root_dir, list.files(root_dir))

prediction_data <- purrr::map_dfr(file_paths_forecast, 
                                  .f = function(x) {
                                    data <- data.table::fread(x) %>%
                                      dplyr::mutate(target_end_date = as.Date(target_end_date), 
                                                    submission_date = as.Date(submission_date), 
                                                    forecast_date = as.Date(forecast_date))
                                  }) %>%
  dplyr::mutate(target_type = ifelse(grepl("death", target), "death", "case")) %>%
  dplyr::rename(prediction = value) %>%
  dplyr::mutate(forecast_date = as.Date(submission_date)) %>%
  dplyr::rename(model = board_name) %>%
  dplyr::filter(type == "quantile") %>%
  dplyr::select(location, location_name, forecast_date, quantile, prediction, model, target_end_date, horizon, target, target_type)

# filter forecasters such that only those are kept that have submitted forecasts
# in the last four weeks
keep_fc <- prediction_data %>%
  dplyr::filter(forecast_date > (Sys.Date() - 4 * 7)) %>%
  dplyr::pull(model) %>%
  unique()

prediction_data <- dplyr::filter(prediction_data, 
                                 model %in% keep_fc)

files <- list.files(here::here("data-raw"))
file_paths <- here::here("data-raw", files[grepl("weekly-incident", files)])
names(file_paths) <- c("case", "death")


truth_data <- purrr::map_dfr(file_paths, readr::read_csv, .id = "target_type") %>%
  dplyr::select(-location_name) %>%
  dplyr::rename(true_value = value) %>%
  dplyr::mutate(target_end_date = as.Date(target_end_date)) %>%
  dplyr::arrange(location, target_type, target_end_date) %>%
  left_join(locations)


data <- scoringutils::merge_pred_and_obs(prediction_data, truth_data, 
                                         join = "full")

Dear r name,

Here are your weekly updates from the crowdforecastr project as well as a reminder to submit a forecast this week if you like. Forecasts can be made between Sunday 12am and Monday 8pm UK time (we slightly changed these times so that we can check and process all forecasts).

You can always find more information about the project on www.crowdforecastr.org.

Forecast and Performance overview

Now, let's have a look at last weeks forecasts and performance! Here is an overview of last week's forecasts for the locations you forecasted (or Germany, Poland and UK if you didn’t make a forecast) and the ensemble forecast. Depending on the number of locations you forecasted, this might look a bit crammed. You can find more detailed information on our performance board here. You can also find the evaluation for the European Forecast Hub here.

if (weekdays(Sys.Date()) == "Monday") {
  last_forecast_date <- latest_weekday(Sys.Date() - 7)
} else {
  last_forecast_date <- latest_weekday(Sys.Date())
}

forecasted_locations <- unique(data[model %in% board_name & 
                                      forecast_date %in% last_forecast_date
                                    ]$location_name)

if (length(forecasted_locations) == 0) {
  forecasted_locations <- c("Germany", "Poland", "United Kingdom")
}


filter_truth <- list('target_end_date >= Sys.Date() - 14 * 7')
filter_both <- list("location_name %in% forecasted_locations")
filter_forecasts <- list("model %in% 'EpiExpert-ensemble'", 
                         paste0("forecast_date == '", last_forecast_date, "'"))

plot_predictions(data = data, 
                 x = "target_end_date",
                 filter_truth = filter_truth, 
                 filter_both = filter_both,
                 filter_forecasts = filter_forecasts,
                 facet_formula = target_type ~ location_name, 
                 facet_wrap_or_grid = "grid", 
                 allow_truth_without_pred = FALSE) + 
  ggplot2::labs(x = "Date", title = "Ensemble Forecasts") + 
  ggplot2::theme(legend.position = "bottom")

filter_truth <- list('target_end_date >= Sys.Date() - 14 * 7')
filter_forecasts <- list(paste0("model %in% '", board_name, "'"), 
                         paste0("forecast_date == '", last_forecast_date, "'"))

p <- plot_predictions(data = data, 
                 x = "target_end_date",
                 filter_truth = filter_truth, 
                 filter_both = filter_both,
                 filter_forecasts = filter_forecasts,
                 facet_formula = target_type ~ location_name, 
                 facet_wrap_or_grid = "grid") 
if (!is.null(p)) {
  p <- p + 
  ggplot2::labs(x = "Date", title = "Your Forecasts") + 
  ggplot2::theme(legend.position = "bottom")
  print(p)
}


*Here is the list with the current top 5 forecasters in the locations you forecasted (or Germany, Poland and UK if you didn't make a forecast), based on the last 5 weeks: *

cut_off <- latest_weekday(Sys.Date()) - 5 * 7
df <- data %>%
  filter(forecast_date >= cut_off)

table <- eval_forecasts(data[location_name %in% forecasted_locations], 
                        summarise_by = c("model"), 
                        compute_relative_skill = TRUE)

table %>%
    arrange(relative_skill) %>%
    dplyr::select(-scaled_rel_skill) %>%
    mutate_if(is.numeric, round, 2) %>%
    dplyr::rename(wis = interval_score,
                  underpred = underprediction,
                  overpred = overprediction,
                  cvrage_dev = coverage_deviation,
                  rel_skill = relative_skill) %>%
  select(model, rel_skill) %>%
  head(5) %>%
  knitr::kable(format = "html") %>% 
  kable_styling(bootstrap_options = "striped", full_width = F)


And here is a plot with the rank by date and target of all active forecasters over time:

scores <- scoringutils::eval_forecasts(data[location_name %in% forecasted_locations],
                                         summarise_by = c("model", 
                                                          "location_name", 
                                                          "target_type",
                                                          "forecast_date"), 
                                       compute_relative_skill = FALSE)

ranked_scores <- scores %>%
  dplyr::mutate(forecast_date = as.Date(forecast_date)) %>%
  dplyr::group_by(forecast_date, location_name, target_type) %>%
  dplyr::mutate(num_forecasts = dplyr::n(), 
                rank = rank(interval_score, ties.method = "average",
                                           na.last = NA), 
                standard_rank = round((1 - (rank - 1) / (num_forecasts - 1)) * 100)) %>%
  dplyr::ungroup()

plot <- score_heatmap(ranked_scores, metric = "rank", 
                      x = "forecast_date",
                      facet_wrap_or_grid = "grid",
                      facet_formula = location_name ~ target_type) + 
  ggplot2::theme(legend.position = "bottom") +
    ggplot2::scale_fill_gradient(low = "white", high = "coral")

print(plot)


If you have any further questions or feedback, please don't hesitate to reach out. You can drop us an email or submit an issue to our new github repo

Thank you very much for your support and participation. We very much appreciate it!

Best wishes,

Nikos for the epiforecasts team


PS: You're receiving this message because you gave us your email address when you registered for the Epiforecasts Crowd Forecast App. If you don't want to receive future emails simply write me a quick message or go to the app and click on 'Account Details' in the menu to the left of the screen. You can simply delete your email address there.



epiforecasts/europe-covid-forecast documentation built on Jan. 15, 2025, 8:57 p.m.