library(scoringutils) library(ggplot2) library(dplyr) library(DT) library(here) library(knitr) library(covidHubUtils) library(lubridate) library(purrr) knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE) include_ranking <- TRUE include_forecast_plot <- TRUE include_avail_forecasts <- FALSE include_ranks_over_time <- FALSE include_countries <- include_forecast_plot || include_forecast_plot || include_avail_forecasts || include_ranks_over_time
data <- params$data[location_name == "United Kingdom"] horizons <- 1:4 summarise_by = c("model", "target_variable") target_variables <- c(Cases = "inc case", Deaths = "inc death")
Below you find a visualisation of all forecasts made for the UK Covid-19 Crowd Forecasting Challenge as well as an evaluation of the forecasts.
If you want to learn more about the crowd forecasting challenge and how to participate, visit crowdforecastr.org
out <- paste0("\n\n## Visualisation of the combined crowd forecast {.tabset .tabset_fade}\n\n") out <- c(out, "The plot shows the combined forecast of all participants. Forecasts are combined by taking the average of all predictions. The date of the tab marks the date on", "which a forecast was made.\n\n")
r paste(if (include_forecast_plot) knit(text = out), collapse = '\n\n')
# copy data temporarily to allow for filtering tmp_data <- copy(data) data <- data[model %in% c(NA, "EpiExpert-ensemble")] out <- NULL forecast_dates <- data$forecast_date[!is.na(data$forecast_date)] %>% unique() %>% sort() %>% as.character() %>% rev() loc <- params$location_name out <- c(out, knit_child(here::here("reports", "evaluation", "template-plot-forecasts.Rmd"))) # move tmp_data back data <- tmp_data
r paste(if (include_forecast_plot) knit(text = out), collapse = '\n\n')
out <- paste0("\n\n## Forecast visualisation {.tabset .tabset_fade}\n\n") out <- c(out, "Visualisation of individual forecasts. The date of the tab marks the date on", "which a forecast was made.\n\n")
r paste(if (include_forecast_plot) knit(text = out), collapse = '\n\n')
out <- NULL forecast_dates <- data$forecast_date[!is.na(data$forecast_date)] %>% unique() %>% sort() %>% as.character() %>% rev() loc <- params$location_name out <- c(out, knit_child(here::here("reports", "evaluation", "template-plot-forecasts.Rmd")))
r paste(if (include_forecast_plot) knit(text = out), collapse = '\n\n')
rankingdata <- copy(data) # log data rankingdata[, true_value := pmax(true_value, 0)] rankingdata[, true_value := log(true_value + 1)] rankingdata[, prediction := pmax(prediction, 0)] rankingdata[, prediction := log(prediction + 1)] table <- eval_forecasts(rankingdata, summarise_by = c("model", "forecast_date")) # assign median score if no forecast was made ---------------------------------- # for ever forecast_date, calculate the median medians <- table[model != "EpiExpert-ensemble", .(median = median(interval_score)), by = "forecast_date"] # get complete set of forecasters and dates fcdates <- medians$forecast_date participants <- unique(rankingdata$model) complete_set <- expand.grid(forecast_date = fcdates, model = participants[!is.na(participants)]) # merge medians with complete set and that with scores medians <- merge(complete_set, medians, all.y = TRUE) table <- merge(table, medians, all.y = TRUE) table[is.na(interval_score), interval_score := median] table <- table[, .(interval_score = sum(interval_score)), by = "model"] out <- htmltools::tagList( table %>% arrange(interval_score) %>% mutate_if(is.numeric, round, 2) %>% dplyr::mutate(ranking = 1:n()) %>% dplyr::rename(`interval score (lower = better)` = interval_score, forecaster = model) %>% DT::datatable(extensions = c('FixedColumns', 'Buttons'), width = "100%", options = list( paging = FALSE, info = FALSE, buttons = c('csv', 'excel'), dom = 'Bfrtip', scrollX = TRUE, fixedColumns = TRUE ), class = 'white-space: nowrap') )
r paste(knit(text = out), collapse = '\n\n\n')
out <- paste0("\n\n\n## Detailed forecast scores {.tabset .tabset-fade}\n\n") out <- c(out, "Scores separated by target and forecast horizon.\n\n")
r paste(if (include_ranking) knit(text = out, quiet = TRUE), collapse = '\n\n')
out <- NULL for (variable in names(target_variables)) { out <- c(out, paste("\n\n###", variable, " {.tabset .tabset-fade}\n\n")) for (this_horizon in horizons) { filter_list = list(paste0("target_variable == '", target_variables[[variable]], "'"), "!(is.na(quantile))", paste0("horizon == ", this_horizon), paste0("location_name == '", loc, "'")) horizon_string <- paste0(this_horizon, " week", if_else(this_horizon > 1, "s", ""), " ahead horizon") out <- c(out, paste("\n\n####", horizon_string, "\n\n")) out <- c(out, knit_child(here::here("reports", "evaluation", "template-ranking-table.Rmd"), quiet = TRUE)) } }
r paste(if (include_ranking) knit(text = out, quiet = TRUE), collapse = '\n\n')
out <- paste0("\n\n## Evolution of scores over time {.tabset .tabset-fade}\n\n") out <- c(out, "Visualisation of the weighted interval score over time. In", "addition, the components of the interval score, sharpness (how", "narrow are forecasts - smaller is better), and penalties for", "underprediction and overprediction are shown. Scores are again", "separated by forecast horizon\n\n")
r paste(knit(text = out), collapse = '\n\n')
dt <- data[!(model %in% c("Crowd-Rt-Forecast", "EpiNow2_secondary", "EpiExpert-ensemble", "EpiNow2")), .(`number of forecasters` = length(unique(model))), , by = c("forecast_date", "location_name", "target_variable") ][order(forecast_date)][ !is.na(forecast_date)][ location_name == "United Kingdom" ] dt[, .(sd = sd(`number of forecasters`), mean = mean(`number of forecasters`), min = min(`number of forecasters`), max = max(`number of forecasters`), median = median(`number of forecasters`)), by = c("location_name", "target_variable")] dt %>% ggplot(aes(y = `number of forecasters`, x = forecast_date, group = target_variable)) + geom_line() + geom_point() + facet_grid(target_variable ~ .) + theme_minimal() + labs(x = "Forecast date")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.