knitr::opts_chunk$set(echo = FALSE, message = FALSE, dpi = 300, warning = FALSE) library(knitr) library(here) library(data.table) library(DT) library(janitor) library(purrr) library(ggplot2) source(here("R", "utils.R")) source(here("R", "plot.R"))
In this real time evaluation report we provide preliminary visualisations, evaluation, and exploration of our nowcasting methodology [@epinowcast] for COVID-19 hospitalisations in Germany by date of postive test. For more details of our methodology and the specifics of the models shown in this report please see our project summary and our accompanying paper for full method details. This report is updated each day (at roughly 9:00 GMT) as new data and nowcasts become available and may evolve over time. See our news section for a list of dated changes.
We first visualise current nowcasts across models and age groups at the national level as well as overall at the subnational level. We also plot nowcasts at the date of estimation across sequential nowcasts allowing us to summarise the performance of multiple nowcasts on a single plot, again for age groups at the national level and overall at the subnational level.
To quantify comparative performance we make use of proper scoring rules [@scoringutils] on both the natural and log scales (corresponding to absolute and relative performance) to observed data reported at least 28 days ago aggregating scores first across all targets and then stratifying in turn by age group, nowcast horizon, date of postive test, and date of report. See Bosse et al. [@scoringutils] for information on how to interpret these scores.
To explore other aspects of our models performance we highlight models that have problematic fitting diagnostics and summarise the estimation time for each model. To learn more about the model fitting diagnostics we use here see [@stan] and [@cmdstanr].
The code for this report can be found here and the data that it uses can be found here.
In the following sections nowcasts are visualised for the latest estimation date and then by estimation date for all models considered. Visualising a nowcast from a single estimation date corresponds to plotting a real-time nowcast whilst plotting across estimation dates for that date is useful for understanding the performance of nowcasting models across a number of nowcasts in a concise way.
Nowcasts based on the latest available data by age group on the national level and overall on the subnational level.
daily_nowcasts <- load_nowcasts(here("data", "nowcasts", "daily")) daily_nowcasts[, mean := NA][, median := NA] seven_day_nowcasts <- load_nowcasts(here("data", "nowcasts", "seven_day")) latest_hosp <- load_obs(here("data", "observations", "daily.csv")) latest_seven_day_hosp <- load_obs(here("data", "observations", "seven_day.csv"))
plot_nowcast( daily_nowcasts[location == "DE"][ nowcast_date == max(nowcast_date)][, confirm := NA], latest_hosp[location == "DE"], max_delay = 28 ) + facet_grid(vars(age_group), vars(model), scales = "free_y")
plot_nowcast( daily_nowcasts[!(location == "DE")][ nowcast_date == max(nowcast_date)][, confirm := NA], latest_hosp[!(location == "DE")][age_group == "00+"], max_delay = 28 ) + facet_wrap(vars(location), scales = "free_y")
Nowcast estimates at the date of estimation for sequential nowcasts by age group at the national level and overall at the subnational level.
plot_nowcast( daily_nowcasts[location == "DE"][ horizon == 0][, confirm := NA], latest_hosp[location == "DE"] ) + facet_grid(vars(age_group), vars(model), scales = "free_y")
plot_nowcast( daily_nowcasts[!(location == "DE")][ horizon == 0][, confirm := NA], latest_hosp[!(location == "DE")][age_group == "00+"] ) + facet_wrap(vars(location), scales = "free_y")
In this section we evaluate and compare the performance of each nowcasting model using proper scoring rules [@scoringutils] on both the natural and the log scale to observed data reported at least 28 days ago. This corresponds to evaluating absolute and relative error. Only nowcasts from the national level are considered as sub-national nowcasts are not available for all models. For evaluation we consider only forecast targets for which the data is minimally informative which we defined as the 7 days prior to the date of the nowcast. We explore overall scores as well as scores stratified by age group, by nowcast horizon, and by date of postive test, and by report date.
scores <- load_diagnostics(here("data/scores/overall.csv"))
plot_scores( scores, y = interval_score, x = model, col = model, fill = model, group = model ) + facet_wrap(vars(scale), scales = "free_x") + labs(x = "Model") + theme(legend.position = "none") + coord_flip()
fancy_datatable(scores[scale == "natural"][, scale := NULL])
fancy_datatable(scores[scale == "log"][, scale := NULL])
horizon_scores <- load_diagnostics(here("data/scores/horizon.csv"))
plot_scores( horizon_scores, y = interval_score, x = horizon, col = model, fill = model, group = model ) + facet_wrap(vars(scale), scales = "free_y") + labs(x = "Nowcast horizon (with 0 being the date of nowcast)")
fancy_datatable(horizon_scores[scale == "natural"][, scale := NULL])
fancy_datatable(horizon_scores[scale == "log"][, scale := NULL])
age_group_scores <- load_diagnostics(here("data/scores/age_group.csv"))
plot_scores( age_group_scores, y = interval_score, x = age_group, col = model, fill = model, group = model ) + facet_wrap(vars(scale), scales = "free_y") + labs(x = "Age group")
fancy_datatable(age_group_scores[scale == "natural"][, scale := NULL])
fancy_datatable(age_group_scores[scale == "log"][, scale := NULL])
reference_date_scores <- load_diagnostics( here("data/scores/reference_date.csv") )
plot_scores( reference_date_scores, y = interval_score, x = reference_date, col = model, fill = model, group = model ) + facet_wrap(vars(scale), scales = "free_y") + labs(x = "Reference date")
fancy_datatable(reference_date_scores[scale == "natural"][, scale := NULL])
fancy_datatable(reference_date_scores[scale == "log"][, scale := NULL])
report_date_scores <- load_diagnostics(here("data/scores/nowcast_date.csv"))
plot_scores( report_date_scores, y = interval_score, x = nowcast_date, col = model, fill = model, group = model ) + facet_wrap(vars(scale), scales = "free_y") + labs(x = "Report date")
fancy_datatable(report_date_scores[scale == "natural"][, scale := NULL])
fancy_datatable(report_date_scores[scale == "log"][, scale := NULL])
This section summarises model fitting diagnostics. Whilst most model fits successfully a small subset have issues for as yet unidentified reasons. These issues can be split into issues with convergence and issues with exploring the posterior distribution (often leading to unreliable posterior samples). Resolving these issues is an area of active research. In a smaller subset of cases model fitting may faily entirely. In this instance a nowcast using the default model from epinowcast
(a fixed lognormal delay) is used and a flag is raised indicating this has happened. These instances are also summarised below. Finally, model run-time is also summarised for national level nowcasts.
rhat <- load_diagnostics(here("data/diagnostics/high-rhat.csv")) fancy_datatable(rhat)
dts <- load_diagnostics(here("data/diagnostics/high-divergent-transitions.csv")) fancy_datatable(dts)
failures <- load_diagnostics(here("data/diagnostics/fitting-failed.csv")) fancy_datatable(failures)
In this section, we summarise the run time of each nowcasting model using 2 cores each on a standard Azure virtual machine. These run-times are indicative only but the relative difference between models should hold across machines. To make the independent model run times comparable with other models we have combined the run time across all age groups. However, this model is naively parallel and so could in theory be run on separate compute nodes per age group (so as we have 7 age groups this would result in a 7 times speed up). As all models make use of the support in stan
for within chain parallisation all models could also be estimated with significantly reduced run time with the allocation of increased compute resources. Run times are shown in minutes. Note that there is sometimes competition for resources on the server where these estimates are run and this can lead to extended run times for all models.
run_times <- load_diagnostics(here("data/diagnostics/run-times.csv"))
summary_run_times <- run_times[, .(mean = mean(run_time_mins), median = median(run_time_mins), sd = sd(run_time_mins), max = max(run_time_mins), min = min(run_time_mins) ), by = c("model") ] cols <- c("mean", "median", "sd", "max", "min") summary_run_times <- summary_run_times[, (cols) := lapply(.SD, round, digits = 1), .SDcols = cols ] fancy_datatable(summary_run_times)
ggplot(run_times) + aes(x = model, y = run_time_mins, col = model, fill = model) + geom_violin(alpha = 0.4) + geom_jitter(alpha = 0.4) + scale_y_log10() + theme_bw() + theme(legend.position = "none") + labs(y = "Estimation time (minutes)", x = "") + coord_flip()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.