knitr::opts_chunk$set(echo = TRUE)
library(reactidd)
First we will load the weighted REACT data available using reactidd::load_example_data_weighted() Datasets have the weighted number of positives by english region and for England as a whole (pos below) and the weighted number of total tests by english region and for England as a whole (tot)
pos <- load_example_data_weighted()[[1]] tot <- load_example_data_weighted()[[2]]
In order to subset the data we define some dates to subset into REACT-1 rounds
min_date_r1 <- as.Date("2020-05-01") min_date_r14 <- as.Date("2021-09-09") max_date_r7 <- as.Date("2020-12-03") max_date_r13 <- as.Date("2021-07-12") max_date_r19 <- as.Date("2022-04-01")
We can fit the Bayesian P-spline model to all of the REACT data for all 19 rounds of the study Note that a much larger number of iterations is required to get a reliable fit.
p_spline_mod_react <- stan_p_spline_weighted(pos[pos$X>=min_date_r1 & pos$X<= max_date_r19,]$X, pos[pos$X>=min_date_r1 & pos$X<= max_date_r19,]$England, tot[tot$X>=min_date_r1 & tot$X<= max_date_r19,]$England, target_dist_between_knots = 5, spline_degree = 3, iter = 20000, warmup = 2000, cores = 1, chains=4)
The p-spline model can then be plotted with estimated 95% and 50% credible intervals
p_spline_plot <- plot_p_spline_prev(pos[pos$X>=min_date_r1 & pos$X<= max_date_r19,]$X, pos[pos$X>=min_date_r1 & pos$X<= max_date_r19,]$England, tot[tot$X>=min_date_r1 & tot$X<= max_date_r19,]$England, p_spline_fit = p_spline_mod_react, target_dist_between_knots = 5, spline_degree = 3, ylim = 12.0) print(p_spline_plot[[1]]) p_spline_plot[[1]] <- p_spline_plot[[1]]+ ggplot2::coord_cartesian(xlim=c(min_date_r1, max_date_r19), ylim = c(0.01,10))+ ggplot2::scale_y_log10() print(p_spline_plot[[1]])
We can compare REACT-1 data to the public data available on deaths to estimate the IFR. We demonstrate the code on publicly available death data (obtained from the COVID-19 UK dashboard) for deaths. All analysis could be perfomed using the other data sets available (new cases and hospitalizations).
First we load the public data for deaths that was used in the analysis for the IFR paper
death_dat <- read.csv(system.file("extdata", "public_data_age_deaths_2022-05-20.csv", package = "reactidd")) death <- clean_death_data(death_dat, max_date=as.Date("2022-05-15"))
The public dataset used here has data available for the date ("date"), the total number of daily outcomes ("all"), the total number of outcomes in those aged 64 and under ("age_64_under") and the total number of outcomes in those aged 65 and over ("aged_65_over"). We present here the results for models fit to all data but the code can easily be used for the two timeseries split by age.
We first fit Bayesian P-spline models to the daily death data. (Note again more iterations are required for an accurate fit)
death_spline <- stan_p_spline_phe(X = death$date, Y= death$all, target_dist_between_knots = 5, spline_degree = 3, iter = 5000, warmup = 500, cores = 1, chains=4)
Bayesian P-spline models posteriors are extracted and converted into the actual response variable. A number of samples are drawn randomly from the posterior. We have drawn 100 samples for speed but in the main analysis 1000 samples were drawn.
# Extract posteriors from splines death_post <- rstan::extract(death_spline) # Get response values death_post_Y <- death_post$Y_hat # Select random sample from posterior death_post_Y<-death_post_Y[sample(nrow(death_post_Y), 100),] # Exponentiate to get actual value death_post_Y <- exp(death_post_Y)
With these response posteriors we can then fit the time lag model to the REACT data. Note for analysis using age seperated data the REACT data is available using "read.csv(system.file("extdata", "ages_weighted.csv", package = "reactidd"))" which has the weighted positives and total tests for both those aged 64 and under and those aged 65 and over by date.
# Set initial parameters for time lag and log difference (scaling parameter) init_pars <- c(10, -10) # Set number of iterations N <- 10000 # Set range of REACT data to fit to # Only fitting to rounds 1-7 due to the time lag ensemble = data.frame(day=pos$X, pos=pos$England, obs=tot$England) ensemble1_7 <- ensemble[ensemble$day <=max_date_r7,] ensemble14_19 <- ensemble[ensemble$day <=max_date_r19 &ensemble$day >= min_date_r14,] # Run model to get time lag and log difference for all timeseries death_eng1 <- run_time_lag_model(N=N, init_pars = init_pars, ensemble=ensemble1_7, dates=death$date , ENG = death_post_Y, burnin=2000,name="All", rounds = "Rounds 1-7", data_source = "Deaths") death_eng2 <- run_time_lag_model(N=N, init_pars = init_pars, ensemble=ensemble14_19, dates=death$date , ENG = death_post_Y, burnin=2000,name="All", rounds = "Rounds 14-19", data_source = "Deaths") death_eng1[[1]] death_eng2[[1]] ## Can convert log difference into the IFR using population estimates and a factor of 11.06 # Population estimate of England pop <- 56286961 11.06*100/(pop*exp(death_eng1[[1]]$alpha)) 11.06*100/(pop*exp(death_eng2[[1]]$alpha))
We can then plot the death and hospitalization P-spline fits translated by the best fitting time -lag parameter and scaled by the scaling parameter. We plot it against the previously fit REACT Bayesian P-spline model to compare the time-series.
p_spline_plot <- plot_p_spline_prev(pos[pos$X>=min_date_r1 & pos$X<= max_date_r19,]$X, pos[pos$X>=min_date_r1 & pos$X<= max_date_r19,]$England, tot[tot$X>=min_date_r1 & tot$X<= max_date_r19,]$England, p_spline_fit = p_spline_mod_react, target_dist_between_knots = 5, spline_degree = 3, ylim = 10.0) death_plot1 <- add_public_data_to_plot(p_spline_plot = p_spline_plot[[1]], Y_array = death_post_Y, table_eng = death_eng1[[1]], X=death$date, dat = death) death_plot2 <- add_public_data_to_plot(p_spline_plot = p_spline_plot[[1]], Y_array = death_post_Y, table_eng = death_eng2[[1]], X=death$date, dat = death) print(death_plot1[[1]]) print(death_plot2[[1]])
dd
# Get full posterior of death spline death_post <- rstan::extract(death_spline) death_array <- death_post$Y_hat #Get full posterior of REACT spline X_REACT <- ensemble$day react_post <- rstan::extract(p_spline_mod_react) react_array <- get_response_posterior(react_post, as.numeric(X_REACT)) react_array_log <- log(boot::inv.logit(react_array)) #Get difference between splines first for time lag from model fit to rounds 1-7 death_dif1<-plot_spline_difference(X_REACT, react_array_log, death$date, death_array, time_delay = 26) death_dif1 <- death_dif1[death_dif1$date<=max_date_r13,] #Get difference between splines first for time lag from model fit to rounds 14-19 death_dif2<-plot_spline_difference(X_REACT, react_array_log, death$date, death_array, time_delay = 18) death_dif2 <- death_dif2[death_dif2$date>=min_date_r14,] # Convert difference to IFR death_dif1$mean <- 11.06*100/(pop*exp(death_dif1$mean)) death_dif1$lb <- 11.06*100/(pop*exp(death_dif1$lb)) death_dif1$ub <- 11.06*100/(pop*exp(death_dif1$ub)) death_dif2$mean <- 11.06*100/(pop*exp(death_dif2$mean)) death_dif2$lb <- 11.06*100/(pop*exp(death_dif2$lb)) death_dif2$ub <- 11.06*100/(pop*exp(death_dif2$ub)) #Plot the IFR over time ggplot2::ggplot()+ ggplot2::geom_line(data = death_dif1, ggplot2::aes(x=date, y=mean))+ ggplot2::geom_ribbon(data = death_dif1, ggplot2::aes(x=date,y=mean, ymin=lb, ymax=ub))+ ggplot2::geom_line(data = death_dif2, ggplot2::aes(x=date, y=mean))+ ggplot2::geom_ribbon(data = death_dif2, ggplot2::aes(x=date,y=mean, ymin=lb, ymax=ub))+ ggplot2::theme_bw()+ ggplot2::coord_cartesian(ylim=c(0.001,10))+ ggplot2::scale_y_log10()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.