In mrc-ide/reactidd: Package for the temporal analysis of the REACT study

knitr::opts_chunk$set(echo = TRUE)

library(reactidd)

Loading the Data

First we will load the weighted REACT data available using reactidd::load_example_data_weighted() Datasets have the weighted number of positives by english region and for England as a whole (pos below) and the weighted number of total tests by english region and for England as a whole (tot)

pos <- load_example_data_weighted()[[1]]
tot <- load_example_data_weighted()[[2]]

In order to subset the data we define the dates of each round of the study

min_date_r1 <- as.Date("2020-05-01")
min_date_r12 <- as.Date("2021-05-20")
min_date_r13 <- as.Date("2021-06-24")

max_date_r1 <- as.Date("2020-06-01")
max_date_r12 <- as.Date("2021-06-07")
max_date_r13 <- as.Date("2021-07-12")

Fitting the exponential model

We fit the exponential model to subsets of the data corresponding to individual rounds and pairs of subsequent rounds Similar models can easily be run on the regional data by switching 'England' to the region name 'NorthWest' for example

exp_mod_react_r12 <- stan_exp_model_weighted(pos[pos$X>=min_date_r12 & pos$X<= max_date_r12,]$X,
                                   pos[pos$X>=min_date_r12 & pos$X<= max_date_r12,]$England,
                                   tot[tot$X>=min_date_r12 & tot$X<= max_date_r12,]$England,
                                   iter = 5000,
                                   warmup = 500,
                                   cores = 1)

exp_mod_react_r13 <- stan_exp_model_weighted(pos[pos$X>=min_date_r13 & pos$X<= max_date_r13,]$X,
                                   pos[pos$X>=min_date_r13 & pos$X<= max_date_r13,]$England,
                                   tot[tot$X>=min_date_r13 & tot$X<= max_date_r13,]$England,
                                   iter = 5000,
                                   warmup = 500,
                                   cores = 1)

exp_mod_react_r1213 <- stan_exp_model_weighted(pos[pos$X>=min_date_r12 & pos$X<= max_date_r13,]$X,
                                   pos[pos$X>=min_date_r12 & pos$X<= max_date_r13,]$England,
                                   tot[tot$X>=min_date_r12 & tot$X<= max_date_r13,]$England,
                                   iter = 5000,
                                   warmup = 500,
                                   cores = 1)

Using these model fits we can calculate the growth rate, R and the doubling/halving times for each model fit

R_estimates_react_r12 <- exponential_estimate_R(exp_mod_react_r12, n_mean = 2.29, b_mean =0.36, label ="React-Round12")
R_estimates_react_r13 <- exponential_estimate_R(exp_mod_react_r13, n_mean = 2.29, b_mean =0.36, label ="React-Round13")
R_estimates_react_r1213 <- exponential_estimate_R(exp_mod_react_r1213, n_mean = 2.29, b_mean =0.36, label ="React-Round12-Round13")


R_table <- rbind(R_estimates_react_r12, R_estimates_react_r13, R_estimates_react_r1213)
print(R_table)

Plotting the exponential model fits

The model fits can then be plotted:

linear_plots <- plot_exp_model(X = pos[pos$X>=min_date_r12 & pos$X<= max_date_r13,]$X,
                                   Y= pos[pos$X>=min_date_r12 & pos$X<= max_date_r13,]$England,
                                   N = tot[tot$X>=min_date_r12 & tot$X<= max_date_r13,]$England,
                                   fit_exp = list(exp_mod_react_r13, exp_mod_react_r1213, exp_mod_react_r12),
                                   X_model = list(pos[pos$X>=min_date_r13 & pos$X<= max_date_r13,]$X,
                                                  pos[pos$X>=min_date_r12 & pos$X<= max_date_r13,]$X,
                                                  pos[pos$X>=min_date_r12 & pos$X<= max_date_r12,]$X),
                                   color_list = list("red","blue","dark green"),
                                   ylim = 1.0)
linear_plots[[1]] <- linear_plots[[1]]+
  ggplot2::coord_cartesian(ylim=c(0.01,2))+
  ggplot2::scale_y_log10()

print(linear_plots[[1]])

Fitting the Bayesian P-Spline Model to the data

We can then fit the Bayesian P-spline model to all of the REACT data for the first 13 rounds of the study Note that a larger number of iterations is required to get a reliable fit.

p_spline_mod_react <- stan_p_spline_weighted(pos[pos$X>=min_date_r1 & pos$X<= max_date_r13,]$X,
                                   pos[pos$X>=min_date_r1 & pos$X<= max_date_r13,]$England,
                                   tot[tot$X>=min_date_r1 & tot$X<= max_date_r13,]$England,
                                   target_dist_between_knots = 5,
                                   spline_degree = 3,
                                   iter = 5000,
                                   warmup = 1000,
                                   cores = 1)

The p-spline model can then be plotted with estimated 95% and 50% credible intervals

p_spline_plot <- plot_p_spline_prev(pos[pos$X>=min_date_r1 & pos$X<= max_date_r13,]$X,
                                   pos[pos$X>=min_date_r1 & pos$X<= max_date_r13,]$England,
                                   tot[tot$X>=min_date_r1 & tot$X<= max_date_r13,]$England,
                                   p_spline_fit = p_spline_mod_react, 
                                    target_dist_between_knots = 5,
                                    spline_degree = 3,
                                   ylim = 3.0)

print(p_spline_plot[[1]])

p_spline_plot[[1]] <- p_spline_plot[[1]]+
  ggplot2::coord_cartesian(xlim=c(min_date_r12-10, max_date_r13), ylim = c(0.01,3))+
  ggplot2::scale_y_log10()

print(p_spline_plot[[1]])

Comparing REACT data to deaths and hospitalizations

We can also do comparisons of REACT data to the public data available on deaths and hospitalizations

First we load the public data that was used in the analysis for the round 13 final paper and clean it

death_dat <- read.csv(system.file("extdata", "public_data_age_deaths_2021-07-19.csv", package = "reactidd"))
hosp_dat <- read.csv(system.file("extdata", "public_data_age_hosps_2021-07-19.csv", package = "reactidd"))

death <- clean_death_data(death_dat)
hosp <- clean_hosp_data(hosp_dat)

The public datasets here have columns available for the date ("date"), the total number of daily outcomes ("all"), the total number of outcomes in those aged 64 and under ("age_64_under") and the total number of outcomes in those aged 65 and over ("aged_65_over"). We present here the results for models fit to all data but the code can easily be used for the two timeseries split by age.

We first fit Bayesian P-spline models to the daily death and hospitalization data. (Note again more iterations are required for an accurate fit)

death_spline <- stan_p_spline_phe(X = death$date,
                                   Y= death$all,
                                   target_dist_between_knots = 5,
                                   spline_degree = 3,
                                   iter = 2000,
                                   warmup = 1000,
                                   cores = 1,
                                  chains=1)

hosp_spline <- stan_p_spline_phe(X = hosp$date,
                                   Y=hosp$all,
                                   target_dist_between_knots = 5,
                                   spline_degree = 3,
                                   iter = 2000,
                                   warmup = 1000,
                                   cores = 1,
                                 chains =1)

Bayesian P-spline models posteriors are extracted and converted into the actual response variable. A number of samples are drawn randomly from the posterior. We have drawn 100 samples for speed but in the main analysis 1000 samples were drawn.

# Extract posteriors from splines
death_post <- rstan::extract(death_spline)
hosp_post<-rstan::extract(hosp_spline)

# Get response values
death_post_Y <- death_post$Y_hat
hosp_post_Y <- hosp_post$Y_hat


# Select random sample from posterior
death_post_Y<-death_post_Y[sample(nrow(death_post_Y), 100),]
hosp_post_Y<-hosp_post_Y[sample(nrow(hosp_post_Y), 100),]


# Exponentiate to get actual value
death_post_Y <- exp(death_post_Y)
hosp_post_Y <- exp(hosp_post_Y)

With these response posteriors we can then fit the time lag model to the REACT data. Note for analysis using age seperated data the REACT data is available using "read.csv(system.file("extdata", "ages_weighted.csv", package = "reactidd"))" which has the weighted positives and total tests for both those aged 64 and under and those aged 65 and over by date.

# Set initial parameters for time lag and log difference (scaling parameter)
init_pars <- c(10, -10)
# Set number of iterations 
N <- 10000

# Set range of REACT data to fit to
# Only fitting to rounds 1-12 due to the time lag
ensemble = data.frame(day=pos$X,
                      pos=pos$England,
                      obs=tot$England)

ensemble <- ensemble[ensemble$day <=max_date_r12,]

# Run model to get time lag and log difference for all timeseries
death_eng <- run_time_lag_model(N=N, init_pars = init_pars, ensemble=ensemble,  dates=death$date , ENG = death_post_Y, burnin=2000,name="All", rounds = "Rounds 1-12", data_source = "Deaths")


hosp_eng <- run_time_lag_model(N=N, init_pars = init_pars, ensemble=ensemble,  dates=hosp$date , ENG = hosp_post_Y, burnin=10000,name="All", rounds = "Rounds 1-12", data_source = "hosps")

death_eng[[1]]
hosp_eng[[1]]

## Can convert log difference into the scaling parameter 
# Population estimate of England
pop <- 56286961
hosp_eng[[1]][c("alpha","alpha_lb","alpha_ub")]

We can then plot the death and hospitalization P-spline fits translated by the best fitting time -lag parameter and scaled by the scaling parameter. We plot it against the previously fit REACT Bayesian P-spline model to compare the time-series.

p_spline_plot <- plot_p_spline_prev(pos[pos$X>=min_date_r1 & pos$X<= max_date_r13,]$X,
                                   pos[pos$X>=min_date_r1 & pos$X<= max_date_r13,]$England,
                                   tot[tot$X>=min_date_r1 & tot$X<= max_date_r13,]$England,
                                   p_spline_fit = p_spline_mod_react, 
                                    target_dist_between_knots = 5,
                                    spline_degree = 3,
                                   ylim = 3.0)



death_plot <- add_public_data_to_plot(p_spline_plot = p_spline_plot[[1]], Y_array = death_post_Y, table_eng = death_eng[[1]], X=death$date, dat = death)
hosp_plot <- add_public_data_to_plot(p_spline_plot = p_spline_plot[[1]], Y_array = hosp_post_Y, table_eng = hosp_eng[[1]], X=hosp$date, dat = hosp)

print(death_plot[[1]])
print(hosp_plot[[1]])