knitr::opts_chunk$set(fig.height = 4, fig.width = 9) devtools::load_all() library(tidyverse) library(cowplot)
We want to be able to calibrate $\beta$ in the age-structured model, but trying to fit a $\beta$ value for each age group may be too difficult (or impossible). Let's see how far we can get with $\beta$ calibrated to the whole population.
We can break $\beta_0$ down into two components: transmissiblity, $\tau$ (proportion of contacts between $S$ and $I$ that lead to transmission) and the average contact rate $\langle c \rangle$ (contacts/day):
$$ \beta_0 = \tau \langle c \rangle. $$
In the age-structured model, if we assume transmissibility is constant across age groups, we have
$$ (\beta_0)_i = \tau c_i $$
for each age group $i$. Instead of calibrating each $(\beta_0)_i$, let's try to:
Here's a function that does this lazy calibration:
#' Calibrate age-specific transmission params using population-level beta0 and assumed contact structure #' #' @param age_params parameters that have been initialized using `expand_params_age()` with a specified age-specific contact rate #' @param base_beta0 beta0 calibrated from base (non-ageified) simulation #' #' @return an object of class `params_pansim` #' @export #' #' @examples #' base_params <- update(read_params("PHAC_testify.csv"), testing_intensity = 0, beta0 = 2) #' age_params <- expand_params_age(base_params, transmissibility = 1, contact_rate_age = rep(4, length(age_cat))) #' calibrate_transmission(age_params) calibrate_transmission <- function(age_params, base_beta0){ ## calculate average age-specific contact rate avg_contact_rate_age <- sum(age_params[["N"]]*age_params[["contact_rate_age"]])/sum(age_params[["N"]]) ## calculate transmissibility implied by average contact rate transmissibility <- base_beta0/avg_contact_rate_age print(paste0("calculated transmissibility: ", transmissibility)) ## update beta0 (and transmissibility in age_params) ## based on assumed contact_rate_age and implied transmissibility age_params[["beta0"]] <- transmissibility*age_params[["contact_rate_age"]] age_params[["transmissibility"]] <- transmissibility return(age_params) }
Here's a function that computes both the base sim, as well as the ageified sim (with lazy calibration):
#' Compare ageified simulation to base, using simple calibration trick #' #' @param age_cat age categories #' @param base_beta0 beta0 to use in base simulation #' @param Nvec population distribution #' @param pmat contact probability matrix #' @param contact_rate_age age-specific contact rates #' @inheritParams run_sim #' #' @return #' @export compare_age_to_base <- function(age_cat, base_beta0, Nvec, pmat = NULL, contact_rate_age, start_date = "2020-03-01", end_date = "2020-08-01", params_timevar = NULL){ ## set up base (non-ageified) params and state base_params <- update(read_params("PHAC_testify.csv") , N = sum(Nvec) , testing_intensity = 0 , beta0 = base_beta0 ) base_state <- make_state(N = base_params[["N"]], E0 = length(age_cat)) ## run base simulation res_base <- run_sim(base_params, base_state, start_date = start_date, end_date = end_date, params_timevar = params_timevar, condense = FALSE) ## set up ageified parameters ## (this will use assume beta0 as in the base simulation) age_params <- expand_params_age( params = base_params, age_cat = age_cat, Nvec = Nvec, pmat = pmat, transmissibility = 1, ## dummy value, will get updated contact_rate_age = contact_rate_age ) ## perform lazy calibration (update transmissibility based on base_beta0 ## and contact_rate_age) age_params <- calibrate_transmission(age_params, base_beta0) ## set up age-structured state age_state <- expand_state_age( base_state, age_cat = age_cat, Nvec = age_params[["N"]] ) ## age-structured sim with beta0 vec and transmissibility ## "inferred" from base sim res_age <- run_sim(age_params, age_state, start_date = start_date, end_date = end_date, params_timevar = params_timevar, condense = FALSE) ## aggregate over ages res_age_cond <- condense_age(res_age %>% select(!starts_with("foi"))) attr(res_age_cond, "row.names") <- as.character(attr(res_age_cond, "row.names")) ## check sim results are equal print("comparing base and ageified simulation results... equal?") print(all.equal(res_age_cond, res_base %>% select(-foi))) ## plot sim results p <- (bind_rows( pivot_longer((res_base %>% select(-foi) %>% mutate(model = "base")), cols = -c(date, model), names_to = "var"), pivot_longer((condense_age(res_age %>% select(!starts_with("foi"))) %>% mutate(model = "age-structured")), cols = -c(date, model), names_to = "var")) %>% mutate(var = as_factor(var)) %>% ggplot(aes(x = date, y = value, color = model)) + geom_line() + facet_wrap(vars(var), scales = "free_y") + scale_x_date(date_breaks = "1 month", date_labels = "%b") ) print(p) }
This should work perfectly...
## set up age categories age_cat <- mk_agecats(0, 80, da = 30) n <- length(age_cat) Ni <- 1e6 Nvec <- rep(Ni, n) base_beta0 <- 2 compare_age_to_base( age_cat = age_cat, base_beta0 = base_beta0, Nvec = Nvec, contact_rate_age = rep(4, length(age_cat)) )
Not shocking; if this wasn't a perfect match, it would be because of a bug.
Nvec <- rep(5e6, 3) true_beta0 <- c(2, 3, 3/2) base_beta0 <- sum(Nvec*true_beta0)/sum(Nvec) true_transmissibility <- 0.5 contact_rate_age <- true_beta0/true_transmissibility ## initialize contact structure pmat <- matrix(c(4/10, 4/30, 8/15, 2/10, 23/30, 3/15, 4/10, 1/10, 4/15), nrow = 3, dimnames = list(age_cat, age_cat)) compare_age_to_base( age_cat = age_cat, base_beta0 = base_beta0, Nvec = Nvec, pmat = pmat, contact_rate_age = contact_rate_age )
Not a perfect match, which makes sense because the age-structured model doesn't just collapse to the homogeneous case here, but it looks like there's pretty good agreement with this calibration trick...
## rescale to a non-uniform population while still maintaining balance total_contacts <- true_beta0*Nvec true_beta0 <- 1:3 Nvec <- total_contacts/true_beta0 ## check balance # isSymmetric((Nvec*true_beta0)*pmat) base_beta0 <- sum(Nvec*true_beta0)/sum(Nvec) true_transmissibility <- 0.5 contact_rate_age <- true_beta0/true_transmissibility compare_age_to_base( age_cat = age_cat, base_beta0 = base_beta0, Nvec = Nvec, pmat = pmat, contact_rate_age = contact_rate_age )
Again, not terrible... not sure how sensitive the match is to the population distribution, relative magnitudes of the true $(\beta_0)_i$, and the assumed contact probability matrix.
Let's just go all-in with age-based heterogeneity in $\beta_0$, contact probabilities between ages, and the population distribution, plus a time-varying $\beta$:
time_pars <- data.frame(Date=c("2020-04-02", "2020-05-01"), Symbol=c("beta0", "beta0"), Relative_value=c(0.1, 1)) compare_age_to_base( age_cat = age_cat, base_beta0 = base_beta0, Nvec = Nvec, pmat = pmat, contact_rate_age = contact_rate_age, params_timevar = time_pars )
Nice! It's working.
## read forecast data forecast <- (read_csv("../../MacOMT_report/forecast/2021-03-29_VOC.csv") %>% filter(VoC_effect != "Vaccination") # recode variable values for clear plot legends %>% mutate(scenario = case_when( VoC_effect == "Lock Down for 2 weeks" ~ "two-week lockdown on 5 April, then reopen", VoC_effect == "Lock Down for 4 weeks" ~ "four-week lockdown on 5 April, then reopen", VoC_effect == "Replacement" ~ "baseline", # VoC_effect == "Vaccination" ~ "Explicit replacement by VoC + some Vac" )) %>% filter(var == "report", scenario == "baseline") %>% select(date, Symbol, Relative_value, final_bt, obs) # %>% mutate(scenario = as_factor(scenario)) ) ## set up simulation start_date <- min(forecast$date) end_date <- max(forecast$date) age_params <- expand_params_age( base_params, age_cat = age_cat ) mistry_params <- expand_params_mistry( age_params, age_cat = age_cat ) ## update pop in base simulation base_params[["N"]] <- mistry_params[["N"]] ## set up timepars ## FIXME: getting way more than just the break dates time_pars <- (forecast %>% group_by(Symbol, Relative_value) %>% filter(date == min(date)) %>% ungroup() %>% select(date, Symbol, Relative_value) %>% rename(Date = date) %>% as.data.frame() ) # data.frame(Date=c("2020-04-02"), # Symbol=c("beta0"), # Relative_value=c(0)) base_state = make_state(params = base_params)
calibrate()
Let's see if we can feed in simulated age-structured observations with constant beta0
across age groups (+ uniform population size + contact rate) and recover beta0
via calibrate()
. We'll start with the calibration from the getting started vignette:
params1 <- read_params("PHAC_testify.csv") params1 <- update(params1, testing_intensity = 0, N = 3e6) state1 <- make_state(params=params1) sdate <- "2020-Feb-10" edate <- "2020-Jun-1" ## add observational noise set.seed(101) params1obs <- update(params1, obs_disp=200) res1obs <- run_sim(params1obs, state1, start_date=sdate, end_date=edate, stoch=c(obs=FALSE, proc=FALSE), condense_args = c(add_report = TRUE)) ## set up simulated report data report_data <- (res1obs %>% mutate(value=round(report), var="report") %>% select(date, value, var) %>% na.omit() ## removing first 15 values, which are NA! ) ## beta0 is the only parameter we're going to optimize: opt_pars <- list(params = c(beta0 = 0.1)) ## fit beta0 based on the report data: fitted.mod <- calibrate( data = report_data , start_date = sdate ## skip breaks that are present by default: , time_args = list(break_dates = NULL) , base_params = params1obs , opt_pars = opt_pars ##, debug_plot = TRUE # instructive plotting during optimization ) ## plot the resulting fit plot(fitted.mod, data=report_data) ## compare original beta0 to fit value print("preset beta0:") print(params1[["beta0"]]) print("fitted beta0:") print(coef(fitted.mod$mle2)[[1]])
Cool, so this base example is working. Let's do the smallest ageify expansion upon this working example by implementing uniform age-structure (both in transmission, contacts rate, and population distribution):
## ageify the parameters age_cat <- mk_agecats(0, 80, da = 30) n <- length(age_cat) params1_age <- expand_params_age(params1, age_cat = age_cat) state1_age <- expand_state_age(state1, age_cat = age_cat) sdate <- "2020-Feb-10" edate <- "2020-Jun-1" ## add observational noise set.seed(101) params1obs_age <- update(params1_age, obs_disp=200) res1obs <- run_sim(params1obs_age, state1_age, start_date=sdate, end_date=edate, stoch=c(obs=FALSE, proc=FALSE), condense = TRUE, ## this is the default, but specifying it here because i just realized i need it to generate the reports col condense_args = list(keep_all = TRUE, add_report = TRUE) ## this will also keep the original variables in the df, which i need for later to stratify by age ) # res1obs <- run_sim(params1obs_age, state1_age, start_date=sdate, end_date=edate, # stoch=c(obs=FALSE, proc=FALSE), # condense = FALSE) ## set up simulated report data report_data <- (res1obs # %>% mutate(across(starts_with("report"), ~ round(.x), .names = "{.col}")) %>% mutate(value=round(report), var="report") %>% select(date, value, var) %>% na.omit() ## removing first 15 values, which are NA! ) ## beta0 is the only parameter we're going to optimize: opt_pars <- list(params = c(beta0 = 0.1)) ## fit beta0 based on the report data: ## EVERYTHING BELOW IS NOT WORKING # debug(condense.pansim) ## and then execute following # fitted.mod <- calibrate( # data = report_data # , start_date = sdate # ## skip breaks that are present by default: # , time_args = list(break_dates = NULL) # , base_params = params1obs_age # , opt_pars = opt_pars # ##, debug_plot = TRUE # instructive plotting during optimization # ) ## plot the resulting fit # plot(fitted.mod, data=report_data) ## compare original beta0 to fit value # print("preset beta0:") # print(params1[["beta0"]]) # print("fitted beta0:") # print(coef(fitted.mod$mle2)[[1]])
TO DO: figure out what's breaking calibrate?
The abbreviated stack looks like:
calibrate mle_fun forecast_sim run_sim_break run_sim make_state(params=params)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.