knitr::opts_chunk$set(fig.height = 4,
                      fig.width = 9)

devtools::load_all()
library(tidyverse)
library(cowplot)

We want to be able to calibrate $\beta$ in the age-structured model, but trying to fit a $\beta$ value for each age group may be too difficult (or impossible). Let's see how far we can get with $\beta$ calibrated to the whole population.

Simple calibration for constant $\beta$

Idea

We can break $\beta_0$ down into two components: transmissiblity, $\tau$ (proportion of contacts between $S$ and $I$ that lead to transmission) and the average contact rate $\langle c \rangle$ (contacts/day):

$$ \beta_0 = \tau \langle c \rangle. $$

In the age-structured model, if we assume transmissibility is constant across age groups, we have

$$ (\beta_0)_i = \tau c_i $$

for each age group $i$. Instead of calibrating each $(\beta_0)_i$, let's try to:

  1. estimate $\beta_0$ for the entire population via calibration;
  2. calculate $\tau$ using the estimated $\beta_0$ and the calculated $\langle c \rangle$ from the assumed $c_i$ in the age-structured model, yielding $(\beta_0)_i$ for each age group.

Here's a function that does this lazy calibration:

#' Calibrate age-specific transmission params using population-level beta0 and assumed contact structure
#'
#' @param age_params parameters that have been initialized using `expand_params_age()` with a specified age-specific contact rate
#' @param base_beta0 beta0 calibrated from base (non-ageified) simulation
#'
#' @return an object of class `params_pansim`
#' @export
#'
#' @examples
#' base_params <- update(read_params("PHAC_testify.csv"), testing_intensity = 0, beta0 = 2)
#' age_params <- expand_params_age(base_params, transmissibility = 1, contact_rate_age = rep(4, length(age_cat)))
#' calibrate_transmission(age_params)
calibrate_transmission <- function(age_params, base_beta0){
  ## calculate average age-specific contact rate
  avg_contact_rate_age <- sum(age_params[["N"]]*age_params[["contact_rate_age"]])/sum(age_params[["N"]])

  ## calculate transmissibility implied by average contact rate
  transmissibility <- base_beta0/avg_contact_rate_age
  print(paste0("calculated transmissibility: ", transmissibility))

  ## update beta0 (and transmissibility in age_params)
  ## based on assumed contact_rate_age and implied transmissibility
  age_params[["beta0"]] <- transmissibility*age_params[["contact_rate_age"]]
  age_params[["transmissibility"]] <- transmissibility

  return(age_params)
}

Here's a function that computes both the base sim, as well as the ageified sim (with lazy calibration):

#' Compare ageified simulation to base, using simple calibration trick
#'
#' @param age_cat age categories
#' @param base_beta0 beta0 to use in base simulation
#' @param Nvec population distribution
#' @param pmat contact probability matrix
#' @param contact_rate_age age-specific contact rates
#' @inheritParams run_sim
#'
#' @return
#' @export
compare_age_to_base <- function(age_cat,
                                base_beta0,
                                Nvec,
                                pmat = NULL,
                                contact_rate_age,
                                start_date = "2020-03-01",
                                end_date = "2020-08-01",
                                params_timevar = NULL){

  ## set up base (non-ageified) params and state
  base_params <- update(read_params("PHAC_testify.csv")
                        , N = sum(Nvec)
                        , testing_intensity = 0
                        , beta0 = base_beta0
                        )
  base_state <- make_state(N = base_params[["N"]],
                           E0 = length(age_cat))

  ## run base simulation
  res_base <- run_sim(base_params, base_state,
                      start_date = start_date,
                      end_date = end_date,
                      params_timevar = params_timevar,
                      condense = FALSE)

  ## set up ageified parameters 
  ## (this will use assume beta0 as in the base simulation)
  age_params <- expand_params_age(
    params = base_params,
    age_cat = age_cat,
    Nvec = Nvec,
    pmat = pmat,
    transmissibility = 1, ## dummy value, will get updated
    contact_rate_age = contact_rate_age
  )

  ## perform lazy calibration (update transmissibility based on base_beta0
  ## and contact_rate_age)
  age_params <- calibrate_transmission(age_params, base_beta0)

  ## set up age-structured state
  age_state <- expand_state_age(
    base_state,
    age_cat = age_cat,
    Nvec = age_params[["N"]]
  )

  ## age-structured sim with beta0 vec and transmissibility
  ## "inferred" from base sim
  res_age <- run_sim(age_params, age_state,
                     start_date = start_date,
                     end_date = end_date,
                     params_timevar = params_timevar,
                     condense = FALSE)

  ## aggregate over ages
  res_age_cond <- condense_age(res_age %>% select(!starts_with("foi")))
  attr(res_age_cond, "row.names") <- as.character(attr(res_age_cond, "row.names"))

  ## check sim results are equal
  print("comparing base and ageified simulation results... equal?")
  print(all.equal(res_age_cond,
                   res_base %>% select(-foi)))

  ## plot sim results
  p <- (bind_rows(
    pivot_longer((res_base 
      %>% select(-foi) 
      %>% mutate(model = "base")),
    cols = -c(date, model),
    names_to = "var"),
    pivot_longer((condense_age(res_age %>% select(!starts_with("foi")))
      %>% mutate(model = "age-structured")),
    cols = -c(date, model),
    names_to = "var"))
    %>% mutate(var = as_factor(var))
    %>% ggplot(aes(x = date, y = value, color = model))
    + geom_line()
    + facet_wrap(vars(var), scales = "free_y")
    + scale_x_date(date_breaks = "1 month",
                   date_labels = "%b")
  ) 

  print(p)

}

Demo with a uniform population, uniform contact strucutre

This should work perfectly...

## set up age categories
age_cat <- mk_agecats(0, 80, da = 30)
n <- length(age_cat)
Ni <- 1e6
Nvec <- rep(Ni, n)
base_beta0 <- 2

compare_age_to_base(
  age_cat = age_cat,
  base_beta0 = base_beta0,
  Nvec = Nvec,
  contact_rate_age = rep(4, length(age_cat))
)

Not shocking; if this wasn't a perfect match, it would be because of a bug.

Uniform population, non-uniform contact structure

Nvec <- rep(5e6, 3)
true_beta0 <- c(2, 3, 3/2)

base_beta0 <- sum(Nvec*true_beta0)/sum(Nvec)
true_transmissibility <- 0.5
contact_rate_age <- true_beta0/true_transmissibility

## initialize contact structure
pmat <- matrix(c(4/10, 4/30, 8/15,
                 2/10, 23/30, 3/15,
                 4/10, 1/10, 4/15),
               nrow = 3,
               dimnames = list(age_cat, age_cat))

compare_age_to_base(
  age_cat = age_cat,
  base_beta0 = base_beta0,
  Nvec = Nvec,
  pmat = pmat,
  contact_rate_age = contact_rate_age
)

Not a perfect match, which makes sense because the age-structured model doesn't just collapse to the homogeneous case here, but it looks like there's pretty good agreement with this calibration trick...

Demo with a non-uniform population, non-uniform contact structure

## rescale to a non-uniform population while still maintaining balance
total_contacts <- true_beta0*Nvec
true_beta0 <- 1:3
Nvec <- total_contacts/true_beta0

## check balance
# isSymmetric((Nvec*true_beta0)*pmat)

base_beta0 <- sum(Nvec*true_beta0)/sum(Nvec)
true_transmissibility <- 0.5
contact_rate_age <- true_beta0/true_transmissibility

compare_age_to_base(
  age_cat = age_cat,
  base_beta0 = base_beta0,
  Nvec = Nvec,
  pmat = pmat,
  contact_rate_age = contact_rate_age
)

Again, not terrible... not sure how sensitive the match is to the population distribution, relative magnitudes of the true $(\beta_0)_i$, and the assumed contact probability matrix.

Simple calibration with time-varying $\beta$

Let's just go all-in with age-based heterogeneity in $\beta_0$, contact probabilities between ages, and the population distribution, plus a time-varying $\beta$:

time_pars <- data.frame(Date=c("2020-04-02", "2020-05-01"),
                      Symbol=c("beta0", "beta0"),
                      Relative_value=c(0.1, 1))

compare_age_to_base(
  age_cat = age_cat,
  base_beta0 = base_beta0,
  Nvec = Nvec,
  pmat = pmat,
  contact_rate_age = contact_rate_age,
  params_timevar = time_pars
)

Nice! It's working.

## read forecast data
forecast <- (read_csv("../../MacOMT_report/forecast/2021-03-29_VOC.csv")
  %>% filter(VoC_effect != "Vaccination")
  # recode variable values for clear plot legends
  %>% mutate(scenario = case_when(
    VoC_effect == "Lock Down for 2 weeks" ~ "two-week lockdown on 5 April, then reopen",
    VoC_effect == "Lock Down for 4 weeks" ~ "four-week lockdown on 5 April, then reopen",
    VoC_effect == "Replacement" ~ "baseline",
    # VoC_effect == "Vaccination" ~ "Explicit replacement by VoC + some Vac"
  ))
  %>% filter(var == "report", scenario == "baseline")
  %>% select(date, Symbol, Relative_value, final_bt, obs)
  # %>% mutate(scenario = as_factor(scenario))
)

## set up simulation
start_date <- min(forecast$date)
end_date <- max(forecast$date)

age_params <- expand_params_age(
  base_params,
  age_cat = age_cat
)

mistry_params <- expand_params_mistry(
  age_params,
  age_cat = age_cat
)

## update pop in base simulation
base_params[["N"]] <- mistry_params[["N"]]

## set up timepars
## FIXME: getting way more than just the break dates
time_pars <- (forecast 
    %>% group_by(Symbol, Relative_value) 
    %>% filter(date == min(date))
    %>% ungroup()
    %>% select(date, Symbol, Relative_value)
    %>% rename(Date = date)
    %>% as.data.frame()
)


# data.frame(Date=c("2020-04-02"),
#            Symbol=c("beta0"),
#            Relative_value=c(0))

base_state = make_state(params = base_params)

Calibration using calibrate()

Let's see if we can feed in simulated age-structured observations with constant beta0 across age groups (+ uniform population size + contact rate) and recover beta0 via calibrate(). We'll start with the calibration from the getting started vignette:

params1 <- read_params("PHAC_testify.csv")
params1 <- update(params1,
                  testing_intensity = 0,
                  N = 3e6)
state1 <- make_state(params=params1)
sdate <- "2020-Feb-10"
edate <- "2020-Jun-1"

## add observational noise
set.seed(101)
params1obs <- update(params1, obs_disp=200)
res1obs <- run_sim(params1obs, state1, start_date=sdate, end_date=edate,
                   stoch=c(obs=FALSE, proc=FALSE),
                   condense_args = c(add_report = TRUE))

## set up simulated report data
report_data <- (res1obs
    %>% mutate(value=round(report), var="report")
    %>% select(date, value, var)
    %>% na.omit() ## removing first 15 values, which are NA!
)

## beta0 is the only parameter we're going to optimize:
opt_pars <- list(params = c(beta0 = 0.1)) ## fit beta0 based on the report data: 
fitted.mod <- calibrate(
    data = report_data
  , start_date = sdate
    ## skip breaks that are present by default:
  , time_args = list(break_dates = NULL)
  , base_params = params1obs
  , opt_pars = opt_pars
##, debug_plot = TRUE # instructive plotting during optimization
)

## plot the resulting fit
plot(fitted.mod, data=report_data)

## compare original beta0 to fit value
print("preset beta0:")
print(params1[["beta0"]])
print("fitted beta0:")
print(coef(fitted.mod$mle2)[[1]])

Cool, so this base example is working. Let's do the smallest ageify expansion upon this working example by implementing uniform age-structure (both in transmission, contacts rate, and population distribution):

## ageify the parameters
age_cat <- mk_agecats(0, 80, da = 30)
n <- length(age_cat)

params1_age <- expand_params_age(params1, age_cat = age_cat)
state1_age <- expand_state_age(state1, age_cat = age_cat)
sdate <- "2020-Feb-10"
edate <- "2020-Jun-1"

## add observational noise
set.seed(101)
params1obs_age <- update(params1_age, obs_disp=200)
res1obs <- run_sim(params1obs_age, state1_age, start_date=sdate, end_date=edate,
                   stoch=c(obs=FALSE, proc=FALSE),
                   condense = TRUE, ## this is the default, but specifying it here because i just realized i need it to generate the reports col
                   condense_args = list(keep_all = TRUE,
                                        add_report = TRUE) ## this will also keep the original variables in the df, which i need for later to stratify by age
                   )
# res1obs <- run_sim(params1obs_age, state1_age, start_date=sdate, end_date=edate,
#                    stoch=c(obs=FALSE, proc=FALSE),
#                    condense = FALSE)

## set up simulated report data
report_data <- (res1obs
    # %>% mutate(across(starts_with("report"), ~ round(.x), .names = "{.col}"))
    %>% mutate(value=round(report), var="report")
    %>% select(date, value, var)
    %>% na.omit() ## removing first 15 values, which are NA!
)

## beta0 is the only parameter we're going to optimize:
opt_pars <- list(params = c(beta0 = 0.1)) ## fit beta0 based on the report data: 

## EVERYTHING BELOW IS NOT WORKING
# debug(condense.pansim)
## and then execute following
# fitted.mod <- calibrate(
#     data = report_data
#   , start_date = sdate
#     ## skip breaks that are present by default:
#   , time_args = list(break_dates = NULL)
#   , base_params = params1obs_age
#   , opt_pars = opt_pars
# ##, debug_plot = TRUE # instructive plotting during optimization
# )

## plot the resulting fit
# plot(fitted.mod, data=report_data)

## compare original beta0 to fit value
# print("preset beta0:")
# print(params1[["beta0"]])
# print("fitted beta0:")
# print(coef(fitted.mod$mle2)[[1]])

TO DO: figure out what's breaking calibrate?

The abbreviated stack looks like:

calibrate
  mle_fun
    forecast_sim
      run_sim_break
        run_sim
          make_state(params=params)


bbolker/McMasterPandemic documentation built on Aug. 25, 2024, 6:35 p.m.