library(BDAepimodel)
library(coda)
library(Rcpp)

This vignette contains code to reproduce the simulation examining the effects of model misspecfication presented in the second simulation of Fintzi et al. (2016). We fit SIR and SEIR models to binomially distributed prevalence counts sampled from an epidemic with time-varying dynamics. Additional details on the use of the BDAepimodel package and how to extract the results from fitted objects are provided in the "BDAepimodel" vignette. Details on fitting the models in pomp are provided in the "model_misspecification_pomp" vignette.

Simualting the epidemic

We simulated an epidemic with SEIR dynamics in a population of 400 individuals, 397 of whom were initially susceptible, 2 of whom were initially exposed, and 1 of whom were initially infectious. The dynamics of the epidemic varied over four different epochs and are presented in the table below:

library(knitr)
tab <- t(data.frame("Effective R0" = c(14.9, 9.2, 0.1, 0),
                    "Incubation period" = c(210,210,90,180),
                    "Infectious period" = c(150,330,300,70)))
colnames(tab) = paste("Epoch", 1:4)
kable(tab, caption = "Time varying dynamics. Effective reproductive numbers are computed as the product of the per-contact infectivity rate, the mean infectious period, and the number of susceptibles at the beginning of the epoch.")

The data are simulated as follows:

set.seed(1834)

# declare the functions for simulating from and evaluating the log-density of the measurement process
r_meas_process <- function(state, meas_vars, params){
          # in our example, rho will be the name of the binomial sampling probability parameter.
          # this function returns a matrix of observed counts
          rbinom(n = nrow(state),
                 size = state[,meas_vars],
                 prob =  params["rho"])
}

# initialize the stochastic epidemic model object
epimodel <- init_epimodel(obstimes = seq(1, 183, by = 7),                    # vector of observation times
                          popsize = 400,                                      # population size
                          states = c("S", "E", "I", "R"),                # compartment names
                          params = c(beta = 0.00025,                     # per-contact infectivity parameter high
                                     gamma = 1/210,                      # exposed compartment
                                     mu = 1/150,                          # recovery rate for fast recoverers
                                     rho = 0.95,                               # binomial sampling probability
                                     S0 = 0.94,
                                     E0 = 0.05,
                                     I0 = 0.01,
                                     R0 = 0),      # initial state probabilities
                          rates = c("beta*I", "gamma", "mu"), # unlumped transition rates
                          flow = matrix(c(-1, 1, 0, 0,  # S -> E
                                          0, -1, 1, 0,  # E -> In
                                          0, 0, -1, 1), # I -> R
                                        ncol = 4, byrow = T),

                          meas_vars = c("I"),
                          r_meas_process = r_meas_process)

# simulate the epidemic and the dataset.
epimodel <-
          simulate_epimodel(
                    epimodel = epimodel,
                    init_state = c(
                              S = 397,
                              E = 2,
                              I = 1,
                              R = 0
                    ),
                    trim = TRUE
          )

dat1 <- cbind(time = epimodel$dat[,"time"], I = epimodel$dat[,"I"])
true_path1 <- epimodel$pop_mat

epimodel <- init_epimodel(obstimes = seq(183, 729, by = 7),                    # vector of observation times
                          popsize = 400,                                      # population size
                          states = c("S", "E", "I", "R"),                # compartment names
                          params = c(beta = 0.0001,                     # per-contact infectivity parameter high
                                     gamma = 1/210,                      # exposed compartment
                                     mu = 1/330,                          # recovery rate for fast recoverers
                                     rho = 0.95,                               # binomial sampling probability
                                     S0 = 0.94,
                                     E0 = 0.05,
                                     I0 = 0.01,
                                     R0 = 0),      # initial state probabilities
                          rates = c("beta*I", "gamma", "mu"), # unlumped transition rates
                          flow = matrix(c(-1, 1, 0, 0,  # S -> E
                                          0, -1, 1, 0,  # E -> In
                                          0, 0, -1, 1), # I -> R
                                        ncol = 4, byrow = T),

                          meas_vars = c("I"),
                          r_meas_process = r_meas_process)

# simulate the epidemic and the dataset.
epimodel <-
          simulate_epimodel(
                    epimodel = epimodel,
                    init_state = c(true_path1[nrow(true_path1),"S"],
                                   true_path1[nrow(true_path1),"E"],
                                   true_path1[nrow(true_path1),"I"],
                                   true_path1[nrow(true_path1),"R"]
                    ),
                    trim = TRUE
          )

dat2 <- cbind(time = epimodel$dat[,"time"], I = epimodel$dat[,"I"])
true_path2 <- epimodel$pop_mat

# initialize the stochastic epidemic model object
epimodel <- init_epimodel(obstimes = seq(729, 1163, by = 7),                    # vector of observation times
                          popsize = 400,                                      # population size
                          states = c("S", "E", "I", "R"), # compartment names
                          params = c(beta = 0.00035,                     # per-contact infectivity parameter high
                                     gamma = 1/90,                      # exposed compartment
                                     mu = 1/300,                          # recovery rate for fast recoverers
                                     rho = 0.95,                               # binomial sampling probability
                                     S0 = 0.94,
                                     E0 = 0.05,
                                     I0 = 0.01,
                                     R0 = 0),      # initial state probabilities
                          rates = c("beta*I", "gamma", "mu"), # unlumped transition rates
                          flow = matrix(c(-1, 1, 0, 0,  # S -> E
                                          0, -1, 1, 0,  # E -> In
                                          0, 0, -1, 1), # I -> R
                                        ncol = 4, byrow = T),

                          meas_vars = c("I"),
                          r_meas_process = r_meas_process)


# simulate the epidemic and the dataset.
epimodel <-
          simulate_epimodel(
                    epimodel = epimodel,
                    init_state = c(true_path2[nrow(true_path2),"S"],
                                   true_path2[nrow(true_path2),"E"],
                                   true_path2[nrow(true_path2),"I"],
                                   true_path2[nrow(true_path2),"R"]
                    ),
                    trim = TRUE
          )

dat3 <- cbind(time = epimodel$dat[,"time"], I = epimodel$dat[,"I"])
true_path3 <- epimodel$pop_mat

# initialize the stochastic epidemic model object
epimodel <- init_epimodel(obstimes = seq(1163, 1457, by = 7),                    # vector of observation times
                          popsize = 400,                                      # population size
                          states = c("S", "E", "I", "R"), # compartment names
                          params = c(beta = 0.0001,                     # per-contact infectivity parameter high
                                     gamma = 1/180,                      # exposed compartment
                                     mu = 1/70,                          # recovery rate for fast recoverers
                                     rho = 0.95,                               # binomial sampling probability
                                     S0 = 0.94,
                                     E0 = 0.05,
                                     I0 = 0.01,
                                     R0 = 0),      # initial state probabilities
                          rates = c("beta*I", "gamma", "mu"), # unlumped transition rates
                          flow = matrix(c(-1, 1, 0, 0,  # S -> E
                                          0, -1, 1, 0,  # E -> In
                                          0, 0, -1, 1), # I -> R
                                        ncol = 4, byrow = T),

                          meas_vars = c("I"),
                          r_meas_process = r_meas_process)


# simulate the epidemic and the dataset.
epimodel <-
          simulate_epimodel(
                    epimodel = epimodel,
                    init_state = c(true_path3[nrow(true_path3),"S"],
                                   true_path3[nrow(true_path3),"E"],
                                   true_path3[nrow(true_path3),"I"],
                                   true_path3[nrow(true_path3),"R"]
                    ),
                    trim = TRUE
          )

dat4 <- cbind(time = epimodel$dat[,"time"], I = epimodel$dat[,"I"])
true_path4 <- epimodel$pop_mat

dat <- rbind(dat1, dat2[-c(1),], dat3[-1,], dat4[-1,])
true_path <- rbind(true_path1[-nrow(true_path1),c(1,4:7)], 
                   true_path2[-c(1, nrow(true_path2)), c(1,4:7)],
                   true_path3[-c(1, nrow(true_path3)), c(1,4:7)],
                   true_path4[-1, c(1,4:7)])

plot(true_path[,1], true_path[,c("I")], "l"); points(dat)
abline(v = c(183, 729, 1163), col = "red")

Having simulated a dataset, we can now proceed to fit SIR and SEIR models to the data. In fitting each model, the first step is to define a transition kernel for the model parameters. The parameters are updated from their univariate full conditional distributions via Gibbs sampling (prior distributions indicated in comments in the code below). The following code implements the transition kernel and a helper function for computing the sufficient statistics:

# helper function for computing the sufficient statistics for the SIR model rate parameters
Rcpp::cppFunction("Rcpp::NumericVector getSuffStats_SIR(const Rcpp::NumericMatrix& pop_mat, const int ind_final_config) {

          // initialize sufficient statistics
          int num_inf = 0;       // number of infection events
          int num_rec = 0;       // number of recovery events
          double beta_suff = 0;  // integrated hazard for the infectivity
          double mu_suff = 0;    // integrated hazard for the recovery

          // initialize times
          double cur_time = 0;              // current time
          double next_time = pop_mat(0,0);  // time of the first event
          double dt = 0;                    // time increment

          // compute the sufficient statistics - loop through the pop_mat matrix until
          // reaching the row for the final observation time
          for(int j = 0; j < ind_final_config - 1; ++j) {

                    cur_time = next_time;         
                    next_time = pop_mat(j+1, 0); // grab the time of the next event
                    dt = next_time - cur_time;   // compute the time increment

                    beta_suff += pop_mat(j, 3) * pop_mat(j, 4) * dt; // add S*I*(t_{j+1} - t_j) to beta_suff
                    mu_suff += pop_mat(j, 4) * dt;                   // add I*(t_{j+1} - t_j) to mu_suff

                    // increment the count for the next event
                    if(pop_mat(j + 1, 2) == 1) {  
                              num_inf += 1;
                    } else if(pop_mat(j + 1, 2) == 2) {
                              num_rec += 1;
                    }
          }

          // return the vector of sufficient statistics for the rate parameters
          return Rcpp::NumericVector::create(num_inf, beta_suff, num_rec, mu_suff);
}")

# MCMC transition kernel for the SEIR model rate parameters and the binomial
# sampling probability. The prior distributions for the parameters are contained
# in this function.

gibbs_kernel_SIR <- function(epimodel) {

          # get sufficient statistics using the previously compiled getSuffStats function (above)
          suff_stats          <- getSuffStats_SIR(epimodel$pop_mat, epimodel$ind_final_config)

          ### update parameters from their univariate full conditional distributions
          ## beta ~ Gamma(0.6, 10000)
          ## mu   ~ Gamma(0.7, 100)
          ## rho  ~ Beta(10, 1)
          proposal          <- epimodel$params # params is the vector of ALL model parameters
          proposal["beta"]  <- rgamma(1, 0.6 + suff_stats[1], 10000 + suff_stats[2])
          proposal["mu"]    <- rgamma(1, 0.7 + suff_stats[3], 100 + suff_stats[4])
          proposal["rho"]   <- rbeta(1, 
                                     shape1 = 10 + sum(epimodel$obs_mat[, "I_observed"]),
                                     shape2 = 1 + sum(epimodel$obs_mat[, "I_augmented"] - epimodel$obs_mat[, "I_observed"]))

          # update array of rate matrices
          epimodel          <- build_new_irms(epimodel, proposal)

          # update the eigen decompositions (This function is built in)
          buildEigenArray_SIR(real_eigenvals = epimodel$real_eigen_values,
                              imag_eigenvals = epimodel$imag_eigen_values,
                              eigenvecs      = epimodel$eigen_vectors, 
                              inversevecs    = epimodel$inv_eigen_vectors, 
                              irm_array      = epimodel$irm, 
                              n_real_eigs    = epimodel$n_real_eigs, 
                              initial_calc   = FALSE)

          # get log-likelihood of the observations under the new parameters
          obs_likelihood_new  <- calc_obs_likelihood(epimodel, params = proposal, log = TRUE) #### NOTE - log = TRUE

          # get the new population level CTMC log-likelihood
          pop_likelihood_new  <- epimodel$likelihoods$pop_likelihood_cur +
                    suff_stats[1] * (log(proposal["beta"]) - log(epimodel$params["beta"])) +
                    suff_stats[3] * (log(proposal["mu"]) - log(epimodel$params["mu"])) -
                    suff_stats[2] * (proposal["beta"] - epimodel$params["beta"]) - 
                    suff_stats[4] * (proposal["mu"] - epimodel$params["mu"])

          # update parameters, likelihood objects, and eigen decompositions
          epimodel  <-
                    update_params(
                              epimodel,
                              params = proposal,
                              pop_likelihood = pop_likelihood_new,
                              obs_likelihood = obs_likelihood_new
                    )

          return(epimodel)
}

### Helper function for computing the SEIR model sufficient statistics
Rcpp::cppFunction("Rcpp::NumericVector getSuffStats_SEIR(const Rcpp::NumericMatrix& pop_mat, const int ind_final_config) {

          // initialize sufficient statistics
          int num_exp = 0;       // number of exposure events
          int num_inf = 0;       // number of exposed --> infectious events
          int num_rec = 0;       // number of recovery events
          double beta_suff  = 0; // integrated hazard for the exposure
          double gamma_suff = 0; // integrated hazard for addition of infectives
          double mu_suff    = 0; // integrated hazard for the recovery

          // initialize times
          double cur_time = 0;              // current time
          double next_time = pop_mat(0,0);  // time of the first event
          double dt = 0;                    // time increment

          // compute the sufficient statistics - loop through the pop_mat matrix until
          // reaching the row for the final observation time
          for(int j = 0; j < ind_final_config - 1; ++j) {

                    cur_time = next_time;         
                    next_time = pop_mat(j+1, 0); // grab the time of the next event
                    dt = next_time - cur_time;

                    beta_suff  += pop_mat(j, 3) * pop_mat(j, 5) * dt; // add S*I*(t_{j+1} - t_j) to beta_suff
                    gamma_suff += pop_mat(j, 4) * dt;                 // add E*(t_{j+1} - t_j) to gamma_suff
                    mu_suff    += pop_mat(j, 5) * dt;                 // add I*(t_{j+1} - t_j) to mu_suff

                    if(pop_mat(j + 1, 2) == 1) {  
                              num_exp += 1;            // if the next event is an exposure, increment the number of exposures
                    } else if(pop_mat(j + 1, 2) == 2) {
                              num_inf += 1;            // if the next event adds an infective, increment the number of infections
                    } else if(pop_mat(j + 1, 2) == 3) {
                              num_rec += 1;            // if the next event is a recover, increment the number of recovery
                    }
          }

          // return the vector of sufficient statistics for the rate parameters
          return Rcpp::NumericVector::create(num_exp, beta_suff, num_inf, gamma_suff, num_rec, mu_suff);
}")

# MCMC transition kernel for the SEIR model rate parameters and the binomial
# sampling probability. The prior distributions for the parameters are contained
# in this function.
gibbs_kernel_SEIR <- function(epimodel) {

          # get sufficient statistics using the previously compiled getSuffStats function (above)
          suff_stats          <- getSuffStats_SEIR(epimodel$pop_mat, epimodel$ind_final_config)

          ### update parameters from their univariate full conditional distributions
          ## beta  ~ Gamma(0.6, 10000)
          ## gamma ~ Gamma(0.5,50)
          ## mu    ~ Gamma(0.7, 100)
          ## rho   ~ Beta(10, 1)          
          proposal          <- epimodel$params # params is the vector of ALL model parameters
          proposal["beta"]  <- rgamma(1, 0.6 + suff_stats[1], 10000 + suff_stats[2])
          proposal["gamma"] <- rgamma(1, 0.5 + suff_stats[3], 50 + suff_stats[4])
          proposal["mu"]    <- rgamma(1, 0.7 + suff_stats[5], 100 + suff_stats[6])
          proposal["rho"]   <- rbeta(1, 
                                     shape1 = 10 + sum(epimodel$obs_mat[,"I_observed"]), 
                                     shape2 = 1 + sum(epimodel$obs_mat[,"I_augmented"]- epimodel$obs_mat[,"I_observed"]))

          # update array of rate matrices
          epimodel          <- build_new_irms(epimodel, proposal)

          # update the eigen decompositions
          buildEigenArray_SEIR(real_eigenvals = epimodel$real_eigen_values,
                               imag_eigenvals = epimodel$imag_eigen_values,
                               eigenvecs      = epimodel$eigen_vectors, 
                               inversevecs    = epimodel$inv_eigen_vectors, 
                               irm_array      = epimodel$irm, 
                               n_real_eigs    = epimodel$n_real_eigs, 
                               initial_calc   = FALSE)

          # get log-likelihoods under the new parameters
          pop_likelihood_new  <- calc_pop_likelihood(epimodel, log = TRUE) #### NOTE - log = TRUE
          obs_likelihood_new  <- calc_obs_likelihood(epimodel, params = proposal, log = TRUE) #### NOTE - log = TRUE

          # update parameters, likelihood objects, and eigen decompositions
          epimodel            <- update_params(epimodel, params = proposal, pop_likelihood = pop_likelihood_new, obs_likelihood = obs_likelihood_new)

          return(epimodel)
}

We now re-initialize an epimodel objects with the dataset, set the RNG seed, and run each MCMC chain as follows. Note that the chain was set by a batch script that varied the value of chain (chain = 1,2,3).

chain <- 1 # this was set by a batch script that ran chains 1, 2, and 3 in parallel

# generate the measurement process
d_meas_process <- function(state, meas_vars, params, log = TRUE) {
        # note that the names of the measurement variables are endowed with suffixes "_observed" and "_augmented". This is required.
        # we will declare the names of the measurement variables shortly.
        dbinom(x = state[, "I_observed"],
               size = state[, "I_augmented"],
               prob = params["rho"], log = log)
}

# re-initialize the chain
init_dist_SIR <- normalize(c(0.99, 0.004, 0.0001))
epimodel_SIR  <- init_epimodel(popsize = 400,                                                    # population size
                           states = c("S", "I", "R"),                                         # compartment names
                           params = c(beta = abs(rnorm(1, 0.00005, 1e-5)),                   # infectivity rate
                                      mu = abs(rnorm(1, 0.002, 0.0001)),                      # recovery rate
                                      rho = rbeta(1, 10, 1),                                 # binomial sampling prob
                                      S0 = init_dist_SIR[1], I0 = init_dist_SIR[2], R0 = init_dist_SIR[3]),
                           rates = c("beta * I", "mu"),                # unlumped transition rates
                           flow = matrix(c(-1, 1, 0,
                                           0, -1, 1), ncol = 3, byrow = T),  # flow matrix
                           dat = dat,                                           # dataset
                           time_var = "time",                                   # name of time variable in the dataset
                           meas_vars = "I",                                     # name of measurement var in the dataset
                           initdist_prior = c(90, 0.5, 0.01), ### Parameters for the dirichlet prior distribution
                           d_meas_process = d_meas_process)

epimodel_SIR <- init_settings(epimodel_SIR,
                          niter = 10, # this was set to 100000 in the paper
                          save_params_every = 1,
                          save_configs_every = 2, # this was set to 250 for the chains run in the paper
                          kernel = list(gibbs_kernel_SIR),
                          configs_to_redraw = 2, # set to 150 for the paper
                          analytic_eigen = "SIR",
                          ecctmc_method = "unif",
                          seed = 52787 + chain)

# Fit the epimodel --------------------------------------------------------

epimodel_SIR <- fit_epimodel(epimodel_SIR, monitor = TRUE)
set.seed(1834)

# declare the functions for simulating from and evaluating the log-density of the measurement process
r_meas_process <- function(state, meas_vars, params){
          # in our example, rho will be the name of the binomial sampling probability parameter.
          # this function returns a matrix of observed counts
          rbinom(n = nrow(state),
                 size = state[,meas_vars],
                 prob =  params["rho"])
}

# initialize the stochastic epidemic model object
epimodel <- init_epimodel(obstimes = seq(1, 183, by = 7),                    # vector of observation times
                          popsize = 400,                                      # population size
                          states = c("S", "E", "I", "R"),                # compartment names
                          params = c(beta = 0.00025,                     # per-contact infectivity parameter high
                                     gamma = 1/210,                      # exposed compartment
                                     mu = 1/150,                          # recovery rate for fast recoverers
                                     rho = 0.95,                               # binomial sampling probability
                                     S0 = 0.94,
                                     E0 = 0.05,
                                     I0 = 0.01,
                                     R0 = 0),      # initial state probabilities
                          rates = c("beta*I", "gamma", "mu"), # unlumped transition rates
                          flow = matrix(c(-1, 1, 0, 0,  # S -> E
                                          0, -1, 1, 0,  # E -> In
                                          0, 0, -1, 1), # I -> R
                                        ncol = 4, byrow = T),

                          meas_vars = c("I"),
                          r_meas_process = r_meas_process)

# simulate the epidemic and the dataset.
epimodel <-
          simulate_epimodel(
                    epimodel = epimodel,
                    init_state = c(
                              S = 397,
                              E = 2,
                              I = 1,
                              R = 0
                    ),
                    trim = TRUE
          )

dat1 <- cbind(time = epimodel$dat[,"time"], I = epimodel$dat[,"I"])
true_path1 <- epimodel$pop_mat

epimodel <- init_epimodel(obstimes = seq(183, 729, by = 7),                    # vector of observation times
                          popsize = 400,                                      # population size
                          states = c("S", "E", "I", "R"),                # compartment names
                          params = c(beta = 0.0001,                     # per-contact infectivity parameter high
                                     gamma = 1/210,                      # exposed compartment
                                     mu = 1/330,                          # recovery rate for fast recoverers
                                     rho = 0.95,                               # binomial sampling probability
                                     S0 = 0.94,
                                     E0 = 0.05,
                                     I0 = 0.01,
                                     R0 = 0),      # initial state probabilities
                          rates = c("beta*I", "gamma", "mu"), # unlumped transition rates
                          flow = matrix(c(-1, 1, 0, 0,  # S -> E
                                          0, -1, 1, 0,  # E -> In
                                          0, 0, -1, 1), # I -> R
                                        ncol = 4, byrow = T),

                          meas_vars = c("I"),
                          r_meas_process = r_meas_process)

# simulate the epidemic and the dataset.
epimodel <-
          simulate_epimodel(
                    epimodel = epimodel,
                    init_state = c(true_path1[nrow(true_path1),"S"],
                                   true_path1[nrow(true_path1),"E"],
                                   true_path1[nrow(true_path1),"I"],
                                   true_path1[nrow(true_path1),"R"]
                    ),
                    trim = TRUE
          )

dat2 <- cbind(time = epimodel$dat[,"time"], I = epimodel$dat[,"I"])
true_path2 <- epimodel$pop_mat

# initialize the stochastic epidemic model object
epimodel <- init_epimodel(obstimes = seq(729, 1163, by = 7),                    # vector of observation times
                          popsize = 400,                                      # population size
                          states = c("S", "E", "I", "R"), # compartment names
                          params = c(beta = 0.00035,                     # per-contact infectivity parameter high
                                     gamma = 1/90,                      # exposed compartment
                                     mu = 1/300,                          # recovery rate for fast recoverers
                                     rho = 0.95,                               # binomial sampling probability
                                     S0 = 0.94,
                                     E0 = 0.05,
                                     I0 = 0.01,
                                     R0 = 0),      # initial state probabilities
                          rates = c("beta*I", "gamma", "mu"), # unlumped transition rates
                          flow = matrix(c(-1, 1, 0, 0,  # S -> E
                                          0, -1, 1, 0,  # E -> In
                                          0, 0, -1, 1), # I -> R
                                        ncol = 4, byrow = T),

                          meas_vars = c("I"),
                          r_meas_process = r_meas_process)


# simulate the epidemic and the dataset.
epimodel <-
          simulate_epimodel(
                    epimodel = epimodel,
                    init_state = c(true_path2[nrow(true_path2),"S"],
                                   true_path2[nrow(true_path2),"E"],
                                   true_path2[nrow(true_path2),"I"],
                                   true_path2[nrow(true_path2),"R"]
                    ),
                    trim = TRUE
          )

dat3 <- cbind(time = epimodel$dat[,"time"], I = epimodel$dat[,"I"])
true_path3 <- epimodel$pop_mat

# initialize the stochastic epidemic model object
epimodel <- init_epimodel(obstimes = seq(1163, 1457, by = 7),                    # vector of observation times
                          popsize = 400,                                      # population size
                          states = c("S", "E", "I", "R"), # compartment names
                          params = c(beta = 0.0001,                     # per-contact infectivity parameter high
                                     gamma = 1/180,                      # exposed compartment
                                     mu = 1/70,                          # recovery rate for fast recoverers
                                     rho = 0.95,                               # binomial sampling probability
                                     S0 = 0.94,
                                     E0 = 0.05,
                                     I0 = 0.01,
                                     R0 = 0),      # initial state probabilities
                          rates = c("beta*I", "gamma", "mu"), # unlumped transition rates
                          flow = matrix(c(-1, 1, 0, 0,  # S -> E
                                          0, -1, 1, 0,  # E -> In
                                          0, 0, -1, 1), # I -> R
                                        ncol = 4, byrow = T),

                          meas_vars = c("I"),
                          r_meas_process = r_meas_process)


# simulate the epidemic and the dataset.
epimodel <-
          simulate_epimodel(
                    epimodel = epimodel,
                    init_state = c(true_path3[nrow(true_path3),"S"],
                                   true_path3[nrow(true_path3),"E"],
                                   true_path3[nrow(true_path3),"I"],
                                   true_path3[nrow(true_path3),"R"]
                    ),
                    trim = TRUE
          )

dat4 <- cbind(time = epimodel$dat[,"time"], I = epimodel$dat[,"I"])
true_path4 <- epimodel$pop_mat

dat <- rbind(dat1, dat2[-c(1),], dat3[-1,], dat4[-1,])
true_path <- rbind(true_path1[-nrow(true_path1),c(1,4:7)], 
                   true_path2[-c(1, nrow(true_path2)), c(1,4:7)],
                   true_path3[-c(1, nrow(true_path3)), c(1,4:7)],
                   true_path4[-1, c(1,4:7)])
chain <- 1 # this was set by a batch script that ran chains 1, 2, and 3 in parallel

# generate the measurement process
d_meas_process <- function(state, meas_vars, params, log = TRUE) {
        # note that the names of the measurement variables are endowed with suffixes "_observed" and "_augmented". This is required.
        # we will declare the names of the measurement variables shortly.
        dbinom(x = state[, "I_observed"],
               size = state[, "I_augmented"],
               prob = params["rho"], log = log)
}

# re-initialize the chain
init_dist_SEIR <- normalize(c(90, 0.5, 0.5, 0.01))
epimodel_SEIR  <- init_epimodel(popsize = 400,                                                    # population size
                           states = c("S", "E", "I", "R"),                                         # compartment names
                           params = c(beta = abs(rnorm(1, 0.00008, 1e-6)),                      # infectivity rate
                                      gamma = abs(rnorm(1, 0.008, 1e-3)),
                                      mu = abs(rnorm(1, 0.0012, 0.001)),                          # recovery rate
                                      rho = rbeta(1, 10, 1),                                 # binomial sampling prob
                                      S0 = init_dist_SEIR[1], E0 = init_dist_SEIR[2], I0 = init_dist_SEIR[3], R0 = init_dist_SEIR[4]),
                           rates = c("beta * I", "gamma", "mu"),                # unlumped transition rates
                           flow = matrix(c(-1, 1, 0, 0, # S -> E
                                           0, -1, 1, 0, # E -> I
                                           0, 0, -1, 1),# I -> R
                                         ncol = 4, byrow = T),  # flow matrix
                           dat = dat,                                           # dataset
                           time_var = "time",                                   # name of time variable in the dataset
                           meas_vars = "I",                                     # name of measurement var in the dataset
                           initdist_prior = c(90, 0.5, 0.5, 0.01), ### Parameters for the dirichlet prior distribution
                           d_meas_process = d_meas_process,
                           r_meas_process = r_meas_process)

epimodel_SEIR <- init_settings(epimodel_SEIR,
                          niter = 10, # this was set to 100000 in the paper
                          save_params_every = 1,
                          save_configs_every = 2, # this was set to 250 for the chains run in the paper
                          kernel = list(gibbs_kernel_SEIR),
                          configs_to_redraw = 2, # set to 150 for the paper
                          analytic_eigen = "SEIR",
                          ecctmc_method = "unif",
                          seed = 52787 + chain)

# Fit the epimodel --------------------------------------------------------

epimodel_SEIR <- fit_epimodel(epimodel_SEIR, monitor = TRUE)

After running all three chains for each model, we discarded the burn-in and combined the parameter samples and latent posterior samples from each chain. Posterior median estimates and 95% credible intervals of model parameters were computed, along with the pointwise posterior distribution of the latent process. These are presented in the paper and the supplement.



fintzij/BDAepimodel documentation built on Sept. 20, 2020, 1:44 p.m.