vignettes/model_misspecification.R

## ----load_packages, echo = F, include=F----------------------------------
library(BDAepimodel)
library(coda)
library(Rcpp)

## ----dynamics, echo = FALSE, results = 'asis'----------------------------
library(knitr)
tab <- t(data.frame("Effective R0" = c(14.9, 9.2, 0.1, 0),
                    "Incubation period" = c(210,210,90,180),
                    "Infectious period" = c(150,330,300,70)))
colnames(tab) = paste("Epoch", 1:4)
kable(tab, caption = "Time varying dynamics. Effective reproductive numbers are computed as the product of the per-contact infectivity rate, the mean infectious period, and the number of susceptibles at the beginning of the epoch.")

## ----SEIR_sim, warning=F, cache = F--------------------------------------
set.seed(1834)

# declare the functions for simulating from and evaluating the log-density of the measurement process
r_meas_process <- function(state, meas_vars, params){
          # in our example, rho will be the name of the binomial sampling probability parameter.
          # this function returns a matrix of observed counts
          rbinom(n = nrow(state),
                 size = state[,meas_vars],
                 prob =  params["rho"])
}

# initialize the stochastic epidemic model object
epimodel <- init_epimodel(obstimes = seq(1, 183, by = 7),                    # vector of observation times
                          popsize = 400,                                      # population size
                          states = c("S", "E", "I", "R"),                # compartment names
                          params = c(beta = 0.00025,                     # per-contact infectivity parameter high
                                     gamma = 1/210,                      # exposed compartment
                                     mu = 1/150,                          # recovery rate for fast recoverers
                                     rho = 0.95,                               # binomial sampling probability
                                     S0 = 0.94,
                                     E0 = 0.05,
                                     I0 = 0.01,
                                     R0 = 0),      # initial state probabilities
                          rates = c("beta*I", "gamma", "mu"), # unlumped transition rates
                          flow = matrix(c(-1, 1, 0, 0,  # S -> E
                                          0, -1, 1, 0,  # E -> In
                                          0, 0, -1, 1), # I -> R
                                        ncol = 4, byrow = T),
                          
                          meas_vars = c("I"),
                          r_meas_process = r_meas_process)

# simulate the epidemic and the dataset.
epimodel <-
          simulate_epimodel(
                    epimodel = epimodel,
                    init_state = c(
                              S = 397,
                              E = 2,
                              I = 1,
                              R = 0
                    ),
                    trim = TRUE
          )

dat1 <- cbind(time = epimodel$dat[,"time"], I = epimodel$dat[,"I"])
true_path1 <- epimodel$pop_mat

epimodel <- init_epimodel(obstimes = seq(183, 729, by = 7),                    # vector of observation times
                          popsize = 400,                                      # population size
                          states = c("S", "E", "I", "R"),                # compartment names
                          params = c(beta = 0.0001,                     # per-contact infectivity parameter high
                                     gamma = 1/210,                      # exposed compartment
                                     mu = 1/330,                          # recovery rate for fast recoverers
                                     rho = 0.95,                               # binomial sampling probability
                                     S0 = 0.94,
                                     E0 = 0.05,
                                     I0 = 0.01,
                                     R0 = 0),      # initial state probabilities
                          rates = c("beta*I", "gamma", "mu"), # unlumped transition rates
                          flow = matrix(c(-1, 1, 0, 0,  # S -> E
                                          0, -1, 1, 0,  # E -> In
                                          0, 0, -1, 1), # I -> R
                                        ncol = 4, byrow = T),
                          
                          meas_vars = c("I"),
                          r_meas_process = r_meas_process)

# simulate the epidemic and the dataset.
epimodel <-
          simulate_epimodel(
                    epimodel = epimodel,
                    init_state = c(true_path1[nrow(true_path1),"S"],
                                   true_path1[nrow(true_path1),"E"],
                                   true_path1[nrow(true_path1),"I"],
                                   true_path1[nrow(true_path1),"R"]
                    ),
                    trim = TRUE
          )

dat2 <- cbind(time = epimodel$dat[,"time"], I = epimodel$dat[,"I"])
true_path2 <- epimodel$pop_mat

# initialize the stochastic epidemic model object
epimodel <- init_epimodel(obstimes = seq(729, 1163, by = 7),                    # vector of observation times
                          popsize = 400,                                      # population size
                          states = c("S", "E", "I", "R"), # compartment names
                          params = c(beta = 0.00035,                     # per-contact infectivity parameter high
                                     gamma = 1/90,                      # exposed compartment
                                     mu = 1/300,                          # recovery rate for fast recoverers
                                     rho = 0.95,                               # binomial sampling probability
                                     S0 = 0.94,
                                     E0 = 0.05,
                                     I0 = 0.01,
                                     R0 = 0),      # initial state probabilities
                          rates = c("beta*I", "gamma", "mu"), # unlumped transition rates
                          flow = matrix(c(-1, 1, 0, 0,  # S -> E
                                          0, -1, 1, 0,  # E -> In
                                          0, 0, -1, 1), # I -> R
                                        ncol = 4, byrow = T),
                          
                          meas_vars = c("I"),
                          r_meas_process = r_meas_process)


# simulate the epidemic and the dataset.
epimodel <-
          simulate_epimodel(
                    epimodel = epimodel,
                    init_state = c(true_path2[nrow(true_path2),"S"],
                                   true_path2[nrow(true_path2),"E"],
                                   true_path2[nrow(true_path2),"I"],
                                   true_path2[nrow(true_path2),"R"]
                    ),
                    trim = TRUE
          )

dat3 <- cbind(time = epimodel$dat[,"time"], I = epimodel$dat[,"I"])
true_path3 <- epimodel$pop_mat

# initialize the stochastic epidemic model object
epimodel <- init_epimodel(obstimes = seq(1163, 1457, by = 7),                    # vector of observation times
                          popsize = 400,                                      # population size
                          states = c("S", "E", "I", "R"), # compartment names
                          params = c(beta = 0.0001,                     # per-contact infectivity parameter high
                                     gamma = 1/180,                      # exposed compartment
                                     mu = 1/70,                          # recovery rate for fast recoverers
                                     rho = 0.95,                               # binomial sampling probability
                                     S0 = 0.94,
                                     E0 = 0.05,
                                     I0 = 0.01,
                                     R0 = 0),      # initial state probabilities
                          rates = c("beta*I", "gamma", "mu"), # unlumped transition rates
                          flow = matrix(c(-1, 1, 0, 0,  # S -> E
                                          0, -1, 1, 0,  # E -> In
                                          0, 0, -1, 1), # I -> R
                                        ncol = 4, byrow = T),
                          
                          meas_vars = c("I"),
                          r_meas_process = r_meas_process)


# simulate the epidemic and the dataset.
epimodel <-
          simulate_epimodel(
                    epimodel = epimodel,
                    init_state = c(true_path3[nrow(true_path3),"S"],
                                   true_path3[nrow(true_path3),"E"],
                                   true_path3[nrow(true_path3),"I"],
                                   true_path3[nrow(true_path3),"R"]
                    ),
                    trim = TRUE
          )

dat4 <- cbind(time = epimodel$dat[,"time"], I = epimodel$dat[,"I"])
true_path4 <- epimodel$pop_mat

dat <- rbind(dat1, dat2[-c(1),], dat3[-1,], dat4[-1,])
true_path <- rbind(true_path1[-nrow(true_path1),c(1,4:7)], 
                   true_path2[-c(1, nrow(true_path2)), c(1,4:7)],
                   true_path3[-c(1, nrow(true_path3)), c(1,4:7)],
                   true_path4[-1, c(1,4:7)])

plot(true_path[,1], true_path[,c("I")], "l"); points(dat)
abline(v = c(183, 729, 1163), col = "red")


## ----SIR_kernel, warning = F, cache=F------------------------------------
# helper function for computing the sufficient statistics for the SIR model rate parameters
Rcpp::cppFunction("Rcpp::NumericVector getSuffStats_SIR(const Rcpp::NumericMatrix& pop_mat, const int ind_final_config) {
                  
          // initialize sufficient statistics
          int num_inf = 0;       // number of infection events
          int num_rec = 0;       // number of recovery events
          double beta_suff = 0;  // integrated hazard for the infectivity
          double mu_suff = 0;    // integrated hazard for the recovery

          // initialize times
          double cur_time = 0;              // current time
          double next_time = pop_mat(0,0);  // time of the first event
          double dt = 0;                    // time increment
          
          // compute the sufficient statistics - loop through the pop_mat matrix until
          // reaching the row for the final observation time
          for(int j = 0; j < ind_final_config - 1; ++j) {
          
                    cur_time = next_time;         
                    next_time = pop_mat(j+1, 0); // grab the time of the next event
                    dt = next_time - cur_time;   // compute the time increment
                    
                    beta_suff += pop_mat(j, 3) * pop_mat(j, 4) * dt; // add S*I*(t_{j+1} - t_j) to beta_suff
                    mu_suff += pop_mat(j, 4) * dt;                   // add I*(t_{j+1} - t_j) to mu_suff
                    
                    // increment the count for the next event
                    if(pop_mat(j + 1, 2) == 1) {  
                              num_inf += 1;
                    } else if(pop_mat(j + 1, 2) == 2) {
                              num_rec += 1;
                    }
          }
                  
          // return the vector of sufficient statistics for the rate parameters
          return Rcpp::NumericVector::create(num_inf, beta_suff, num_rec, mu_suff);
}")

# MCMC transition kernel for the SEIR model rate parameters and the binomial
# sampling probability. The prior distributions for the parameters are contained
# in this function.

gibbs_kernel_SIR <- function(epimodel) {
          
          # get sufficient statistics using the previously compiled getSuffStats function (above)
          suff_stats          <- getSuffStats_SIR(epimodel$pop_mat, epimodel$ind_final_config)
          
          ### update parameters from their univariate full conditional distributions
          ## beta ~ Gamma(0.6, 10000)
          ## mu   ~ Gamma(0.7, 100)
          ## rho  ~ Beta(10, 1)
          proposal          <- epimodel$params # params is the vector of ALL model parameters
          proposal["beta"]  <- rgamma(1, 0.6 + suff_stats[1], 10000 + suff_stats[2])
          proposal["mu"]    <- rgamma(1, 0.7 + suff_stats[3], 100 + suff_stats[4])
          proposal["rho"]   <- rbeta(1, 
                                     shape1 = 10 + sum(epimodel$obs_mat[, "I_observed"]),
                                     shape2 = 1 + sum(epimodel$obs_mat[, "I_augmented"] - epimodel$obs_mat[, "I_observed"]))
          
          # update array of rate matrices
          epimodel          <- build_new_irms(epimodel, proposal)
          
          # update the eigen decompositions (This function is built in)
          buildEigenArray_SIR(real_eigenvals = epimodel$real_eigen_values,
                              imag_eigenvals = epimodel$imag_eigen_values,
                              eigenvecs      = epimodel$eigen_vectors, 
                              inversevecs    = epimodel$inv_eigen_vectors, 
                              irm_array      = epimodel$irm, 
                              n_real_eigs    = epimodel$n_real_eigs, 
                              initial_calc   = FALSE)
          
          # get log-likelihood of the observations under the new parameters
          obs_likelihood_new  <- calc_obs_likelihood(epimodel, params = proposal, log = TRUE) #### NOTE - log = TRUE
          
          # get the new population level CTMC log-likelihood
          pop_likelihood_new  <- epimodel$likelihoods$pop_likelihood_cur +
                    suff_stats[1] * (log(proposal["beta"]) - log(epimodel$params["beta"])) +
                    suff_stats[3] * (log(proposal["mu"]) - log(epimodel$params["mu"])) -
                    suff_stats[2] * (proposal["beta"] - epimodel$params["beta"]) - 
                    suff_stats[4] * (proposal["mu"] - epimodel$params["mu"])
          
          # update parameters, likelihood objects, and eigen decompositions
          epimodel  <-
                    update_params(
                              epimodel,
                              params = proposal,
                              pop_likelihood = pop_likelihood_new,
                              obs_likelihood = obs_likelihood_new
                    )
          
          return(epimodel)
}

### Helper function for computing the SEIR model sufficient statistics
Rcpp::cppFunction("Rcpp::NumericVector getSuffStats_SEIR(const Rcpp::NumericMatrix& pop_mat, const int ind_final_config) {
                  
          // initialize sufficient statistics
          int num_exp = 0;       // number of exposure events
          int num_inf = 0;       // number of exposed --> infectious events
          int num_rec = 0;       // number of recovery events
          double beta_suff  = 0; // integrated hazard for the exposure
          double gamma_suff = 0; // integrated hazard for addition of infectives
          double mu_suff    = 0; // integrated hazard for the recovery
          
          // initialize times
          double cur_time = 0;              // current time
          double next_time = pop_mat(0,0);  // time of the first event
          double dt = 0;                    // time increment

          // compute the sufficient statistics - loop through the pop_mat matrix until
          // reaching the row for the final observation time
          for(int j = 0; j < ind_final_config - 1; ++j) {

                    cur_time = next_time;         
                    next_time = pop_mat(j+1, 0); // grab the time of the next event
                    dt = next_time - cur_time;

                    beta_suff  += pop_mat(j, 3) * pop_mat(j, 5) * dt; // add S*I*(t_{j+1} - t_j) to beta_suff
                    gamma_suff += pop_mat(j, 4) * dt;                 // add E*(t_{j+1} - t_j) to gamma_suff
                    mu_suff    += pop_mat(j, 5) * dt;                 // add I*(t_{j+1} - t_j) to mu_suff
                    
                    if(pop_mat(j + 1, 2) == 1) {  
                              num_exp += 1;            // if the next event is an exposure, increment the number of exposures
                    } else if(pop_mat(j + 1, 2) == 2) {
                              num_inf += 1;            // if the next event adds an infective, increment the number of infections
                    } else if(pop_mat(j + 1, 2) == 3) {
                              num_rec += 1;            // if the next event is a recover, increment the number of recovery
                    }
          }
          
          // return the vector of sufficient statistics for the rate parameters
          return Rcpp::NumericVector::create(num_exp, beta_suff, num_inf, gamma_suff, num_rec, mu_suff);
}")

# MCMC transition kernel for the SEIR model rate parameters and the binomial
# sampling probability. The prior distributions for the parameters are contained
# in this function.
gibbs_kernel_SEIR <- function(epimodel) {
          
          # get sufficient statistics using the previously compiled getSuffStats function (above)
          suff_stats          <- getSuffStats_SEIR(epimodel$pop_mat, epimodel$ind_final_config)
          
          ### update parameters from their univariate full conditional distributions
          ## beta  ~ Gamma(0.6, 10000)
          ## gamma ~ Gamma(0.5,50)
          ## mu    ~ Gamma(0.7, 100)
          ## rho   ~ Beta(10, 1)          
          proposal          <- epimodel$params # params is the vector of ALL model parameters
          proposal["beta"]  <- rgamma(1, 0.6 + suff_stats[1], 10000 + suff_stats[2])
          proposal["gamma"] <- rgamma(1, 0.5 + suff_stats[3], 50 + suff_stats[4])
          proposal["mu"]    <- rgamma(1, 0.7 + suff_stats[5], 100 + suff_stats[6])
          proposal["rho"]   <- rbeta(1, 
                                     shape1 = 10 + sum(epimodel$obs_mat[,"I_observed"]), 
                                     shape2 = 1 + sum(epimodel$obs_mat[,"I_augmented"]- epimodel$obs_mat[,"I_observed"]))
          
          # update array of rate matrices
          epimodel          <- build_new_irms(epimodel, proposal)
          
          # update the eigen decompositions
          buildEigenArray_SEIR(real_eigenvals = epimodel$real_eigen_values,
                               imag_eigenvals = epimodel$imag_eigen_values,
                               eigenvecs      = epimodel$eigen_vectors, 
                               inversevecs    = epimodel$inv_eigen_vectors, 
                               irm_array      = epimodel$irm, 
                               n_real_eigs    = epimodel$n_real_eigs, 
                               initial_calc   = FALSE)
          
          # get log-likelihoods under the new parameters
          pop_likelihood_new  <- calc_pop_likelihood(epimodel, log = TRUE) #### NOTE - log = TRUE
          obs_likelihood_new  <- calc_obs_likelihood(epimodel, params = proposal, log = TRUE) #### NOTE - log = TRUE
          
          # update parameters, likelihood objects, and eigen decompositions
          epimodel            <- update_params(epimodel, params = proposal, pop_likelihood = pop_likelihood_new, obs_likelihood = obs_likelihood_new)
          
          return(epimodel)
}

## ----SIR_inference, warning=F, cache=F, messages = F---------------------
chain <- 1 # this was set by a batch script that ran chains 1, 2, and 3 in parallel

# generate the measurement process
d_meas_process <- function(state, meas_vars, params, log = TRUE) {
        # note that the names of the measurement variables are endowed with suffixes "_observed" and "_augmented". This is required.
        # we will declare the names of the measurement variables shortly.
        dbinom(x = state[, "I_observed"],
               size = state[, "I_augmented"],
               prob = params["rho"], log = log)
}

# re-initialize the chain
init_dist_SIR <- normalize(c(0.99, 0.004, 0.0001))
epimodel_SIR  <- init_epimodel(popsize = 400,                                                    # population size
                           states = c("S", "I", "R"),                                         # compartment names
                           params = c(beta = abs(rnorm(1, 0.00005, 1e-5)),                   # infectivity rate
                                      mu = abs(rnorm(1, 0.002, 0.0001)),                      # recovery rate
                                      rho = rbeta(1, 10, 1),                                 # binomial sampling prob
                                      S0 = init_dist_SIR[1], I0 = init_dist_SIR[2], R0 = init_dist_SIR[3]),
                           rates = c("beta * I", "mu"),                # unlumped transition rates
                           flow = matrix(c(-1, 1, 0,
                                           0, -1, 1), ncol = 3, byrow = T),  # flow matrix
                           dat = dat,                                           # dataset
                           time_var = "time",                                   # name of time variable in the dataset
                           meas_vars = "I",                                     # name of measurement var in the dataset
                           initdist_prior = c(90, 0.5, 0.01), ### Parameters for the dirichlet prior distribution
                           d_meas_process = d_meas_process)

epimodel_SIR <- init_settings(epimodel_SIR,
                          niter = 10, # this was set to 100000 in the paper
                          save_params_every = 1,
                          save_configs_every = 2, # this was set to 250 for the chains run in the paper
                          kernel = list(gibbs_kernel_SIR),
                          configs_to_redraw = 2, # set to 150 for the paper
                          analytic_eigen = "SIR",
                          ecctmc_method = "unif",
                          seed = 52787 + chain)

# Fit the epimodel --------------------------------------------------------

epimodel_SIR <- fit_epimodel(epimodel_SIR, monitor = TRUE)


## ----SEIR_sim2, include=FALSE, warning=F, cache = F----------------------
set.seed(1834)

# declare the functions for simulating from and evaluating the log-density of the measurement process
r_meas_process <- function(state, meas_vars, params){
          # in our example, rho will be the name of the binomial sampling probability parameter.
          # this function returns a matrix of observed counts
          rbinom(n = nrow(state),
                 size = state[,meas_vars],
                 prob =  params["rho"])
}

# initialize the stochastic epidemic model object
epimodel <- init_epimodel(obstimes = seq(1, 183, by = 7),                    # vector of observation times
                          popsize = 400,                                      # population size
                          states = c("S", "E", "I", "R"),                # compartment names
                          params = c(beta = 0.00025,                     # per-contact infectivity parameter high
                                     gamma = 1/210,                      # exposed compartment
                                     mu = 1/150,                          # recovery rate for fast recoverers
                                     rho = 0.95,                               # binomial sampling probability
                                     S0 = 0.94,
                                     E0 = 0.05,
                                     I0 = 0.01,
                                     R0 = 0),      # initial state probabilities
                          rates = c("beta*I", "gamma", "mu"), # unlumped transition rates
                          flow = matrix(c(-1, 1, 0, 0,  # S -> E
                                          0, -1, 1, 0,  # E -> In
                                          0, 0, -1, 1), # I -> R
                                        ncol = 4, byrow = T),
                          
                          meas_vars = c("I"),
                          r_meas_process = r_meas_process)

# simulate the epidemic and the dataset.
epimodel <-
          simulate_epimodel(
                    epimodel = epimodel,
                    init_state = c(
                              S = 397,
                              E = 2,
                              I = 1,
                              R = 0
                    ),
                    trim = TRUE
          )

dat1 <- cbind(time = epimodel$dat[,"time"], I = epimodel$dat[,"I"])
true_path1 <- epimodel$pop_mat

epimodel <- init_epimodel(obstimes = seq(183, 729, by = 7),                    # vector of observation times
                          popsize = 400,                                      # population size
                          states = c("S", "E", "I", "R"),                # compartment names
                          params = c(beta = 0.0001,                     # per-contact infectivity parameter high
                                     gamma = 1/210,                      # exposed compartment
                                     mu = 1/330,                          # recovery rate for fast recoverers
                                     rho = 0.95,                               # binomial sampling probability
                                     S0 = 0.94,
                                     E0 = 0.05,
                                     I0 = 0.01,
                                     R0 = 0),      # initial state probabilities
                          rates = c("beta*I", "gamma", "mu"), # unlumped transition rates
                          flow = matrix(c(-1, 1, 0, 0,  # S -> E
                                          0, -1, 1, 0,  # E -> In
                                          0, 0, -1, 1), # I -> R
                                        ncol = 4, byrow = T),
                          
                          meas_vars = c("I"),
                          r_meas_process = r_meas_process)

# simulate the epidemic and the dataset.
epimodel <-
          simulate_epimodel(
                    epimodel = epimodel,
                    init_state = c(true_path1[nrow(true_path1),"S"],
                                   true_path1[nrow(true_path1),"E"],
                                   true_path1[nrow(true_path1),"I"],
                                   true_path1[nrow(true_path1),"R"]
                    ),
                    trim = TRUE
          )

dat2 <- cbind(time = epimodel$dat[,"time"], I = epimodel$dat[,"I"])
true_path2 <- epimodel$pop_mat

# initialize the stochastic epidemic model object
epimodel <- init_epimodel(obstimes = seq(729, 1163, by = 7),                    # vector of observation times
                          popsize = 400,                                      # population size
                          states = c("S", "E", "I", "R"), # compartment names
                          params = c(beta = 0.00035,                     # per-contact infectivity parameter high
                                     gamma = 1/90,                      # exposed compartment
                                     mu = 1/300,                          # recovery rate for fast recoverers
                                     rho = 0.95,                               # binomial sampling probability
                                     S0 = 0.94,
                                     E0 = 0.05,
                                     I0 = 0.01,
                                     R0 = 0),      # initial state probabilities
                          rates = c("beta*I", "gamma", "mu"), # unlumped transition rates
                          flow = matrix(c(-1, 1, 0, 0,  # S -> E
                                          0, -1, 1, 0,  # E -> In
                                          0, 0, -1, 1), # I -> R
                                        ncol = 4, byrow = T),
                          
                          meas_vars = c("I"),
                          r_meas_process = r_meas_process)


# simulate the epidemic and the dataset.
epimodel <-
          simulate_epimodel(
                    epimodel = epimodel,
                    init_state = c(true_path2[nrow(true_path2),"S"],
                                   true_path2[nrow(true_path2),"E"],
                                   true_path2[nrow(true_path2),"I"],
                                   true_path2[nrow(true_path2),"R"]
                    ),
                    trim = TRUE
          )

dat3 <- cbind(time = epimodel$dat[,"time"], I = epimodel$dat[,"I"])
true_path3 <- epimodel$pop_mat

# initialize the stochastic epidemic model object
epimodel <- init_epimodel(obstimes = seq(1163, 1457, by = 7),                    # vector of observation times
                          popsize = 400,                                      # population size
                          states = c("S", "E", "I", "R"), # compartment names
                          params = c(beta = 0.0001,                     # per-contact infectivity parameter high
                                     gamma = 1/180,                      # exposed compartment
                                     mu = 1/70,                          # recovery rate for fast recoverers
                                     rho = 0.95,                               # binomial sampling probability
                                     S0 = 0.94,
                                     E0 = 0.05,
                                     I0 = 0.01,
                                     R0 = 0),      # initial state probabilities
                          rates = c("beta*I", "gamma", "mu"), # unlumped transition rates
                          flow = matrix(c(-1, 1, 0, 0,  # S -> E
                                          0, -1, 1, 0,  # E -> In
                                          0, 0, -1, 1), # I -> R
                                        ncol = 4, byrow = T),
                          
                          meas_vars = c("I"),
                          r_meas_process = r_meas_process)


# simulate the epidemic and the dataset.
epimodel <-
          simulate_epimodel(
                    epimodel = epimodel,
                    init_state = c(true_path3[nrow(true_path3),"S"],
                                   true_path3[nrow(true_path3),"E"],
                                   true_path3[nrow(true_path3),"I"],
                                   true_path3[nrow(true_path3),"R"]
                    ),
                    trim = TRUE
          )

dat4 <- cbind(time = epimodel$dat[,"time"], I = epimodel$dat[,"I"])
true_path4 <- epimodel$pop_mat

dat <- rbind(dat1, dat2[-c(1),], dat3[-1,], dat4[-1,])
true_path <- rbind(true_path1[-nrow(true_path1),c(1,4:7)], 
                   true_path2[-c(1, nrow(true_path2)), c(1,4:7)],
                   true_path3[-c(1, nrow(true_path3)), c(1,4:7)],
                   true_path4[-1, c(1,4:7)])


## ----SEIR_inference, warning=F, cache=F, messages = F--------------------
chain <- 1 # this was set by a batch script that ran chains 1, 2, and 3 in parallel

# generate the measurement process
d_meas_process <- function(state, meas_vars, params, log = TRUE) {
        # note that the names of the measurement variables are endowed with suffixes "_observed" and "_augmented". This is required.
        # we will declare the names of the measurement variables shortly.
        dbinom(x = state[, "I_observed"],
               size = state[, "I_augmented"],
               prob = params["rho"], log = log)
}

# re-initialize the chain
init_dist_SEIR <- normalize(c(90, 0.5, 0.5, 0.01))
epimodel_SEIR  <- init_epimodel(popsize = 400,                                                    # population size
                           states = c("S", "E", "I", "R"),                                         # compartment names
                           params = c(beta = abs(rnorm(1, 0.00008, 1e-6)),                      # infectivity rate
                                      gamma = abs(rnorm(1, 0.008, 1e-3)),
                                      mu = abs(rnorm(1, 0.0012, 0.001)),                          # recovery rate
                                      rho = rbeta(1, 10, 1),                                 # binomial sampling prob
                                      S0 = init_dist_SEIR[1], E0 = init_dist_SEIR[2], I0 = init_dist_SEIR[3], R0 = init_dist_SEIR[4]),
                           rates = c("beta * I", "gamma", "mu"),                # unlumped transition rates
                           flow = matrix(c(-1, 1, 0, 0, # S -> E
                                           0, -1, 1, 0, # E -> I
                                           0, 0, -1, 1),# I -> R
                                         ncol = 4, byrow = T),  # flow matrix
                           dat = dat,                                           # dataset
                           time_var = "time",                                   # name of time variable in the dataset
                           meas_vars = "I",                                     # name of measurement var in the dataset
                           initdist_prior = c(90, 0.5, 0.5, 0.01), ### Parameters for the dirichlet prior distribution
                           d_meas_process = d_meas_process,
                           r_meas_process = r_meas_process)

epimodel_SEIR <- init_settings(epimodel_SEIR,
                          niter = 10, # this was set to 100000 in the paper
                          save_params_every = 1,
                          save_configs_every = 2, # this was set to 250 for the chains run in the paper
                          kernel = list(gibbs_kernel_SEIR),
                          configs_to_redraw = 2, # set to 150 for the paper
                          analytic_eigen = "SEIR",
                          ecctmc_method = "unif",
                          seed = 52787 + chain)

# Fit the epimodel --------------------------------------------------------

epimodel_SEIR <- fit_epimodel(epimodel_SEIR, monitor = TRUE)
fintzij/BDAepimodel documentation built on Sept. 20, 2020, 1:44 p.m.