.development_files/incidence_lna_simulations/rev_sim_02_3dis_diff/pmmh/seir_sim_covid.R

# Overview
# 
# Code for fitting an SEIR model to weekly and monthly incidence data simulated
# from an SEIR model with COVID-like dynamics. 
#
# Settings: 
# - R0 = 2.5
# - Mean latent period duration = 1 week
# - Mean infectious period duration = 10 days
# - P  = 5000
# - X(t_0) = (S_0 = 1990, E_0 = 5, I_0 = 5, R_0 = 0)
# - Mean case detection rate = 0.1

library(pomp)
library(coda)
library(stemr)
library(foreach)
library(doParallel)
library(doRNG)
library(future)

# load simulation arguments
args <- commandArgs(TRUE)
print(args)
replication  <- as.numeric(args[1])
set.seed(12511 + replication)

# create stem model
popsize = 1e4 # population size

true_pars =
  c(R0         = 2.5, # basic reproduction number
    latent_dur = 5/7, # latent period duration = 5 days
    infec_dur  = 10/7, # infectious period duration = 10 days
    rho        = 0.1, # case detection rate
    phi        = 36)  # negative binomial overdispersion

# initialize model compartments and rates
strata <- NULL # no strata
compartments <- c("S", "E", "I", "R")

# rates initialized as a list of rate lists
rates <-
  list(rate(rate = "beta * I", # individual level rate (unlumped)
            from = "S",        # source compartment
            to   = "E",        # destination compartment
            incidence = FALSE),# don't compute incidence of S2E transitions, not required for simulating incidence data
       rate(rate = "omega",    # individual level rate
            from = "E",        # source compartment
            to   = "I",        # destination compartment
            incidence = TRUE), # compute incidence of IE2I transitions (required for simulating data)
       rate(rate = "mu",       # individual level rate
            from = "I",        # source compartment
            to   = "R",        # destination compartment
            incidence = FALSE)) # don't compute incidence of I2R transitions (not required for simulating data)

# list used for simulation/inference for the initial state, initial counts fixed.
# state initializer a list of stem_initializer lists.
state_initializer <-
  list(stem_initializer(
          init_states = c(S = popsize-10, E = 5, I = 5, R = 0), # must match compartment names
          fixed = T)) # initial state fixed for simulation, we'll change this later

# set the parameter values - must be a named vector
parameters =
  c(true_pars["R0"] / popsize / true_pars["infec_dur"], # R0 = beta * P / mu
    1/true_pars["latent_dur"],
    1/true_pars["infec_dur"],
    true_pars["rho"],
    true_pars["phi"])
names(parameters) <- c("beta", "omega", "mu", "rho", "phi")

# declare the initial time to be constant
constants <- c(t0 = 0)
t0 <- 0; tmax <- 52

# compile the model
dynamics <-
      stem_dynamics(
            rates = rates,
            tmax = tmax,
            parameters = parameters,
            state_initializer = state_initializer,
            compartments = compartments,
            constants = constants,
            compile_ode = F,   # compile ODE functions
            compile_rates = T, # compile MJP functions for Gillespie simulation
            compile_lna = T,   # compile LNA functions
            messages = F       # don't print messages
      )

# list of emission distribution lists (analogous to rate specification)
emissions <-
  list(emission(meas_var = "cases", # transition or compartment being measured (S->I transitions)
                distribution    = "negbinomial",         # emission distribution
                emission_params = c("phi", "E2I * rho"), # distribution pars, here overdispersion and mean
                incidence       = TRUE,                  # is the data incidence
                obstimes        = seq(1, tmax, by = 1)))  # vector of observation times

# compile the measurement process
measurement_process <-
  stem_measure(emissions = emissions,
               dynamics  = dynamics,
               messages  = F)

# put it all together into a stochastic epidemic model object
stem_object <-
  make_stem(
    dynamics = dynamics,
    measurement_process = measurement_process)
 
# Simulating an outbreak and data 
sim_mjp <- simulate_stem(stem_object = stem_object, method = "gillespie")

while(max(sim_mjp$datasets[[1]][,"cases"]) < 15) {
    sim_mjp <- simulate_stem(stem_object = stem_object, method = "gillespie")
}

# grab the true path and the dataset
dat_sim <- sim_mjp$datasets[[1]]
true_path <- sim_mjp$paths[[1]]

# chop off the trailing data after the outbreak has ended (four consecutive counts < 5)
rle_dat <- rle(dat_sim[-c(1:8),2] < 5) # run length encoding representation
runs    <- which(rle_dat$values & rle_dat$lengths >= 4) # find runs of counts<5 of length >= 4

# if a run of zeros was found, truncate data
if(length(runs) != 0) {
  
  inds <- 
    if(runs[1] == 1) {
      9:12
    } else {
      c(seq(9, length.out = sum(rle_dat$length[seq_len(runs[1] - 1)])),
        seq(9 + sum(rle_dat$length[seq_len(runs[1] - 1)]), length.out = 4))
    }
  
  # truncate at one year
  if(any(inds > 52)) inds <- 9:52
  
  dat <- rbind(dat_sim[1:8,],
               dat_sim[inds,])
  
} else {
  dat <- dat_sim
}

# Set up pomp objects -----------------------------------------------------

# Measurement process objects
rmeas <- "
  cases=rnbinom_mu(exp(-2*log_sqrt_phi_inv),exp(logit_rho)*E2I/(1+exp(logit_rho)));    // simulates the data
"

dmeas<-"
  lik=dnbinom_mu(cases,exp(-2*log_sqrt_phi_inv),exp(logit_rho)*E2I/(1+exp(logit_rho)),give_log); // emission density
"

# Define the priors
seir.dprior = 
  "double loglik;
   loglik = 
      dnorm(log_R0, log(2.4), 0.5, 1) + 
      dnorm(log_latent_dur, log(6) - log(7), 0.82, 1) + 
      dnorm(log_infec_dur, log(9) - log(7), 0.82, 1) + 
      logit_rho - 2 * log1p(exp(logit_rho)) + 
      dexp(exp(log_sqrt_phi_inv), exp(-log(3)), 1) + log_sqrt_phi_inv;
  lik = give_log == 1? loglik : exp(loglik);
"

# define the stepper
seir.step<-"
  double rate[3];
  double dN[3];
  rate[0]=exp(log_R0 - log_infec_dur - log(*get_userdata_double(\"popsize\")))*I; // Infection rate
  rate[1]=exp(-log_latent_dur);                // rate of E-I transitions
  rate[2]=exp(-log_infec_dur);                 // recovery rate
  reulermultinom(1,S,&rate[0],dt,&dN[0]);      // generate the number of newly exposed people
  reulermultinom(1,E,&rate[1],dt,&dN[1]);      // generate the number of newly infected people
  reulermultinom(1,I,&rate[2],dt,&dN[2]);      // generate the number of newly recovered people
  S+=-dN[0];                                   // update the number of Susceptibles
  E+=dN[0]-dN[1];                              // update the number of Infections
  I+=dN[1]-dN[2];                              // update the number of Infections
  R+=dN[2];                                    // update the number of Recoveries
  E2I += dN[1];
"

# instatiate the euler stepper function
SEIR_sim <- euler(step.fun = Csnippet(seir.step), delta.t = 1/24/7)

# instatiate the pomp object
parnames = c("log_R0", "log_latent_dur", "log_infec_dur", "logit_rho", "log_sqrt_phi_inv")
pars = c(log(true_pars["R0"]),
         log(true_pars["latent_dur"]),
         log(true_pars["infec_dur"]),
         qlogis(true_pars["rho"]),
         -0.5 * log(true_pars["phi"]))
names(pars) = parnames

seir_mod <- pomp(
  data = as.data.frame(dat),  #"cases" is the dataset, "time" is the observation time
  times = "time",
  t0 = 0,                      # initial time point
  popsize = popsize,
  dmeasure = Csnippet(dmeas),  # evaluates the density of the measurement process
  rmeasure = Csnippet(rmeas),  # simulates from the measurement process
  rprocess = SEIR_sim,          # simulates from the latent process
  statenames = c("S", "E", "I", "R", "E2I"),  #state space variable name
  params = pars,
  paramnames = parnames, #parameters name
  accumvars = c("E2I"),
  dprior = Csnippet(seir.dprior),
  rinit = function(params, t0, ...) {
    return(c(S = popsize-10, E = 5, I = 5, R = 0, E2I = 0))
  }
)

# Metropolis kernel proposal covariance matrix
cov_init <- diag(1e-2, length(true_pars)); 
colnames(cov_init) <- rownames(cov_init) <- 
  c("log_R0", "log_latent_dur", "log_infec_dur", "logit_rho", "log_sqrt_phi_inv")

# run the MCMC
registerDoParallel(cores = future::availableCores())
set.seed(52787 + replication)

fits <- 
  foreach(i = seq_len(5),
          .export = ls(),
          .packages = c("pomp")) %dorng% {
            
            init <- pars + rnorm(length(parameters), 0, 0.1)
            
            adapt_res_1 <- 
              pmcmc(seir_mod,
                    Nmcmc = 1e3,
                    Np = 500,
                    params = init,
                    proposal =
                      mvn.rw.adaptive(
                        rw.sd = diag(cov_init),
                        scale.start = 100,
                        shape.start = 100,
                        scale.cooling = 0.999))
            
            while(adapt_res_1@accepts < 1e2 | adapt_res_1@accepts > 800) {
              
              init <- pars + rnorm(length(parameters), 0, 0.1)
              
              adapt_res_1 <- 
                pmcmc(seir_mod,
                      Nmcmc = 1e3,
                      Np = 500,
                      params = init,
                      proposal =
                        mvn.rw.adaptive(
                          rw.sd = diag(cov_init),
                          scale.start = 100,
                          shape.start = 100,
                          scale.cooling = 0.999))
            }
            
            start.time <- Sys.time()
            adapt_res_2 <- 
              pmcmc(adapt_res_1,
                    Nmcmc = 1e4,
                    Np = 500,
                    proposal =
                      mvn.rw.adaptive(
                        rw.var = covmat(adapt_res_1) + diag(0.01, length(true_pars)),
                        scale.start = 100,
                        shape.start = 100,
                        scale.cooling = 0.999))
            
            while(adapt_res_2@accepts < 500 | adapt_res_2@accepts > 8e3) {
              
              init <- pars + rnorm(length(parameters), 0, 0.1)
              
              adapt_res_2 <- 
                pmcmc(adapt_res_1,
                      Nmcmc = 1e4,
                      Np = 500,
                      params = init,
                      proposal =
                        mvn.rw.adaptive(
                          rw.var = covmat(adapt_res_1) + diag(0.01, length(true_pars)),
                          scale.start = 100,
                          shape.start = 100,
                          scale.cooling = 0.999))
            }
            
            # stop the adaptation and do some more MCMC
            fit <- 
              pmcmc(adapt_res_2, 
                    Nmcmc = 5e4, 
                    Np = 500,
                    proposal = mvn.rw(rw.var = covmat(adapt_res_2)))
            
            end.time <- Sys.time()
            
            return(list(fit = fit, runtime = difftime(end.time, start.time, units = "hours")))
          }

# extract results
posts <- 
  do.call(rbind, 
          lapply(fits, 
                 function(x) traces(x$fit, pars = parnames)))
post_quants <- apply(posts, 2, quantile, c(0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975)) 

posts_mcmc <- 
  as.mcmc.list(lapply(fits, 
                      function(x) 
                        as.mcmc(cbind(
                          logpost = 
                            rowSums(traces(x$fit, pars = c("loglik", "log.prior"))),
                          traces(x$fit, pars = parnames)))))
psrf              <- gelman.diag(posts_mcmc)
effective_samples <- do.call(rbind, lapply(posts_mcmc, effectiveSize))  

results <- 
  list(true_pars = true_pars,
       dat = dat,
       post_quants = post_quants,
       effective_samples = effective_samples,
       psrf = psrf,
       times = sapply(fits, function(x) x$runtime))

saveRDS(results, file = paste0("seir_covid_",replication,".Rds"))
fintzij/stemr documentation built on March 25, 2022, 12:25 p.m.