library(zikaInfer) library(lazymcmc) library(kableExtra) library(knitr) library(zoo) options(knitr.table.format = "html") knitr::opts_chunk$set(fig.width=6, fig.height=4)
This vignette describes the work flow for analysis using [zikaInfer
] in detail. Users should be able to follow each section to understand how the cod relates to the methods described in the accompanying methods vignette.
The first step in using this package is to define the set of model parameters. An example table of parameters is provided in the package and can be viewed using the following:
data(exampleParTab) ## Extract parameter values to solve model. Note that ## these parameters MUST be named pars <- exampleParTab$values names(pars) <- exampleParTab$names
Refer to the documentation of ?exampleParTab
for a description of the parameter table columns and their uses. Similarly, users can use the parTabSetup
function to generate a new parameter table from scratch.
The transmission component of the model is an SEIR model with mosquito vectors, described here. The model can be solved as follows:
## Generate starting population sizes based on human population size ## and mosquito density y0s <- generate_y0s(pars["N_H"], pars["density"],iniI=10) ## Times to solve model over. Note that the unit of time is in days ts <- seq(0,3003,by=1) y <- solveSEIRModel_rlsoda(ts, y0s, pars, TRUE) plot(y[,"I_H"],type='l',col="red", xlab="Time (days)", ylab="ZIKV incidence in humans")
This SEIR model is then used to generate a per-capita risk of infection at any given time within the generate_probM
function. Note that there is a suite of accompanying functions, for example:
## Calculate R0 print(r0.calc(pars)) ## Find the mosquito density required for a desired R0 print(density.calc(pars, R0=3)) print(calculate_AR(r0=3))
There are four implemented risk models that describe the risk of developing a congenital abnormality given infection in a particular gestational week. These are numbered 1-4 (print_version_names()
). The generate_micro_curve
function will automatically detect which version of the risk curve the user requires based on the parameter names:
risk <- generate_micro_curve(pars) plot(risk,type='l',col="blue", xlab="Gestational time at infection (days)",ylab="Prob of congenital abnormality")
The main version used in the accompanying analyses is model version 1, the gamma risk model.
Taken together, the per capita incidence model and gestational risk model are combined to give the expected proportion of microcephaly affected births observed in a given day.
probM <- generate_probM(y[,"I_M"],pars["N_H"],risk,pars["b"],pars["p_MH"],pars["baselineProb"],1) plot(probM,type='l',col="green", xlab="Time (days)",ylab="Proportion of microcephaly affected births")
Incidence and microcephaly incidence data can be simulated using the generate_multiple_data
function. The idea is to generate fake data with known parameters that represent actual observed data:
library(ggplot2) ## Generate simulated data with known parameters simDat <- generate_multiple_data(ts, parTab=exampleParTab, weeks=FALSE, dataRangeMicro=c(600,2000),dataRangeInc=c(600,1500), noise=FALSE,peakTimeRange=60) microDat <- simDat[[1]] incDat <- simDat[[2]] peakTimes <- simDat[[3]] ## Save simulated data write.table(microDat,"sim_microDat.csv",sep=",",row.names=FALSE) write.table(incDat,"sim_incDat.csv",sep=",",row.names=FALSE) ## Check the generated data ggplot(incDat) + geom_line(aes(x=startDay,y=inc/N_H), col="red") + geom_line(data=microDat,aes(x=startDay,y=microCeph*0.001/births),col="blue") + facet_wrap(~local) + ylab("Per capita ZIKV incidence (red)") + xlab("Time (days)") + scale_y_continuous(sec.axis=sec_axis(~.*1000,name="Per birth microcephaly\n incidence (blue)")) + theme_bw()
Parameter estimation is done using the lazymcmc
package as follows:
library(lazymcmc) ## Generate random starting points for MCMC chain ## Note that we have to constrain the starting points for R0 and the epidemic seed time ## such that trajectory is near expected peak time startTab <- generateStartingParTab(exampleParTab, peakTimes, restrictedR0=TRUE,"") ## MCMC control parameters mcmcPars <- c("adaptive_period"=10000,"iterations"=20000,"opt_freq"=1000,"thin"=10,"save_block"=100,"popt"=0.44) ## Run MCMC chain using univariate sampler result <- lazymcmc::run_MCMC(parTab=startTab, data=microDat, mcmcPars=mcmcPars,filename="test_univariate", CREATE_POSTERIOR_FUNC = create_posterior, mvrPars=NULL,PRIOR_FUNC=NULL, OPT_TUNING=0.2, ts=ts,incDat=incDat,peakTimes=NULL) ## Use these results to get covariance matrix to run multivariate sampler chain <- read.csv(result$file) chain <- chain[chain$sampno >= mcmcPars["adaptive_period"],] covMat <- cov(chain[,2:(ncol(chain)-1)]) startTab$values <- get_best_pars(chain) mvrPars <- list(covMat,2.38/sqrt(nrow(startTab[startTab$fixed==0,])),w=0.8) mcmcPars <- c("adaptive_period"=10000,"iterations"=20000,"opt_freq"=1000,"thin"=10,"save_block"=100,"popt"=0.234) ## Run MCMC chain using multivariate sampler ## Run two chains to calculate gelman diagnostics final <- lazymcmc::run_MCMC(parTab=startTab, data=microDat, mcmcPars=mcmcPars,filename="test_2_multivariate", CREATE_POSTERIOR_FUNC = create_posterior, mvrPars=mvrPars,PRIOR_FUNC=NULL, OPT_TUNING=0.2, ts=ts,incDat=incDat,peakTimes=NULL) finalB <- lazymcmc::run_MCMC(parTab=startTab, data=microDat, mcmcPars=mcmcPars,filename="test_1_multivariate", CREATE_POSTERIOR_FUNC = create_posterior, mvrPars=mvrPars,PRIOR_FUNC=NULL, OPT_TUNING=0.2, ts=ts,incDat=incDat,peakTimes=NULL) chain <- read.csv(final$file) chain <- chain[chain$sampno >= mcmcPars["adaptive_period"],]
The MCMC chain should then be checked for correct convergence using the functions available in [lazymcmc
], which uses the [coda
] package.
library(data.table) library(coda) # Function to read in MCMC chains if possible and then calculate convergence diagnostics read_and_check <- function(wd, parTab, burnin){ chain <- lazymcmc::load_mcmc_chains(wd, parTab, TRUE,1,burnin, TRUE,FALSE,FALSE) if(length(chain[[1]]) > 1){ ess <- ess_diagnostics(chain[[1]],200) gelman <- gelman_diagnostics(chain[[1]], 1.1) minESS <- min(ess$ESS) whichMinESS <- names(which.min(ess$ESS)) maxGelman <- gelman$WorstGelman[1] maxGelmanName <- gelman$WorstGelman[2] mpsrf <- gelman$WorstGelman[3] rerun <- gelman$Rerun | minESS < 200 } else { minESS <- NA whichMinESS <- NA maxGelman <- NA maxGelmanName <- NA mpsrf <- NA rerun <- TRUE } return(list(minESS,whichMinESS,maxGelman,maxGelmanName,mpsrf,rerun)) } dir <- getwd() ## The working directory that the MCMC chain was run ## The parameter table used in this run will ## be saved in the working directory res <- read_and_check(dir, startTab, mcmcPars["adaptive_period"]) print(res)
plot_random_microceph_curves(chain,100) indiv_model_fit("sim_microDat.csv","sim_incDat.csv", "bahia","Simulated data",0.001,200,0.03, 500, FALSE, TRUE, startTab, chain, FALSE, FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.