knitr::opts_chunk$set(echo = TRUE)
The reproduction number $R_{0}$ - the number of secondary infections generated by a single typical primary infection in an immunologically naïve population - for mosquito borne pathogens is expressed by the Ross Macdonald model. The model, which describes the effect of the different mosquito traits on disease transmission can be written in a temperature-dependent form which captures the mechanistic effect of temperature on the mosquito traits:
$$ R_{0}(T) = \frac{a^{2}(T)mb(T)c(T)e^{-\mu(T)v(T)}}{\mu(T)\gamma} $$
where (T) means that the trait is a function of temperature; a is the biting rate per mosquito per day, m is the equilibrium number of mosquitoes per human, b is the human infectivity to mosquitoes, c is the infectiousness of mosquitoes to humans, $\gamma$ is the recovery rate of the human host (with $\frac{1}{\gamma}$ the average infectious period in humans), v is the extrinsic incubation period (with $\frac{1}{v}$ = extrinsic incubation rate) and $\mu$ is the adult mosquito death rate per day (with $\frac{1}{\mu}$ = mosquito lifespan). The mosquito to humans ratio is assumed to be temperature independent for now. Following Mordecai et al. 2017, we express a, b, c, $\frac{1}{\mu}$ and $\frac{1}{v}$ as unimodal functions (either Quadratic, $-c(T-T_{0})(T-T_{m})$ or Brière, $cT(T-T_{0})(T_{m}-T)^{1/2}$) of temperature:
$$ a(T) = cT(T-T{0})(T{m}-T)^{1/2} $$ $$ b(T) = cT(T-T{0})(T{m}-T)^{1/2} $$ $$ c(T) = cT(T-T{0})(T{m}-T)^{1/2} $$ $$ \frac{1}{\mu}(T) = -cT(T-T{0})(T-T{m}) $$ $$ \frac{1}{v}(T) = cT(T-T{0})(T{m}-T)^{1/2} $$
where $T_{0}$ and $T_{m}$ are the minimum and maximum temperature for transmission and c is a positive constant. By substituting equations 2, 3, 4, 5 and 6 in equation 1 we obtain an $R_{0}$ model which is a mechanistic function of temperature T through 15 parameters (too long to write out).
We fit the $R_{0}$ model to a dataset of 382 $R_{0}$ estimates using a Bayesian framework and uninformative priors ($T_{0} \sim {\sf Uniform} (0, 24)$, $T_{m} \sim {\sf Uniform}(25, 45)$, $c \sim {\sf Gamma}(1, 10)$ for Briere and $c \sim {\sf Gamma}(1, 1)$ for Quadratic fits) chosen to restrict each parameter to its biologically realistic range (i.e., T0 < Tm and we assumed that temperatures below 0C and above 45C were lethal).
We assume that the observed $R_{0}$ estimates are normally distributed with mean $\theta$ predicted by the $R_{0}$ equation calculated at the observed temperature T in location i and standard deviation $\sigma$:
$$ y_i \sim {\sf Normal}(\theta_{i}, \sigma) $$
Model fitting was implemented in Stan and R.
Load the repository and other packages
devtools::load_all() library(rstan) library(dplyr) library(stringr) library(reshape2)
Define parameters
T_inf <- 6 # from Zika paper(T_inf = 1/human recovery rate) params_to_estimate <- c("c_a", "T0_a", "Tm_a") indicator_vars <- c("aMin_indicator", "aMax_indicator")
Load datasets
foi_covariates <- readRDS(file.path("output", "foi_data_cov_rescaled.rds")) m_values <- readRDS(file.path(file.path("output", "central_estimates_from_Mordecai"), "m_mean_DayTemp_const_term.rds")) # posteriors from Mordecai et al. 2017 in_dir <- file.path("output", "termal_response_fits", "informative") a_samps <- readRDS(file.path(in_dir, "a_samps.rds")) b_samps <- readRDS(file.path(in_dir, "b_samps.rds")) c_samps <- readRDS(file.path(in_dir, "c_samps.rds")) PDR_samps <- readRDS(file.path(in_dir, "PDR_samps.rds")) lf_samps <- readRDS(file.path(in_dir, "lf_DENV_samps.rds"))
Plot histogram of R0 data
ggplot(data = foi_covariates, mapping = aes(x = R0_1)) + geom_histogram(col = "white", bins = 40) + scale_x_continuous("Observed R0 value", breaks = seq(0,13,1)) + scale_y_continuous("Frequency") + coord_cartesian(xlim = c(0, 13)) + ggtitle("Distribution of the data") + theme_classic()
Prepare the data for stan
R0_values <- foi_covariates$R0_1 R0_values_n <- length(R0_values) temp_values <- foi_covariates$DayTemp_const_term stan_dat <- list(J = R0_values_n, y = R0_values, temp = temp_values, m = m_values, T_inf = T_inf)
Create prior distributions for all parameters. These are the same as the ones used by Mordecai et al. 2017 (PlosNTD).
n <- 10000 seq_n <- seq(0, 10, length.out = n) df_c_a <- data.frame(x = seq_n, y = dgamma(seq_n, shape = 1, rate = 10), trait_par = "c_a") x_T0_a <- runif(n, min = 0, max = 24) df_T0_a <- data.frame(x = x_T0_a, y = dunif(x_T0_a, min = 0, max = 24), trait_par = "T0_a") x_Tm_a <- runif(n, min = 25, max = 45) df_Tm_a <- data.frame(x = x_Tm_a, y = dunif(x_Tm_a, min = 25, max = 45), trait_par = "Tm_a") df_c_b <- data.frame(x = seq_n, y = dgamma(seq_n, shape = 1, rate = 10), trait_par = "c_b") x_T0_b <- runif(n, min = 0, max = 24) df_T0_b <- data.frame(x = x_T0_b, y = dunif(x_T0_b, min = 0, max = 24), trait_par = "T0_b") x_Tm_b <- runif(n, min = 25, max = 45) df_Tm_b <- data.frame(x = x_Tm_b, y = dunif(x_Tm_b, min = 25, max = 45), trait_par = "Tm_b") df_c_c <- data.frame(x = seq_n, y = dgamma(seq_n, shape = 1, rate = 10), trait_par = "c_c") x_T0_c <- runif(n, min = 0, max = 24) df_T0_c <- data.frame(x = x_T0_c, y = dunif(x_T0_c, min = 0, max = 24), trait_par = "T0_c") x_Tm_c <- runif(n, min = 25, max = 45) df_Tm_c <- data.frame(x = x_Tm_c, y = dunif(x_Tm_c, min = 25, max = 45), trait_par = "Tm_c") df_c_lf <- data.frame(x = seq_n, y = dgamma(seq_n, shape = 1, rate = 1), trait_par = "c_lf") x_T0_lf <- runif(n, min = 0, max = 24) df_T0_lf <- data.frame(x = x_T0_lf, y = dunif(x_T0_lf, min = 0, max = 24), trait_par = "T0_lf") x_Tm_lf <- runif(n, min = 25, max = 45) df_Tm_lf <- data.frame(x = x_Tm_lf, y = dunif(x_Tm_lf, min = 25, max = 45), trait_par = "Tm_lf") df_c_PDR <- data.frame(x = seq_n, y = dgamma(seq_n, shape = 1, rate = 10), trait_par = "c_PDR") x_T0_PDR <- runif(n, min = 0, max = 24) df_T0_PDR <- data.frame(x = x_T0_PDR, y = dunif(x_T0_PDR, min = 0, max = 24), trait_par = "T0_PDR") x_Tm_PDR <- runif(n, min = 25, max = 45) df_Tm_PDR <- data.frame(x = x_Tm_PDR, y = dunif(x_Tm_PDR, min = 25, max = 45), trait_par = "Tm_PDR") all_df <- rbind(df_c_a, df_T0_a, df_Tm_a, df_c_b, df_T0_b, df_Tm_b, df_c_c, df_T0_c, df_Tm_c, df_c_lf, df_T0_lf, df_Tm_lf, df_c_PDR, df_T0_PDR, df_Tm_PDR)
Plot vague priors
ggplot(data = all_df, mapping = aes(x = x, y = y)) + geom_line() + facet_wrap(~ trait_par, scales = "free") + scale_x_continuous("Trait value", labels = function(x) format(x, scientific = FALSE)) + scale_y_continuous("PDF") + ggtitle("Vague priors") + theme_bw() + theme(axis.text = element_text(size = 6))
Fit the stan model
options(mc.cores = 7) fit_normal <- stan(file = "R0_model.stan", data = stan_dat, iter = 20000, chains = 4, seed = 132532525, control = list(adapt_delta = 0.99))
Post processing
print(fit_normal, pars = c(params_to_estimate, "theta", indicator_vars, "lp__"), probs = c(0.25, 0.5, 0.75)) list_of_draws <- extract(fit_normal) ppd_data_sample <- sample(20000:40000, 12) ppd_data <- t(list_of_draws$SimData[ppd_data_sample,]) %>% as.data.frame() %>% rename_all(list(name = ~str_replace(., "V", "Sample_"))) %>% mutate(id = seq_len(length(R0_values))) %>% melt(id.vars = "id", variable.name = "sample")
Traceplots
traceplot(fit_normal, params_to_estimate)
Post predictive distribution checks
ggplot(data = ppd_data, mapping = aes(x = value)) + geom_histogram(col = "white") + facet_wrap(~ sample, ncol = 3) + geom_vline(xintercept = 0, linetype = "dashed") + scale_x_continuous("predicted R0") + scale_y_continuous("Frequency")
Pairs plot
pairs(fit_normal, pars = c(params_to_estimate, "lp__"), las = 1)
Pre-process informative priors
ids <- seq_len(nrow(a_samps)) a_samps_2 <- a_samps %>% mutate(trait = "a", id = ids) b_samps_2 <- b_samps %>% mutate(trait = "b", id = ids) c_samps_2 <- c_samps %>% mutate(trait = "c", id = ids) PDR_samps_2 <- PDR_samps %>% mutate(trait = "PDR", id = ids) lf_samps_2 <- lf_samps %>% mutate(trait = "lf", id = ids) %>% rename(c = qd) %>% select(-n.qd) all_samps <- rbind(a_samps_2, b_samps_2, c_samps_2, PDR_samps_2, lf_samps_2) all_samps_m <- melt(all_samps, id.vars = c("id", "trait"), measure.vars = c("T0", "Tm", "c"), variable.name = "parameter") all_samps_m$trait_par <- paste(all_samps_m$parameter, all_samps_m$trait, sep = "_") all_samps_m$trait_par <- factor(all_samps_m$trait_par, levels = c("c_a", "T0_a", "Tm_a", "c_b", "T0_b", "Tm_b", "c_c", "T0_c", "Tm_c", "c_lf", "T0_lf", "Tm_lf", "c_PDR", "T0_PDR", "Tm_PDR"))
Plot informative priors (NOTE: I will use these in the next fit. I just want to make sure I am fitting a sensible likelihood first).
ggplot(data = all_samps_m, mapping = aes(x = value)) + geom_histogram(col = "white") + facet_wrap(~ trait_par, scales = "free") + scale_x_continuous("Trait value", labels = function(x) format(x, scientific = FALSE)) + scale_y_continuous("Frequency") + ggtitle("Informative priors (Posterior densities from Mordecai et al. 2017)") + theme_bw() + theme(axis.text = element_text(size = 6))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.