In lorecatta/DENVclimate:

knitr::opts_chunk$set(echo = TRUE)

Background

The reproduction number $R_{0}$ - the number of secondary infections generated by a single typical primary infection in an immunologically naïve population - for mosquito borne pathogens is expressed by the Ross Macdonald model. The model, which describes the effect of the different mosquito traits on disease transmission can be written in a temperature-dependent form which captures the mechanistic effect of temperature on the mosquito traits:

$$ R_{0}(T) = \frac{a^{2}(T)mb(T)c(T)e^{-\mu(T)v(T)}}{\mu(T)\gamma} $$

where (T) means that the trait is a function of temperature; a is the biting rate per mosquito per day, m is the equilibrium number of mosquitoes per human, b is the human infectivity to mosquitoes, c is the infectiousness of mosquitoes to humans, $\gamma$ is the recovery rate of the human host (with $\frac{1}{\gamma}$ the average infectious period in humans), v is the extrinsic incubation period (with $\frac{1}{v}$ = extrinsic incubation rate) and $\mu$ is the adult mosquito death rate per day (with $\frac{1}{\mu}$ = mosquito lifespan). The mosquito to humans ratio is assumed to be temperature independent for now. Following Mordecai et al. 2017, we express a, b, c, $\frac{1}{\mu}$ and $\frac{1}{v}$ as unimodal functions (either Quadratic, $-c(T-T_{0})(T-T_{m})$ or Brière, $cT(T-T_{0})(T_{m}-T)^{1/2}$) of temperature:

$$ a(T) = cT(T-T{0})(T{m}-T)^{1/2} $$ $$ b(T) = cT(T-T{0})(T{m}-T)^{1/2} $$ $$ c(T) = cT(T-T{0})(T{m}-T)^{1/2} $$ $$ \frac{1}{\mu}(T) = -cT(T-T{0})(T-T{m}) $$ $$ \frac{1}{v}(T) = cT(T-T{0})(T{m}-T)^{1/2} $$

where $T_{0}$ and $T_{m}$ are the minimum and maximum temperature for transmission and c is a positive constant. By substituting equations 2, 3, 4, 5 and 6 in equation 1 we obtain an $R_{0}$ model which is a mechanistic function of temperature T through 15 parameters (too long to write out).

We fit the $R_{0}$ model to a dataset of 382 $R_{0}$ estimates using a Bayesian framework and uninformative priors ($T_{0} \sim {\sf Uniform} (0, 24)$, $T_{m} \sim {\sf Uniform}(25, 45)$, $c \sim {\sf Gamma}(1, 10)$ for Briere and $c \sim {\sf Gamma}(1, 1)$ for Quadratic fits) chosen to restrict each parameter to its biologically realistic range (i.e., T0 < Tm and we assumed that temperatures below 0C and above 45C were lethal).

We assume that the observed $R_{0}$ estimates are normally distributed with mean $\theta$ predicted by the $R_{0}$ equation calculated at the observed temperature T in location i and standard deviation $\sigma$:

$$ y_i \sim {\sf Normal}(\theta_{i}, \sigma) $$

Model fitting was implemented in Stan and R.

Experiment 1. Fit a model with only three parameters and vague priors.

Load the repository and other packages

devtools::load_all()

library(rstan)
library(dplyr)
library(stringr)
library(reshape2)

Define parameters

T_inf <- 6 # from Zika paper(T_inf = 1/human recovery rate)

params_to_estimate <- c("c_a", "T0_a", "Tm_a")

indicator_vars <- c("aMin_indicator", "aMax_indicator")

Load datasets

foi_covariates <- readRDS(file.path("output", "foi_data_cov_rescaled.rds"))

m_values <- readRDS(file.path(file.path("output",
                                        "central_estimates_from_Mordecai"),
                              "m_mean_DayTemp_const_term.rds"))

# posteriors from Mordecai et al. 2017
in_dir <- file.path("output", "termal_response_fits", "informative")
a_samps <- readRDS(file.path(in_dir, "a_samps.rds"))
b_samps <- readRDS(file.path(in_dir, "b_samps.rds"))
c_samps <- readRDS(file.path(in_dir, "c_samps.rds"))
PDR_samps <- readRDS(file.path(in_dir, "PDR_samps.rds"))
lf_samps <- readRDS(file.path(in_dir, "lf_DENV_samps.rds"))

Plot histogram of R0 data

ggplot(data = foi_covariates, mapping = aes(x = R0_1)) +
  geom_histogram(col = "white", bins = 40) +
  scale_x_continuous("Observed R0 value", breaks = seq(0,13,1)) +
  scale_y_continuous("Frequency") +
  coord_cartesian(xlim = c(0, 13)) +
  ggtitle("Distribution of the data") +
  theme_classic()

Prepare the data for stan

R0_values <- foi_covariates$R0_1

R0_values_n <- length(R0_values)

temp_values <- foi_covariates$DayTemp_const_term

stan_dat <- list(J = R0_values_n,
                 y = R0_values,
                 temp = temp_values,
                 m = m_values,
                 T_inf = T_inf)

Create prior distributions for all parameters. These are the same as the ones used by Mordecai et al. 2017 (PlosNTD).

n <- 10000

seq_n <- seq(0, 10, length.out = n)

df_c_a <- data.frame(x = seq_n,
                     y = dgamma(seq_n, shape = 1, rate = 10),
                     trait_par = "c_a")

x_T0_a <- runif(n, min = 0, max = 24)
df_T0_a <- data.frame(x = x_T0_a,
                      y = dunif(x_T0_a, min = 0, max = 24),
                      trait_par = "T0_a")

x_Tm_a <- runif(n, min = 25, max = 45)
df_Tm_a <- data.frame(x = x_Tm_a,
                      y = dunif(x_Tm_a, min = 25, max = 45),
                      trait_par = "Tm_a")

df_c_b <- data.frame(x = seq_n,
                     y = dgamma(seq_n, shape = 1, rate = 10),
                     trait_par = "c_b")

x_T0_b <- runif(n, min = 0, max = 24)
df_T0_b <- data.frame(x = x_T0_b,
                      y = dunif(x_T0_b, min = 0, max = 24),
                      trait_par = "T0_b")

x_Tm_b <- runif(n, min = 25, max = 45)
df_Tm_b <- data.frame(x = x_Tm_b,
                      y = dunif(x_Tm_b, min = 25, max = 45),
                      trait_par = "Tm_b")

df_c_c <- data.frame(x = seq_n,
                     y = dgamma(seq_n, shape = 1, rate = 10),
                     trait_par = "c_c")

x_T0_c <- runif(n, min = 0, max = 24)
df_T0_c <- data.frame(x = x_T0_c,
                      y = dunif(x_T0_c, min = 0, max = 24),
                      trait_par = "T0_c")

x_Tm_c <- runif(n, min = 25, max = 45)
df_Tm_c <- data.frame(x = x_Tm_c,
                      y = dunif(x_Tm_c, min = 25, max = 45),
                      trait_par = "Tm_c")

df_c_lf <- data.frame(x = seq_n,
                      y = dgamma(seq_n, shape = 1, rate = 1),
                      trait_par = "c_lf")

x_T0_lf <- runif(n, min = 0, max = 24)
df_T0_lf <- data.frame(x = x_T0_lf,
                       y = dunif(x_T0_lf, min = 0, max = 24),
                       trait_par = "T0_lf")

x_Tm_lf <- runif(n, min = 25, max = 45)
df_Tm_lf <- data.frame(x = x_Tm_lf,
                       y = dunif(x_Tm_lf, min = 25, max = 45),
                       trait_par = "Tm_lf")

df_c_PDR <- data.frame(x = seq_n,
                       y = dgamma(seq_n, shape = 1, rate = 10),
                       trait_par = "c_PDR")

x_T0_PDR <- runif(n, min = 0, max = 24)
df_T0_PDR <- data.frame(x = x_T0_PDR,
                        y = dunif(x_T0_PDR, min = 0, max = 24),
                        trait_par = "T0_PDR")

x_Tm_PDR <- runif(n, min = 25, max = 45)
df_Tm_PDR <- data.frame(x = x_Tm_PDR,
                        y = dunif(x_Tm_PDR, min = 25, max = 45),
                        trait_par = "Tm_PDR")

all_df <- rbind(df_c_a, df_T0_a, df_Tm_a, df_c_b, df_T0_b, df_Tm_b,
                df_c_c, df_T0_c, df_Tm_c, df_c_lf, df_T0_lf, df_Tm_lf,
                df_c_PDR, df_T0_PDR, df_Tm_PDR)

Plot vague priors

ggplot(data = all_df, mapping = aes(x = x, y = y)) +
  geom_line() +
  facet_wrap(~ trait_par, scales = "free") +
  scale_x_continuous("Trait value",
                     labels = function(x) format(x, scientific = FALSE)) +
  scale_y_continuous("PDF") +
  ggtitle("Vague priors") +
  theme_bw() +
  theme(axis.text = element_text(size = 6))

Fit the stan model

options(mc.cores = 7)

fit_normal <- stan(file = "R0_model.stan",
                   data = stan_dat,
                   iter = 20000,
                   chains = 4,
                   seed = 132532525,
                   control = list(adapt_delta = 0.99))

Post processing

print(fit_normal,
      pars = c(params_to_estimate, "theta", indicator_vars, "lp__"),
      probs = c(0.25, 0.5, 0.75))

list_of_draws <- extract(fit_normal)

ppd_data_sample <- sample(20000:40000, 12)

ppd_data <- t(list_of_draws$SimData[ppd_data_sample,]) %>%
  as.data.frame() %>%
  rename_all(list(name = ~str_replace(., "V", "Sample_"))) %>%
  mutate(id = seq_len(length(R0_values))) %>%
  melt(id.vars = "id",
       variable.name = "sample")

Traceplots

traceplot(fit_normal, params_to_estimate)

Post predictive distribution checks

ggplot(data = ppd_data, mapping = aes(x = value)) +
  geom_histogram(col = "white") +
  facet_wrap(~ sample, ncol = 3) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  scale_x_continuous("predicted R0") +
  scale_y_continuous("Frequency")

Pairs plot

pairs(fit_normal, pars = c(params_to_estimate, "lp__"), las = 1)

Experiment 2. Fit a model with three parameters and informative priors.

Pre-process informative priors

ids <- seq_len(nrow(a_samps))

a_samps_2 <- a_samps %>%
  mutate(trait = "a",
         id = ids)

b_samps_2 <- b_samps %>%
  mutate(trait = "b",
         id = ids)

c_samps_2 <- c_samps %>%
  mutate(trait = "c",
         id = ids)

PDR_samps_2 <- PDR_samps %>%
  mutate(trait = "PDR",
         id = ids)

lf_samps_2 <- lf_samps %>%
  mutate(trait = "lf",
         id = ids) %>%
  rename(c = qd) %>%
  select(-n.qd)

all_samps <- rbind(a_samps_2, b_samps_2, c_samps_2, PDR_samps_2, lf_samps_2)

all_samps_m <- melt(all_samps,
                    id.vars = c("id", "trait"),
                    measure.vars = c("T0", "Tm", "c"),
                    variable.name = "parameter")

all_samps_m$trait_par <- paste(all_samps_m$parameter,
                               all_samps_m$trait,
                               sep = "_")

all_samps_m$trait_par <- factor(all_samps_m$trait_par,
                                levels = c("c_a", "T0_a", "Tm_a",
                                           "c_b", "T0_b", "Tm_b",
                                           "c_c", "T0_c", "Tm_c",
                                           "c_lf", "T0_lf", "Tm_lf",
                                           "c_PDR", "T0_PDR", "Tm_PDR"))

Plot informative priors (NOTE: I will use these in the next fit. I just want to make sure I am fitting a sensible likelihood first).

ggplot(data = all_samps_m, mapping = aes(x = value)) +
  geom_histogram(col = "white") +
  facet_wrap(~ trait_par, scales = "free") +
  scale_x_continuous("Trait value",
                     labels = function(x) format(x, scientific = FALSE)) +
  scale_y_continuous("Frequency") +
  ggtitle("Informative priors (Posterior densities from Mordecai et al. 2017)") +
  theme_bw() +
  theme(axis.text = element_text(size = 6))