knitr::opts_chunk$set(echo = TRUE)

require(tidyverse)
require(viridis)
require(knitcitations)
  cleanbib()
  cite_options(cite.style = "numeric",
               citation_format = "pandoc")

comm_haem_sums <- read_rds("../../data/senegal_community_haematobium_summaries.rds")  
devtools::load_all()

#define some additional functions
Reff_from_R0_W <- function(R0, W, kap, zeta){
  Reff <- R0*phi_Wk(W, kap)*rho_Wk(W, zeta, kap)

  return(Reff)
}

Intensity_from_W_kap_zeta_m <- function(W, kap, zeta, m){
  0.5*W*phi_Wk(W, kap)*rho_Wk(W, zeta, kap)*m
}

Prev_from_W_kap <- function(W, kap){
  1-(2*(1+W/(2*kap))^-kap)+(1+W/kap)^-kap
}

Question 1: How does the population dist'n of infection intensity change following interventions and what are the implications for control?

Hypothesis 1: Individual susceptibility is the main determinant of risk and of reinfection patterns, and does not change over time, therefore the dist'n of infection intensity is likely to remain relatively stable over time, but mean infection intensity is likely to decrease (same $\kappa$, decreased mean infection intensity)

Hypothesis 2: Individual exposures become more rare, and worm acquisition becomes more stochastic, leading to even more skewed distributions as transmission is reduced and mean intensity decreases (decreased $\kappa$ and mean infection intensity)

Datasets

Approaches

IBM model linked with (Spatially distributed?) snail infection model

Explicitly model individual variability in susceptibility and cercarial exposure as in r citet("10.4269/ajtmh.14-0691"), but link it with a snail infection model. Key phenomena to consider are extremely rare detections of both cercariae and infected snails at low human worm burdens.

Village-level individual infection intensities moving from ~endemic setting to low/near elimination setting

Does negative binomial dist'n remain a good descriptor of population dist'n of infection? If so, what is the relationship between its parameters and control efforts? What are the implications of dynamic distributions for control and elimination efforts?

Question 2: What explains high rebound rates following MDA and persistent hotspot phenomenon?

Hypotheses

Hypothesis 1: High transmission rates cause individuals to re-acquire infection rapidly following MDA. High transmission could be explained by a number of factors including a large reservoir of infection in the snail population, in connected human populations, or in zoonotic reservoirs; high exposure rates due to common water contact with infected water bodies; or poor sanitation that causes a steady input of infectious material into the environmental reservoir even after treatment (related to infection reservoir)

Hypothesis 2: Imperfect treatment causes individuals, particularly those with heavy infections, to remain infected, albeit with reduced intensity

Hypothesis 3: Negative density dependent effects allow for increased egg output even among reduced parasite populations, meaning infection intensity is reduced, but transmission intensity remains similar (could be related to H2 if heavily infected individuals retain a few mated pairs that produce more eggs)

Datasets

Approaches

Theory of critical transitions

Data

Longitudinal infection data and $R_0$ estimates from Senegal communities

comm_haem_sums %>% 
  mutate(base_prev_class = factor(base_prev_class, levels = c("High", "Med", "Low"))) %>% 
  ggplot(aes(x = year.x, y = prev, col = school, group = school, lty = Intervention)) +
    geom_line(size = 1.1) +
    geom_point(aes(size = samp_size)) +
    theme_classic() +
    facet_wrap(~base_prev_class) +
    theme(legend.position = "bottom",
          strip.background = element_rect(fill = "black"),
          strip.text = element_text(color = "white")) +
    scale_x_discrete(name = "Study Year",
                     breaks = c(2016, 2017, 2018),
                     labels = c("Y0", "Y1", "Y2")) +
    ylim(c(0,1)) +
    labs(y = "Community prevalence",
         title = "S. haematobium prevalence among SAC over time",
         subtitle = "Stratified by high, medium, and low starting prevalence") +
    guides(col=guide_legend(ncol=4))

comm_haem_sums %>% 
  mutate(base_prev_class = factor(base_prev_class, levels = c("High", "Med", "Low"))) %>% 
  ggplot(aes(x = year.x, y = epmL_mean, col = school, group = school, lty = Intervention)) +
    geom_line(size = 1.2) +
    geom_point(aes(size = samp_size)) +
    theme_classic() +
    facet_wrap(~base_prev_class) +
    theme(legend.position = "bottom",
          strip.background = element_rect(fill = "black"),
          strip.text = element_text(color = "white")) +
    scale_x_discrete(name = "Study Year",
                     breaks = c(2016, 2017, 2018),
                     labels = c("Y0", "Y1", "Y2")) +
    labs(y = "mean infection intensity eggs per 10mL urine",
         title = "S. haematobium infection intensity among SAC over time",
         subtitle = "Stratified by high, medium, and low starting prevalence") +
    guides(col=guide_legend(ncol=4))

comm_haem_sums %>% 
  filter(year.x == 2016) %>% 
  ggplot(aes(x = cheev_worms, y = cheev_worms_r0, col = school, pch = base_prev_class, size = samp_size)) +
    geom_point() +
    theme_bw() +
    theme(legend.position = "bottom") +
    labs(x = "Mean egg burden (eggs/10mL)",
         y = expression(R[0]~estimate),
         title = "Baseline egg burdens and resulting R0 estimates",
         pch = "Base class") +
    guides(col=guide_legend(ncol=4))

Simulations

sim_w <- function(w_start, kap, m, zeta, n_ppl,
                  t_tot, t_step, events){
  #Estimate R0 from starting worm burden
    R0 <- w_start_get_r0(w_start, kap, zeta)

  # Initiate vector to fill
    w_vec <- vector("numeric", length = t_tot/t_step)
    w_vec[1] <- w_start

  # Simulate by t_step for total time, implementing events
    for(t in c(1:t_tot/t_step)){
      if(t %in% events[["times"]]){
        w_vec[t+1] = w_vec[t]*R0*phi_Wk(w_vec[t], kap)*rho_Wk(w_vec[t], zeta, kap)*events[["event"]][which(events[["times"]] == t)]
      } else {
        w_vec[t+1] = w_vec[t]*R0*phi_Wk(w_vec[t], kap)*rho_Wk(w_vec[t], zeta, kap)
      }

    }

  # Convert time series of worm burden to Reff, egg output,  and prevalence estimates with stochasticity
    Reff_vec = sapply(w_vec, function(w){
      R0*phi_Wk(w, kap)*rho_Wk(w, zeta, kap)
    })

    egg_vec = sapply(w_vec, function(w){
      mean(rnbinom(n_ppl, size = kap, 
                   mu = w*0.5*phi_Wk(w, kap)*rho_Wk(w, zeta, kap)*m))
    })

    prev_vec = 1-(2*(1+w_vec/(2*kap))^-kap)+(1+w_vec/kap)^-kap

  # Return time series of worm burden and simulated egg output
    return(data.frame(time = c(0:t_tot/t_step),
                      W = w_vec,
                      R = Reff_vec,
                      E = egg_vec,
                      P = prev_vec))
}

R0_2.kap_04.m_20.z_05 <- sim_w(w_start = 20,
                               kap = 0.4, m = 20, 
                               zeta = 0.05, n_ppl = 50,
                               t_tot = 365*3, t_step = 1,
                               events = data.frame(times = c(1:2)*365,
                                                   event = 0.06))

  R0_2.kap_04.m_20.z_05 %>% 
    gather("Var", "Val", W:P) %>% 
    ggplot(aes(x = time, y = Val)) +
      geom_line() +
      theme_bw() +
      facet_grid(Var~., scales = "free_y") +
      scale_x_continuous(breaks = c(1:3)*365,
                         labels = c(1:3),
                         name = "time (yrs)")

Looks like rebound is way too fast, maybe because the effect of negative density dependence is too strong?

R0_2.kap_04.m_20.z_01 <- sim_w(w_start = 20,
                               kap = 0.4, m = 20, 
                               zeta = 0.01, n_ppl = 50,
                               t_tot = 365*3, t_step = 1,
                               events = data.frame(times = c(1:2)*365,
                                                   event = 0.06))

  R0_2.kap_04.m_20.z_01 %>% 
    gather("Var", "Val", W:P) %>% 
    ggplot(aes(x = time, y = Val)) +
      geom_line() +
      theme_bw() +
      facet_grid(Var~., scales = "free_y") +
      scale_x_continuous(breaks = c(1:3)*365,
                         labels = c(1:3),
                         name = "time (yrs)")

Better but still pretty fast. Things are sped up due to the simplified system though. Let's try with an application to data

dt_dat <- comm_haem_sums %>% filter(school == "DT")

dt_plot_dat <- dt_dat %>% 
  mutate(time = c(0:2)*364,
         E = epmL_mean,
         P = prev,
         W = cheev_worms) %>% 
  dplyr::select(time, E, P, W) %>% 
  gather("Var", "Val", E:W)

dt_phase_plane <- expand.grid(R0 = seq(1,7,0.1),
                              W = exp_seq(1e-3, 200, 500),
                              kap = dt_dat %>% filter(year.x == 2016) %>% pull(epmL_disp),
                              zeta = 0.01) %>% 
  mutate(Intensity = mapply(Intensity_from_W_kap_zeta_m, 
                            W, kap, zeta,
                            MoreArgs = list(m = 15)),
         Prevalence = mapply(Prev_from_W_kap,
                             W, kap),
         Reff = mapply(Reff_from_R0_W, 
                       R0, W, kap, zeta))

dt_phase_plane %>% 
  ggplot(aes(x = R0, y = W, fill = Reff, z = Reff)) +
    theme_classic() +
    geom_tile() +
    geom_contour(breaks = 1, col = "white") +
    scale_y_continuous(trans = "log", 
                       breaks = c(1e-3, 1, 100), 
                       labels = c("0.001", "1", "100")) +
    scale_fill_viridis(option = "magma") +
    annotate(geom = "point",
             x = w_start_get_r0(dt_dat %>% filter(year.x == 2016) %>% pull(cheev_worms), 
                                dt_dat %>% filter(year.x == 2016) %>% pull(epmL_disp),
                                zeta = 0.01),
             y = dt_dat %>% filter(year.x == 2016) %>% pull(cheev_worms), 
             col = "white") +
    labs(title = "DT proposed Phase Plane",
         subtitle = "White point indicates endemic equilibrium")



dt_sim <- sim_w(w_start = dt_dat %>% filter(year.x == 2016) %>% pull(cheev_worms),
                kap =  dt_dat %>% filter(year.x == 2016) %>% pull(epmL_disp),
                m = 20, zeta = 0.01,
                n_ppl = dt_dat %>% filter(year.x == 2016) %>% pull(samp_size),
                t_tot = 365*3, t_step = 1,
                events = data.frame(times = c(0:1)*365+30,
                                                   event = 0.06))

dt_sim %>% 
  gather("Var", "Val", W:P) %>% 
    ggplot(aes(x = time, y = Val)) +
      geom_line() +
      theme_bw() + 
      geom_point(data = dt_plot_dat, 
                 aes(x = time, y = Val),col = "red") +
      facet_grid(Var~., scales = "free_y") +
      scale_x_continuous(breaks = c(1:3)*365,
                         labels = c(1:3),
                         name = "time (yrs)")

Relationship between egg shedding, $\mathcal{E}(W,\kappa)$, and mean worm burden, $W$

In order to reduce the dimensionality of the system, we want a simple algebraic relationship between mean egg output, $\mathcal{E}$, and mean worm burden, $W$, that incorporates assumptions of mean egg output per mated female worm and density dependencies on mating probability and fecundity.

get_egg_output <- function(W, kap, zeta, m){
  0.5*W*phi_Wk(W, kap)*rho_Wk(W, zeta, kap)*m
}

w_to_eggs <- expand.grid(W = exp_seq(1e-4, 100, 200),
                         kap = c(0.01, 0.1, 1.0),
                         zeta = c(0.01, 0.05, 0.1),
                         m = c(5, 15, 25)) %>% 
  mutate(eggs = mapply(get_egg_output, 
                       W, kap, zeta, m))

w_to_eggs %>% 
  mutate(zeta2 = factor(zeta, labels = c("zeta=0.01", "zeta=0.05", "zeta=0.1")),
         m2 = factor(m, labels = c("m=5", "m=15", "m=25"))) %>% 
  ggplot(aes(x = W, y = eggs, col = as.factor(kap))) +
    geom_line() +
    theme_bw() +
    facet_grid(zeta2~m2) +
    labs(x = "Mean worm burden (W)",
         y = "Mean egg output (eggs/10mL)",
         title = "Factors affecting mean egg output given mean worm burden")

From Senegal infection data, values of egg output range from about 1 to 150 and values of kappa range from about 0.05 to 0.9. This implies that $m\approx15$ and $\zeta\approx0.01$, but let's see how the data match up with these estimates

plot(sapply(exp_seq(2,50,200), R0_egg_kap_zeta_m, 
            kap = 0.25, zeta = 0.001, m = 35),
     exp_seq(2,50,200),
     type = "l")


sen_dat <- sen_dat %>% 
  mutate(W_from_eggs = mapply(eggs_kap_get_W,
                              nb_mu, nb_k,
                              MoreArgs = list(zeta = 0.01, m = 15)),
         R0_from_eggs = mapply(R0_egg_kap_zeta_m,
                               nb_mu, nb_k,
                               MoreArgs = list(zeta = 0.01, m = 15)))

base_r0s <- sen_dat %>% filter(year.x == 2018) %>% 
  mutate(r0_from_eggs = mapply(R0_egg_kap_zeta_m,
                               nb_mu, nb_k,
                               MoreArgs = list(zeta = 0.01, m = 15)))

w_egg_test <- expand.grid(W = exp_seq(1e-4, 200, 200),
                          kap = exp_seq(0.01, 1.0, 15)) %>% 
  mutate(eggs = mapply(get_egg_output,
                       W, kap,
                       MoreArgs = list(zeta = 0.01, m = 15)))

w_egg_test %>% 
  ggplot(aes(x = eggs, y = kap)) +
    geom_line()
Reff_Grid <- expand.grid(R0 = seq(1,7,0.1),
                         W = exp_seq(1e-3, 200, 500),
                         kap = c(0.01, 0.1, 1),
                         zeta = 0.01) %>% 
  mutate(Intensity = mapply(Intensity_from_W_kap_zeta_m, 
                            W, kap, zeta,
                            MoreArgs = list(m = 15)),
         Prevalence = mapply(Prev_from_W_kap,
                             W, kap),
         Reff = mapply(Reff_from_R0_W, 
                       R0, W, kap, zeta))

Reff_Grid %>% ggplot(aes(x = W, y = Intensity, col = as.factor(kap))) + 
  geom_line() + theme_bw() 

Reff_Grid %>% 
  ggplot(aes(x = R0, y = W, fill = Reff, z = Reff)) +
    theme_classic() +
    geom_tile() +
    geom_contour(breaks = 1, col = "white") +
    scale_y_continuous(trans = "log", 
                       breaks = c(1e-3, 1, 100, 500), 
                       labels = c("0.001", "1", "100", "500")) +
    scale_fill_viridis(option = "magma") +
    facet_grid(kap~.) +
    labs(title = "Phase planes of W/R0 relationships across values of dispersion parameter")

Is $R_0$ correlated with net change in infection intensity following MDA?

comm_wide <- comm_haem_sums %>% 
  dplyr::select(school, year.x, samp_size, epmL_mean, epmL_disp, prev, Intervention) %>% 
  spread(year.x, epmL_mean) %>% 
  group_by(school) %>% 
  summarise(w_2016 = `2016`[!is.na(`2016`)],
            w_2017 = `2017`[!is.na(`2017`)],
            w_2018 = `2018`[!is.na(`2018`)]) %>% 
  mutate(w_change_16_17 = w_2017-w_2016,
         w_change_17_18 = w_2018-w_2017,
         w_change_16_18 = w_2018-w_2016,
         post_2016 = w_2016*0.06,
         post_2017 = w_2017*0.06,
         post_2018 = w_2018*0.06,
         lambda_2016 = w_2016*(1/(5*365)),
         lambda_2017 = (w_2017-post_2016)/365,
         lambda_2018 = (w_2018-post_2017)/365,
         bbr_2017 = ((w_2017-post_2016)/(((post_2016+w_2017)/2)*365)),
         bbr_2018 = ((w_2018-post_2017)/(((post_2017+w_2018)/2)*365)),
         Reff_2017 = bbr_2017/(1/(5*365))+1,
         Reff_2018 = bbr_2018/(1/(5*365))+1) %>% 
  left_join(comm_haem_sums %>% 
              filter(year.x == 2016) %>% 
              dplyr::select(school, cheev_worms_r0))

comm_wide %>% 
  ggplot(aes(x = cheev_worms_r0, y = w_change_16_17)) +
    geom_point() +
    theme_bw()
lm(w_change_16_17 ~ cheev_worms_r0, data = comm_wide) %>% summary()
r0_mean_disp_mod <- lm(cheev_worms_r0 ~ epmL_mean + epmL_disp, data = comm_haem_sums %>% filter(year.x == 2016))
  summary(r0_mean_disp_mod)

comm_haem_sums_2016 <- comm_haem_sums %>% 
  filter(year.x == 2016) %>% 
  mutate(r0_mean_disp_pred = predict(r0_mean_disp_mod))

comm_haem_sums_2016 %>% 
  ggplot(aes(x = cheev_worms_r0, y = r0_mean_disp_pred)) +
    geom_point() +
    theme_bw() +
    geom_abline(slope = 1, intercept = 0) +
    labs(x = expression(Estimated~R[0]),
         y = expression(Modelled~R[0]),
         title = expression(Modelled~vs~Predicted~R[0]))

$R_0$ estimates derived from converting egg burden to worm burden based on relationship in cheever study with model based on mean egg burden and distribution of eggs amongst SAC, so they're not exactly independent samples...

References

r write.bibtex(file = "ideas_refs.bib")



cmhoove14/DDNTD documentation built on Nov. 23, 2019, 7:04 p.m.