knitr::opts_chunk$set(echo = TRUE) require(tidyverse) require(viridis) require(knitcitations) cleanbib() cite_options(cite.style = "numeric", citation_format = "pandoc") comm_haem_sums <- read_rds("../../data/senegal_community_haematobium_summaries.rds") devtools::load_all() #define some additional functions Reff_from_R0_W <- function(R0, W, kap, zeta){ Reff <- R0*phi_Wk(W, kap)*rho_Wk(W, zeta, kap) return(Reff) } Intensity_from_W_kap_zeta_m <- function(W, kap, zeta, m){ 0.5*W*phi_Wk(W, kap)*rho_Wk(W, zeta, kap)*m } Prev_from_W_kap <- function(W, kap){ 1-(2*(1+W/(2*kap))^-kap)+(1+W/kap)^-kap }
Explicitly model individual variability in susceptibility and cercarial exposure as in r citet("10.4269/ajtmh.14-0691")
, but link it with a snail infection model. Key phenomena to consider are extremely rare detections of both cercariae and infected snails at low human worm burdens.
Does negative binomial dist'n remain a good descriptor of population dist'n of infection? If so, what is the relationship between its parameters and control efforts? What are the implications of dynamic distributions for control and elimination efforts?
comm_haem_sums %>% mutate(base_prev_class = factor(base_prev_class, levels = c("High", "Med", "Low"))) %>% ggplot(aes(x = year.x, y = prev, col = school, group = school, lty = Intervention)) + geom_line(size = 1.1) + geom_point(aes(size = samp_size)) + theme_classic() + facet_wrap(~base_prev_class) + theme(legend.position = "bottom", strip.background = element_rect(fill = "black"), strip.text = element_text(color = "white")) + scale_x_discrete(name = "Study Year", breaks = c(2016, 2017, 2018), labels = c("Y0", "Y1", "Y2")) + ylim(c(0,1)) + labs(y = "Community prevalence", title = "S. haematobium prevalence among SAC over time", subtitle = "Stratified by high, medium, and low starting prevalence") + guides(col=guide_legend(ncol=4)) comm_haem_sums %>% mutate(base_prev_class = factor(base_prev_class, levels = c("High", "Med", "Low"))) %>% ggplot(aes(x = year.x, y = epmL_mean, col = school, group = school, lty = Intervention)) + geom_line(size = 1.2) + geom_point(aes(size = samp_size)) + theme_classic() + facet_wrap(~base_prev_class) + theme(legend.position = "bottom", strip.background = element_rect(fill = "black"), strip.text = element_text(color = "white")) + scale_x_discrete(name = "Study Year", breaks = c(2016, 2017, 2018), labels = c("Y0", "Y1", "Y2")) + labs(y = "mean infection intensity eggs per 10mL urine", title = "S. haematobium infection intensity among SAC over time", subtitle = "Stratified by high, medium, and low starting prevalence") + guides(col=guide_legend(ncol=4)) comm_haem_sums %>% filter(year.x == 2016) %>% ggplot(aes(x = cheev_worms, y = cheev_worms_r0, col = school, pch = base_prev_class, size = samp_size)) + geom_point() + theme_bw() + theme(legend.position = "bottom") + labs(x = "Mean egg burden (eggs/10mL)", y = expression(R[0]~estimate), title = "Baseline egg burdens and resulting R0 estimates", pch = "Base class") + guides(col=guide_legend(ncol=4))
sim_w <- function(w_start, kap, m, zeta, n_ppl, t_tot, t_step, events){ #Estimate R0 from starting worm burden R0 <- w_start_get_r0(w_start, kap, zeta) # Initiate vector to fill w_vec <- vector("numeric", length = t_tot/t_step) w_vec[1] <- w_start # Simulate by t_step for total time, implementing events for(t in c(1:t_tot/t_step)){ if(t %in% events[["times"]]){ w_vec[t+1] = w_vec[t]*R0*phi_Wk(w_vec[t], kap)*rho_Wk(w_vec[t], zeta, kap)*events[["event"]][which(events[["times"]] == t)] } else { w_vec[t+1] = w_vec[t]*R0*phi_Wk(w_vec[t], kap)*rho_Wk(w_vec[t], zeta, kap) } } # Convert time series of worm burden to Reff, egg output, and prevalence estimates with stochasticity Reff_vec = sapply(w_vec, function(w){ R0*phi_Wk(w, kap)*rho_Wk(w, zeta, kap) }) egg_vec = sapply(w_vec, function(w){ mean(rnbinom(n_ppl, size = kap, mu = w*0.5*phi_Wk(w, kap)*rho_Wk(w, zeta, kap)*m)) }) prev_vec = 1-(2*(1+w_vec/(2*kap))^-kap)+(1+w_vec/kap)^-kap # Return time series of worm burden and simulated egg output return(data.frame(time = c(0:t_tot/t_step), W = w_vec, R = Reff_vec, E = egg_vec, P = prev_vec)) } R0_2.kap_04.m_20.z_05 <- sim_w(w_start = 20, kap = 0.4, m = 20, zeta = 0.05, n_ppl = 50, t_tot = 365*3, t_step = 1, events = data.frame(times = c(1:2)*365, event = 0.06)) R0_2.kap_04.m_20.z_05 %>% gather("Var", "Val", W:P) %>% ggplot(aes(x = time, y = Val)) + geom_line() + theme_bw() + facet_grid(Var~., scales = "free_y") + scale_x_continuous(breaks = c(1:3)*365, labels = c(1:3), name = "time (yrs)")
Looks like rebound is way too fast, maybe because the effect of negative density dependence is too strong?
R0_2.kap_04.m_20.z_01 <- sim_w(w_start = 20, kap = 0.4, m = 20, zeta = 0.01, n_ppl = 50, t_tot = 365*3, t_step = 1, events = data.frame(times = c(1:2)*365, event = 0.06)) R0_2.kap_04.m_20.z_01 %>% gather("Var", "Val", W:P) %>% ggplot(aes(x = time, y = Val)) + geom_line() + theme_bw() + facet_grid(Var~., scales = "free_y") + scale_x_continuous(breaks = c(1:3)*365, labels = c(1:3), name = "time (yrs)")
Better but still pretty fast. Things are sped up due to the simplified system though. Let's try with an application to data
dt_dat <- comm_haem_sums %>% filter(school == "DT") dt_plot_dat <- dt_dat %>% mutate(time = c(0:2)*364, E = epmL_mean, P = prev, W = cheev_worms) %>% dplyr::select(time, E, P, W) %>% gather("Var", "Val", E:W) dt_phase_plane <- expand.grid(R0 = seq(1,7,0.1), W = exp_seq(1e-3, 200, 500), kap = dt_dat %>% filter(year.x == 2016) %>% pull(epmL_disp), zeta = 0.01) %>% mutate(Intensity = mapply(Intensity_from_W_kap_zeta_m, W, kap, zeta, MoreArgs = list(m = 15)), Prevalence = mapply(Prev_from_W_kap, W, kap), Reff = mapply(Reff_from_R0_W, R0, W, kap, zeta)) dt_phase_plane %>% ggplot(aes(x = R0, y = W, fill = Reff, z = Reff)) + theme_classic() + geom_tile() + geom_contour(breaks = 1, col = "white") + scale_y_continuous(trans = "log", breaks = c(1e-3, 1, 100), labels = c("0.001", "1", "100")) + scale_fill_viridis(option = "magma") + annotate(geom = "point", x = w_start_get_r0(dt_dat %>% filter(year.x == 2016) %>% pull(cheev_worms), dt_dat %>% filter(year.x == 2016) %>% pull(epmL_disp), zeta = 0.01), y = dt_dat %>% filter(year.x == 2016) %>% pull(cheev_worms), col = "white") + labs(title = "DT proposed Phase Plane", subtitle = "White point indicates endemic equilibrium") dt_sim <- sim_w(w_start = dt_dat %>% filter(year.x == 2016) %>% pull(cheev_worms), kap = dt_dat %>% filter(year.x == 2016) %>% pull(epmL_disp), m = 20, zeta = 0.01, n_ppl = dt_dat %>% filter(year.x == 2016) %>% pull(samp_size), t_tot = 365*3, t_step = 1, events = data.frame(times = c(0:1)*365+30, event = 0.06)) dt_sim %>% gather("Var", "Val", W:P) %>% ggplot(aes(x = time, y = Val)) + geom_line() + theme_bw() + geom_point(data = dt_plot_dat, aes(x = time, y = Val),col = "red") + facet_grid(Var~., scales = "free_y") + scale_x_continuous(breaks = c(1:3)*365, labels = c(1:3), name = "time (yrs)")
In order to reduce the dimensionality of the system, we want a simple algebraic relationship between mean egg output, $\mathcal{E}$, and mean worm burden, $W$, that incorporates assumptions of mean egg output per mated female worm and density dependencies on mating probability and fecundity.
get_egg_output <- function(W, kap, zeta, m){ 0.5*W*phi_Wk(W, kap)*rho_Wk(W, zeta, kap)*m } w_to_eggs <- expand.grid(W = exp_seq(1e-4, 100, 200), kap = c(0.01, 0.1, 1.0), zeta = c(0.01, 0.05, 0.1), m = c(5, 15, 25)) %>% mutate(eggs = mapply(get_egg_output, W, kap, zeta, m)) w_to_eggs %>% mutate(zeta2 = factor(zeta, labels = c("zeta=0.01", "zeta=0.05", "zeta=0.1")), m2 = factor(m, labels = c("m=5", "m=15", "m=25"))) %>% ggplot(aes(x = W, y = eggs, col = as.factor(kap))) + geom_line() + theme_bw() + facet_grid(zeta2~m2) + labs(x = "Mean worm burden (W)", y = "Mean egg output (eggs/10mL)", title = "Factors affecting mean egg output given mean worm burden")
From Senegal infection data, values of egg output range from about 1 to 150 and values of kappa range from about 0.05 to 0.9. This implies that $m\approx15$ and $\zeta\approx0.01$, but let's see how the data match up with these estimates
plot(sapply(exp_seq(2,50,200), R0_egg_kap_zeta_m, kap = 0.25, zeta = 0.001, m = 35), exp_seq(2,50,200), type = "l") sen_dat <- sen_dat %>% mutate(W_from_eggs = mapply(eggs_kap_get_W, nb_mu, nb_k, MoreArgs = list(zeta = 0.01, m = 15)), R0_from_eggs = mapply(R0_egg_kap_zeta_m, nb_mu, nb_k, MoreArgs = list(zeta = 0.01, m = 15))) base_r0s <- sen_dat %>% filter(year.x == 2018) %>% mutate(r0_from_eggs = mapply(R0_egg_kap_zeta_m, nb_mu, nb_k, MoreArgs = list(zeta = 0.01, m = 15))) w_egg_test <- expand.grid(W = exp_seq(1e-4, 200, 200), kap = exp_seq(0.01, 1.0, 15)) %>% mutate(eggs = mapply(get_egg_output, W, kap, MoreArgs = list(zeta = 0.01, m = 15))) w_egg_test %>% ggplot(aes(x = eggs, y = kap)) + geom_line()
Reff_Grid <- expand.grid(R0 = seq(1,7,0.1), W = exp_seq(1e-3, 200, 500), kap = c(0.01, 0.1, 1), zeta = 0.01) %>% mutate(Intensity = mapply(Intensity_from_W_kap_zeta_m, W, kap, zeta, MoreArgs = list(m = 15)), Prevalence = mapply(Prev_from_W_kap, W, kap), Reff = mapply(Reff_from_R0_W, R0, W, kap, zeta)) Reff_Grid %>% ggplot(aes(x = W, y = Intensity, col = as.factor(kap))) + geom_line() + theme_bw() Reff_Grid %>% ggplot(aes(x = R0, y = W, fill = Reff, z = Reff)) + theme_classic() + geom_tile() + geom_contour(breaks = 1, col = "white") + scale_y_continuous(trans = "log", breaks = c(1e-3, 1, 100, 500), labels = c("0.001", "1", "100", "500")) + scale_fill_viridis(option = "magma") + facet_grid(kap~.) + labs(title = "Phase planes of W/R0 relationships across values of dispersion parameter")
comm_wide <- comm_haem_sums %>% dplyr::select(school, year.x, samp_size, epmL_mean, epmL_disp, prev, Intervention) %>% spread(year.x, epmL_mean) %>% group_by(school) %>% summarise(w_2016 = `2016`[!is.na(`2016`)], w_2017 = `2017`[!is.na(`2017`)], w_2018 = `2018`[!is.na(`2018`)]) %>% mutate(w_change_16_17 = w_2017-w_2016, w_change_17_18 = w_2018-w_2017, w_change_16_18 = w_2018-w_2016, post_2016 = w_2016*0.06, post_2017 = w_2017*0.06, post_2018 = w_2018*0.06, lambda_2016 = w_2016*(1/(5*365)), lambda_2017 = (w_2017-post_2016)/365, lambda_2018 = (w_2018-post_2017)/365, bbr_2017 = ((w_2017-post_2016)/(((post_2016+w_2017)/2)*365)), bbr_2018 = ((w_2018-post_2017)/(((post_2017+w_2018)/2)*365)), Reff_2017 = bbr_2017/(1/(5*365))+1, Reff_2018 = bbr_2018/(1/(5*365))+1) %>% left_join(comm_haem_sums %>% filter(year.x == 2016) %>% dplyr::select(school, cheev_worms_r0)) comm_wide %>% ggplot(aes(x = cheev_worms_r0, y = w_change_16_17)) + geom_point() + theme_bw() lm(w_change_16_17 ~ cheev_worms_r0, data = comm_wide) %>% summary()
r0_mean_disp_mod <- lm(cheev_worms_r0 ~ epmL_mean + epmL_disp, data = comm_haem_sums %>% filter(year.x == 2016)) summary(r0_mean_disp_mod) comm_haem_sums_2016 <- comm_haem_sums %>% filter(year.x == 2016) %>% mutate(r0_mean_disp_pred = predict(r0_mean_disp_mod)) comm_haem_sums_2016 %>% ggplot(aes(x = cheev_worms_r0, y = r0_mean_disp_pred)) + geom_point() + theme_bw() + geom_abline(slope = 1, intercept = 0) + labs(x = expression(Estimated~R[0]), y = expression(Modelled~R[0]), title = expression(Modelled~vs~Predicted~R[0]))
$R_0$ estimates derived from converting egg burden to worm burden based on relationship in cheever study with model based on mean egg burden and distribution of eggs amongst SAC, so they're not exactly independent samples...
r write.bibtex(file = "ideas_refs.bib")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.