knitr::opts_chunk$set( collapse = TRUE, comment = "#>", cache = TRUE ) devtools::load_all()
## Only need to do this step once per session remove.packages("TBornotTB" ) devtools::install_github("skgallagher/TBornotTB") ## Get the latest version from github library(TBornotTB)
In this vignette, we explore the generation scheme and corresponding likelihood for clusters where the root node is unobserved.
In the previous vignette, we explored the significance of a covariate when we assumed that all infections within a cluster could be traced back to an observed infection also within that cluster. That is, we assumed there was a root node in our transmission tree in which a path could always be traced back to.
We now assume that the root node is unobserved.
We simulate outbreak data via the following procedure.
For cluster $k = 1, \dots, K.$
For i = 1, ... For j = 1, ... p[i,j] <- draw_covariate() J[i,j] <- Geometric(p[i,j]) if(J[i,j] > 0){ if(j==1){ Generate_individuals (i+1, 1), ..., (i+1, J[i,j]) }else { Generate_individuals (i+1, J[i, j-1] + 1), ..., (i+1, J[i,j-1] + J[i,j]) } Store_infector_ID() } return (indices, infector IDs, covariates)
Basically, this is a repeated coin flipping procedure where we create a new individual for every successful flip. Once the coin flip is not successful individual we move to the next indexed individual within the same generation. If there are no more individuals within the generation we move on to the first individual within the next generation. We do this until there are no new individuals.
The likelihood for a single cluster is the number is dependent on the smear status unboserved generator $p_0$ along with how many in the cluster the generator infected, $n_0$: $$ L(p_+, p_{-}; n, x) = \sum_{p_0 \in {p_+,p_{-}}}\sum_{i=n_0+1}^{x - 1 - n_0}P(p_0)P(n_0; p_0, n, x) P(i;n, x, p_0, n_0). $$ Here, $P(p_0)$ is the probability of the outside generator's smear status, and $P(n_0;p_0, n, x)$ is the probability of the outside generator infecting $n_0$ others conditioned on the cluster total size. Here $n_0$ is a doubly truncated geometric random variable, meaning that the minimum size of $n_0$ is 1 and the maximum is $n-1$, $$ P(n_0=y;p_0, n, x) = \frac{p_0^{y}(1-p_0)}{(1-p_0)\sum_{y=1}^{n-1}p_0^y } \ = \frac{p_0^y}{\sum_{y=1}^{n-1}p_0^y}. $$
The likleihood of the rest of the cluster (the number of positive transmissions from positive within-cluster nodes $i$) conditioned on the outside generator status and number of infections is then, where $x_0$ is 1 if the outside generator is positive and 0 otherwise, $$ P(i;n,x, p_0, n_0) \propto p_+^{i}p_{-}^{n - 1- x_0 - i} \ = \frac{p_+^{i}p_{-}^{n-1-x_0-i}}{\sum_{i=n_0+1}^{n-2}p_+^{i}p_{-}^{n-2-i}} $$
devtools::load_all() set.seed(2020) inf_params <- c("beta_0" = -1.5, "beta_1" = 1) ## relatively high chance of transmission smear_pos_prob <- .5 max_size <- 50 K <- 50000 ## number of clusters obs_clusters <- simulate_outbreak(K = K, inf_params = inf_params, smear_pos_prob = smear_pos_prob, max_size = max_size, start_at_outsider = TRUE) cluster_summ <- summarize_clusters(obs_clusters)
## Observed clusters cluster_summ %>% dplyr::arrange(n, n_pos) %>% head() %>% knitr::kable() cluster_summ %>% dplyr::arrange(n, n_pos) %>% tail() %>% knitr::kable() n_trials <- 1000 cluster_summary <- cluster_summ t <- proc.time()[3] sampled_trees <- tree_sampler(cluster_summary, B = n_trials, impute_generator = TRUE) print(proc.time()[3] - t)
devtools::load_all() par <- c("beta_0" = 0, "beta_1" = 0) best_pars <- optim(par, fn = loglike_fast, data = cluster_summary, sampled_trees_summary = sampled_trees, cond_on_generator = TRUE, is_truncated = TRUE, return_neg = TRUE, hessian = TRUE) best_pars$par sqrt(diag(solve(best_pars$hessian)))
## Trying a grid beta_0_seq <- seq(-5, 0, length.out = 11) beta_1_seq <- seq(-1.5, 1.5,length.out = 11) par_df <- expand.grid(beta_0 = beta_0_seq, beta_1 = beta_1_seq, like = NA) for(ii in 1:nrow(par_df)){ print(ii) par_df$like[ii] <- loglike_fast(par = c(beta_0 = par_df$beta_0[ii], beta_1 = par_df$beta_1[ii]), data = cluster_summary, sampled_trees_summary = sampled_trees, cond_on_generator = TRUE, is_truncated = FALSE, return_neg = FALSE) } library(ggplot2) ggplot(data = par_df, aes(x = beta_0, y = beta_1, fill = like )) + geom_tile() + viridis::scale_fill_viridis()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.