inst/tree_sampling/compare-tree-sampling-methods.R

## SKG
## Feb. 6, 2020
## Comparing my two cluster methods that get different answers

devtools::load_all()

## Generate partition list
part_list <- generate_part_list(n = 25)
g_weight_list <- get_weight_list(part_list)
inf_params <- c("beta_0" = -2, "beta_1" = 1.5)



cluster_summary <- data.frame(freq = 1, n = 10, n_pos = 6)


###########################################################
### EXACT SAMPLING
### ##################################################
n_trials <- 1000
n <- cluster_summary$n
n_pos <- cluster_summary$n_pos
n_vec <- rep(n, each = n_trials)
n_pos_vec <- rep(n_pos, each = n_trials)
par <- c("beta_0" = 1, "beta_1" = 0)
one_init <- TRUE



## Sample trees of n people with n_pos
##sampled_trees <- simulate_many_cond_bp(K = length(n_vec),
##                                      n_vec = n_vec, n_pos_vec = n_pos_vec,
##                                       part_list = part_list,
##                                     g_weight_list = g_weight_list,
##                                     one_init = one_init)
sampled_trees1 <- simulate_many_cond_bp(K = length(n_vec),
                                        n_vec = n_vec, n_pos_vec = n_pos_vec,
                                        part_list = part_list,
                                        g_weight_list = g_weight_list,
                                        one_init = one_init)
sum_trees1 <- summarize_cond_trees(sampled_trees1)
#######################
### FAST AND LOOSE SAMPLING
n <- cluster_summary$n
n_pos <- cluster_summary$n_pos
n_vec <- n
n_pos_vec <- n_pos
par <- c("beta_0" = -1, "beta_1" = 1)




## Sample trees of n people with n_pos
sampled_trees2 <- sample_many_groups(n_vec = n_vec,
                                    n_pos_vec = n_pos_vec,
                                    rep_vec = rep(n_trials, length(n_vec)))

sum_trees2 <- summarize_cond_trees(sampled_trees2)
par(mfrow = c(1, 2))
hist(sum_trees1$i_pos)
hist(sum_trees2$i_pos)
ks.test(sum_trees1$i_pos, sum_trees2$i_pos)

z <- nrow(sum_trees1)
par(mfrow = c(3,2))
plot(y = sum_trees1$i_pos + runif(z, -.15, .15), x = sum_trees1$max_g + runif(z,-.25, .25),
     xlab = "# Gens", ylab = "# Pos transmissions",
     xlim = c(0, n), ylim = c(0,n))
title(paste(n, "people", n_pos, "Smear +"))
plot( y = sum_trees2$i_pos + runif(z, -.15, .15), x= sum_trees2$max_g + runif(z,-.25, .25),
      xlab = "# Gens", ylab = "# Pos transmissions",
      xlim = c(0, n), ylim = c(0,n))
title(paste(n, "people", n_pos, "Smear +"))
hist(sum_trees1$max_g,
     xlab = "# Gens", xlim = c(0,n))
hist(sum_trees2$max_g, xlab = "# Gens",
     xlim = c(0, n))
hist(sum_trees1$i_pos, xlab = "# Pos trans",
     xlim = c(0, n-1))
hist(sum_trees2$i_pos, xlab = "# Pos trans",
     xlim = c(0, n-1))


fast2 <- optim(par = par,
               fn = loglike_obs_data,
               data = cluster_summary,
               sampled_trees_summary = sum_trees1,
               return_neg = TRUE, hessian = TRUE)
fast2$par
sqrt(diag(solve(fast2$hessian)))

fast2 <- optim(par = par,
               fn = loglike_obs_data,
               data = cluster_summary,
               sampled_trees_summary = sum_trees2,
               return_neg = TRUE, hessian = TRUE)
fast2$par
sqrt(diag(solve(fast2$hessian)))




## NEXT TRY larger n and n_pos







## SO the problem is with i_pos, which can mean a lot of things
##
skgallagher/TBornotTB documentation built on April 21, 2020, 1:19 p.m.