## SKG
## Feb. 6, 2020
## Comparing my two cluster methods that get different answers
devtools::load_all()
## Generate partition list
part_list <- generate_part_list(n = 25)
g_weight_list <- get_weight_list(part_list)
inf_params <- c("beta_0" = -2, "beta_1" = 1.5)
cluster_summary <- data.frame(freq = 1, n = 10, n_pos = 6)
###########################################################
### EXACT SAMPLING
### ##################################################
n_trials <- 1000
n <- cluster_summary$n
n_pos <- cluster_summary$n_pos
n_vec <- rep(n, each = n_trials)
n_pos_vec <- rep(n_pos, each = n_trials)
par <- c("beta_0" = 1, "beta_1" = 0)
one_init <- TRUE
## Sample trees of n people with n_pos
##sampled_trees <- simulate_many_cond_bp(K = length(n_vec),
## n_vec = n_vec, n_pos_vec = n_pos_vec,
## part_list = part_list,
## g_weight_list = g_weight_list,
## one_init = one_init)
sampled_trees1 <- simulate_many_cond_bp(K = length(n_vec),
n_vec = n_vec, n_pos_vec = n_pos_vec,
part_list = part_list,
g_weight_list = g_weight_list,
one_init = one_init)
sum_trees1 <- summarize_cond_trees(sampled_trees1)
#######################
### FAST AND LOOSE SAMPLING
n <- cluster_summary$n
n_pos <- cluster_summary$n_pos
n_vec <- n
n_pos_vec <- n_pos
par <- c("beta_0" = -1, "beta_1" = 1)
## Sample trees of n people with n_pos
sampled_trees2 <- sample_many_groups(n_vec = n_vec,
n_pos_vec = n_pos_vec,
rep_vec = rep(n_trials, length(n_vec)))
sum_trees2 <- summarize_cond_trees(sampled_trees2)
par(mfrow = c(1, 2))
hist(sum_trees1$i_pos)
hist(sum_trees2$i_pos)
ks.test(sum_trees1$i_pos, sum_trees2$i_pos)
z <- nrow(sum_trees1)
par(mfrow = c(3,2))
plot(y = sum_trees1$i_pos + runif(z, -.15, .15), x = sum_trees1$max_g + runif(z,-.25, .25),
xlab = "# Gens", ylab = "# Pos transmissions",
xlim = c(0, n), ylim = c(0,n))
title(paste(n, "people", n_pos, "Smear +"))
plot( y = sum_trees2$i_pos + runif(z, -.15, .15), x= sum_trees2$max_g + runif(z,-.25, .25),
xlab = "# Gens", ylab = "# Pos transmissions",
xlim = c(0, n), ylim = c(0,n))
title(paste(n, "people", n_pos, "Smear +"))
hist(sum_trees1$max_g,
xlab = "# Gens", xlim = c(0,n))
hist(sum_trees2$max_g, xlab = "# Gens",
xlim = c(0, n))
hist(sum_trees1$i_pos, xlab = "# Pos trans",
xlim = c(0, n-1))
hist(sum_trees2$i_pos, xlab = "# Pos trans",
xlim = c(0, n-1))
fast2 <- optim(par = par,
fn = loglike_obs_data,
data = cluster_summary,
sampled_trees_summary = sum_trees1,
return_neg = TRUE, hessian = TRUE)
fast2$par
sqrt(diag(solve(fast2$hessian)))
fast2 <- optim(par = par,
fn = loglike_obs_data,
data = cluster_summary,
sampled_trees_summary = sum_trees2,
return_neg = TRUE, hessian = TRUE)
fast2$par
sqrt(diag(solve(fast2$hessian)))
## NEXT TRY larger n and n_pos
## SO the problem is with i_pos, which can mean a lot of things
##
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.