## SKG
## 2/4/20
## Calculating likelhood and making trees
##
devtools::load_all()
## Generate partition list
part_list <- generate_part_list(n = 25)
g_weight_list <- get_weight_list(part_list)
K <- 10000
inf_params <- c("beta_0" = -2, "beta_1" = 1.3)
max_size <- 26
smear_pos_prob <- .5
forest <- simulate_flip_til_failure(K = K,
inf_params = inf_params,
smear_pos_prob = smear_pos_prob,
max_size = max_size)
bark <- summarize_clusters(forest)
#sum_bark <- summarize_cond_trees(forest)
#test <- sum_bark %>% dplyr::filter(n == 5, n_pos == 3)
## Make samples of data
n_trials <- 1000
cluster_summary <- bark
n <- cluster_summary$n
n_pos <- cluster_summary$n_pos
n_vec <- rep(n, each = n_trials)
n_pos_vec <- rep(n_pos, each = n_trials)
par <- c("beta_0" = -3, "beta_1" = 2)
one_init <- TRUE
## Sample trees of n people with n_pos
##sampled_trees <- simulate_many_cond_bp(K = length(n_vec),
## n_vec = n_vec, n_pos_vec = n_pos_vec,
## part_list = part_list,
## g_weight_list = g_weight_list,
## one_init = one_init)
sampled_trees2 <- simulate_many_cond_bp(K = length(n_vec),
n_vec = n_vec, n_pos_vec = n_pos_vec,
part_list = part_list,
g_weight_list = g_weight_list,
one_init = one_init)
##################################################################################
sum_trees2 <- summarize_cond_trees(sampled_trees2)
fast2 <- optim(par = par,
fn = loglike_obs_data,
data = cluster_summary,
sampled_trees_summary = sum_trees2,
return_neg = TRUE, hessian = TRUE)
fast2$par
sqrt(diag(solve(fast2$hessian)))
##################################
##################################
##################################
##################################
##################################
##################################
##################################
max_g <- sampled_trees2 %>% dplyr::group_by(cluster_size, cluster_pos, cluster_id) %>%
dplyr::summarize(max_g = max(gen))
x <- 10
y <- 8
out <- max_g %>% dplyr::filter(cluster_size == x, cluster_pos == y)
hist(out$max_g)
### trees from other sampling
n <- cluster_summary$n
n_pos <- cluster_summary$n_pos
n_vec <- n
n_pos_vec <- n_pos
par <- c("beta_0" = 1, "beta_1" = 0)
## Sample trees of n people with n_pos
sampled_trees1 <- sample_many_groups(n_vec = n_vec,
n_pos_vec = n_pos_vec,
rep_vec = rep(n_trials, length(n_vec)))
sum_trees1 <- summarize_cond_trees(sampled_trees1)
fast_out1 <- loglike_obs_data(par,
data = cluster_summary,
sampled_trees_summary = sum_trees1,
return_neg = TRUE)
fast1 <- optim(par = par,
fn = loglike_obs_data,
data = cluster_summary,
sampled_trees_summary = sum_trees1,
return_neg = TRUE, hessian = TRUE)
max_g <- sampled_trees2 %>% dplyr::group_by(cluster_size, cluster_pos, cluster_id) %>%
dplyr::summarize(max_g = max(gen))
bad <- rep(0, nrow(cluster_summary))
for(ii in 1:nrow(cluster_summary)){
x <- cluster_summary$n[ii]
y <- cluster_summary$n_pos[ii]
################################
out <- max_g %>% dplyr::filter(cluster_size == x, cluster_pos == y)
max_g2 <- sampled_trees %>% dplyr::group_by(cluster_size, cluster_pos, cluster_id) %>%
dplyr::summarize(max_g = max(gen))
out2 <- max_g2 %>% dplyr::filter(cluster_size == x, cluster_pos == y)
#par(mfrow = c(1,2))
#hist(out2$max_g)
#hist(out$max_g)
dog <- ks.test(out$max_g, out2$max_g)
print(dog)
if(dog$p.value < .05){
bad[ii] <- 1
}
}
## TODO: Need to sample trees of given n, n_pos and compare distributions of i_pos, g, etc.
fast2$par
sqrt(diag(solve(fast2$hessian)))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.