inst/tree_sampling/tree-sample-test2.R

## SKG
## 2/4/20
## Calculating likelhood  and making trees
##


devtools::load_all()

## Generate partition list
part_list <- generate_part_list(n = 25)
g_weight_list <- get_weight_list(part_list)


K <- 10000
inf_params <- c("beta_0" = -2, "beta_1" = 1.3)
max_size <- 26
smear_pos_prob <- .5

forest <- simulate_flip_til_failure(K = K,
                                    inf_params = inf_params,
                                    smear_pos_prob = smear_pos_prob,
                                    max_size = max_size)

bark <- summarize_clusters(forest)
#sum_bark <- summarize_cond_trees(forest)
#test <- sum_bark %>% dplyr::filter(n == 5, n_pos == 3)

## Make samples of data



n_trials <- 1000


cluster_summary <- bark

n <- cluster_summary$n
n_pos <- cluster_summary$n_pos
n_vec <- rep(n, each = n_trials)
n_pos_vec <- rep(n_pos, each = n_trials)
par <- c("beta_0" = -3, "beta_1" = 2)
one_init <- TRUE



## Sample trees of n people with n_pos
##sampled_trees <- simulate_many_cond_bp(K = length(n_vec),
 ##                                      n_vec = n_vec, n_pos_vec = n_pos_vec,
##                                       part_list = part_list,
  ##                                     g_weight_list = g_weight_list,
  ##                                     one_init = one_init)
sampled_trees2 <- simulate_many_cond_bp(K = length(n_vec),
                                       n_vec = n_vec, n_pos_vec = n_pos_vec,
                                       part_list = part_list,
                                       g_weight_list = g_weight_list,
                                       one_init = one_init)


##################################################################################


sum_trees2 <- summarize_cond_trees(sampled_trees2)




fast2 <- optim(par = par,
               fn = loglike_obs_data,
               data = cluster_summary,
               sampled_trees_summary = sum_trees2,
               return_neg = TRUE, hessian = TRUE)
fast2$par
sqrt(diag(solve(fast2$hessian)))



##################################
##################################
##################################
##################################
##################################
##################################
##################################


max_g <- sampled_trees2 %>% dplyr::group_by(cluster_size, cluster_pos, cluster_id) %>%
  dplyr::summarize(max_g = max(gen))

x <- 10
y <- 8

out <- max_g %>% dplyr::filter(cluster_size == x, cluster_pos == y)
hist(out$max_g)




### trees from other sampling
n <- cluster_summary$n
n_pos <- cluster_summary$n_pos
n_vec <- n
n_pos_vec <- n_pos
par <- c("beta_0" = 1, "beta_1" = 0)




## Sample trees of n people with n_pos
sampled_trees1 <- sample_many_groups(n_vec = n_vec,
                                    n_pos_vec = n_pos_vec,
                                    rep_vec = rep(n_trials, length(n_vec)))
sum_trees1 <- summarize_cond_trees(sampled_trees1)
fast_out1 <- loglike_obs_data(par,
                              data = cluster_summary,
                              sampled_trees_summary = sum_trees1,
                              return_neg = TRUE)
fast1 <- optim(par = par,
               fn = loglike_obs_data,
               data = cluster_summary,
               sampled_trees_summary = sum_trees1,
               return_neg = TRUE, hessian = TRUE)


max_g <- sampled_trees2 %>% dplyr::group_by(cluster_size, cluster_pos, cluster_id) %>%
  dplyr::summarize(max_g = max(gen))


bad <- rep(0, nrow(cluster_summary))
for(ii in 1:nrow(cluster_summary)){
  x <- cluster_summary$n[ii]
  y <- cluster_summary$n_pos[ii]
  ################################
  out <- max_g %>% dplyr::filter(cluster_size == x, cluster_pos == y)

  max_g2 <- sampled_trees %>% dplyr::group_by(cluster_size, cluster_pos, cluster_id) %>%
    dplyr::summarize(max_g = max(gen))

  out2 <- max_g2 %>% dplyr::filter(cluster_size == x, cluster_pos == y)

  #par(mfrow = c(1,2))
  #hist(out2$max_g)
  #hist(out$max_g)
  dog <- ks.test(out$max_g, out2$max_g)
  print(dog)
  if(dog$p.value < .05){
    bad[ii] <- 1
  }
}
## TODO: Need to sample trees of given n, n_pos and compare distributions of i_pos, g, etc.










fast2$par
sqrt(diag(solve(fast2$hessian)))
skgallagher/TBornotTB documentation built on April 21, 2020, 1:19 p.m.