inst/tree_sampling/i-distrib-tables.R

## SKG
## February 13, 2020
## Making tables and taking names
##

devtools::load_all()
part_list <- generate_part_list(n = 25)
g_weight_list <- get_weight_list(part_list)


m <- 25
out_list <- vector(mode = "list", length = m)
for(ii in 1:m){
  out_list[[ii]] <- matrix(0, nrow = ii+1, ncol = ii)
  for(jj in 0:ii){
    t <- proc.time()[3]
    print(paste("ii:",ii, "jj:", jj))
    n <- ii
    n_pos <- jj
    n_trials <- 10000
    n_vec <- rep(n, each = n_trials)
    n_pos_vec <- rep(n_pos, each = n_trials)
    one_init <- TRUE

    sampled_trees <- simulate_many_cond_bp(K = length(n_vec),
                                           n_vec = n_vec, n_pos_vec = n_pos_vec,
                                           part_list = part_list,
                                           g_weight_list = g_weight_list,
                                           one_init = one_init)



    sum_trees <- summarize_cond_trees(sampled_trees)
    i_distrib <- table(factor(sum_trees$i_pos, levels = 0:(ii-1)))
    out_list[[ii]][jj+1, 1:length(i_distrib)] <- i_distrib
    print(proc.time()[3] - t)
  }
}

saveRDS(out_list, "i_distrib_tables-10000.RDS")
out_list <- readRDS("inst/data/i_distrib_tables-10000.RDS")
results <- out_list
idistrib1e4 <- out_list
usethis::use_data(idistrib1e4, overwrite = TRUE)

library(ggplot2)
library(dplyr)

df_list <- lapply(out_list, function(x){
  df <- as.data.frame(x)
  colnames(df) <- paste0("V", 0:(ncol(df) - 1))
  df$size <- nrow(df) - 1
  df$n_pos <- 0:(nrow(df) - 1)
  df
})

df <- dplyr::bind_rows(df_list) %>% dplyr::select(c(size, n_pos), dplyr::everything())




library(ggplot2)
library(dplyr)
gg_df <- df %>% tidyr::pivot_longer(cols = "V0":"V24", names_to = "i")
gg_df$id <- paste(gg_df$size, gg_df$n_pos, sep = "-")
gg_df$i2 <- as.numeric(gsub("V", "", gg_df$i))
gg_df$size2 <- factor(paste("Cluster Size", gg_df$size),
                      levels = unique(paste("Cluster Size", gg_df$size)))

test_df <- gg_df %>% dplyr::filter(size == 5) %>% na.omit()
ggplot(gg_df, aes(x = n_pos, fill = i2 /(size-1), col = i2/(size-1), y = value )) +
  geom_col() + facet_wrap(~(size2), scales = "free") +
  viridis::scale_fill_viridis(name = "# positive smear Transmissions / (Cluster Size - 1)") +
  theme_minimal() +
  viridis::scale_color_viridis(name = "# positive smear Transmissions / (Cluster Size - 1)") +
  labs(x = "Number of positive smears in cluster",
       y = "Count",
       title = "Distributions of positive smear transmissions")  +
  theme("legend.position" = "bottom")


## Individuals
k <- 20
test_df <- gg_df %>% dplyr::filter(size == 20) %>% na.omit()
test_df$n_pos2 <- factor(paste("# Smear Pos +", test_df$n_pos),
                         unique(paste("# Smear Pos +", test_df$n_pos)))
ggplot(data = test_df, aes(x = i2, y = value)) + geom_col() +
  facet_wrap(~n_pos2) + labs(title = paste("Clusters of size", k),
                             x = "Number of Positive Transmissions",
                             y = "Count") +
  theme_bw()
skgallagher/TBornotTB documentation built on April 21, 2020, 1:19 p.m.