This notebook display the generated quantities of each model, with no data imput
# Import libraries set.seed(13254) # Import libraries library(tidyverse) library(magrittr) library(rstan) library(tidybayes) library(here) library(foreach) library(doParallel) registerDoParallel() # library(future) # plan(multiprocess) rstan_options(auto_write = TRUE) options(mc.cores = parallel::detectCores()) #devtools::load_all() # Sorry this costs me 30 minutes every day
Setting global variables
N = 20 G = 100 counts_source = "Acute_Myeloid_Leukemia_Primary_Blood_Derived_Cancer_-_Peripheral_Blood" counts_tidy <- load_test_data(N, G, counts_source)
Wrappers and utilities
source(here("R", "evaluation_tools.R")) check_return = function(fit){ fit %>% check_all_diagnostics() fit } sampling_check_return = function(model, cores = 4, iter = 500, ...){ sampling(model, cores = cores, iter = iter, ...) %>% check_return } # Set theme plots my_theme = theme_bw() + theme( panel.border = element_blank(), axis.line = element_line(), panel.grid.major = element_line(size = 0.2), panel.grid.minor = element_line(size = 0.1), text = element_text(size=12), legend.position="bottom", aspect.ratio=1, axis.text.x = element_text(angle = 90, hjust = 1), strip.background = element_blank(), axis.title.x = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)), axis.title.y = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)) )
data_for_stan = data_for_stan_from_tidy(N, G, counts_tidy)
Set model names
# model_names = # dir(path = here("stan"), pattern = ".stan") %>% # gsub("\\.stan", "", .) model_names = c( "logNormal_multinomial", "logStudent_multinomial", "negBinomial", "multinomial", "dirichlet_multinomial", "gamma_multinomial" )
Compile all models
models = foreach(model_name = model_names) %do% { library(rstan) #On Windows, %dopar%-ed code does not share the main session library(here) #On Windows, %dopar%-ed code does not share the main session stan_model(here("stan",sprintf("%s.stan", model_name))) } %>% setNames(model_names)
Run all models - MARTIN
# model_defs = tibble(model_name = model_names) %>% # crossing(tibble(is_prior_asymetric = c(0,1))) %>% # filter(model_name != "gamma_multinomial" | is_prior_asymetric == 0) #gamma_multinomial currently does not support assymetric prior # # # models_list = models[model_defs$model_name] # # control_list = model_defs %>% rowwise() %>% do(list(max_treedepth = switch( # model_name, # "logNormal_multinomial" = 12, # "logStudent_multinomial" = 13, # 10) # ) # ) # # #Copy the data for all runs # data_list = list() # for(i in 1:length(control_list)) { # data_list[[i]] = data_for_stan # # } # # fits = sampling_multi(models, data_list, control_per_item = control_list) %>% # setNames(model_names)
Run all models - STEFANO
fits = foreach(model = models, model_name = models %>% names, .combine = c) %:% foreach(is_prior_asymetric = c(0,1), .combine = c) %do% { # Trick for naming list my_list = list() my_list[[sprintf("%s_asymmetric-%s", model_name, is_prior_asymetric)]] = sampling_check_return( model, data = data_for_stan %>% lplyr::mutate(is_prior_asymetric = is_prior_asymetric), control = list( max_treedepth = switch( model_name, "logNormal_multinomial" = 12, "logStudent_multinomial" = 13, 10 ) ) ) # Return my_list } # Fit a single model fits$`logNormal_multinomial_asymmetric-1` = sampling_check_return( stan_model(here("stan",sprintf("%s.stan", "logNormal_multinomial"))), data = data_for_stan %>% lplyr::mutate(is_prior_asymetric = 1), control = list( max_treedepth = 12) )
Show speed of the models
fits %>% map_dbl(function(x) { sum(get_elapsed_time(x))})
Get generated quantities
generated_quant = foreach(fit = fits, model_name = fits %>% names, .combine = bind_rows) %dopar% { library(dplyr) #On Windows, %dopar%-ed code does not share the main session library(tidybayes) fit %>% gather_samples(counts_gen_naive[sample_idx, ens_iso_idx], counts_gen_geneWise[sample_idx, ens_iso_idx]) %>% ungroup() %>% mutate(model_name) } %>% left_join(counts_tidy) %>% rename(Observed = `read count`, Estimated = estimate)
Plots
# Overall distribution generated_quant %>% gather(`Source`, `Read count`, c("Observed", "Estimated")) %>% filter(.iteration %in% 1:50 & .chain %in% 1) %>% mutate(term = gsub("counts_gen_", "", term)) %>% # Plot { (.) %>% ggplot(aes(`Read count` + 1, color = Source)) + stat_bin(geom = "step", position = "identity", bins=50, alpha = 0.7) + facet_grid(term ~ model_name) + scale_x_log10() + scale_color_brewer(palette = "Set1") + my_theme + theme( strip.text.y = element_text(angle = 0), strip.text.x = element_text(angle = 90, size=8) ) + ggtitle("Overall read count distribution") } %>% # Save plot { ggsave(plot = (.), here("dev", "overall_read_count_distribution.png")) (.) } # Gene-wise distribution generated_quant %>% gather(`Source`, `Read count`, c("Observed", "Estimated")) %>% filter(.iteration %in% 1:50 & .chain %in% 1) %>% filter(term == "counts_gen_geneWise") %>% right_join( (.) %>% distinct(ens_iso) %>% head(n=20) ) %>% # Gene-wise plots { (.) %>% ggplot(aes(`Read count` + 1, color = Source)) + stat_bin(geom = "step", position = "identity", bins=50, size=0.2, alpha = 0.7 ) + facet_grid(model_name ~ ens_iso) + scale_x_log10() + scale_color_brewer(palette = "Set1") + #scale_size() my_theme + theme( strip.text.y = element_text(angle = 0), strip.text.x = element_text(angle = 90, size=8) ) + ggtitle("Gene-wise read count distribution") } %>% # Save plot { ggsave(plot = (.), here("dev", "gene_wise_read_count_distribution.png"), width=29, height=21, units = "cm") (.) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.