This notebook display the generated quantities of each model, with no data imput

# Import libraries

set.seed(13254)

# Import libraries
library(tidyverse)
library(magrittr)
library(rstan)
library(tidybayes)
library(here)
library(foreach)
library(doParallel)
registerDoParallel()
# library(future)
# plan(multiprocess)

rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())

#devtools::load_all() # Sorry this costs me 30 minutes every day

Setting global variables

N = 20
G = 100
counts_source = "Acute_Myeloid_Leukemia_Primary_Blood_Derived_Cancer_-_Peripheral_Blood"

counts_tidy <- load_test_data(N, G, counts_source)

Wrappers and utilities

source(here("R", "evaluation_tools.R"))
check_return = function(fit){
  fit %>% check_all_diagnostics()
  fit
}

sampling_check_return = function(model, cores = 4, iter = 500, ...){
  sampling(model, cores = cores, iter = iter, ...) %>% check_return
}

# Set theme plots
my_theme =  
    theme_bw() +
    theme(
        panel.border = element_blank(),
        axis.line = element_line(),
        panel.grid.major = element_line(size = 0.2),
        panel.grid.minor = element_line(size = 0.1),
        text = element_text(size=12),
        legend.position="bottom",
        aspect.ratio=1,
        axis.text.x = element_text(angle = 90, hjust = 1),
        strip.background = element_blank(),
        axis.title.x  = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)),
        axis.title.y  = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10))
    )
data_for_stan = data_for_stan_from_tidy(N, G, counts_tidy)

Set model names

# model_names =
#   dir(path = here("stan"), pattern = ".stan") %>%
#   gsub("\\.stan", "", .)
model_names = c(
  "logNormal_multinomial",
  "logStudent_multinomial",
  "negBinomial",
  "multinomial",
  "dirichlet_multinomial",
  "gamma_multinomial"
  )

Compile all models

models =
  foreach(model_name = model_names) %do% {
    library(rstan) #On Windows, %dopar%-ed code does not share the main session
    library(here) #On Windows, %dopar%-ed code does not share the main session
    stan_model(here("stan",sprintf("%s.stan", model_name)))
  } %>%
  setNames(model_names)

Run all models - MARTIN

# model_defs = tibble(model_name = model_names) %>% 
#   crossing(tibble(is_prior_asymetric = c(0,1))) %>%
#   filter(model_name != "gamma_multinomial" | is_prior_asymetric == 0) #gamma_multinomial currently does not support assymetric prior
# 
# 
# models_list = models[model_defs$model_name]
# 
# control_list = model_defs %>% rowwise() %>% do(list(max_treedepth = switch(
#           model_name,
#           "logNormal_multinomial" = 12,
#           "logStudent_multinomial" = 13,
#           10)
#   )
# )
# 
# #Copy the data for all runs
# data_list = list()
# for(i in 1:length(control_list)) {
#   data_list[[i]] = data_for_stan
#   
# }
# 
# fits = sampling_multi(models, data_list, control_per_item = control_list) %>%
#   setNames(model_names)

Run all models - STEFANO

fits =
  foreach(model = models, model_name = models %>% names, .combine = c) %:%
  foreach(is_prior_asymetric = c(0,1), .combine = c) %do% {

    # Trick for naming list
    my_list = list()
    my_list[[sprintf("%s_asymmetric-%s", model_name, is_prior_asymetric)]] =
      sampling_check_return(
        model,
        data =
          data_for_stan %>%
          lplyr::mutate(is_prior_asymetric = is_prior_asymetric),
        control = list(
          max_treedepth = switch(
            model_name,
            "logNormal_multinomial" = 12,
            "logStudent_multinomial" = 13,
            10
          )
        )
      )

    # Return
    my_list
  }

# Fit a single model
fits$`logNormal_multinomial_asymmetric-1` =
  sampling_check_return(
     stan_model(here("stan",sprintf("%s.stan", "logNormal_multinomial"))),
    data = data_for_stan %>% lplyr::mutate(is_prior_asymetric = 1),
    control = list( max_treedepth = 12)
)

Show speed of the models

fits %>% map_dbl(function(x) { sum(get_elapsed_time(x))})

Get generated quantities

generated_quant =
  foreach(fit = fits, model_name = fits %>% names, .combine = bind_rows) %dopar% {
    library(dplyr) #On Windows, %dopar%-ed code does not share the main session
    library(tidybayes)
    fit %>%
      gather_samples(counts_gen_naive[sample_idx, ens_iso_idx], counts_gen_geneWise[sample_idx, ens_iso_idx]) %>%
      ungroup() %>%
      mutate(model_name)
  } %>%
  left_join(counts_tidy) %>%
  rename(Observed = `read count`, Estimated = estimate)

Plots

# Overall distribution
generated_quant %>%
  gather(`Source`, `Read count`, c("Observed", "Estimated")) %>%
  filter(.iteration %in% 1:50 & .chain %in% 1) %>%
  mutate(term = gsub("counts_gen_", "", term)) %>%

  # Plot
  {
    (.) %>%
    ggplot(aes(`Read count` + 1, color = Source)) +
    stat_bin(geom = "step", position = "identity", bins=50, alpha = 0.7) +
    facet_grid(term ~ model_name) +
    scale_x_log10() +
    scale_color_brewer(palette = "Set1") +
    my_theme +
    theme(
    strip.text.y = element_text(angle = 0),
    strip.text.x = element_text(angle = 90, size=8)
    ) +
    ggtitle("Overall read count distribution")
  } %>%

  # Save plot
  {
    ggsave(plot = (.), here("dev", "overall_read_count_distribution.png"))
    (.)
  }

# Gene-wise distribution
generated_quant %>%
  gather(`Source`, `Read count`, c("Observed", "Estimated")) %>%
  filter(.iteration %in% 1:50 & .chain %in% 1) %>%
  filter(term == "counts_gen_geneWise") %>%
  right_join( (.) %>% distinct(ens_iso) %>% head(n=20) ) %>%

  # Gene-wise plots
  {
    (.) %>%
    ggplot(aes(`Read count` + 1, color = Source)) +
    stat_bin(geom = "step", position = "identity", bins=50, size=0.2, alpha = 0.7 ) +
    facet_grid(model_name ~ ens_iso) +
    scale_x_log10() +
    scale_color_brewer(palette = "Set1") +
      #scale_size()
    my_theme +
    theme(
      strip.text.y = element_text(angle = 0),
      strip.text.x = element_text(angle = 90, size=8)
    ) +
    ggtitle("Gene-wise read count distribution")
  } %>%

  # Save plot
  {
    ggsave(plot = (.), here("dev", "gene_wise_read_count_distribution.png"), width=29, height=21, units = "cm")
    (.)
  }


stemangiola/RNAseq-noise-model documentation built on Oct. 18, 2019, 1:47 p.m.