knitr::read_chunk("../../analysis/chunks.R")
library(ggplot2) library(ggsci) library(ggridges) library(ggrastr) library(dplyr) library(parallel) library(reshape2) library(dropestr) library(dropEstAnalysis) theme_set(theme_base) kPlotsFolder <- '../../output/figures/' kDataPath <- '../../data/dropest/'
# reads_per_umi_per_cell <- holder$reads_per_umi_per_cell # reads_per_umi_per_cell$reads_per_umi <- FilterNUmis(holder$reads_per_umi_per_cell$reads_per_umi) reads_per_umi_per_cell <- kDataPath %>% paste0('10x/aml035_post_transplant/est_10_20_umi_quality/reads_per_umi_per_cell.rds') %>% readRDS()
Trimming:
umi_lengths <- rep(6:10, 2) is_reverse <- c(rep(F, 5), rep(T, 5)) trimmed <- mcmapply(function(i, b) TrimUmisSummary(reads_per_umi_per_cell, i, b), umi_lengths, is_reverse, SIMPLIFY = F, mc.cores=10)
collisions_infos <- lapply(trimmed, function(d) d$collisions.info) collision_df <- mapply(function(s, l, r) data.frame(Observed=1:length(s), Adjusted=s, UmiLength=l, IsReverse=r), collisions_infos, umi_lengths, is_reverse, SIMPLIFY = F) %>% bind_rows() %>% mutate(AdjustedUniform=mapply(AdjustGeneExpressionUniform, Observed, 4^UmiLength)) collision_dfs <- split(collision_df, collision_df$IsReverse) %>% setNames(c('Reverse', 'Direct'))
rm(trimmed, reads_per_umi_per_cell, collisions_infos, collision_df) invisible(gc())
plt_guide <- guides(color=guide_legend(title='UMI length', nrow = 2, order=1), linetype=guide_legend(title='UMI distribution', order=2, keywidth = 1.5)) expand <- c(0.01, 0) fig_collisions <- ggplot(collision_dfs$Direct, aes(x=Observed, col=as.factor(UmiLength))) + geom_line(aes(y=Adjusted - Observed, linetype='Empirical'), size=0.9) + geom_line(aes(y=AdjustedUniform - Observed, linetype='Uniform'), size=0.9) + scale_x_continuous(expand = expand, breaks=seq(4000, 12000, 4000)) + scale_y_continuous(expand = expand) + labs(x='#Observed UMIs', y='#Collisions') + theme_pdf(legend.pos=c(1, 1)) + plt_guide fig_collisions <- cowplot::plot_grid(fig_collisions, labels='C') ggsave(paste0(kPlotsFolder, '/fig_collisions.pdf'), fig_collisions, height=5.5, width=3.5) print(fig_collisions) rm(fig_collisions);
gg_collisions_ext <- ggplot(mapping=aes(x=Observed, col=as.factor(UmiLength), y=Adjusted / AdjustedUniform)) + geom_smooth(data=collision_dfs$Direct, mapping=aes(linetype='Empirical, back trim'), size=0.9, se=F) + geom_smooth(data=collision_dfs$Reverse, mapping=aes(linetype='Empirical, front trim'), size=0.9, se=F) + scale_x_continuous(expand = expand) + scale_y_continuous(expand = expand) + labs(x='#Observed UMIs', y='#Collisions empirical / #Collisions uniform') + theme_pdf(legend.pos=c(1, 1)) + plt_guide rm(collision_dfs) gg_collisions_ext
holder <- readRDS(paste0(kDataPath, 'SCG71/est_01_15_umi_quality/SCG71.rds')) umi_distribution <- GetUmisDistribution(holder$reads_per_umi_per_cell$reads_per_umi) umi_probs <- umi_distribution / sum(umi_distribution)
rm(holder, umi_distribution) invisible(gc())
gene_sizes <- list(2:10, 11:50, seq(60, 300, 10), seq(300, 500, 25), seq(600, 4090, 200), c(4050, 4095)) sample_nums <- unlist(mapply(rep, c(100000, 100000, 15000, 1000, 500, 500), sapply(gene_sizes, length))) gene_sizes_f <- unlist(gene_sizes) adjacent_umis_num <- mcmapply(function(s, n) SampleNumbersOfAdjacentUmis(s, umi_probs, n, uniform=F), gene_sizes_f, sample_nums, mc.cores=30) adjacent_umis_num_unif <- mcmapply(function(s, n) SampleNumbersOfAdjacentUmis(s, umi_probs, n, uniform=T), gene_sizes_f, sample_nums, mc.cores=30)
plot_df <- data.frame(GeneSize=c(1, unlist(gene_sizes))) %>% mutate(ProbOfAdjacentUMIs=c(0, sapply(adjacent_umis_num, mean)) / GeneSize^2, ProbOfAdjacentUMIsUnif=c(0, sapply(adjacent_umis_num_unif, mean)) / GeneSize^2) axis_ratio <- 200 axis_offset <- 0.001 gg_prob_ratio <- ggplot(plot_df, aes(x=GeneSize)) + geom_line(aes(y=ProbOfAdjacentUMIs, color='Empirical'), size=2, alpha=0.8) + geom_line(aes(y=ProbOfAdjacentUMIsUnif, color='Uniform'), size=2, alpha=0.78) + geom_smooth(aes(y=ProbOfAdjacentUMIs / ProbOfAdjacentUMIsUnif / axis_ratio - axis_offset, linetype='Empirical / Uniform'), size=1.5, color='black', se=FALSE) + labs(x = '#UMIs per gene', y='Adjacent UMI probability') + scale_x_log10(expand=c(0, 0), limits=c(2, 4100)) + annotation_logticks(side='b') + scale_y_continuous(expand=c(1e-5, 1e-5), limits=c(0.002, 0.0052), sec.axis=sec_axis(trans=~.*axis_ratio + axis_offset*axis_ratio, name='Probabilities ratio')) + scale_linetype_manual(values='dashed', name='Probabilities ratio') + guides(color=guide_legend(title='UMI distribution')) + guides(linetype=guide_legend(override.aes=list(linetype=6))) + theme_pdf(legend.pos=c(0.5, 0)) + theme(legend.box='horizontal', legend.key.width=unit(0.3, 'in')) gg_prob_ratio
gg_figure <- cowplot::plot_grid(gg_collisions_ext, gg_prob_ratio, ncol=2, labels="AUTO", align='h', rel_widths=c(3.5, 4.5), label_x=0.03) ggsave(paste0(kPlotsFolder, 'supp_impact_on_collisions.pdf'), gg_figure, width=8, height=4) gg_figure
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.