knitr::read_chunk("../../analysis/chunks.R")
library(cowplot) library(ggplot2) library(ggsci) library(ggpubr) library(ggrastr) library(dplyr) library(parallel) library(reshape2) library(dropestr) library(dropEstAnalysis) theme_set(theme_base) kPlotsFolder <- '../../output/figures/' kDataPath <- '../../data/dropest/10x/aml035_post_transplant/' kTrimmedLengths <- 6:9
# holders <- mclapply(paste0(kDataPath, c('est_01_21_umi_quality/aml035ost_transplant.rds', 'est_01_21_10x_umi/aml035ost_transplant.rds')), readRDS, mc.cores=2) %>% # setNames(c('raw', '10x')) # # reads_per_umi_per_cell <- holders$raw$reads_per_umi_per_cell # reads_per_umi_per_cell$reads_per_umi <- FilterNUmis(reads_per_umi_per_cell$reads_per_umi) # # saveRDS(reads_per_umi_per_cell, paste0(kDataPath, 'reads_per_umi_per_cell.rds')) reads_per_umi_per_cell <- readRDS(paste0(kDataPath, 'reads_per_umi_per_cell.rds')) # reads_per_umi_per_cell$reads_per_umi <- reads_per_umi_per_cell$reads_per_umi[1:50000] # raw <- readRDS(paste0(kDataPath, 'raw.rds')) length(reads_per_umi_per_cell$reads_per_umi)
# # WARNING: it requires a lot of memory and CPU. Because of weird R multiprocessing, for me it took up to 650Gb RAM for 30 cores. # trimmed <- mclapply(kTrimmedLengths, function(i) # TrimAndCorrect(reads_per_umi_per_cell, umi.trim.length=i, mc.cores.large=7, # mc.cores.small=7, verbosity.level=1), mc.cores=4) # # raw <- list() # # raw$umi_distribution <- GetUmisDistribution(reads_per_umi_per_cell$reads_per_umi, smooth=10) # raw$umi_probabilities <- raw$umi_distribution / sum(raw$umi_distribution) # # max_umi_per_gene <- sapply(reads_per_umi_per_cell$reads_per_umi, length) %>% max() # raw$collisions_info <- FillCollisionsAdjustmentInfo(raw$umi_probabilities, max_umi_per_gene) # raw$correction_info <- PrepareUmiCorrectionInfo(umi.probabilities=raw$umi_probabilities, # raw$collisions_info[max_umi_per_gene]) # # raw$filt_reads_per_umi_simple <- reads_per_umi_per_cell %>% # CorrectUmiSequenceErrors(raw$umi_probabilities, method='Classic', # correction.info=raw$correction_info, # collisions.info=raw$collisions_info, # mc.cores=30, verbosity.level=2, return='reads') # # raw$filt_umis_per_gene_simple <- sapply(raw$filt_reads_per_umi_simple, length) # # raw$umis_per_gene <- sapply(reads_per_umi_per_cell$reads_per_umi, length) # # saveRDS(raw, paste0(kDataPath, 'raw5.rds')) # saveRDS(trimmed, paste0(kDataPath, 'trimmed5.rds'))
raw <- readRDS(paste0(kDataPath, 'raw5.rds')) trimmed <- readRDS(paste0(kDataPath, 'trimmed5.rds'))
umis_per_gene_df <- as.data.frame(lapply(trimmed[1:4], `[[`, 'umis.per.gene')) umis_per_gene_df$raw <- raw$umis_per_gene colnames(umis_per_gene_df)[1:4] <- paste0(kTrimmedLengths) umis_per_gene_adj_df <- trimmed[1:4] %>% lapply(function(tr) tr$collisions.info[tr$umis.per.gene]) %>% as.data.frame() colnames(umis_per_gene_adj_df) <- paste0(kTrimmedLengths) umis_per_gene_adj_classic_df <- mclapply(kTrimmedLengths, function(i) sapply( trimmed[[i - 5]]$umis.per.gene, AdjustGeneExpressionUniform, 4^i), mc.cores=5) %>% as.data.frame() colnames(umis_per_gene_adj_classic_df) <- paste0(kTrimmedLengths) plot_df <- melt(umis_per_gene_df, id.vars='raw', variable.name='UmiLen', value.name='NoAdjustment') plot_df$Adjusted <- melt(umis_per_gene_adj_df)$value plot_df$AdjustedClassic <- melt(umis_per_gene_adj_classic_df)$value plot_df <- melt(plot_df, id.vars=c('raw', 'UmiLen'), variable.name='Adjustment') %>% filter(raw > 5) plot_dfs <- split(plot_df, plot_df$UmiLen)
scale_y_low <- scale_y_continuous(limits=c(-3000, 0), breaks=seq(0, -2500, by=-2500), expand=c(0, 0)) ggs <- lapply(names(plot_dfs), function(n) ggplot(plot_dfs[[n]], aes(x=raw, y=(value - raw), linetype=Adjustment, col=UmiLen)) + geom_smooth() + scale_color_manual(values=scales::hue_pal()(4)[as.integer(n) - 5]) + scale_linetype_manual(labels=c('No adjustment', 'Empirical', 'Uniform'), values=c('solid', 'dashed', 'dotted')) + scale_x_continuous(expand=c(0, 0), limits=c(0, 12300), breaks=seq(0, 10000, 2500)) + scale_y_continuous(expand=c(0, 0), limits=c(-10000, 0)) + theme_pdf()) ggs[3:4] <- lapply(ggs[3:4], `+`, scale_y_low) ggs[[1]] <- ggs[[1]] + theme_pdf(legend.pos=c(0, 0)) + theme(legend.box='horizontal') + scale_color_manual(values=alpha(scales::hue_pal()(4), 0.7), name='Length of\ntrimmed UMI', labels=names(plot_dfs), drop=F) + guides(color=guide_legend(ncol=2, nrow=2, byrow=T)) ggs <- BuildPanel4(ggs, "", "", return.raw=T, legend.plot.id=1) ga1 <- plot_grid(plotlist=ggs[c(1,3)], nrow=2, rel_heights=c(2.5, 1), align='v') ga2 <- plot_grid(plotlist=ggs[c(2,4)], nrow=2, rel_heights=c(2.5, 1), align='v') supp_fig_collisions <- annotate_figure(ggarrange(ga1, ga2, ncol=2), bottom=text_grob("#Molecules after trimming", size=14), left = text_grob("Error after trimming (#molecules)", rot=90, size=14)) ggsave(paste0(kPlotsFolder, 'supp_umi_trim_collisions.pdf'), supp_fig_collisions, height=4, width=8) supp_fig_collisions
plots <- lapply(kTrimmedLengths, function(i) PlotTrimmedCorrections(trimmed[[i - 5]], raw, i, log=T, rast.width=4, rast.height=4.5, rast.dpi=150, heights.ratio=c(2.5, 3))) large_plots <- lapply(plots, `[[`, 'large') %>% lapply(`+`, annotation_logticks(sides='b')) small_plots <- lapply(plots, `[[`, 'small') title_theme <- theme(plot.title=element_text(size=12)) gp_large <- BuildPanel4(large_plots, xlabel="Corrected #UMIs without trimming", ylabel="Error on trimmed data, %", legend.plot.id=3, labels=NULL, plot.theme=title_theme) gp_small <- BuildPanel4(small_plots, xlabel="Corrected #UMIs without trimming", ylabel="Error on trimmed data, %", labels=NULL, plot.theme=title_theme) supp_fig_errors <- plot_grid(gp_large, gp_small, nrow=2, labels=c('A', 'B'), rel_heights=c(5, 6))
supp_fig_errors
ggsave(paste0(kPlotsFolder, 'supp_umi_trim_error.pdf'), supp_fig_errors, width=8, height=9)
umis_per_gene <- as.data.frame(lapply(trimmed, `[[`, 'umis.per.gene')) colnames(umis_per_gene) <- paste0(kTrimmedLengths) errors <- ((umis_per_gene - raw$filt_umis_per_gene_simple) / raw$filt_umis_per_gene_simple) %>% cbind(Real=raw$filt_umis_per_gene_simple) %>% melt(id.vars='Real', variable.name='UmiLength', value.name='Error') errors_sum <- errors %>% group_by(Real, UmiLength) %>% summarise(Error=mean(Error, trim=0.2))
kPlotWidth <- 8 kPlotHeight <- 7 gp1 <- ggplot(errors_sum, aes(x=Real, y=100 * Error, color=UmiLength)) + geom_point_rast(size=0.3, alpha=0.5, width=kPlotWidth / 2, height=kPlotHeight / 3) + geom_smooth() + scale_x_log10(expand=c(0.01, 0)) + annotation_logticks(sides='b') + scale_y_continuous(limits=c(-65, 60), expand=c(0, 0)) + theme_pdf(legend.pos=c(0, 0)) + scale_color_npg() + guides(color=guide_legend(title='Length of trimmed UMI', ncol=4)) gp1
fig_umi_length <- 6 trimmed_corrected_cells <- lapply(raw$filt_reads_per_umi_simple, TrimUmis, fig_umi_length)
umis_per_gene_plt <- data.frame(none=sapply(trimmed_corrected_cells, length), real=raw$filt_umis_per_gene_simple) umis_per_gene_plt$empirical <- trimmed[[fig_umi_length - 5]]$collisions.info[umis_per_gene_plt$none] umis_per_gene_plt$uniform <- sapply(umis_per_gene_plt$none, AdjustGeneExpressionUniform, 4^fig_umi_length) umis_per_gene_plt_df <- melt(as.data.frame(umis_per_gene_plt), id.vars='real', variable.name='Adjustment', value.name='value') umis_per_gene_plt_df <- umis_per_gene_plt_df %>% group_by(Adjustment, real) %>% summarize(value=mean(value, 0.2)) gp2 <- umis_per_gene_plt_df %>% ggplot(aes(x=real, y=100 * (value - real) / real, col=Adjustment)) + geom_point_rast(size=0.3, alpha=0.3, width=kPlotWidth / 2, height=kPlotHeight / 3) + geom_smooth() + scale_x_log10(expand=c(0.01, 0)) + annotation_logticks(sides='b') + scale_y_continuous(expand=c(0, 0), limits=c(-65, 11)) + scale_color_nejm() + labs(x='Corrected #molecules without trimming', y='Error, %') + guides(color = guide_legend(title='Collision adjustment', ncol=3)) + theme_pdf(legend.pos=c(0, 0)) gp2
fig_umi_length <- 7 corrected_data <- as_tibble(trimmed[[fig_umi_length - 5]]$filt_cells) colnames(corrected_data) <- c('Bayesian', 'cluster', 'cluster-neq', 'directional') corrected_data$`no correction` <- trimmed[[fig_umi_length - 5]]$umis.per.gene corrected_data$real <- raw$filt_umis_per_gene_simple plot_df <- melt(corrected_data, id.vars='real', variable.name='Correction') %>% group_by(Correction, real) %>% summarize(value=median(value)) gp3 <- ggplot(plot_df, aes(x=real, y=100 * (value - real) / real, color=Correction)) + geom_point_rast(size=0.3, alpha=0.3, width=kPlotWidth / 2, height=kPlotHeight / 3) + geom_smooth() + scale_x_log10(expand=c(0.01, 0)) + annotation_logticks(sides='b') + scale_y_continuous(limits=c(-101, 51), expand=c(0, 0)) + # scale_color_manual(values=correction_colors[2:length(correction_colors)]) + guides(color=guide_legend(ncol=3)) + theme_pdf(legend.pos=c(0, 0)) gp3
aml035_plots <- list(gp1=gp1, gp2=gp2, gp3=gp3)
# You need to run "umi_correction/umi_bmmc1.Rmd" first to produce these plots bmmc_plots_data <- readRDS('../../data/plot_data/bmmc_umi_fig_part2.rds')
colors <- alpha(bmmc_plots_data$correction_colors, 0.7) %>% setNames(names(bmmc_plots_data$correction_colors)) scale_color_short <- scale_color_manual(values=colors) figure_trim <- plot_grid( aml035_plots$gp1 + theme_pdf() + ylab('Error, %') + rremove("xlab") + theme(plot.margin=margin(b=2, unit='pt')), aml035_plots$gp2 + theme_pdf() + rremove("xlab") + theme(plot.margin=margin(b=2, unit='pt')), aml035_plots$gp3 + theme_pdf() + ylab('Error, %') + scale_color_short + guides(color=guide_legend(title='Correction (see Figure D)', label.theme=element_blank(), ncol=5, reverse=T)) + rremove("xlab") + theme(plot.margin=margin()), ncol=1, nrow=3, labels = c("A", "B", "C"), align='v') figure_trim_ann <- (figure_trim + theme(plot.margin=margin(t=10, l=0, unit='pt'))) %>% annotate_figure(bottom=text_grob("Gene expression magnitude (#molecules)", size=14, hjust=0.43))
fig_full <- plot_grid(figure_trim_ann + theme(plot.margin=margin()), bmmc_plots_data$gg_fig + theme(plot.margin=margin()), ncol=2) fig_full_ann <- annotate_figure(fig_full, fig.lab="Figure 2", fig.lab.pos="top.right") fig_full_ann
ggsave(paste0(kPlotsFolder, 'umi_correction_figure.pdf'), fig_full_ann, width=8, height=7)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.