knitr::read_chunk("../../analysis/chunks.R")
library(ggplot2) library(ggrastr) library(dropestr) library(dropEstAnalysis) library(Matrix) library(dplyr) theme_set(theme_base)
Here bam file was filtered by removing all reads, which were aligned on both mouse and human chromosomes at the same time.
# holder <- readRDS('../../data/dropest/10x/hgmm_6k/est_2018_01_25_filtered/hgmm_6k.rds') # holder_filt <- list() # holder_filt$cm_raw <- holder$cm_raw # holder_filt$reads_per_chr_per_cells <- holder$reads_per_chr_per_cells$Exon # saveRDS(holder_filt, '../../data/dropest/10x/hgmm_6k/est_2018_01_25_filtered/hgmm_6k_filt.rds') holder <- readRDS('../../data/dropest/10x/hgmm_6k/est_2018_01_25_filtered/hgmm_6k_filt.rds') kPlotDir <- '../../output/figures/'
cm_real <- holder$cm_raw cell_number <- 6500 gene_species <- ifelse(substr(rownames(cm_real), 1, 2) == "hg", 'Human', 'Mouse') %>% as.factor() umi_by_species <- lapply(levels(gene_species), function(l) cm_real[gene_species == l,] %>% Matrix::colSums()) %>% as.data.frame() %>% `colnames<-`(levels(gene_species)) %>% tibble::rownames_to_column('CB') %>% as_tibble() %>% mutate(Total = Human + Mouse, Organism=ifelse(Human > Mouse, "Human", "Mouse"), IsReal=rank(Total) >= length(Total) - cell_number) %>% filter(Total > 20) reads_per_chr <- FillNa(holder$reads_per_chr_per_cells$Exon[umi_by_species$CB,]) umi_by_species <- umi_by_species %>% mutate( MitReads = reads_per_chr$mm10_MT + reads_per_chr$hg19_MT, TotalReads = rowSums(reads_per_chr), MitochondrionFraction = MitReads / TotalReads ) umi_by_species$Type <- ifelse(umi_by_species$IsReal, umi_by_species$Organism, "Background") umi_by_species$Type[umi_by_species$Mouse > 2e3 & umi_by_species$Human > 2e3] <- 'Dublets'
gg_template <- ggplot(umi_by_species, aes(x=Mouse, y=Human)) + geom_abline(aes(slope=1, intercept=0), linetype='dashed', alpha=0.5) + scale_x_log10(limits=c(1, 2e5), name="#Mouse molecules") + scale_y_log10(name="#Human molecules") + annotation_logticks() + theme_pdf(legend.pos=c(0.97, 0.05)) + theme(legend.margin=margin(l=3, r=3, unit="pt")) gg_left <- gg_template + geom_point(aes(color=IsReal), size=0.1, alpha=0.15) + guides(color=guide_legend(override.aes=list(size=1.5, alpha=1))) gg_right <- gg_template + geom_point(aes(color=MitochondrionFraction), size=0.1, alpha=0.15) + scale_color_gradientn(colours=c("#1200ba", "#347fff", "#cc4000", "#ff3333"), values=scales::rescale(c(0, 0.1, 0.3, 0.8)), breaks=seq(0, 1.0, 0.2)) + guides(color=guide_colorbar(direction="horizontal", title.position="top", title="Mitochondrial\nfraction", barwidth=unit(1.2, units="in"))) cowplot::plot_grid(gg_left, gg_right)
ggplot(umi_by_species) + geom_point(aes(x=Total, y=pmin(Human, Mouse) / Total, color=Organism), size=0.1, alpha=0.1) + scale_x_log10(name='#Real UMIs', limits=c(10, 2e5)) + annotation_logticks() + ylab('Fraction of mixed UMIs') + guides(color=guide_legend(override.aes=list(size=1.5, alpha=1))) + theme_pdf(legend.pos=c(1, 1))
Background cells have constant fraction of mouse and human reads:
mouse_frac <- umi_by_species %>% filter(IsReal) %>% summarise(Mouse=sum(Mouse[Organism == 'Mouse']), Human=sum(Human[Organism == 'Human']), MF=Mouse / (Mouse + Human)) %>% .$MF ggplot(umi_by_species) + geom_histogram(aes(x=Mouse / Total, y=..density.., fill=IsReal), binwidth=0.005, position="identity") + geom_vline(xintercept=mouse_frac) + xlab("Fraction of mouse reads") + theme_pdf(legend.pos=c(1, 1))
Distribution of total number of molecules by background cells:
gg <- ggplot(umi_by_species %>% filter(!IsReal)) + geom_histogram(aes(x=Total), bins=100) + scale_x_continuous(limits=c(0, 600), expand=c(0, 0), name="Total #UMIs") + scale_y_continuous(limits=c(0, 6000), expand=c(0, 0), name="#Cells") + theme_pdf() gg
arrows_df <- umi_by_species %>% group_by(Type) %>% summarise(MouseEnd=median(Mouse), HumanEnd=median(Human)) %>% mutate(Mouse=c(1e1, 2e4, 7e1, 6e4), Human=c(1e3, 5e3, 7e4, 1.5e2)) gg_fig <- gg_template + geom_point_rast(aes(color=MitochondrionFraction), size=0.1, alpha=0.15, width=6, height=4, dpi=200) + scale_color_gradientn(colours=c("#1200ba", "#347fff", "#cc4000", "#ff3333"), values=scales::rescale(c(0, 0.1, 0.3, 0.8)), breaks=seq(0, 1.0, 0.2)) + guides(color=guide_colorbar(direction="horizontal", title.position="top", title="Mitochondrial\nfraction", barwidth=unit(1.2, units="in"))) + stat_ellipse(aes(group=Type), level=0.9999) + geom_segment(aes(xend=MouseEnd, yend=HumanEnd, group=Type), data=arrows_df, arrow=arrow(length = unit(0.03, "npc"))) + geom_label(aes(label=Type), data=arrows_df, fill=alpha('white', 1)) + theme(plot.margin=margin(1, 1, 1, 1)) try(invisible(dev.off()), silent=T) ggsave(paste0(kPlotDir, 'supp_human_mouse.pdf'), gg_fig, width=6, height=4)
gg_fig
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.