knitr::read_chunk("../../analysis/chunks.R")
library(ggplot2) library(ggrastr) library(dropestr) library(dropEstAnalysis) library(Matrix) library(dplyr) theme_set(theme_base)
Here bam file was filtered by realigning it with kallisto 0.43
separately on mouse and human genome. Only reads, which were aligned only on one of them were used in dropEst
.
holder <- readRDS('../../data/dropest/10x/hgmm_1k/est_2018_01_27_kallisto/hgmm_1k.rds')
cm_real <- holder$cm_raw cell_number <- 1100 gene_species <- ifelse(substr(rownames(cm_real), 1, 2) == "hg", 'Human', 'Mouse') %>% as.factor() umi_by_species <- lapply(levels(gene_species), function(l) cm_real[gene_species == l,] %>% Matrix::colSums()) %>% as.data.frame() %>% `colnames<-`(levels(gene_species)) %>% tibble::rownames_to_column('CB') %>% as_tibble() %>% mutate(Total = Human + Mouse, Organism=ifelse(Human > Mouse, "Human", "Mouse"), IsReal=rank(Total) >= length(Total) - cell_number) %>% filter(Total > 20) reads_per_chr <- FillNa(holder$reads_per_chr_per_cells$Exon[umi_by_species$CB,]) umi_by_species <- umi_by_species %>% mutate( MitReads = reads_per_chr$mm10_MT + reads_per_chr$hg19_MT, TotalReads = rowSums(reads_per_chr), MitochondrionFraction = MitReads / TotalReads )
gg <- ggplot(umi_by_species, aes(x=Mouse, y=Human)) + geom_abline(aes(slope=1, intercept=0), linetype='dashed', alpha=0.5) + scale_x_log10(limits=c(1, 2e5), name="#Mouse molecules") + scale_y_log10(name="#Human molecules") + annotation_logticks() + theme_pdf(legend.pos=c(0.97, 0.05)) + theme(legend.margin=margin(l=3, r=3, unit="pt")) gg_left <- gg + geom_point(aes(color=IsReal), size=0.1, alpha=0.15) + guides(color=guide_legend(override.aes=list(size=1.5, alpha=1))) gg_right <- gg + geom_point(aes(color=MitochondrionFraction), size=0.1, alpha=0.15) + scale_color_gradientn(colours=c("#1200ba", "#347fff", "#cc4000", "#ff3333"), values=scales::rescale(c(0, 0.1, 0.3, 0.8)), breaks=seq(0, 1.0, 0.2)) + guides(color=guide_colorbar(direction="horizontal", title.position="top", title="Mitochondrial\nfraction", barwidth=unit(1.2, units="in"))) cowplot::plot_grid(gg_left, gg_right)
ggplot(umi_by_species) + geom_point(aes(x=Total, y=pmin(Human, Mouse) / Total, color=Organism), size=0.1, alpha=0.1) + scale_x_log10(name='#Real UMIs', limits=c(10, 2e5)) + annotation_logticks() + ylab('Fraction of mixed UMIs') + guides(color=guide_legend(override.aes=list(size=1.5, alpha=1))) + theme_pdf(legend.pos=c(1, 1))
Background cells have constant fraction of mouse and human reads:
mouse_frac <- umi_by_species %>% filter(IsReal) %>% summarise(Mouse=sum(Mouse[Organism == 'Mouse']), Human=sum(Human[Organism == 'Human']), MF=Mouse / (Mouse + Human)) %>% .$MF ggplot(umi_by_species) + geom_histogram(aes(x=Mouse / Total, y=..density.., fill=IsReal), binwidth=0.005, position="identity") + geom_vline(xintercept=mouse_frac) + xlab("Fraction of mouse reads") + theme_pdf(legend.pos=c(1, 1))
Distribution of total number of molecules by background cells:
gg <- ggplot(umi_by_species %>% filter(!IsReal)) + geom_histogram(aes(x=Total), bins=100) + scale_x_continuous(limits=c(0, 250), expand=c(0, 0), name="Total #UMIs") + scale_y_continuous(limits=c(0,9000), expand=c(0, 0), name="#Cells") + theme_pdf() gg
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.