knitr::read_chunk("../../analysis/chunks.R")



library(ggplot2)
library(ggrastr)
library(dropestr)
library(dropEstAnalysis)
library(Matrix)
library(dplyr)

theme_set(theme_base)

Load data

Here bam file was filtered by realigning it with kallisto 0.43 separately on mouse and human genome. Only reads, which were aligned only on one of them were used in dropEst.

holder <- readRDS('../../data/dropest/10x/hgmm_1k/est_2018_01_27_kallisto/hgmm_1k.rds')
cm_real <- holder$cm_raw
cell_number <- 1100

gene_species <- ifelse(substr(rownames(cm_real), 1, 2) == "hg", 'Human', 'Mouse') %>% 
  as.factor()
umi_by_species <- lapply(levels(gene_species), function(l) cm_real[gene_species == l,] %>% 
                           Matrix::colSums()) %>% as.data.frame() %>% 
  `colnames<-`(levels(gene_species)) %>% tibble::rownames_to_column('CB') %>% 
  as_tibble() %>% 
  mutate(Total = Human + Mouse, Organism=ifelse(Human > Mouse, "Human", "Mouse"), 
         IsReal=rank(Total) >= length(Total) - cell_number) %>%
  filter(Total > 20)

reads_per_chr <- FillNa(holder$reads_per_chr_per_cells$Exon[umi_by_species$CB,])

umi_by_species <- umi_by_species %>% 
  mutate(
    MitReads = reads_per_chr$mm10_MT + reads_per_chr$hg19_MT,
    TotalReads = rowSums(reads_per_chr),
    MitochondrionFraction = MitReads / TotalReads
  )

Common view

gg <- ggplot(umi_by_species, aes(x=Mouse, y=Human)) + 
  geom_abline(aes(slope=1, intercept=0), linetype='dashed', alpha=0.5) +
  scale_x_log10(limits=c(1, 2e5), name="#Mouse molecules") + 
  scale_y_log10(name="#Human molecules") + annotation_logticks() +
  theme_pdf(legend.pos=c(0.97, 0.05)) + theme(legend.margin=margin(l=3, r=3, unit="pt"))

gg_left <- gg + geom_point(aes(color=IsReal), size=0.1, alpha=0.15) +
  guides(color=guide_legend(override.aes=list(size=1.5, alpha=1)))

gg_right <- gg + geom_point(aes(color=MitochondrionFraction), size=0.1, alpha=0.15) +
  scale_color_gradientn(colours=c("#1200ba", "#347fff", "#cc4000", "#ff3333"), 
                        values=scales::rescale(c(0, 0.1, 0.3, 0.8)), 
                        breaks=seq(0, 1.0, 0.2)) +
  guides(color=guide_colorbar(direction="horizontal", title.position="top", 
                              title="Mitochondrial\nfraction", 
                              barwidth=unit(1.2, units="in")))

cowplot::plot_grid(gg_left, gg_right)
ggplot(umi_by_species) + 
  geom_point(aes(x=Total, y=pmin(Human, Mouse) / Total, color=Organism), size=0.1, 
             alpha=0.1) +
  scale_x_log10(name='#Real UMIs', limits=c(10, 2e5)) + annotation_logticks() + 
  ylab('Fraction of mixed UMIs') +
  guides(color=guide_legend(override.aes=list(size=1.5, alpha=1))) +
  theme_pdf(legend.pos=c(1, 1))

Check for constant background

Background cells have constant fraction of mouse and human reads:

mouse_frac <- umi_by_species %>% filter(IsReal) %>% 
  summarise(Mouse=sum(Mouse[Organism == 'Mouse']), Human=sum(Human[Organism == 'Human']), 
            MF=Mouse / (Mouse + Human)) %>% .$MF

ggplot(umi_by_species) + 
  geom_histogram(aes(x=Mouse / Total, y=..density.., fill=IsReal), binwidth=0.005, position="identity") + 
  geom_vline(xintercept=mouse_frac) +
  xlab("Fraction of mouse reads") +
  theme_pdf(legend.pos=c(1, 1))

Distribution of total number of molecules by background cells:

gg <- ggplot(umi_by_species %>% filter(!IsReal)) +
  geom_histogram(aes(x=Total), bins=100) +
  scale_x_continuous(limits=c(0, 250), expand=c(0, 0), name="Total #UMIs") +
  scale_y_continuous(limits=c(0,9000), expand=c(0, 0), name="#Cells") +
  theme_pdf()

gg

Session information




VPetukhov/dropEstAnalysis documentation built on Dec. 28, 2019, 8:16 p.m.