knitr::read_chunk("../../analysis/chunks.R")



library(ggplot2)
library(ggrastr)
library(dropestr)
library(dropEstAnalysis)
library(Matrix)
library(dplyr)

theme_set(theme_base)

Load data

Here bam file was filtered by removing all reads, which were aligned on both mouse and human chromosomes at the same time.

# holder <- readRDS('../../data/dropest/10x/hgmm_6k/est_2018_01_25_filtered/hgmm_6k.rds')
# holder_filt <- list()
# holder_filt$cm_raw <- holder$cm_raw
# holder_filt$reads_per_chr_per_cells <- holder$reads_per_chr_per_cells$Exon
# saveRDS(holder_filt, '../../data/dropest/10x/hgmm_6k/est_2018_01_25_filtered/hgmm_6k_filt.rds')
holder <- readRDS('../../data/dropest/10x/hgmm_6k/est_2018_01_25_filtered/hgmm_6k_filt.rds')
kPlotDir <- '../../output/figures/'
cm_real <- holder$cm_raw
cell_number <- 6500

gene_species <- ifelse(substr(rownames(cm_real), 1, 2) == "hg", 'Human', 'Mouse') %>% 
  as.factor()
umi_by_species <- lapply(levels(gene_species), function(l) cm_real[gene_species == l,] %>% 
                           Matrix::colSums()) %>% as.data.frame() %>% 
  `colnames<-`(levels(gene_species)) %>% tibble::rownames_to_column('CB') %>% 
  as_tibble() %>% 
  mutate(Total = Human + Mouse, Organism=ifelse(Human > Mouse, "Human", "Mouse"), 
         IsReal=rank(Total) >= length(Total) - cell_number) %>%
  filter(Total > 20)

reads_per_chr <- FillNa(holder$reads_per_chr_per_cells$Exon[umi_by_species$CB,])

umi_by_species <- umi_by_species %>% 
  mutate(
    MitReads = reads_per_chr$mm10_MT + reads_per_chr$hg19_MT,
    TotalReads = rowSums(reads_per_chr),
    MitochondrionFraction = MitReads / TotalReads
  )

umi_by_species$Type <- ifelse(umi_by_species$IsReal, umi_by_species$Organism, "Background")
umi_by_species$Type[umi_by_species$Mouse > 2e3 & umi_by_species$Human > 2e3] <- 'Dublets'

Common view

gg_template <- ggplot(umi_by_species, aes(x=Mouse, y=Human)) + 
  geom_abline(aes(slope=1, intercept=0), linetype='dashed', alpha=0.5) +
  scale_x_log10(limits=c(1, 2e5), name="#Mouse molecules") + 
  scale_y_log10(name="#Human molecules") + annotation_logticks() +
  theme_pdf(legend.pos=c(0.97, 0.05)) + theme(legend.margin=margin(l=3, r=3, unit="pt"))

gg_left <- gg_template + geom_point(aes(color=IsReal), size=0.1, alpha=0.15) +
  guides(color=guide_legend(override.aes=list(size=1.5, alpha=1)))

gg_right <- gg_template + geom_point(aes(color=MitochondrionFraction), size=0.1, alpha=0.15) +
  scale_color_gradientn(colours=c("#1200ba", "#347fff", "#cc4000", "#ff3333"), 
                        values=scales::rescale(c(0, 0.1, 0.3, 0.8)), 
                        breaks=seq(0, 1.0, 0.2)) +
  guides(color=guide_colorbar(direction="horizontal", title.position="top", 
                              title="Mitochondrial\nfraction", 
                              barwidth=unit(1.2, units="in")))

cowplot::plot_grid(gg_left, gg_right)
ggplot(umi_by_species) + 
  geom_point(aes(x=Total, y=pmin(Human, Mouse) / Total, color=Organism), size=0.1, 
             alpha=0.1) +
  scale_x_log10(name='#Real UMIs', limits=c(10, 2e5)) + annotation_logticks() + 
  ylab('Fraction of mixed UMIs') +
  guides(color=guide_legend(override.aes=list(size=1.5, alpha=1))) +
  theme_pdf(legend.pos=c(1, 1))

Check for constant background

Background cells have constant fraction of mouse and human reads:

mouse_frac <- umi_by_species %>% filter(IsReal) %>% 
  summarise(Mouse=sum(Mouse[Organism == 'Mouse']), Human=sum(Human[Organism == 'Human']), 
            MF=Mouse / (Mouse + Human)) %>% .$MF

ggplot(umi_by_species) + 
  geom_histogram(aes(x=Mouse / Total, y=..density.., fill=IsReal), binwidth=0.005, 
                 position="identity") + 
  geom_vline(xintercept=mouse_frac) +
  xlab("Fraction of mouse reads") +
  theme_pdf(legend.pos=c(1, 1))

Distribution of total number of molecules by background cells:

gg <- ggplot(umi_by_species %>% filter(!IsReal)) +
  geom_histogram(aes(x=Total), bins=100) +
  scale_x_continuous(limits=c(0, 600), expand=c(0, 0), name="Total #UMIs") +
  scale_y_continuous(limits=c(0, 6000), expand=c(0, 0), name="#Cells") +
  theme_pdf()

gg

Figure

arrows_df <- umi_by_species %>% group_by(Type) %>% 
  summarise(MouseEnd=median(Mouse), HumanEnd=median(Human)) %>%
  mutate(Mouse=c(1e1, 2e4, 7e1, 6e4), 
         Human=c(1e3, 5e3, 7e4, 1.5e2))


gg_fig <- gg_template + 
  geom_point_rast(aes(color=MitochondrionFraction), size=0.1, alpha=0.15, width=6, 
                  height=4, dpi=200) +
  scale_color_gradientn(colours=c("#1200ba", "#347fff", "#cc4000", "#ff3333"), 
                        values=scales::rescale(c(0, 0.1, 0.3, 0.8)), 
                        breaks=seq(0, 1.0, 0.2)) +
  guides(color=guide_colorbar(direction="horizontal", title.position="top", 
                              title="Mitochondrial\nfraction", 
                              barwidth=unit(1.2, units="in"))) + 
  stat_ellipse(aes(group=Type), level=0.9999) +
  geom_segment(aes(xend=MouseEnd, yend=HumanEnd, group=Type),  data=arrows_df, 
               arrow=arrow(length = unit(0.03, "npc"))) +
  geom_label(aes(label=Type),  data=arrows_df, fill=alpha('white', 1)) +
  theme(plot.margin=margin(1, 1, 1, 1))

try(invisible(dev.off()), silent=T)
ggsave(paste0(kPlotDir, 'supp_human_mouse.pdf'), gg_fig, width=6, height=4)
gg_fig

Session information




VPetukhov/dropEstAnalysis documentation built on Dec. 28, 2019, 8:16 p.m.