knitr::read_chunk("../../analysis/chunks.R")
library(ggplot2) library(ggrastr) library(dplyr) library(parallel) library(dropestr) library(dropEstAnalysis) library(Matrix) theme_set(theme_base)
kDataPath <- '../../data/dropest/' kTablesPath <- '../../output/tables/' k10xSubfolders <- c(poisson='est_01_14_precise/', real='est_01_14_barcodes/', unmerged='est_01_14_unmerged/', merge_all='est_01_16_merge_all/') kDropSeqSubolders <- c(poisson='est_01_16_precise/', unmerged='est_01_16_unmerged/', merge_all='est_01_16_merge_all/') kDataFiles <- list( `10x`=paste0(kDataPath, '10x/hgmm_6k/', k10xSubfolders, "hgmm6k.rds") %>% setNames(names(k10xSubfolders)), drop_seq=paste0(kDataPath, 'dropseq/thousand/', kDropSeqSubolders, "thousand.rds") %>% setNames(names(kDropSeqSubolders)) )
holders <- mclapply(kDataFiles, function(paths) mclapply(paths, readRDS, mc.cores=4), mc.cores=2) validation_data <- mclapply(holders, function(hs) list( merge_targets = lapply(hs, function(holder) unlist(holder$merge_targets)), cms_raw = lapply(hs, `[[`, 'cm_raw'), cms = lapply(hs, `[[`, 'cm') ), mc.cores=8) validation_data$`10x`$cms_raw <- lapply(validation_data$`10x`$cms_raw, function(cm) cm[grep("^[^;]+$", rownames(cm)),]) validation_data$`10x`$cms <- lapply(validation_data$`10x`$cms, function(cm) cm[grep("^[^;]+$", rownames(cm)),]) rm(holders) invisible(gc()) # saveRDS(validation_data, paste0(kDataPath, 'human_mouse_mixture_validation_data.rds')) # validation_data <- readRDS(paste0(kDataPath, 'human_mouse_mixture_validation_data.rds'))
umis_per_cb <- Matrix::colSums(validation_data$`10x`$cms$unmerged) %>% sort(decreasing=T) real_cbs <- names(umis_per_cb)[1:6000] PlotCellsNumberLine(umis_per_cb[1:10000])
GeneSpeciesFromMixture <- function(cm, org1.marker, org1.name, org2.name) { res <- ifelse(substr(rownames(cm), 1, nchar(org1.marker)) == org1.marker, org1.name, org2.name) return(as.factor(res)) } CellSpeciesFromMixture <- function(gene.species, cm) { res <- levels(gene.species) %>% lapply(function(l) cm[gene.species == l,] %>% Matrix::colSums()) res <- levels(gene.species)[as.integer(res[[1]] < res[[2]]) + 1] %>% setNames(colnames(cm)) %>% as.factor() return(res) }
gene_species <- GeneSpeciesFromMixture(validation_data$`10x`$cms_raw$unmerged, 'hg', 'Human', 'Mouse') cell_species <- CellSpeciesFromMixture(gene_species, validation_data$`10x`$cms_raw$unmerged) table(cell_species[real_cbs]) table(cell_species) / sum(table(cell_species))
merge_targets <- lapply(validation_data$`10x`$merge_targets, function(mt) mt[mt %in% real_cbs]) comparison_10x <- MergeComparisonSummary(merge_targets, cell_species, dataset="10x hgmm6k") comparison_10x$`Merge type` <- c('Poisson', 'Known barcodes', 'Simple') comparison_10x
umis_per_cb <- Matrix::colSums(validation_data$drop_seq$cms$unmerged) %>% sort(decreasing=T) real_cbs <- names(umis_per_cb)[1:1000] PlotCellsNumberLine(umis_per_cb[1:5000])
gene_species <- GeneSpeciesFromMixture(validation_data$drop_seq$cms_raw$unmerged, 'HUMAN', 'Human', 'Mouse') cell_species <- CellSpeciesFromMixture(gene_species, validation_data$drop_seq$cms_raw$unmerged) table(cell_species[real_cbs]) table(cell_species)
merge_targets <- lapply(validation_data$drop_seq$merge_targets, function(mt) mt[mt %in% real_cbs]) comparison_drop_seq <- MergeComparisonSummary(merge_targets, cell_species, dataset='Drop-seq thousand') comparison_drop_seq$`Merge type` <- c('Poisson', 'Simple') comparison_drop_seq
complete_table <- rbind(comparison_10x, comparison_drop_seq) write.csv(complete_table, paste0(kTablesPath, 'merge_comparison.csv'), row.names=F)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.