In this analysis, we demonstrate that MitoTrace identifies germline mutations in droplet-based single-cell RNA sequencing data (10X Genomics). The underlying BAM file is available from McGinnis et al. (McGinnis et al. 2019).
Load R libraries
library("data.table") library("MitoTrace") library("ggplot2")
Read the BAM files, the bam file we used could be downloaded from here (https://ucsf.app.box.com/s/vg1bycvsjgyg63gkqsputprq5rxzjl6k) or (https://drive.google.com/open?id=1y3WnnLnjLf4ZMNkyiqIqPPWzKPpgnZDP). Please use samtools to re-index the bam file
bams <- list.files("../../data/", full.names = T, pattern = ".bam$")
Read the annotation files
demuxlet <- fread("../Data/McGinnisEtAl/annotation_file/jurkat_293t_demuxlet.best")
Read the human GRCH38 Mitochondrial reference genome
fasta_loc <- "../Data/GRCH38_MT.fa"
MitoTrace calculates alternative allele counts and read coverage for each nucleotide position
mae_res <- MitoTrace(bam_list = bams, fasta = fasta_loc, chr_name = "MT", min_read = 100)
MitoTrace calculates the allele frequencies
af <- calc_allele_frequency(mae_res)
Perform unsupervised dimension reduction on highly variable alles.
ok <- intersect(colnames(af), demuxlet$BARCODE) af <- af[,ok] cell_line <- sapply(demuxlet$BEST,function(x){strsplit(x,"-")[[1]][[2]]}) cell_line <- cell_line[match(ok, demuxlet$BARCODE)] good_variants <- names(which(rowMeans(af) > 0.3)) good_variants <- names(tail(sort(apply(af, 1, var)), 20)) af_hv <- data.matrix(af[good_variants, ]) ydata <- prcomp(t(af_hv)) aframe <- data.frame(cell_line, ydata$x) ggplot(aes(PC1, PC2, color = cell_line), data = aframe) + geom_point() anno <- data.frame(cell_line) rownames(anno) <- colnames(af_hv) pheatmap::pheatmap(af_hv, show_colnames = F, annotation_col = anno)
Can we identify doublets? Build classifier on highly informative alleles.
pvals <- apply(af[which(Matrix::rowMeans(af) > 0), ], 1, function(x){ splitz <- split(x, cell_line) wilcox.test(splitz[[1]], splitz[[2]])$p.value }) good <- names(which(pvals < 1e-5)) af_hv <- data.matrix(af[good, ]) ydata <- prcomp(t(af_hv)) cell_line2 <- demuxlet$BEST[match(ok, demuxlet$BARCODE)] cell_line2[grep("DBL", cell_line2)] <- "DBL" aframe <- data.frame(cell_line2, ydata$x) ggplot(aes(PC1, PC2, color = cell_line2), data = aframe) + geom_point()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.