In this analysis, we demonstrate that MitoTrace identifies germline mutations in droplet-based single-cell RNA sequencing data (10X Genomics). The underlying BAM file is available from McGinnis et al. (McGinnis et al. 2019).

Load R libraries

library("data.table")
library("MitoTrace")
library("ggplot2")

Read the BAM files, the bam file we used could be downloaded from here (https://ucsf.app.box.com/s/vg1bycvsjgyg63gkqsputprq5rxzjl6k) or (https://drive.google.com/open?id=1y3WnnLnjLf4ZMNkyiqIqPPWzKPpgnZDP). Please use samtools to re-index the bam file

bams <- list.files("../../data/", full.names = T, pattern = ".bam$")

Read the annotation files

demuxlet <- fread("../Data/McGinnisEtAl/annotation_file/jurkat_293t_demuxlet.best")

Read the human GRCH38 Mitochondrial reference genome

fasta_loc <- "../Data/GRCH38_MT.fa"

MitoTrace calculates alternative allele counts and read coverage for each nucleotide position

mae_res <- MitoTrace(bam_list = bams, fasta = fasta_loc, chr_name = "MT", min_read = 100)

MitoTrace calculates the allele frequencies

af <- calc_allele_frequency(mae_res)

Perform unsupervised dimension reduction on highly variable alles.

ok <- intersect(colnames(af), demuxlet$BARCODE)
af <- af[,ok]
cell_line <- sapply(demuxlet$BEST,function(x){strsplit(x,"-")[[1]][[2]]})
cell_line <- cell_line[match(ok, demuxlet$BARCODE)]

good_variants <- names(which(rowMeans(af) > 0.3))
good_variants <- names(tail(sort(apply(af, 1, var)), 20))
af_hv <- data.matrix(af[good_variants, ])
ydata <- prcomp(t(af_hv))

aframe <- data.frame(cell_line, ydata$x)
ggplot(aes(PC1, PC2, color = cell_line), data = aframe) + geom_point()

anno <- data.frame(cell_line)
rownames(anno) <- colnames(af_hv)
pheatmap::pheatmap(af_hv, show_colnames = F, annotation_col = anno)

Can we identify doublets? Build classifier on highly informative alleles.

pvals <- apply(af[which(Matrix::rowMeans(af) > 0), ], 1, function(x){
  splitz <- split(x, cell_line)
  wilcox.test(splitz[[1]], splitz[[2]])$p.value 
})
good <- names(which(pvals < 1e-5))

af_hv <- data.matrix(af[good, ])
ydata <- prcomp(t(af_hv))

cell_line2 <- demuxlet$BEST[match(ok, demuxlet$BARCODE)]
cell_line2[grep("DBL", cell_line2)] <- "DBL"
aframe <- data.frame(cell_line2, ydata$x)
ggplot(aes(PC1, PC2, color = cell_line2), data = aframe) + geom_point()


lkmklsmn/MitoTrace documentation built on June 29, 2023, 5:55 a.m.