In this analysis, we will reproduce the selected colony-specific mutations in donor 1 colonies, the results is shwon by heatmap and boxplot in figures 5H & 5I from Leif et al.

Load R related libraries into R environment.

library("MitoTrace")
library("ComplexHeatmap")
library("circlize")

Read the BAM files into R, the BAM files can be downloaded from here.

bams <- list.files("../Data/LeifEtAl/bam_file", full.names = T, pattern = ".bam$")

Read the human GRCH38 Mitochondrial reference genome into R.

fasta_loc <- "../Data/GRCH38_MT.fa"

Read the sample label file

ann <- read.table("../Data/LeifEtAl/annotation_file/SRR_donors_list")

MitoTrace calculates alternative allele counts and read coverage for each nucleotide position

mae_res <- MitoTrace(bam_list = bams, fasta = fasta_loc, chr_name = "MT")

MitoTrace calculate the allele frequencies

af <- calc_allele_frequency(mae_res)
# Change the column name
colnames(af) <- unlist(lapply(colnames(af), function(x) strsplit(x, ".", fixed=T)[[1]][1]))

# Exclude cells that do not have the label
af01 <- af[ ,na.omit(ann$V1[match(colnames(af), ann$V1)])]

# Change the column name of new dataframe
colnames(af01) <- na.omit(ann$V2[match(colnames(af), ann$V1)])
donors <- unlist(lapply(colnames(af01), function(x) paste(strsplit(x, "_", fixed = T)[[1]][1:2], collapse = "_")))

Boxplot to show the selected colony-specific mutations, as it's shown in Figure 5I

# plot first row
par(mfrow = c(2,6))
boxplot(rev(split(af01["779T>C", ], donors == "Donor1_C101")), main="779 T>C",ylab="Variant heteroplasmy", names=c("C101", "Other"))
boxplot(rev(split(af01["8978T>C", ], donors == "Donor1_C103")), main="8979 T>C", names=c("C103","Other"))
boxplot(rev(split(af01["6712A>G", ], donors == "Donor1_C107")), main="6712 A>G", names=c("C107","Other"))
boxplot(rev(split(af01["1082A>G", ], donors == "Donor1_C109")), main="1082 A>G", names=c("C109","Other"))
boxplot(rev(split(af01["3776G>A", ], donors == "Donor1_C112")), main="3776 T>A", names=c("C112","Other"))
boxplot(rev(split(af01["7275T>C", ], donors == "Donor1_C114")), main="7275 T>C", names=c("C114","Other"))
# plot second row
boxplot(rev(split(af01["13093G>A", ], donors == "Donor1_C116")), main="13093 G>A", ylab="Variant heteroplasmy", names=c("C116","Other"))
boxplot(rev(split(af01["7340G>A", ], donors == "Donor1_C118")), main="7340 G>A", names=c("C118","Other"))
boxplot(rev(split(af01["7754G>A", ], donors == "Donor1_C120")), main="7755 G>A", names=c("C120","Other"))
boxplot(rev(split(af01["2646G>A", ], donors == "Donor1_C124")), main="2648 G>A", names=c("C124","Other"))
boxplot(rev(split(af01["11622A>C", ], donors == "Donor1_C132")), main="11623 T>C", names=c("C132","Other"))
boxplot(rev(split(af01["1446A>G", ], donors == "Donor1_C135")), main="1448 A>G", names=c("C135","Other"))

par(mfrow = c(1,1))

Read the target mutation

target_mutation <- read.csv("../Data/LeifEtAl/annotation_file/target_mutation.csv", header = FALSE)
selected_mutation <- af01[target_mutation$V1, ]

Read the the cell order

cell_order <- read.csv("../Data/LeifEtAl/annotation_file/target_cell.csv", header = FALSE)
selected_mutation <- selected_mutation[, cell_order$V1]

Visulized the colony-specific mutations by heapmap, as it's shown in Figure 5H

col_fun = colorRamp2(c(0, 0.1, 0.2), c("white", "firebrick", "firebrick4"))
Heatmap(as.matrix(selected_mutation), 
        col= col_fun, 
        name = "Allele\nFrequency",
        show_column_names = FALSE, 
        cluster_columns = FALSE, 
        cluster_rows = FALSE, 
        column_title="Specific colonies", 
        row_title = "Mitochondiral mutations",
        row_names_gp = gpar(fontsize = 6))


lkmklsmn/MitoTrace documentation built on June 29, 2023, 5:55 a.m.