##PBMCs benchmarking###########################################################################################################
# Create cell by bin matrix from generated .h5 (snap) file
p.x.sp = createSnap(
file="/home/zeinab/Documents/Zeinab/SIMATAC/data/benchmarking/10x_PBMC_5k/10x_PBMC_5k.snap",
sample="labels",
num.cores=1
)
p.x.sp = addBmatToSnap(p.x.sp, bin.size=5000)
# Read true cell labels with cell barcodes list
metadata <- read.table('/home/zeinab/Documents/Zeinab/SIMATAC/data/benchmarking/10x_PBMC_5k/10x_PBMC_5k_metadata.tsv', header = TRUE)
metadata$barcode <- substring(metadata$barcode, first = 1, last = 16)
label <- sapply(p.x.sp@barcode,
function(x) as.character(metadata[which(metadata$barcode == x),]$label))
p.x.sp@sample <- unlist(label)
# Define each cell group
index1 <- which(p.x.sp@sample == "1")
p.x.sp1 <- p.x.sp[index1 ,]
# p.x.sp1
index2 <- which(p.x.sp@sample == "2")
p.x.sp2 <- p.x.sp[index2,]
# p.x.sp2
index3 <- which(p.x.sp@sample == "3")
p.x.sp3 <- p.x.sp[index3,]
# p.x.sp3
index4 <- which(p.x.sp@sample == "4")
p.x.sp4 <- p.x.sp[index4,]
# p.x.sp4
index5 <- which(p.x.sp@sample == "5")
p.x.sp5 <- p.x.sp[index5,]
# p.x.sp5
index6 <- which(p.x.sp@sample == "6")
p.x.sp6 <- p.x.sp[index6,]
# p.x.sp6
index7 <- which(p.x.sp@sample == "7")
p.x.sp7 <- p.x.sp[index7,]
# p.x.sp7
index8 <- which(p.x.sp@sample == "8")
p.x.sp8 <- p.x.sp[index8,]
# p.x.sp8
# This function simulates each cell group from PBMCs separately, and saves all together in
# a .h5 file for performing evaluation.
# Inputs:
# version: An string to put all benchmarking plots and files for this simulation in a folder with
# that name.
# mean: The Gaussian noise mean to be used for simATAC simulation.
# sd: The Gaussian noise standard deviation to be used for simATAC simulation.
# Output: -
#
simulatePBMC <- function(version, mean, sd){
# Create a folder for saving simulated count matrix.
dir.create(paste("Results/PBMCs/", version, sep = ""))
id = paste("Results/PBMCs/", version, "/PBMCs", sep = "")
gc()
# Simulate each cell group with simATAC.
sim2.1 <- simulate(p.x.sp1, mean, sd, species = "human")
gc()
sim2.2 <- simulate(p.x.sp2, mean, sd, species = "human")
gc()
sim2.3 <- simulate(p.x.sp3, mean, sd, species = "human")
gc()
sim2.4 <- simulate(p.x.sp4, mean, sd, species = "human")
gc()
sim2.5 <- simulate(p.x.sp5, mean, sd, species = "human")
gc()
sim2.6 <- simulate(p.x.sp6, mean, sd, species = "human")
gc()
sim2.7 <- simulate(p.x.sp7, mean, sd, species = "human")
gc()
sim2.8 <- simulate(p.x.sp8, mean, sd, species = "human")
gc()
# Combine simulated cell groups together to save for further analysis (for performing cell type clustering analysis).
data <- rbind(t(assays(sim2.1)$counts), t(assays(sim2.2)$counts), t(assays(sim2.3)$counts), t(assays(sim2.4)$counts),
t(assays(sim2.5)$counts), t(assays(sim2.6)$counts), t(assays(sim2.7)$counts), t(assays(sim2.8)$counts))
data <- as(data, "dgCMatrix")
label <- unlist(c(p.x.sp1@sample, p.x.sp2@sample, p.x.sp3@sample, p.x.sp4@sample,
p.x.sp5@sample, p.x.sp6@sample, p.x.sp7@sample, p.x.sp8@sample))
gc()
# Save simulated matrix with labels in a h5 file.
if (file.exists(paste(id, "_sim_mat.h5", sep="")))
#Delete file if it exists
file.remove(paste(id, "_sim_mat.h5", sep=""))
mat <- summary(data)
h5createFile(paste(id, "_sim_mat.h5", sep=""))
h5write(mat, paste(id, "_sim_mat.h5", sep=""), "sim")
h5write(label, paste(id, "_sim_mat.h5", sep=""), "label")
return()
}
# This function performs benchmarking on simATAC's simulated counts with a specific version
# by plotting each cell group's original and simulated parameters and comparing them, and extracting
# statistical similarity parameters.
# Input:
# version: An string to put all benchmarking plots and files for this simulation in a folder with
# that name.
# Output: -
#
benchmarkPBMC <- function(version){
address <- "Results"
name <- "PBMCs"
# Load simulated cells for the input version parameter.
count <- readSim(paste(address, "/", name, "/", version, "/", name, "_sim_mat.h5", sep = ""),
ncol(p.x.sp@bmat),
nrow(p.x.sp@bmat))
# Define cell groups for each label.
cell.p.1 <- count[, which(colnames(count) == "1")]
cell.p.2 <- count[, which(colnames(count) == "2")]
cell.p.3 <- count[, which(colnames(count) == "3")]
cell.p.4 <- count[, which(colnames(count) == "4")]
cell.p.5 <- count[, which(colnames(count) == "5")]
cell.p.6 <- count[, which(colnames(count) == "6")]
cell.p.7 <- count[, which(colnames(count) == "7")]
cell.p.8 <- count[, which(colnames(count) == "8")]
# Perform benchmarking and plot simulated and real parameters for each cell group.
plotFigures(t(p.x.sp1@bmat), cell.p.1, address, name, "Cell1", version)
plotFigures(t(p.x.sp2@bmat), cell.p.2, address, name, "Cell2", version)
plotFigures(t(p.x.sp3@bmat), cell.p.3, address, name, "Cell3", version)
plotFigures(t(p.x.sp4@bmat), cell.p.4, address, name, "Cell4", version)
plotFigures(t(p.x.sp5@bmat), cell.p.5, address, name, "Cell5", version)
plotFigures(t(p.x.sp6@bmat), cell.p.6, address, name, "Cell6", version)
plotFigures(t(p.x.sp7@bmat), cell.p.7, address, name, "Cell7", version)
plotFigures(t(p.x.sp8@bmat), cell.p.8, address, name, "Cell8", version)
# Perform benchmarking and plot simulated and real parameters for each cell group.
p.x.sp.sim2 <- p.x.sp
p.x.sp.sim2@bmat <- rbind(t(cell.p.1), t(cell.p.2), t(cell.p.3), t(cell.p.4),
t(cell.p.5), t(cell.p.6), t(cell.p.7), t(cell.p.8))
p.x.sp.sim2@barcode <- unlist(c(p.x.sp1@barcode, p.x.sp2@barcode, p.x.sp3@barcode, p.x.sp4@barcode,
p.x.sp5@barcode, p.x.sp6@barcode, p.x.sp7@barcode, p.x.sp8@barcode))
p.x.sp.sim2@sample <- unlist(c(p.x.sp1@sample, p.x.sp2@sample, p.x.sp3@sample, p.x.sp4@sample,
p.x.sp5@sample, p.x.sp6@sample, p.x.sp7@sample, p.x.sp8@sample))
p.x.sp.sim2@file <- unlist(c(p.x.sp1@file, p.x.sp2@file, p.x.sp3@file, p.x.sp4@file,
p.x.sp5@file, p.x.sp6@file, p.x.sp7@file, p.x.sp8@file))
p.x.sp.sim2@metaData <- rbind(p.x.sp1@metaData, p.x.sp2@metaData, p.x.sp3@metaData, p.x.sp4@metaData,
p.x.sp5@metaData, p.x.sp6@metaData, p.x.sp7@metaData, p.x.sp8@metaData)
gc()
# Cell type clustering with SnapATAC.
# Adjsut the number of reduced dimensions to 5
nmi <- SnapATACClustering(p.x.sp.sim2,
"human",
paste(address, "/", name, "/", version, "/", name, "_sim.txt", sep = ""))
write(paste(name, version, as.character(nmi), sep = " "),
file=paste(address, "/", name, "/", name, "_clustering_metric.txt", sep = ""),
append=TRUE)
print(paste(name, "clustering nmi:", nmi, sep = " "))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.