#' Order Snap based on the sample information
#' This is critical for adding bmat and other related functions in SnapATAC
#' @param s snap object
#' @return snap object
#' @export
orderSnap <- function(s) {
if (all(s@sample == sort(s@sample))) {
message("No need to order the snap since samples are ordered.")
} else {
message("Need to order the snap since samples are not ordered")
s <- s[order(s@sample), ]
}
return(s)
}
#' Empty snap's KNN graph if it has.
#' @param s snap object
#' @return snap object
#' @export
emptySnapGraph <- function(s) {
ngraph <- nrow(s@graph@mat)
if (ngraph > 0) {
message(ngraph, " nodes in current snap graph, and remove it.")
s@graph@mat <- Matrix::Matrix(0, 0, 0, sparse = TRUE)
s@graph@k <- numeric()
s@graph@snn <- FALSE
s@graph@file <- character()
s@graph@snn.prune <- numeric()
} else {
message("No KNN graph is found in the snap.")
}
return(s)
}
#' Empty snap's embedding recorded in smat slot if it has.
#' @param s snap object
#' @return snap object
#' @export
emptySnapEmbedding <- function(s) {
if (nrow(s@smat@dmat) > 0) {
message("Snap has smat, now remove it.")
s@smat@dmat <- matrix(0, 0, 0)
s@smat@sdev <- numeric()
s@smat@method <- character()
} else {
message("Snap has no smat.")
}
return(s)
}
#' Convert snap object to Seurat by gmat
#'
#' @param snap snap object defined in SnapATAC package
#' snap@gmat must be not empty; snap@smat@dmat should not be empty
#' @param eigDims vector, used for choosing PCA components, default 1:50
#' @param assay characters, name used in Seurat object
#' @param pcaPrefix characters, default "SnapATAC_"
#' @param nameDelim characters, default "-"
#' @param useSnapATACEmbed bool, use SnapATAC embedding as pca
#' embeding in Seurat, default TRUE.
#' @return Seurat object
#' @import Seurat
#' @export
snapGmat2Seurat <- function(snap, eigDims = 1:50,
assay = "GeneScore",
pcaPrefix = "SnapATAC_",
nameDelim = "-",
useSnapATACEmbed = TRUE) {
# check snap@gmat
# check snap@smat@dmat
metaData <- snap@metaData
rownames(metaData) <- paste(metaData$sample, metaData$barcode, sep = ".")
gmatUse <- Matrix::t(snap@gmat)
colnames(gmatUse) <- paste(metaData$sample, metaData$barcode, sep = ".")
snapSeurat <- Seurat::CreateSeuratObject(counts = gmatUse, assay =
assay,
names.delim = nameDelim)
snapSeurat <- Seurat::AddMetaData(snapSeurat, metadata = metaData)
if (useSnapATACEmbed) {
message("Use SnapATAC Embed as pca in Seurat.")
pcaUse <- snap@smat@dmat[, eigDims]
rownames(pcaUse) <- paste(metaData$sample, metaData$barcode, sep = ".")
colnames(pcaUse) <- paste0(pcaPrefix, 1:ncol(pcaUse))
snapSeurat[["pca"]] <- methods::new(Class = "DimReduc", cell.embeddings = pcaUse,
feature.loadings = matrix(0, 0, 0),
feature.loadings.projected = matrix(0, 0, 0),
assay.used = assay, stdev = rep(1, ncol(pcaUse)),
key = pcaPrefix,
jackstraw = new(Class = "JackStrawData"), misc = list())
}
return(snapSeurat)
}
#' Consensus plot.
#' @param consensusFile characters
#' @param type characters, cell type name for title
#' @param outPDF characters
#' @param width numeric, default is 7
#' @param height numeric, default is 7
#' @return None.
#' Side effect: generate the figure file.
#' @importFrom graphics axis legend mtext par
#' @importFrom methods new
#' @importFrom utils read.table
#' @importFrom stats as.formula complete.cases
#' @importFrom grDevices dev.off pdf
#' @export
plotConsensus <- function(consensusFile, type,
outPDF, width = 7, height = 7) {
consensus <- read.table(consensusFile)
colnames(consensus) <- c("file", "Resolution", "Dispersion",
"ProportionOfAmbiguousClustering")
pdf(file = outPDF, width = width, height = height)
par(mar = c(5, 4, 4, 4) + 0.3)
plot(consensus$Resolution, consensus$Dispersion,
type = "l", pch = 16, col = "red", lwd = 4,
xlab = "Resolution", ylab = "Dispersion", cex.lab = 1.5,
main =
paste("Consensus analysis for Leiden-base clustering on",
type, "cells")
)
par(new = TRUE)
plot(consensus$Resolution, consensus$ProportionOfAmbiguousClustering,
pch = 17, col = "blue",
axes = FALSE, type = "l", lwd = 4, xlab = "", ylab = ""
)
axis(side = 4,
at = pretty(range(consensus$ProportionOfAmbiguousClustering)))
legend("topright", legend = c("Dispersion (left;red)", "PAC (right;blue)"),
col = c("red", "blue"), cex = 1, lty = c(3, 3))
mtext("Proportion Of Ambiguous Clustering (PAC)",
side = 4, line = 3, cex = 1.5)
dev.off()
}
#' Down sample snap object based on a given cluster meta.
#' @param snap SnapObject from SnapATAC
#' @param cluster vector of characters/integer
#' aligned with the cells in snap in order.
#' @param n integer, number of samples in each unique cluster id.
#' @return SnapObject
#' @export
downSampleOnSnap <- function(snap, cluster, n = 200) {
snapList <- lapply(unique(cluster), function(i) {
s <- snap[cluster %in% i, , drop = FALSE]
if(SnapATAC::nrow(s) <= n) {
return(s)
} else {
return(s[sample(SnapATAC::nrow(s), size = n, replace = F), ])
}
})
return(SnapATAC::snapListRbind(snapList = snapList))
}
#' SnapATAC landmark embedding.
#' @param snap snap object of SnapATAC, default NULL
#' @param snapFile characters name of the snap File to load, default NULL
#' @param blacklistGR GenomicRanges object, default NULL
#' @param blackListFile characters file of the blacklistGR, default NULL
#' @param binSize integer dim of bmat, default 5,000
#' @param nPC integer dim of embeding for embedding, default 50
#' @param ncores integer number of cores to use for parallel, default 1
#' @param outFile characters to which we write snap with embedding, default NULL
#' @param excludeChr characters used to filter bmat, default "random|chrM"
#' @param removeBmat bool default FALSE
#' @param removeJmat bool default FALSE
#' @return SnapObject
#' @export
landmarkEmbedding <- function(snap = NULL,
snapFile = NULL,
blacklistGR = NULL,
blackListFile = NULL,
binSize = 5000,
nPC = 50,
ncores = 1,
outFile = NULL,
excludeChr = "random|chrM",
removeBmat = FALSE,
removeJmat = FALSE) {
if (!is.null(snapFile)) {
if (grepl("\\.RData", snapFile)) {
snap <- loadRData(snapFile, check = FALSE)
} else {
snap <- readRDS(snapFile)
}
}
if (is.null(snap)) {
stop("No snap found.")
}
if (!is.null(blackListFile)) {
blackList <- read.table(blackListFile,
header = FALSE, quote = "")
blacklistGR <- GenomicRanges::GRanges(
seqnames = blackList[, 1],
ranges = IRanges::IRanges(blackList[, 2], blackList[, 3]))
}
if(is.null(blacklistGR)) {
stop("No blacklist found")
}
if (!is.null(outFile)) {
prepareOutfile(outFile)
}
## add bmat
if (nrow(snap@bmat) > 0) {
message("Snap has bmat, will skip addBmat.")
} else {
snap <- SnapATAC::addBmatToSnap(
obj = snap, bin.size = binSize, do.par = TRUE,
num.cores = ncores,
checkSnap = FALSE
)
}
## preprocess
idy <- S4Vectors::queryHits(
GenomicRanges::findOverlaps(snap@feature, blacklistGR))
if (length(idy) > 0) {
message("remove blacklist")
snap <- snap[, -idy, mat = "bmat"]
}
chr.exclude <- GenomeInfoDb::seqlevels(snap@feature)[
grep(excludeChr, GenomeInfoDb::seqlevels(snap@feature))
]
idy <- grep(paste(chr.exclude, collapse = "|"), snap@feature)
if (length(idy) > 0) {
message("remove ", excludeChr)
snap <- snap[, -idy, mat = "bmat"]
}
## filter based on bincoverage
bin.cov <- log10(Matrix::colSums(snap@bmat) + 1)
binCutoff <- quantile(bin.cov[bin.cov > 0], 0.95)
idy <- which(bin.cov <= binCutoff & bin.cov > 0)
snap <- snap[, idy, mat = "bmat"]
## binary snap bmat
if(max(snap@bmat) == 1) {
message("@bmat is already binarized.")
} else {
snap <- SnapATAC::makeBinary(snap, mat = "bmat")
}
snap <- SnapATAC::runDiffusionMaps(
obj = snap,
input.mat = "bmat",
num.eigs = nPC,
method = "RSpectra"
)
snap@metaData$landmark <- 1
if(removeBmat & (nrow(methods::slot(snap, "bmat")) != 0L)) {
message("Remove bmat.")
snap <- SnapATAC::rmBmatFromSnap(snap)
}
if(removeJmat) {
message("Remove jmat.")
snap@jmat <- SnapATAC::newJaccard()
}
if (!is.null(outFile)) {
saveRDS(object = snap, file = outFile)
}
return(snap)
}
#' SnapATAC query embedding.
#' @param snapLandmark snap object of SnapATAC, default NULL
#' @param snaplandmarkFile characters name of the snap File to load, default NULL
#' @param snapQuery snap object of SnapATAC, default NULL
#' @param snapQueryFile characters name of the snap File to load, default NULL
#' @param binSize integer dim of bmat, default 5,000
#' @param outFile characters to which we write snap with embedding, default NULL
#' @param chunkSize integer default 20,000
#' @param removeBmat bool default FALSE
#' @param removeJmat bool default FALSE
#' @return SnapObject
#' @export
queryEmbedding <- function(snapLandmark = NULL,
snapLandmarkFile = NULL,
snapQuery = NULL,
snapQueryFile = NULL,
binSize = 5000,
outFile = NULL,
chunkSize = 20000,
removeBmat = TRUE,
removeJmat = TRUE) {
if (!is.null(snapLandmarkFile)) {
snapLandmark <- readRDS(snapLandmarkFile)
}
if (is.null(snapLandmark)) {
stop("No snapLandmark found.")
}
if (!is.null(snapQueryFile)) {
snapQuery <- readRDS(snapQueryFile)
}
if (is.null(snapQuery)) {
stop("No snapQuery found.")
}
if (!is.null(outFile)) {
prepareOutfile(outFile)
}
n <- SnapATAC::nrow(snapQuery)
if (n > chunkSize) {
message(n, " has too many cells than ",
chunkSize, " chunk.")
message("now partition them into chunks.")
chunks <- split(seq(n), ceiling(seq(n) / chunkSize))
} else {
chunks <- NULL
}
message("Add bmat.")
if (is.null(chunks)) {
snapQuery <- queryEmbedding.default(
snapLandmark, snapQuery, binSize, removeBmat, removeJmat)
} else {
snapQueryList <- lapply(chunks, function(i) {
s <- snapQuery[i, , drop = FALSE]
s <- queryEmbedding.default(
snapLandmark, s, binSize, removeBmat, removeJmat
)
return(s)
})
snapQuery <- SnapATAC::snapListRbind(snapList = snapQueryList,
checkSnap = FALSE)
}
if(!is.null(outFile)) {
saveRDS(object = snapQuery, file = outFile)
}
return(snapQuery)
}
queryEmbedding.default <- function(snapLandmark,
snapQuery,
binSize = 5000,
removeBmat = TRUE,
removeJmat = TRUE) {
if (nrow(snapQuery@bmat) > 0) {
message("snapQuery has bmat, will skip addBmat for it.")
} else {
snapQuery <- SnapATAC::addBmatToSnap(snapQuery, bin.size = binSize)
}
if(max(snapQuery@bmat) == 1) {
message("@bmat is already binaryized.")
} else {
snapQuery <- SnapATAC::makeBinary(snapQuery)
}
idy <- unique(S4Vectors::queryHits(
GenomicRanges::findOverlaps(snapQuery@feature, snapLandmark@feature)))
snapQuery <- snapQuery[, idy, mat = "bmat"]
message("Run embedding for query set.")
snapQuery <- SnapATAC::runDiffusionMapsExtension(
obj1 = snapLandmark,
obj2 = snapQuery,
input.mat = "bmat"
)
snapQuery@metaData$landmark <- 0
if (removeBmat & (nrow(methods::slot(snapQuery, "bmat")) != 0L)) {
message("Remove query bmat")
snapQuery <- SnapATAC::rmBmatFromSnap(snapQuery)
}
if (removeJmat) {
message("Remove jmat.")
snapQuery@jmat <- SnapATAC::newJaccard()
}
return(snapQuery)
}
#' Run SnapATAC KNN.
#'
#' It will check snapLandmark and snapQuery firstly,
#' if snapLandmark is not NULL, will use it.
#' if snapQuery then is also NULL, will merge them.
#' if both of them are NULL, then use snapAllFile, snapAll in order.
#'
#' @param snapAll SnapObject, default NULL
#' @param snapAllFile characters, default NULL
#' @param snapLandmark SnapObject, default NULL
#' @param snapLandmarkFile characters, default NULL
#' @param snapQuery SnapObject, default NULL
#' @param snapQueryFile SnapObject, default NULL
#' @param removeBmat bool, default TRUE
#' @param removeJmat bool, default TRUE
#' @param k integer, default 50
#' @param dims integer or vector, default is 1:30
#' @param method characters, method for KNN (either "RANN" or "Annoy")
#' default "RANN"
#' @param runUMAP bool, default TRUE
#' @param umapNcores integer, default 1
#' @param outmmtxFile characters, output file of mmtx, default NULL
#' @param outSnapFile characters, default NULL
#' @return SnapObject
#' @export
runKNN <- function(snapAll = NULL,
snapAllFile = NULL,
snapLandmark = NULL,
snapLandmarkFile = NULL,
snapQuery = NULL,
snapQueryFile = NULL,
removeBmat = TRUE,
removeJmat = TRUE,
k = 50,
dims = 1:30,
method = "RANN",
runUMAP = TRUE,
umapNcores = 1,
outmmtxFile = NULL,
outSnapFile = NULL) {
if (!is.null(snapQueryFile)) {
snapQuery <- readRDS(snapQueryFile)
}
if (!is.null(snapLandmarkFile)) {
snapLandmark <- readRDS(snapLandmarkFile)
}
if (!is.null(snapLandmark)) {
if (!is.null(snapQuery)) {
snapAll <- SnapATAC::snapRbind(snapLandmark, snapQuery)
} else {
snapAll <- snapLandmark
}
}
if (!is.null(snapAllFile)) {
snapAll <- readRDS(snapAllFile)
}
if (is.null(snapAll)) {
stop("Final snapAll is NULL.")
}
if (!is.null(outmmtxFile)) {
prepareOutfile(outmmtxFile)
}
if (!is.null(outSnapFile)) {
prepareOutfile(outSnapFile)
}
if (removeBmat & (nrow(methods::slot(snapAll, "bmat")) != 0L)) {
message("Remove snapAll bmat.")
snapAll <- SnapATAC::rmBmatFromSnap(snapAll)
}
if (removeJmat) {
message("Remove snapAll jmat.")
snapAll@jmat <- SnapATAC::newJaccard()
}
if (length(dims) == 1) {
message("Dim is a scalar and convert it to vector.")
dims <- 1:dims
}
message("Run KNN with k ", k, " and size of dims ", length(dims))
message("KNN method: ", method)
snapAll <- SnapATAC::runKNN(obj = snapAll, eigs.dims = dims, k = k, method = method)
if (!is.null(outmmtxFile)) {
message("save graph as mmtx file.")
Matrix::writeMM(snapAll@graph@mat, file = outmmtxFile)
}
if (runUMAP) {
message("Run Umap.")
snapAll <- SnapATAC::runViz(
obj = snapAll, tmp.folder = tempdir(), dims = 2,
eigs.dims = dims, method = "uwot", seed.use = 2022,
num.cores = umapNcores
)
}
if (!is.null(outSnapFile)) {
saveRDS(snapAll, outSnapFile)
}
return(snapAll)
}
#' Run Leiden Algorithm.
#' @param snap SnapObject default NULL
#' @param snapFile characters default NULL
#' @param r numeric resolution paramter for leiden, default 0.5
#' @param pt characters partition type for leiden, default "RB"
#' @param seed integer default NULL
#' @param pathToPython characters where the python is, default NULL
#' @param outLeidenFile characters default NULL
#' @param outClusterMetaCSV characters default NULL
#' @param outClusterPDF characters default NULL
#' @param pdfn function used to draw figures for outClusterPDF
#' the inputs are snap object and others
#' default NULL.
#' @param colName characters used for record cluster namae in snap meta,
#' default is "cluster". if snap@metaData has this column,
#' will use [colName].1 instead
#' @param dims integer or vector for UMAP default is 1:30
#' @param umapNcores integer default 1
#' @param ... use for pdfn function
#' @return SnapObject, slot "cluster" is the result
#' @import reticulate
#' @export
runLeiden <- function(snap = NULL,
snapFile = NULL,
r = 0.5,
pt = "RB",
seed = 10,
pathToPython = NULL,
outLeidenFile = NULL,
outClusterMetaCSV = NULL,
outClusterPDF = NULL,
pdfn = NULL,
colName = "cluster",
dims = 1:30,
umapNcores = 1,
...) {
if (!is.null(snapFile)) {
snap <- readRDS(snapFile)
}
if (is.null(snap)) {
stop("Snap is NULL.")
}
invisible(lapply(c(outLeidenFile, outClusterMetaCSV, outClusterPDF),
function(i) {if (!is.null(i)) {prepareOutfile(i)}}))
message("Run Leiden algorithm with: resolution ", r,
" partition type: ", pt)
if(!is.null(pathToPython)) {
reticulate::use_python(pathToPython, required = TRUE)
message("python path: ", pathToPython)
}
setSessionTimeLimit(cpu = Inf, elapsed = Inf)
message("Loading python smmuty package.")
pymod <- reticulate::import(module = "smmuty", convert = FALSE)
message("Clustering.")
c <- as.factor(reticulate::py_to_r(
pymod$leiden(knn = reticulate::r_to_py(snap@graph@mat),
reso = r,
seed = as.integer(seed),
opt = pt)
))
## snap only accept factors
snap@cluster <- c
## Label start from 1
c <- as.integer(c)
message("Summarize the clustering result:")
print(table(c))
m <- snap@metaData
if (!("CellID" %in% colnames(m))) {
m$CellID <- paste(snap@sample, snap@barcode, sep = ".")
}
if (colName %in% colnames(m)) {
warning(colName, " is in the column of snap meta data.")
colName <- paste0(colName, ".1")
warning("Use ", colName, " instead.")
}
m[, colName] <- c
## cluster start from 1
if (!is.null(outLeidenFile)) {
message("Output Leiden result: ", outLeidenFile)
write.table(x = c, file = outLeidenFile,
row.names = FALSE, col.names = FALSE, quote = FALSE)
} else {
warning("No output of Leiden cluster file.")
}
if (!is.null(outClusterPDF)) {
if (nrow(methods::slot(snap, "umap")) == 0L) {
warning("No UMAP found, and now calculate it.")
if (length(dims) == 1) {
message("Dim is a scalar and convert it to vector.")
dims <- 1:dims
}
snap <- SnapATAC::runViz(
obj = snap, tmp.folder = tempdir(), dims = 2,
eigs.dims = dims, method = "uwot", seed.use = 2022,
num.cores = umapNcores
)
} # end of update umap
if (is.null(pdfn)) {
warning("No pdfn is found.")
warning("No output of custer pdf.")
} else {
message("Output cluster pdf: ", outClusterPDF)
withr::with_pdf(outClusterPDF, code = {
pList <- pdfn(embed = snap@umap,
meta = m,
checkRowName = FALSE,
names = c(colName, "TSSe", "log10UMI"),
discretes = c(TRUE, FALSE, FALSE),
legends = c(FALSE, TRUE, TRUE),
addLabels = c(TRUE, FALSE, FALSE),
...)
lapply(pList, function(p) {
if(!is.null(p)) {
print(p)
}
}) }, width = 10, height = 10)
}
} else {
warning("No output of custer pdf.")
}# end of outClusterPDF
if (!is.null(outClusterMetaCSV)) {
message("Output meta data to: ", outClusterMetaCSV)
if (nrow(methods::slot(snap, "umap")) > 1L) {
m$UMAP1 <- snap@umap[, 1]
m$UMAP2 <- snap@umap[, 2]
}
write.table(x = m, file = outClusterMetaCSV,
row.names = FALSE, col.names = TRUE,
sep = ",", quote = FALSE)
} else {
warning("No output of cluster meta file.")
}
return(snap)
}
#' Filter bmat
#' removing common bins and those overlapped with blacklist
#' @param snap snap object of SnapATAC, default NULL
#' @param snapFile characters name of the snap File to load, default NULL
#' @param low double threshold, default 0 (bin freq <= low will be removed)
#' @param high double threshold, default 0.99 (bin freq > high will be removed)
#' @param blacklistGR GenomicRanges object, default NULL
#' @param blackListFile characters file of the blacklistGR, default NULL
#' @param binSize integer dim of bmat, default 5,000
#' @param ncores integer number of cores to use for parallel, default 1
#' @param excludeChr characters used to filter bmat, default "random|chrM"
#' @param addBmatIfEmpty bool, default FALSE
#' @return SnapObject
#' @export
filterSnapBmat <- function(snap = NULL,
snapFile = NULL,
low = 0,
high = 0.99,
blackListFile = NULL,
blacklistGR = NULL,
excludeChr = "random|chrM",
addBmatIfEmpty = FALSE,
binSize = 5000,
ncores = 1) {
## load snap
if (!is.null(snapFile)) {
if (grepl("\\.RData", snapFile)) {
snap <- loadRData(snapFile, check = FALSE)
} else {
snap <- readRDS(snapFile)
}
}
if (is.null(snap)) {
stop("No snap found.")
}
## check bmat
if(nrow(snap@bmat) < 1) {
warning("Snap has no bmat.")
if(addBmatIfEmpty) {
message("Add bmat to snap.")
snap <- SnapATAC::addBmatToSnap(
obj = snap, bin.size = binSize, do.par = TRUE,
num.cores = ncores,
checkSnap = FALSE)
} else {
stop("Add bmat is FALSE.")
}
}
## load black list
if (!is.null(blackListFile)) {
blackList <- read.table(blackListFile,
header = FALSE, quote = "")
blacklistGR <- GenomicRanges::GRanges(
seqnames = blackList[, 1],
ranges = IRanges::IRanges(blackList[, 2], blackList[, 3]))
}
if(is.null(blacklistGR)) {
warning("Blacklist is empty.")
} else {
message("Filtering bin based on blacklist...")
idy <- S4Vectors::queryHits(
GenomicRanges::findOverlaps(snap@feature, blacklistGR))
if (length(idy) > 0) {
message("Remove blacklist-related ", length(idy), " bins.")
snap <- snap[, -idy, mat = "bmat"]
} else {
message("No blacklist-related bins.")
}
}
message("Remove bins from the excludeChr: ", excludeChr)
chr.exclude <- GenomeInfoDb::seqlevels(snap@feature)[
grep(excludeChr, GenomeInfoDb::seqlevels(snap@feature))
]
idy <- grep(paste(chr.exclude, collapse = "|"), snap@feature)
if (length(idy) > 0) {
message("Remove ", length(idy),
" bins located in ", excludeChr)
snap <- snap[, -idy, mat = "bmat"]
} else {
message("No bins located in the excludeChr: ", excludeChr)
}
message("Filter bins based on the coverage.")
bincov <- Matrix::colSums(snap@bmat) / nrow(snap@bmat)
lowIndex <- which(bincov <= low)
highIndex <- which(bincov > high)
message(length(lowIndex), " features are blow ", low,
" frequency.")
message(length(highIndex), " features are above ", high,
" frequencey.")
removeIndex <- union(lowIndex, highIndex)
if(length(removeIndex) == length(bincov)) {
message("All the features will be filtered.")
snap@bmat <- Matrix::Matrix(nrow = 0, ncol = 0, sparse = TRUE)
} else {
message(length(removeIndex), " / ", length(bincov),
" features are removed.")
snap <- snap[ , -removeIndex, mat = "bmat"]
}
return(snap)
}
#' Add bmat or gmat to snap
#'
#' @param snap snap object
#' @param cellGroup vector of characters
#' cluster labels for snap default is NULL
#' @param group characters which cluster needes
#' default is NULL
#' @param outDir characters default is NULL
#' @param matType characters default is "bmat"
#' @param gencode characters default is "vM16"
#' @param ndp integer default is -1
#' if its value is larger than 0, then down sampling will be used.
#' @param ncores integer default is 2
#' @param force bool default is TRUE
#' if add mat even the mat already exists
#' @param binSize integer default is 5000
#' @param coverFile bool default is TRUE
#' when set it TRUE, it will cover the output file if exists.
#' @param prefix characters default is "snap", which is used for output.
#' @return snap object with corresponding mat.
#' @export
addMatToSnap <- function(snap,
cellGroup = NULL,
group = NULL,
outDir = NULL,
matType = "bmat",
gencode = "vM16",
ndp = -1,
ncores = 2,
force = TRUE,
binSize = 5000,
coverFile = TRUE,
prefix = "snap") {
# * get rowIndex belonging to a group
rowIndex <- seq(nrow(snap))
if ((!is.null(cellGroup)) & (length(cellGroup) != SnapATAC::nrow(snap))) {
stop("cellGroup size does not match snap cell number.")
}
if (!is.null(cellGroup) & (!is.null(group))) {
rowIndex <- cellGroup %in% group
if (sum(rowIndex) == 0) {
warning(group, " does not exist, and skip it.")
return(NULL)
}
}
# * down sampling if needed
if ((ndp > 0) & (sum(rowIndex) > ndp)) {
message("Downsample: ", ndp)
rowIndex <- sort(sample(which(rowIndex), size = ndp, replace = FALSE))
}
# * get snap
s <- snap[rowIndex, ]
# ** double check the order of the snap
s <- pureRUtils::orderSnap(s)
# * get mat
if (!(matType %in% c("bmat", "gmat"))) {
stop("Do not support ", matType)
}
# * check if need to get mat
flag <- TRUE
n <- nrow(snap@bmat)
if (matType == "gmat") {
n <- nrow(snap@gmat)
}
if (n != 0L) {
message("The snap has already owned a ", matType)
if (!force) {
message("Skip it due to the rule is not forced.")
flag <- FALSE
}
}
# * only run when flag is TRUE
if (flag) {
message("Get ", matType)
if (matType == "bmat") {
s <- SnapATAC::addBmatToSnap(obj = s, bin.size = binSize,
do.par = TRUE, num.cores = ncores,
checkSnap = TRUE)
} else {
s <- SnapATAC::addGmatToSnap(obj = s, do.par = TRUE,
num.cores = ncores, checkSnap = TRUE)
}
}
# * output
if (is.null(outDir)) {
return(s)
}
prepareOutdir(outDir)
if (ndp > 0) {
prefix <- paste(prefix, paste("dp", ndp, sep = "-"), sep = ".")
}
suffix <- paste(matType, "rds", sep = ".")
outfile <- file.path(outDir, paste(prefix, suffix, sep = "."))
if (matType == "gmat") {
outmat <- file.path(outDir,
paste(prefix, paste("gencode", gencode, sep = "-"),
suffix, sep = "."))
}
if (file.exists(outfile) & (!coverFile)) {
message("File exist. Skip.")
} else {
message("Generate ", matType, " file: ", outfile)
saveRDS(s, outfile)
}
return(s)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.