#' Simplified runKNN in SnapATAC
#'
#' We have checked this function:
#' it returns the same KNN mat as SnapATAC does.
#'
#' @param smat dense matrix or matrix, cell by feature
#' @param k integer, max number of nearest neighbours, should be between 10 to 50.
#' @param treetype string, "kd" or "bd", "bd" is usuful for larger point sets and
#' local clusters in the dataset, which could reduce the depth of the tree.
#' Default is "kd".
#' NOTE: "bd" may have bugs on Linux (Ubuntu) but not on MacOS. When I use bd, my task is
#' always be killed no mater how big memory I use (I even use 200GB for a small dataset: < 60,000 points).
#' This does not happen on MacOS. But "kd" works on Linux
#' @param searchtype string, "standard", "priority", or "radius".
#' Default is "standard"
#' @param nn_eps Error bound when performing nearest neighbor seach using RANN.
#' default of 0.0 implies exact nearest neighbor search
#' @return sparseMatrix, KNN matrix, ncell by ncell, value is 1 (unweighted),
#' including the diagnal part.
#' @export
runKNN <- function(smat, k = 20, treetype = "kd",
searchtype = "standard",
nn_eps = 0.0){
message(paste("Generate KNN with", k))
ncell <- nrow(smat)
if(ncell < k){
message(paste("Ncell", ncell, "is smaller than K nearst neighbor", k))
k <- ncell-1
message("Set k as Ncell - 1.")
}
nnRanked <- RANN::nn2(data = smat, k = k,
treetype = treetype,
searchtype = searchtype,
eps = nn_eps)$nn.idx
j <- as.numeric(t(nnRanked))
i <- (seq_along(j)-1) %/% k + 1
kmat <- Matrix::sparseMatrix(i = i, j = j, x = 1, dims = c(ncell, ncell))
if (!is.null(rownames(smat))) {
rownames(kmat) <- rownames(smat)
}
return(kmat)
}
#' Run Leiden algorithm as graph-based clustering.
#' @param kmat sparseMatrix, KNN matrix generated by runKNN.
#' @param path_to_ptyhon string
#' @param reso double, resolution param in Leiden default is 0.8
#' @param seed integer, used for Leiden
#' @param partitionType string, used for Leiden, default is "RB"
#' @return vector of factor, cluster index for cells
#' @import reticulate
#' @export
runLeiden <- function(kmat,
path_to_python = NULL,
reso = 0.8, seed = 10,
partitionType = "RB") {
message(paste("Run Leiden for clustering with resolution", reso, "and partitionType", partitionType))
if(!is.null(path_to_python)) {
use_python(path_to_python, required = TRUE)
message("Use the Python located in:", path_to_python, "\n")
}
setSessionTimeLimit(cpu = Inf, elapsed = Inf)
ld <- import(module = "smmuty", convert = FALSE)
ldCluster <- as.factor(py_to_r(
ld$leiden(knn = r_to_py(kmat), reso = reso, seed = seed, opt = partitionType)))
message("Summary of clustering:")
print(table(ldCluster))
return(ldCluster)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.