#' Cluster cells with K-means clustering and mini-batch K-means clustering
#'
#' Perform K-means clustering on cells, to obtain cell identities.
#' Based on \code{Seurat::DoKMeans}.
#'
#' K-means clustering is performed on either scaled data or reduced dimensions,
#' with a number of subpopulations set to \code{k.cells}.
#' After the initial K-means clustering is performed,
#' one can use \code{EvaluateIdent} to test individual cell identities, as
#' given by K-means clustering.
#'
#' @param object Seurat object
#' @param genes.use Genes to use for clustering
#' @param reduction.type Name of dimensional reduction technique to use in
#' k-means clustering. If NULL, genes will be used. (default is NULL)
#' @param dims.use A vector of the dimensions to use in k-means clustering
#' graph (e.g. To use the first 10 PCs, pass 1:10)
#' @param num_init A number of times the algorithm will be run with different centroid seeds. (default is 5)
#' @param center Center the cells/rows (default is TRUE)
#' @param k.cells K value to use for clustering cells
#' @param k.seed Random seed
#' @param do.plot Draw heatmap of clustered genes/cells (default is FALSE).
#' @param data.cut Clip all z-scores to have an absolute value below this.
#' Reduces the effect of huge outliers in the data. (default is NULL)
#' @param k.cols Color palette for heatmap
#' @param set.ident If clustering cells (so k.cells>0), set the cell identity
#' class to its K-means cluster (default is TRUE)
#' @param minibatch FALSE by default. If TRUE, use the mini-batch K-means clustering implemented in the ClusterR package.
#' @param do.constrained FALSE by default. If TRUE, use the constrained K-means function implemented in the tclust package.
#' @param assay.type Type of data to normalize for (default is RNA), but can be changed for multimodal analyses.
#' @param \dots Additional parameters passed to kmeans (or tkmeans)
#'
#' @importFrom methods new
#' @importFrom stats kmeans
#' @importFrom tclust tkmeans
#' @importFrom ClusterR MiniBatchKmeans
#' @importFrom ClusterR predict_MBatchKMeans
#' @importFrom ClusterR KMeans_rcpp
#'
#' @return Seurat object where the k-means results for genes is stored in
#' object@@kmeans.gene[[1]]. The cluster for each cell is stored in object@@meta.data[,"kmeans.ident"]
#' and also object@@ident (if set.ident=TRUE)
#'
#' @export
#'
#' @examples
#' pbmc_small
#' # Cluster single cells
#' pbmc_small <- ClusterCellsKmeans(pbmc_small, k.cells = 3)
ClusterCellsKmeans <- function(
object,
genes.use = NULL,
reduction.type = NULL,
dims.use = NULL,
num_init = 5,
center = TRUE,
k.cells = NULL,
k.seed = 1,
do.plot = FALSE,
data.cut = NULL,
k.cols = PurpleAndYellow(),
set.ident = TRUE,
minibatch = FALSE,
do.constrained = FALSE,
assay.type="RNA",
...
) {
if(is.null(k.cells) | k.cells < 2) { stop("Set the proper number of clusters for cells, for evaluation of cell identities.") }
if(is.null(genes.use)) { genes.use <- Seurat:::SetIfNull(x = genes.use, default = object@var.genes) }
if (is.null(x = dims.use)) {
message("Using scaled data.")
data.use <- GetAssayData(
object = object,
assay.type = assay.type,
slot = "scale.data"
)
# rows: genes and cols: cells
genes.use <- genes.use[genes.use %in% rownames(x = data.use)]
data.use <- data.use[genes.use,]
# rows: cells and cols: genes
if(center) {
data.use <- t(scale(data.use, center = TRUE, scale = FALSE))
} else {
data.use <- t(data.use)
}
if(!is.null(data.cut)) { data.use <- MinMax(data = data.use, min = data.cut * (-1), max = data.cut) }
} else {
message(paste0("Using ", reduction.type, "."))
data.use <- GetCellEmbeddings(object = object,
reduction.type = reduction.type,
dims.use = dims.use)
# rows: cells and cols: genes
if(center) {
data.use <- t(scale(t(data.use), center = TRUE, scale = FALSE))
}
}
message(paste0("Clustering ", nrow(data.use)," cells."))
# k-means clustering cells
if (do.constrained) {
message(paste0("Truncated K-means clustering."))
set.seed(seed = k.seed)
Seurat:::PackageCheck('tclust')
kmeans.cell <- tclust::tkmeans(x = data.use, k = k.cells, ...)
} else if (minibatch) {
message(paste0("Mini-batch K-means clustering."))
set.seed(seed = k.seed)
Seurat:::PackageCheck('ClusterR')
kmeans.cell <- ClusterR::MiniBatchKmeans(data = data.use, clusters = k.cells, ...)
kmeans.cell$cluster = ClusterR::predict_MBatchKMeans(data = data.use, CENTROIDS = kmeans.cell$centroids)
} else {
message(paste0("K-means clustering with K-means++."))
set.seed(seed = k.seed)
kmeans.cell <- ClusterR::KMeans_rcpp(data = data.use, clusters = k.cells, num_init = num_init, ...)
}
names(x = kmeans.cell$cluster) <- object@cell.names
object@kmeans <- new(
Class = "kmeans.info",
cell.kmeans.obj = kmeans.cell
)
if (k.cells > 0) {
kmeans.code=paste("kmeans",k.cells,"ident",sep=".")
object@meta.data[names(x = kmeans.cell$cluster), kmeans.code] <- kmeans.cell$cluster
}
if (set.ident) {
object <- Seurat:::SetIdent(
object = object,
cells.use = names(x = kmeans.cell$cluster),
ident.use = kmeans.cell$cluster
)
}
if (do.plot) {
KMeansHeatmap(object = object)
}
return(object)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.