#' @title Clustering
#'
#' @description
#' \code{clustering} currently implements two methods. The first approach groups
#' cells into \code{xdim}x\code{ydim} clusters using \pkg{FlowSOM}, and then
#' performs metaclustering with \pkg{ConsensusClusterPlus} into \code{maxK} clusters.
#' The second approach performs dimention reduction through UMAP, constructs
#' shared nearest-neighbor graphs using \pkg{scran}, and then performs community
#' detection using the Leuvein algorithm in \pkg{igraph}. In both cases,
#' an \code{SingleCellExperiment} object with a '\code{cluster_id}' column is
#' returned.\cr
#' \code{clustering_with_masking} performs a preliminary clustering and prompts
#' the user to provide the clusters to be masked. The median expression
#' value heatmap will be generated by \code{plotClusterHeatmap}. Then the function
#' performs the main clustering only using the non-masked cells. The masked cells
#' will be assigned to a cluster ID of zero.
#'
#' @param sce A \code{\link[SingleCellExperiment]{SingleCellExperiment}} object.
#' @param features A character vector.
#' Specifies which antigens to use for clustering.
#' @param features_masking A character vector.
#' Specifies which antigens to use for the preliminary masking.
#' @param by_exprs_values A character string.
#' Specifies which assay data to use for clustering.
#' @param method A character string.
#' Specifies which clustering method to use. Currently, either "FlowSOM" or "SNNGraph" is supported.
#' @param xdim,ydim
#' Numeric. Specifies the grid size of the self-orginizing map.
#' For example, 20x20 grid will yield 400 clusters.
#' @param maxK
#' Numeric. Specifies the maximum number of clusters to evaluate
#' in the metaclustering. For \code{maxK = 40}, for example,
#' metaclustering will be performed to obtain 40 clusters.
#' @param n_components,n_neighbors,min_dist
#' Specifies parameters for UMAP and SNN-graph construction.
#' @param seed Numeric. Sets a random seed.
#'
#' @return A \code{\link{SingleCellExperiment-class}} object with a
#' '\code{cluster_id}' column. Masked cells are assigned to the cluster 0.
#' @export
#' @rdname clustering
#' @name clustering
clustering <- function(
sce,
features=rownames(sce),
by_exprs_values="normexprs",
method="FlowSOM",
xdim=20,
ydim=20,
maxK=40,
n_components=min(c(10, length(features))),
n_neighbors=10,
min_dist=0.1,
seed=12345
){
# Set the random seed
set.seed(seed)
# FlowSOM-guided clustering & metaclustering
if(identical(method, "FlowSOM")){
## FlowSOM clustering
cat("1. FlowSOM clustering...\n")
som <- FlowSOM::ReadInput(flowCore::flowFrame(t(assay(sce, by_exprs_values))))
som <- FlowSOM::BuildSOM(
som,
colsToUse=features,
xdim=xdim,
ydim=ydim,
silent=T
)
## Metaclustering
cat("2. ConsensusClusterPlus metaclustering...\n")
pdf(NULL)
mc <- suppressMessages(
ConsensusClusterPlus::ConsensusClusterPlus(
t(som$map$codes),
maxK=maxK,
reps=100,
distance="euclidean",
seed=seed,
plot=NULL
)
)
dev.off()
## Retrieve cluster metadata
k <- xdim*ydim
mcs <- seq_len(maxK)[-1]
clstCodeDF <- data.frame(seq_len(k), purrr::map(mc[-1], "consensusClass"))
clstCodeDF <- dplyr::mutate_all(clstCodeDF, function(u){factor(u, levels=sort(unique(u)))})
colnames(clstCodeDF) <- c(sprintf("som%s", k), sprintf("meta%s", mcs))
## Convert original FlowSOM cluster codes into final metacluster IDs
clstIDs <- som$map$mapping[,1]
clstCodeDF <- data.frame(
"FlowSOM"=clstCodeDF[[paste0("som", k)]],
"Metaclustering"=clstCodeDF[[paste0("meta", maxK)]],
stringsAsFactors=F
)
clstCodeDF <- clstCodeDF[which(clstCodeDF$"FlowSOM" %in% unique(clstIDs)),]
## Put the final metacluster IDs back into the original SCE object
sce$"cluster_id" <- factor(
clstIDs,
levels=clstCodeDF$"FlowSOM",
labels=clstCodeDF$"Metaclustering"
)
}
# Shared nearest-neighbor graph-based clustering
if(identical(method, "SNNGraph")){
cat("1. Dimention reduction through UMAP...\n")
SingleCellExperiment::reducedDim(sce, type="UMAP_SNN", withDimnames=T) <- uwot::umap(
t(assay(sce, by_exprs_values)),
n_components=n_components,
n_neighbors=n_neighbors,
min_dist=min_dist,
scale=T,
n_threads=parallel::detectCores(logical=F),
verbose=T
) %>%
magrittr::set_colnames(paste0("UMAP_SNN", 1:n_components))
cat("2. SNN graph construction...\n")
g <- scran::buildSNNGraph(
sce,
use.dimred="UMAP_SNN"
)
cat("3. Graph-based clustering...\n")
g_comm <- igraph::cluster_louvain(g)
sce$"cluster_id" <- factor(g_comm$"membership")
}
# Output
return(sce)
}
#' @export
#' @rdname clustering
#' @name clustering
clustering_with_masking <- function(
sce,
features=rownames(sce),
features_masking=rownames(sce),
by_exprs_values="normexprs",
method="FlowSOM",
xdim=20,
ydim=20,
maxK=40,
n_components=min(c(10, length(features))),
n_neighbors=10,
min_dist=0.1,
seed=12345
){
# Set the random seed
set.seed(seed)
# A preliminary clustering
cat("A preliminary clustering...\n")
sce <- clustering(
sce,
features=features_masking,
by_exprs_values=by_exprs_values,
method=method,
xdim=xdim,
ydim=ydim,
maxK=maxK,
n_components=min(c(n_components, length(features_masking))),
n_neighbors=n_neighbors,
min_dist=min_dist,
seed=seed
)
plt <- plotClusterHeatmap(
sce,
features=rownames(sce),
clusters=sce$"cluster_id",
by_exprs_values=by_exprs_values,
fun="median",
cluster_anno=T,
scale=T,
draw_dend=T,
draw_freqs=T
)
print(plt)
# Prompt the user to provide the cluster IDs to be masked
cat("Enter the cluster IDs to be masked (separated by comma without space)\n")
rm <- readline(prompt="Cluster IDs: ")
rm <- as.numeric(unlist(stringr::str_split(rm, ",")))
sce$"cluster_masked" <- F
sce$"cluster_masked"[sce$"cluster_id" %in% rm] <- T
# The main clustering
cat("The main clustering...\n")
clusterIDs <- rep(0, ncol(sce)) ## 0 for masked clusters
sce_sub <- clustering(
sce[,sce$"cluster_masked"==F],
features=features,
by_exprs_values=by_exprs_values,
method=method,
xdim=xdim,
ydim=ydim,
maxK=maxK,
n_components=n_components,
n_neighbors=n_neighbors,
min_dist=min_dist,
seed=seed
)
clusterIDs[which(sce$"cluster_masked"==F)] <- sce_sub$"cluster_id"
sce$"cluster_id" <- factor(
clusterIDs,
levels=sort(unique(clusterIDs))
) ## re-factorize
return(sce)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.