#' clusterSVG
#'
#' Clustering to identify major cell types as input for nnSVG
#'
#' Convenience function to perform clustering to identify major cell types as
#' input for nnSVG. Cluster labels representing major cell types generated by
#' this function can be provided to \code{nnSVG} as a matrix of covariates to
#' include them within the statistical model. \code{nnSVG} will then identify
#' spatially variable genes (SVGs) after taking variation due to major cell
#' types into account.
#'
#' Alternatively, cell types can be identified using manually guided analyses or
#' a different clustering algorithm, or \code{nnSVG} can also be run without
#' taking cell types into account (\code{x = NULL}).
#'
#' The most appropriate type of analysis (i.e. whether or not to take into
#' account variation due to cell types) will depend on the biological context of
#' your dataset.
#'
#'
#' @param spe \code{SpatialExperiment} Input data, assumed to be a
#' \code{\link{SpatialExperiment}} object with an \code{assay} slot containing
#' either deviance residuals or log-transformed normalized counts, e.g. from
#' \code{\link{preprocessSVG}}.
#'
#' @param assay_name \code{character} Name of assay containing preprocessed
#' expression values to use for clustering, i.e. either deviance residuals or
#' log-transformed normalized counts. Assumed to be either
#' \code{binomial_deviance_residuals} or \code{logcounts}. Default =
#' \code{binomial_deviance_residuals}.
#'
#' @param filter_mito \code{logical} Whether to filter mitochondrial genes.
#' Assumes the \code{rowData} slot of \code{spe} contains a column named
#' \code{gene_name}, which can be used to identify mitochondrial genes.
#' Default = TRUE. Set to FALSE to disable.
#'
#'
#' @return Returns a \code{SpatialExperiment} object with cluster labels stored
#' in a column in \code{colData}, which can then be extracted and provided to
#' \code{nnSVG} as a matrix of covariates.
#'
#'
#' @importFrom SingleCellExperiment logcounts 'colLabels<-'
#' @importFrom SummarizedExperiment assayNames
#' @importFrom scran modelGeneVar getTopHVGs buildSNNGraph
#' @importFrom scater runPCA
#' @importFrom BiocSingular RandomParam
#' @importFrom igraph cluster_walktrap
#' @importFrom methods isClass
#'
#' @export
#'
#' @examples
#' library(SpatialExperiment)
#' library(STexampleData)
#'
#' spe <- Visium_humanDLPFC()
#'
#' # subset genes for faster runtime in this example
#' set.seed(123)
#' spe <- spe[sample(seq_len(1000)), ]
#'
#' # set seed for reproducibility
#' set.seed(123)
#' spe <- preprocessSVG(spe)
#'
#' # set seed for reproducibility
#' set.seed(123)
#' spe <- clusterSVG(spe)
#'
#' # show results
#' colData(spe)
#'
clusterSVG <- function(spe,
assay_name = c("binomial_deviance_residuals", "logcounts"),
filter_mito = TRUE) {
stopifnot(isClass(spe, "SpatialExperiment"))
assay_name <- match.arg(assay_name, c("binomial_deviance_residuals", "logcounts"))
stopifnot(assay_name %in% assayNames(spe))
# filter mitochondrial genes (if not already done)
if (filter_mito) {
stopifnot("gene_name" %in% colnames(rowData(spe)))
is_mito <- grepl("(^MT-)|(^mt-)", rowData(spe)$gene_name)
message("removing ", sum(is_mito), " mitochondrial genes")
spe <- spe[!is_mito, ]
}
if (assay_name == "binomial_deviance_residuals") {
# dimensionality reduction
spe <- runPCA(spe, exprs_values = "binomial_deviance_residuals",
scale = TRUE, name = "PCA", BSPARAM = RandomParam())
}
if (assay_name == "logcounts") {
# feature selection
# fit mean-variance relationship
dec <- modelGeneVar(spe)
# select top HVGs
top_hvgs <- getTopHVGs(dec, prop = 0.1)
# dimensionality reduction
# compute PCA (note: set random seed for reproducibility)
spe <- runPCA(spe, subset_row = top_hvgs)
}
# clustering
# graph-based clustering (note: set random seed for reproducibility)
g <- buildSNNGraph(spe, k = 10, use.dimred = "PCA")
g_walk <- cluster_walktrap(g)
clus <- g_walk$membership
colLabels(spe) <- factor(clus)
# return object
spe
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.