spatzli: Methods for analyzing spatially resolved transcriptomics data

Documented in clusterSVG

#' clusterSVG
#' 
#' Clustering to identify major cell types as input for nnSVG
#' 
#' Convenience function to perform clustering to identify major cell types as
#' input for nnSVG. Cluster labels representing major cell types generated by
#' this function can be provided to \code{nnSVG} as a matrix of covariates to
#' include them within the statistical model. \code{nnSVG} will then identify
#' spatially variable genes (SVGs) after taking variation due to major cell
#' types into account.
#' 
#' Alternatively, cell types can be identified using manually guided analyses or
#' a different clustering algorithm, or \code{nnSVG} can also be run without
#' taking cell types into account (\code{x = NULL}).
#' 
#' The most appropriate type of analysis (i.e. whether or not to take into
#' account variation due to cell types) will depend on the biological context of
#' your dataset.
#' 
#' 
#' @param spe \code{SpatialExperiment} Input data, assumed to be a
#'   \code{\link{SpatialExperiment}} object with an \code{assay} slot containing
#'   either deviance residuals or log-transformed normalized counts, e.g. from
#'   \code{\link{preprocessSVG}}.
#' 
#' @param assay_name \code{character} Name of assay containing preprocessed
#'   expression values to use for clustering, i.e. either deviance residuals or
#'   log-transformed normalized counts. Assumed to be either
#'   \code{binomial_deviance_residuals} or \code{logcounts}. Default =
#'   \code{binomial_deviance_residuals}.
#' 
#' @param filter_mito \code{logical} Whether to filter mitochondrial genes.
#'   Assumes the \code{rowData} slot of \code{spe} contains a column named
#'   \code{gene_name}, which can be used to identify mitochondrial genes.
#'   Default = TRUE. Set to FALSE to disable.
#' 
#' 
#' @return Returns a \code{SpatialExperiment} object with cluster labels stored
#'   in a column in \code{colData}, which can then be extracted and provided to
#'   \code{nnSVG} as a matrix of covariates.
#' 
#' 
#' @importFrom SingleCellExperiment logcounts 'colLabels<-'
#' @importFrom SummarizedExperiment assayNames
#' @importFrom scran modelGeneVar getTopHVGs buildSNNGraph
#' @importFrom scater runPCA
#' @importFrom BiocSingular RandomParam
#' @importFrom igraph cluster_walktrap
#' @importFrom methods isClass
#' 
#' @export
#' 
#' @examples
#' library(SpatialExperiment)
#' library(STexampleData)
#' 
#' spe <- Visium_humanDLPFC()
#' 
#' # subset genes for faster runtime in this example
#' set.seed(123)
#' spe <- spe[sample(seq_len(1000)), ]
#' 
#' # set seed for reproducibility
#' set.seed(123)
#' spe <- preprocessSVG(spe)
#' 
#' # set seed for reproducibility
#' set.seed(123)
#' spe <- clusterSVG(spe)
#' 
#' # show results
#' colData(spe)
#' 
clusterSVG <- function(spe, 
                       assay_name = c("binomial_deviance_residuals", "logcounts"), 
                       filter_mito = TRUE) {
  
  stopifnot(isClass(spe, "SpatialExperiment"))
  
  assay_name <- match.arg(assay_name, c("binomial_deviance_residuals", "logcounts"))
  stopifnot(assay_name %in% assayNames(spe))
  
  # filter mitochondrial genes (if not already done)
  
  if (filter_mito) {
    stopifnot("gene_name" %in% colnames(rowData(spe)))
    is_mito <- grepl("(^MT-)|(^mt-)", rowData(spe)$gene_name)
    message("removing ", sum(is_mito), " mitochondrial genes")
    spe <- spe[!is_mito, ]
  }
  
  if (assay_name == "binomial_deviance_residuals") {
    # dimensionality reduction
    spe <- runPCA(spe, exprs_values = "binomial_deviance_residuals", 
                  scale = TRUE, name = "PCA", BSPARAM = RandomParam())
  }
  
  if (assay_name == "logcounts") {
    # feature selection
    # fit mean-variance relationship
    dec <- modelGeneVar(spe)
    # select top HVGs
    top_hvgs <- getTopHVGs(dec, prop = 0.1)
    
    # dimensionality reduction
    # compute PCA (note: set random seed for reproducibility)
    spe <- runPCA(spe, subset_row = top_hvgs)
  }
  
  # clustering
  
  # graph-based clustering (note: set random seed for reproducibility)
  g <- buildSNNGraph(spe, k = 10, use.dimred = "PCA")
  g_walk <- cluster_walktrap(g)
  clus <- g_walk$membership
  colLabels(spe) <- factor(clus)
  
  # return object
  
  spe
}