R/runTSNE.R

Defines functions getTSNE runQuickTSNE runTSNE

Documented in getTSNE runQuickTSNE runTSNE

#' Run t-SNE embedding with Rtsne method
#' @rdname runTSNE
#' @description T-Stochastic Neighbour Embedding (t-SNE) algorithm is commonly
#' for 2D visualization of single-cell data. This function wraps the
#' Rtsne \code{\link[Rtsne]{Rtsne}} function.
#'
#' With this funciton, users can create tSNE embedding directly from raw count
#' matrix, with necessary preprocessing including normalization, scaling,
#' dimension reduction all automated. Yet we still recommend having the PCA as
#' input, so that the result can match with the clustering based on the same
#' input PCA, and will be much faster.
#' @param inSCE Input \linkS4class{SingleCellExperiment} object.
#' @param useReducedDim The low dimension representation to use for UMAP
#' computation. Default \code{"PCA"}.
#' @param useAssay Assay to use for tSNE computation. If \code{useAltExp} is
#' specified, \code{useAssay} has to exist in
#' \code{assays(altExp(inSCE, useAltExp))}. Default \code{NULL}.
#' @param useAltExp The subset to use for tSNE computation, usually for the
#' selected.variable features. Default \code{NULL}.
#' @param reducedDimName a name to store the results of the dimension
#' reductions. Default \code{"TSNE"}.
#' @param logNorm Whether the counts will need to be log-normalized prior to
#' generating the tSNE via \code{\link{scaterlogNormCounts}}. Ignored when using
#' \code{useReducedDim}. Default \code{TRUE}.
#' @param useFeatureSubset Subset of feature to use for dimension reduction. A
#' character string indicating a \code{rowData} variable that stores the logical
#' vector of HVG selection, or a vector that can subset the rows of
#' \code{inSCE}. Default \code{NULL}.
#' @param nTop Automatically detect this number of variable features to use for
#' dimension reduction. Ignored when using \code{useReducedDim} or using
#' \code{useFeatureSubset}. Default \code{2000}.
#' @param center Whether data should be centered before PCA is applied. Ignored
#' when using \code{useReducedDim}. Default \code{TRUE}.
#' @param scale Whether data should be scaled before PCA is applied. Ignored
#' when using \code{useReducedDim}. Default \code{TRUE}.
#' @param pca Whether an initial PCA step should be performed. Ignored when
#' using \code{useReducedDim}. Default \code{TRUE}.
#' @param partialPCA Whether truncated PCA should be used to calculate principal
#' components (requires the irlba package). This is faster for large input
#' matrices. Ignored when using \code{useReducedDim}. Default \code{FALSE}.
#' @param initialDims Number of dimensions from PCA to use as input in tSNE.
#' Default \code{25}.
#' @param theta Numeric value for speed/accuracy trade-off (increase for less
#' accuracy), set to \code{0.0} for exact TSNE. Default \code{0.5}.
#' @param perplexity perplexity parameter. Should not be bigger than
#' \code{3 * perplexity < ncol(inSCE) - 1}. Default \code{30}. See
#' \code{\link[Rtsne]{Rtsne}} details for interpretation.
#' @param nIterations maximum iterations. Default \code{1000}.
#' @param numThreads Integer, number of threads to use using OpenMP, Default
#' \code{1}. \code{0} corresponds to using all available cores.
#' @param seed Random seed for reproducibility of tSNE results.
#' Default \code{NULL} will use global seed in use by the R environment.
#' @return A \linkS4class{SingleCellExperiment} object with tSNE computation
#' updated in \code{reducedDim(inSCE, reducedDimName)}.
#' @export
#' @examples
#' data(scExample, package = "singleCellTK")
#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'")
#' # Run from raw counts
#' sce <- runQuickTSNE(sce)
#' \dontrun{
#' # Run from PCA
#' sce <- scaterlogNormCounts(sce, "logcounts")
#' sce <- runModelGeneVar(sce)
#' sce <- setTopHVG(sce, method = "modelGeneVar", hvgNumber = 2000, 
#'                  featureSubsetName = "HVG_modelGeneVar2000")
#' sce <- scaterPCA(sce, useAssay = "logcounts",
#'                  useFeatureSubset = "HVG_modelGeneVar2000", scale = TRUE)
#' sce <- runTSNE(sce, useReducedDim = "PCA")
#' }
#' @importFrom S4Vectors metadata<-
runTSNE <- function(inSCE, useReducedDim = "PCA", useAssay = NULL, 
                    useAltExp = NULL, reducedDimName = "TSNE", logNorm = TRUE,
                    useFeatureSubset = NULL, nTop = 2000, center = TRUE,
                    scale = TRUE, pca = TRUE, partialPCA = FALSE,
                    initialDims = 25, theta = 0.5, perplexity = 30,
                    nIterations = 1000, numThreads = 1, seed = 12345) {
  params <- as.list(environment())
  params$inSCE <- NULL
  # Note: useMat = list(useAssay = useAssay, ...)
  # `.selectSCEMatrix()` does the check and corrects the conditions
  # When using useReducedDim, useAssay will be ignored
  useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay,
                             useReducedDim = useReducedDim,
                             useAltExp = useAltExp,
                             returnMatrix = FALSE)$names
  params$useAssay <- useMat$useAssay
  useAssay <- useMat$useAssay
  
  if (!is.null(useAltExp)) {
    sce <- SingleCellExperiment::altExp(inSCE, useAltExp)
  } else {
    sce <- inSCE
  }
  
  if (!is.null(useAssay)) {
    if (isTRUE(logNorm)) {
      sce <- scaterlogNormCounts(sce, assayName = "logcounts", useAssay = useAssay)
      useAssay <- "logcounts"
    }
    if (!is.null(useFeatureSubset)) {
      subset_row <- .parseUseFeatureSubset(inSCE, useFeatureSubset,
                                           altExpObj = sce,
                                           returnType = "logical")
      sce <- sce[subset_row,]
    } else {
      if (!is.null(nTop) && nTop < nrow(sce)) {
        suppressMessages({
          sce <- runFeatureSelection(sce, useAssay, method = "modelGeneVar")
          sce <- setTopHVG(sce, method = "modelGeneVar", hvgNumber = nTop,
                           featureSubsetName = "tsneHVG")
        })
        sce <- subsetSCERows(sce, rowData = "tsneHVG", returnAsAltExp = FALSE)
      }
    }
    mat <- t(SummarizedExperiment::assay(sce, useAssay))
    if ((!isTRUE(pca) & !isTRUE(partialPCA))) {
      initialDims <- NULL
    }
  } else {
    mat <- SingleCellExperiment::reducedDim(sce, useReducedDim)
    pca <- FALSE
    partialPCA <- FALSE
    scale <- FALSE
    center <- FALSE
    if (initialDims < ncol(mat)) {
      mat <- mat[,seq(initialDims)]
    }
  }
  
  if (is.null(perplexity)){
    perplexity <- floor(ncol(inSCE) / 5)
  }
  # Rtsne requires a matrix input
  mat <- as.matrix(mat)
  p <- paste0(date(), " ... Computing Rtsne.")
  message(p)
  withr::with_seed(seed, {
    tsneOut <- Rtsne::Rtsne(mat, pca_scale = scale, pca_center = center,
                            pca = pca, partial_pca = partialPCA,
                            perplexity = perplexity,
                            initial_dims = initialDims,
                            max_iter = nIterations, theta = theta,
                            num_threads = numThreads)
  })
  tsneOut <- tsneOut$Y
  rownames(tsneOut) <- colnames(inSCE)
  colnames(tsneOut) <- c("tSNE1", "tSNE2")
  SingleCellExperiment::reducedDim(inSCE, reducedDimName) <- tsneOut
  metadata(inSCE)$sctk$runDimReduce$embedding[[reducedDimName]] <- params
  return(inSCE)
}

#' @rdname runTSNE
#' @param ... Other parameters to be passed to \code{runTSNE}
#' @export
runQuickTSNE <- function(inSCE, useAssay = "counts", ...) {
  args <- list(...)
  if (!is.null(args$useReducedDim)) {
    warning("Forcing `useReducedDim` to be `NULL`. Please use `runTSNE` for ",
            "using reducedDim.")
  }
  args$useReducedDim <- NULL
  args <- c(list(inSCE = inSCE, useAssay = useAssay, useReducedDim = NULL), 
            args)
  inSCE <- do.call("runTSNE", args = args)
  return(inSCE)
}

#' @rdname runTSNE
#' @export
getTSNE <- function(inSCE, useReducedDim = "PCA", useAssay = NULL, 
                    useAltExp = NULL, reducedDimName = "TSNE", logNorm = TRUE,
                    useFeatureSubset = NULL, nTop = 2000, center = TRUE,
                    scale = TRUE, pca = TRUE, partialPCA = FALSE,
                    initialDims = 25, theta = 0.5, perplexity = 30,
                    nIterations = 1000, numThreads = 1, seed = 12345) {
  .Deprecated("runTSNE")
  runTSNE(inSCE = inSCE, useReducedDim = useReducedDim, useAssay = useAssay, 
          useAltExp = useAltExp, reducedDimName = reducedDimName, 
          logNorm = logNorm, useFeatureSubset = useFeatureSubset, nTop = nTop, 
          center = center, scale = scale, pca = pca, partialPCA = partialPCA,
          initialDims = initialDims, theta = theta, perplexity = perplexity,
          nIterations = nIterations, numThreads = numThreads, seed = seed)
}
compbiomed/singleCellTK documentation built on May 8, 2024, 6:58 p.m.