#' @title Uniform Manifold Approximation and Projection (UMAP) dimension
#' reduction for celda \code{sce} object
#' @description Embeds cells in two dimensions using \link[uwot]{umap} based on
#' a celda model. For celda_C \code{sce} objects, PCA on the normalized counts
#' is used to reduce the number of features before applying UMAP. For celda_CG
#' \code{sce} object, UMAP is run on module probabilities to reduce the number
#' of features instead of using PCA. Module probabilities are square-root
#' transformed before applying UMAP.
#' @param sce A \link[SingleCellExperiment]{SingleCellExperiment} object
#' returned by \link{celda_C}, \link{celda_G}, or \link{celda_CG}.
#' @param useAssay A string specifying which \link{assay}
#' slot to use. Default "counts".
#' @param altExpName The name for the \link{altExp} slot
#' to use. Default "featureSubset".
#' @param maxCells Integer. Maximum number of cells to plot. Cells will be
#' randomly subsampled if \code{ncol(sce) > maxCells}. Larger numbers of cells
#' requires more memory. If NULL, no subsampling will be performed.
#' Default NULL.
#' @param minClusterSize Integer. Do not subsample cell clusters below this
#' threshold. Default 100.
#' @param modules Integer vector. Determines which features modules to use for
#' UMAP. If NULL, all modules will be used. Default NULL.
#' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility,
#' a default value of 12345 is used. If NULL, no calls to
#' \link[withr]{with_seed} are made.
#' @param nNeighbors The size of local neighborhood used for
#' manifold approximation. Larger values result in more global
#' views of the manifold, while smaller values result in more
#' local data being preserved. Default 30.
#' See \link[uwot]{umap} for more information.
#' @param minDist The effective minimum distance between embedded points.
#' Smaller values will result in a more clustered/clumped
#' embedding where nearby points on the manifold are drawn
#' closer together, while larger values will result on a more
#' even dispersal of points. Default 0.75.
#' See \link[uwot]{umap} for more information.
#' @param spread The effective scale of embedded points. In combination with
#' \code{min_dist}, this determines how clustered/clumped the
#' embedded points are. Default 1. See \link[uwot]{umap} for more information.
#' @param pca Logical. Whether to perform
#' dimensionality reduction with PCA before UMAP. Only works for celda_C
#' \code{sce} objects.
#' @param initialDims Integer. Number of dimensions from PCA to use as
#' input in UMAP. Default 50. Only works for celda_C \code{sce} objects.
#' @param normalize Character. Passed to \link{normalizeCounts} in
#' normalization step. Divides counts by the library sizes for each
#' cell. One of 'proportion', 'cpm', 'median', or 'mean'. 'proportion' uses
#' the total counts for each cell as the library size. 'cpm' divides the
#' library size of each cell by one million to produce counts per million.
#' 'median' divides the library size of each cell by the median library size
#' across all cells. 'mean' divides the library size of each cell by the mean
#' library size across all cells.
#' @param scaleFactor Numeric. Sets the scale factor for cell-level
#' normalization. This scale factor is multiplied to each cell after the
#' library size of each cell had been adjusted in \code{normalize}. Default
#' \code{NULL} which means no scale factor is applied.
#' @param transformationFun Function. Applys a transformation such as 'sqrt',
#' 'log', 'log2', 'log10', or 'log1p'. If \code{NULL}, no transformation will
#' be applied. Occurs after applying normalization and scale factor. Default
#' \code{NULL}.
#' @param cores Number of threads to use. Default 1.
#' @param ... Additional parameters to pass to \link[uwot]{umap}.
#' @return \code{sce} with UMAP coordinates
#' (columns "celda_UMAP1" & "celda_UMAP2") added to
#' \code{\link{reducedDim}(sce, "celda_UMAP")}.
#' @export
setGeneric("celdaUmap",
function(sce,
useAssay = "counts",
altExpName = "featureSubset",
maxCells = NULL,
minClusterSize = 100,
modules = NULL,
seed = 12345,
nNeighbors = 30,
minDist = 0.75,
spread = 1,
pca = TRUE,
initialDims = 50,
normalize = "proportion",
scaleFactor = NULL,
transformationFun = sqrt,
cores = 1,
...) {
standardGeneric("celdaUmap")
})
#' @rdname celdaUmap
#' @examples
#' data(sceCeldaCG)
#' umapRes <- celdaUmap(sceCeldaCG)
#' @export
setMethod("celdaUmap", signature(sce = "SingleCellExperiment"),
function(sce,
useAssay = "counts",
altExpName = "featureSubset",
maxCells = NULL,
minClusterSize = 100,
modules = NULL,
seed = 12345,
nNeighbors = 30,
minDist = 0.75,
spread = 1,
pca = TRUE,
initialDims = 50,
normalize = "proportion",
scaleFactor = NULL,
transformationFun = sqrt,
cores = 1,
...) {
if (is.null(seed)) {
sce <- .celdaUmap(sce = sce,
useAssay = useAssay,
altExpName = altExpName,
maxCells = maxCells,
minClusterSize = minClusterSize,
modules = modules,
seed = seed,
nNeighbors = nNeighbors,
minDist = minDist,
spread = spread,
pca = pca,
initialDims = initialDims,
normalize = normalize,
scaleFactor = scaleFactor,
transformationFun = transformationFun,
cores = cores,
...)
} else {
with_seed(seed,
sce <- .celdaUmap(sce = sce,
useAssay = useAssay,
altExpName = altExpName,
maxCells = maxCells,
minClusterSize = minClusterSize,
modules = modules,
seed = seed,
nNeighbors = nNeighbors,
minDist = minDist,
spread = spread,
pca = pca,
initialDims = initialDims,
normalize = normalize,
scaleFactor = scaleFactor,
transformationFun = transformationFun,
cores = cores,
...))
}
return(sce)
})
.celdaUmap <- function(sce,
useAssay,
altExpName,
maxCells,
minClusterSize,
modules,
seed,
nNeighbors,
minDist,
spread,
pca,
initialDims,
cores,
normalize,
scaleFactor,
transformationFun,
...) {
celdaMod <- celdaModel(sce, altExpName = altExpName)
altExp <- SingleCellExperiment::altExp(sce, altExpName)
if (celdaMod == "celda_C") {
res <- .celdaUmapC(sce = altExp,
useAssay = useAssay,
maxCells = maxCells,
minClusterSize = minClusterSize,
nNeighbors = nNeighbors,
minDist = minDist,
spread = spread,
pca = pca,
initialDims = initialDims,
normalize = normalize,
scaleFactor = scaleFactor,
transformationFun = transformationFun,
cores = cores,
...)
} else if (celdaMod == "celda_CG") {
res <- .celdaUmapCG(sce = altExp,
useAssay = useAssay,
maxCells = maxCells,
minClusterSize = minClusterSize,
modules = modules,
seed = seed,
nNeighbors = nNeighbors,
minDist = minDist,
spread = spread,
normalize = normalize,
scaleFactor = scaleFactor,
transformationFun = transformationFun,
cores = cores,
...)
} else if (celdaMod == "celda_G") {
res <- .celdaUmapG(sce = altExp,
useAssay = useAssay,
maxCells = maxCells,
minClusterSize = minClusterSize,
modules = modules,
seed = seed,
nNeighbors = nNeighbors,
minDist = minDist,
spread = spread,
normalize = normalize,
scaleFactor = scaleFactor,
transformationFun = transformationFun,
cores = cores,
...)
} else {
stop("S4Vectors::metadata(altExp(sce, altExpName))$",
"celda_parameters$model must be",
" one of 'celda_C', 'celda_G', or 'celda_CG'")
}
SingleCellExperiment::reducedDim(altExp, "celda_UMAP") <- res
SingleCellExperiment::altExp(sce, altExpName) <- altExp
return(sce)
}
.celdaUmapC <- function(sce,
useAssay,
maxCells,
minClusterSize,
nNeighbors,
minDist,
spread,
pca,
initialDims,
normalize,
scaleFactor,
transformationFun,
cores,
...) {
preparedCountInfo <- .prepareCountsForDimReductionCeldaC(sce = sce,
useAssay = useAssay,
maxCells = maxCells,
minClusterSize = minClusterSize,
normalize = normalize,
scaleFactor = scaleFactor,
transformationFun = transformationFun)
umapRes <- .calculateUmap(preparedCountInfo$norm,
nNeighbors = nNeighbors,
minDist = minDist,
spread = spread,
pca = pca,
initialDims = initialDims,
cores = cores,
...
)
final <- matrix(NA, nrow = ncol(sce), ncol = 2)
final[preparedCountInfo$cellIx, ] <- umapRes
rownames(final) <- colnames(sce)
colnames(final) <- c("celda_UMAP1", "celda_UMAP2")
return(final)
}
.celdaUmapCG <- function(sce,
useAssay,
maxCells,
minClusterSize,
modules,
seed,
nNeighbors,
minDist,
spread,
normalize,
scaleFactor,
transformationFun,
cores,
...) {
preparedCountInfo <- .prepareCountsForDimReductionCeldaCG(sce = sce,
useAssay = useAssay,
maxCells = maxCells,
minClusterSize = minClusterSize,
modules = modules,
normalize = normalize,
scaleFactor = scaleFactor,
transformationFun = transformationFun)
umapRes <- .calculateUmap(preparedCountInfo$norm,
nNeighbors = nNeighbors,
minDist = minDist,
spread = spread,
cores = cores,
...)
final <- matrix(NA, nrow = ncol(sce), ncol = 2)
final[preparedCountInfo$cellIx, ] <- umapRes
rownames(final) <- colnames(sce)
colnames(final) <- c("celda_UMAP1", "celda_UMAP2")
return(final)
}
.celdaUmapG <- function(sce,
useAssay,
maxCells,
minClusterSize,
modules,
seed,
nNeighbors,
minDist,
spread,
normalize,
scaleFactor,
transformationFun,
cores,
...) {
preparedCountInfo <- .prepareCountsForDimReductionCeldaG(sce = sce,
useAssay = useAssay,
maxCells = maxCells,
minClusterSize = minClusterSize,
modules = modules,
normalize = normalize,
scaleFactor = scaleFactor,
transformationFun = transformationFun)
umapRes <- .calculateUmap(preparedCountInfo$norm,
nNeighbors = nNeighbors,
minDist = minDist,
spread = spread,
cores = cores,
...)
final <- matrix(NA, nrow = ncol(sce), ncol = 2)
final[preparedCountInfo$cellIx, ] <- umapRes
rownames(final) <- colnames(sce)
colnames(final) <- c("celda_UMAP1", "celda_UMAP2")
return(final)
}
# Run the UMAP algorithm for dimensionality reduction
# @param norm Normalized count matrix.
# @param nNeighbors The size of local neighborhood used for
# manifold approximation. Larger values result in more global
# views of the manifold, while smaller values result in more
# local data being preserved. Default 30.
# See `?uwot::umap` for more information.
# @param minDist The effective minimum distance between embedded points.
# Smaller values will result in a more clustered/clumped
# embedding where nearby points on the manifold are drawn
# closer together, while larger values will result on a more
# even dispersal of points. Default 0.2.
# See `?uwot::umap` for more information.
# @param spread The effective scale of embedded points. In combination with
# 'min_dist', this determines how clustered/clumped the
# embedded points are. Default 1.
# See `?uwot::umap` for more information.
# @param pca Logical. Whether to perform
# dimensionality reduction with PCA before UMAP.
# @param initialDims Integer. Number of dimensions from PCA to use as
# input in UMAP. Default 50.
# @param cores Number of threads to use. Default 1.
# @param ... Other parameters to pass to `uwot::umap`.
#' @import uwot
.calculateUmap <- function(norm,
nNeighbors = 30,
minDist = 0.75,
spread = 1,
pca = FALSE,
initialDims = 50,
cores = 1,
...) {
if (isTRUE(pca)) {
doPCA <- initialDims
} else {
doPCA <- NULL
}
res <- uwot::umap(norm,
n_neighbors = nNeighbors,
min_dist = minDist, spread = spread,
n_threads = cores, n_sgd_threads = 1, pca = doPCA, ...
)
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.