#' @include generics.R
#'
NULL
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Functions
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#' Determine statistical significance of PCA scores.
#'
#' Randomly permutes a subset of data, and calculates projected PCA scores for
#' these 'random' genes. Then compares the PCA scores for the 'random' genes
#' with the observed PCA scores to determine statistical signifance. End result
#' is a p-value for each gene's association with each principal component.
#'
#' @param object Seurat object
#' @param reduction DimReduc to use. ONLY PCA CURRENTLY SUPPORTED.
#' @param assay Assay used to calculate reduction.
#' @param dims Number of PCs to compute significance for
#' @param num.replicate Number of replicate samplings to perform
#' @param prop.freq Proportion of the data to randomly permute for each
#' replicate
#' @param verbose Print progress bar showing the number of replicates
#' that have been processed.
#' @param maxit maximum number of iterations to be performed by the irlba function of RunPCA
#'
#' @return Returns a Seurat object where JS(object = object[['pca']], slot = 'empirical')
#' represents p-values for each gene in the PCA analysis. If ProjectPCA is
#' subsequently run, JS(object = object[['pca']], slot = 'full') then
#' represents p-values for all genes.
#'
#' @importFrom methods new
#' @importFrom pbapply pblapply pbsapply
#' @importFrom future.apply future_lapply future_sapply
#' @importFrom future nbrOfWorkers
#'
#' @references Inspired by Chung et al, Bioinformatics (2014)
#'
#' @export
#'
#' @examples
#' \dontrun{
#' pbmc_small = suppressWarnings(JackStraw(pbmc_small))
#' head(JS(object = pbmc_small[['pca']], slot = 'empirical'))
#' }
#'
JackStraw <- function(
object,
reduction = "pca",
assay = NULL,
dims = 20,
num.replicate = 100,
prop.freq = 0.01,
verbose = TRUE,
maxit = 1000
) {
if (reduction != "pca") {
stop("Only pca for reduction is currently supported")
}
if (verbose && nbrOfWorkers() == 1) {
my.lapply <- pblapply
my.sapply <- pbsapply
} else {
my.lapply <- future_lapply
my.sapply <- future_sapply
}
assay <- assay %||% DefaultAssay(object = object)
if (dims > length(x = object[[reduction]])) {
dims <- length(x = object[[reduction]])
warning("Number of dimensions specified is greater than those available. Setting dims to ", dims, " and continuing", immediate. = TRUE)
}
if (dims > nrow(x = object)) {
dims <- nrow(x = object)
warning("Number of dimensions specified is greater than the number of cells. Setting dims to ", dims, " and continuing", immediate. = TRUE)
}
loadings <- Loadings(object = object[[reduction]], projected = FALSE)
reduc.features <- rownames(x = loadings)
if (length(x = reduc.features) < 3) {
stop("Too few features")
}
if (length(x = reduc.features) * prop.freq < 3) {
warning(
"Number of variable genes given ",
prop.freq,
" as the prop.freq is low. Consider including more variable genes and/or increasing prop.freq. ",
"Continuing with 3 genes in every random sampling."
)
}
data.use <- GetAssayData(object = object, assay = assay, slot = "scale.data")[reduc.features, ]
rev.pca <- object[[paste0('RunPCA.', assay)]]$rev.pca
weight.by.var <- object[[paste0('RunPCA.', assay)]]$weight.by.var
fake.vals.raw <- my.lapply(
X = 1:num.replicate,
FUN = JackRandom,
scaled.data = data.use,
prop.use = prop.freq,
r1.use = 1,
r2.use = dims,
rev.pca = rev.pca,
weight.by.var = weight.by.var,
maxit = maxit
)
fake.vals <- sapply(
X = 1:dims,
FUN = function(x) {
return(as.numeric(x = unlist(x = lapply(
X = 1:num.replicate,
FUN = function(y) {
return(fake.vals.raw[[y]][, x])
}
))))
}
)
fake.vals <- as.matrix(x = fake.vals)
jackStraw.empP <- as.matrix(
my.sapply(
X = 1:dims,
FUN = function(x) {
return(unlist(x = lapply(
X = abs(loadings[, x]),
FUN = EmpiricalP,
nullval = abs(fake.vals[,x])
)))
}
)
)
colnames(x = jackStraw.empP) <- paste0("PC", 1:ncol(x = jackStraw.empP))
jackstraw.obj <- new(
Class = "JackStrawData",
empirical.p.values = jackStraw.empP,
fake.reduction.scores = fake.vals,
empirical.p.values.full = matrix()
)
JS(object = object[[reduction]]) <- jackstraw.obj
object <- LogSeuratCommand(object = object)
return(object)
}
#' Significant genes from a PCA
#'
#' Returns a set of genes, based on the JackStraw analysis, that have
#' statistically significant associations with a set of PCs.
#'
#' @param object Seurat object
#' @param pcs.use PCS to use.
#' @param pval.cut P-value cutoff
#' @param use.full Use the full list of genes (from the projected PCA). Assumes
#' that \code{ProjectDim} has been run. Currently, must be set to FALSE.
#' @param max.per.pc Maximum number of genes to return per PC. Used to avoid genes from one PC dominating the entire analysis.
#'
#' @return A vector of genes whose p-values are statistically significant for
#' at least one of the given PCs.
#'
#' @export
#'
#' @seealso \code{\link{ProjectDim}} \code{\link{JackStraw}}
#'
#' @examples
#' PCASigGenes(pbmc_small, pcs.use = 1:2)
#'
PCASigGenes <- function(
object,
pcs.use,
pval.cut = 0.1,
use.full = FALSE,
max.per.pc = NULL
) {
# pvals.use <- GetDimReduction(object,reduction.type = "pca",slot = "jackstraw")@empirical.p.values
empirical.use <- ifelse(test = use.full, yes = 'full', no = 'empirical')
pvals.use <- JS(object = object[['pca']], slot = empirical.use)
if (length(x = pcs.use) == 1) {
pvals.min <- pvals.use[, pcs.use]
}
if (length(x = pcs.use) > 1) {
pvals.min <- apply(X = pvals.use[, pcs.use], MARGIN = 1, FUN = min)
}
names(x = pvals.min) <- rownames(x = pvals.use)
features <- names(x = pvals.min)[pvals.min < pval.cut]
if (!is.null(x = max.per.pc)) {
top.features <- TopFeatures(
object = object[['pca']],
dim = pcs.use,
nfeatures = max.per.pc,
projected = use.full,
balanced = FALSE
)
features <- intersect(x = top.features, y = features)
}
return(features)
}
#' Project Dimensional reduction onto full dataset
#'
#' Takes a pre-computed dimensional reduction (typically calculated on a subset
#' of genes) and projects this onto the entire dataset (all genes). Note that
#' the cell loadings will remain unchanged, but now there are gene loadings for
#' all genes.
#'
#' @param object Seurat object
#' @param reduction Reduction to use
#' @param assay Assay to use
#' @param dims.print Number of dims to print features for
#' @param nfeatures.print Number of features with highest/lowest loadings to print for
#' each dimension
#' @param overwrite Replace the existing data in feature.loadings
#' @param do.center Center the dataset prior to projection (should be set to TRUE)
#' @param verbose Print top genes associated with the projected dimensions
#'
#' @return Returns Seurat object with the projected values
#'
#' @export
#'
#' @examples
#' pbmc_small
#' pbmc_small <- ProjectDim(object = pbmc_small, reduction = "pca")
#' # Vizualize top projected genes in heatmap
#' DimHeatmap(object = pbmc_small, reduction = "pca", dims = 1, balanced = TRUE)
#'
ProjectDim <- function(
object,
reduction = "pca",
assay = NULL,
dims.print = 1:5,
nfeatures.print = 20,
overwrite = FALSE,
do.center = FALSE,
verbose = TRUE
) {
redeuc <- object[[reduction]]
assay <- assay %||% DefaultAssay(object = redeuc)
data.use <- GetAssayData(
object = object[[assay]],
slot = "scale.data"
)
if (do.center) {
data.use <- scale(x = as.matrix(x = data.use), center = TRUE, scale = FALSE)
}
cell.embeddings <- Embeddings(object = redeuc)
new.feature.loadings.full <- data.use %*% cell.embeddings
rownames(x = new.feature.loadings.full) <- rownames(x = data.use)
colnames(x = new.feature.loadings.full) <- colnames(x = cell.embeddings)
Loadings(object = redeuc, projected = TRUE) <- new.feature.loadings.full
if (overwrite) {
Loadings(object = redeuc, projected = FALSE) <- new.feature.loadings.full
}
object[[reduction]] <- redeuc
if (verbose) {
print(
x = redeuc,
dims = dims.print,
nfeatures = nfeatures.print,
projected = TRUE
)
}
object <- LogSeuratCommand(object = object)
return(object)
}
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Methods for Seurat-defined generics
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#' @param assay Name of Assay PCA is being run on
#' @param npcs Total Number of PCs to compute and store (50 by default)
#' @param rev.pca By default computes the PCA on the cell x gene matrix. Setting
#' to true will compute it on gene x cell matrix.
#' @param weight.by.var Weight the cell embeddings by the variance of each PC
#' (weights the gene loadings if rev.pca is TRUE)
#' @param verbose Print the top genes associated with high/low loadings for
#' the PCs
#' @param ndims.print PCs to print genes for
#' @param nfeatures.print Number of genes to print for each PC
#' @param reduction.key dimensional reduction key, specifies the string before
#' the number for the dimension names. PC by default
#' @param seed.use Set a random seed. By default, sets the seed to 42. Setting
#' NULL will not set a seed.
#' @param approx Use truncated singular value decomposition to approximate PCA
#'
#' @importFrom irlba irlba
#' @importFrom stats prcomp
#' @importFrom utils capture.output
#'
#' @rdname RunPCA
#' @export
#'
RunPCA.default <- function(
object,
assay = NULL,
npcs = 50,
rev.pca = FALSE,
weight.by.var = TRUE,
verbose = TRUE,
ndims.print = 1:5,
nfeatures.print = 30,
reduction.key = "PC_",
seed.use = 42,
approx = TRUE,
...
) {
if (!is.null(x = seed.use)) {
set.seed(seed = seed.use)
}
if (rev.pca) {
npcs <- min(npcs, ncol(x = object) - 1)
pca.results <- irlba(A = object, nv = npcs, ...)
total.variance <- sum(RowVar(x = t(x = object)))
sdev <- pca.results$d/sqrt(max(1, nrow(x = object) - 1))
if (weight.by.var) {
feature.loadings <- pca.results$u %*% diag(pca.results$d)
} else{
feature.loadings <- pca.results$u
}
cell.embeddings <- pca.results$v
}
else {
total.variance <- sum(RowVar(x = object))
if (approx) {
npcs <- min(npcs, nrow(x = object) - 1)
pca.results <- irlba(A = t(x = object), nv = npcs, ...)
feature.loadings <- pca.results$v
sdev <- pca.results$d/sqrt(max(1, ncol(object) - 1))
if (weight.by.var) {
cell.embeddings <- pca.results$u %*% diag(pca.results$d)
} else {
cell.embeddings <- pca.results$u
}
} else {
npcs <- min(npcs, nrow(x = object))
pca.results <- prcomp(x = t(object), rank. = npcs, ...)
feature.loadings <- pca.results$rotation
sdev <- pca.results$sdev
if (weight.by.var) {
cell.embeddings <- pca.results$x %*% diag(pca.results$sdev[1:npcs]^2)
} else {
cell.embeddings <- pca.results$x
}
}
}
rownames(x = feature.loadings) <- rownames(x = object)
colnames(x = feature.loadings) <- paste0(reduction.key, 1:npcs)
rownames(x = cell.embeddings) <- colnames(x = object)
colnames(x = cell.embeddings) <- colnames(x = feature.loadings)
reduction.data <- CreateDimReducObject(
embeddings = cell.embeddings,
loadings = feature.loadings,
assay = assay,
stdev = sdev,
key = reduction.key,
misc = list(total.variance = total.variance)
)
if (verbose) {
msg <- capture.output(print(
x = reduction.data,
dims = ndims.print,
nfeatures = nfeatures.print
))
message(paste(msg, collapse = '\n'))
}
return(reduction.data)
}
#' @param features Features to compute PCA on. If features=NULL, PCA will be run
#' using the variable features for the Assay. Note that the features must be present
#' in the scaled data. Any requested features that are not scaled or have 0 variance
#' will be dropped, and the PCA will be run using the remaining features.
#'
#' @rdname RunPCA
#' @export
#' @method RunPCA Assay
#'
RunPCA.Assay <- function(
object,
assay = NULL,
features = NULL,
npcs = 50,
rev.pca = FALSE,
weight.by.var = TRUE,
verbose = TRUE,
ndims.print = 1:5,
nfeatures.print = 30,
reduction.key = "PC_",
seed.use = 42,
...
) {
data.use <- PrepDR(
object = object,
features = features,
verbose = verbose
)
reduction.data <- RunPCA(
object = data.use,
assay = assay,
npcs = npcs,
rev.pca = rev.pca,
weight.by.var = weight.by.var,
verbose = verbose,
ndims.print = ndims.print,
nfeatures.print = nfeatures.print,
reduction.key = reduction.key,
seed.use = seed.use,
...
)
return(reduction.data)
}
#' @param reduction.name dimensional reduction name, pca by default
#'
#' @rdname RunPCA
#' @export
#' @method RunPCA Seurat
#'
RunPCA.Seurat <- function(
object,
assay = NULL,
features = NULL,
npcs = 50,
rev.pca = FALSE,
weight.by.var = TRUE,
verbose = TRUE,
ndims.print = 1:5,
nfeatures.print = 30,
reduction.name = "pca",
reduction.key = "PC_",
seed.use = 42,
...
) {
assay <- assay %||% DefaultAssay(object = object)
assay.data <- GetAssay(object = object, assay = assay)
reduction.data <- RunPCA(
object = assay.data,
assay = assay,
features = features,
npcs = npcs,
rev.pca = rev.pca,
weight.by.var = weight.by.var,
verbose = verbose,
ndims.print = ndims.print,
nfeatures.print = nfeatures.print,
reduction.key = reduction.key,
seed.use = seed.use,
...
)
object[[reduction.name]] <- reduction.data
object <- LogSeuratCommand(object = object)
return(object)
}
#' @param assay Name of assay that that t-SNE is being run on
#' @param seed.use Random seed for the t-SNE. If NULL, does not set the seed
#' @param tsne.method Select the method to use to compute the tSNE. Available
#' methods are:
#' \itemize{
#' \item{Rtsne: }{Use the Rtsne package Barnes-Hut implementation of tSNE (default)}
# \item{tsne: }{standard tsne - not recommended for large datasets}
#' \item{FIt-SNE: }{Use the FFT-accelerated Interpolation-based t-SNE. Based on
#' Kluger Lab code found here: https://github.com/KlugerLab/FIt-SNE}
#' }
#' @param dim.embed The dimensional space of the resulting tSNE embedding
#' (default is 2). For example, set to 3 for a 3d tSNE
#' @param reduction.key dimensional reduction key, specifies the string before the number for the dimension names. tSNE_ by default
#'
#' @importFrom Rtsne Rtsne
#'
#' @rdname RunTSNE
#' @export
#' @method RunTSNE matrix
#'
RunTSNE.matrix <- function(
object,
assay = NULL,
seed.use = 1,
tsne.method = "Rtsne",
dim.embed = 2,
reduction.key = "tSNE_",
...
) {
if (!is.null(x = seed.use)) {
set.seed(seed = seed.use)
}
tsne.data <- switch(
EXPR = tsne.method,
'Rtsne' = Rtsne(
X = object,
dims = dim.embed,
... # PCA/is_distance
)$Y,
'FIt-SNE' = fftRtsne(X = object, dims = dim.embed, rand_seed = seed.use, ...),
stop("Invalid tSNE method: please choose from 'Rtsne' or 'FIt-SNE'")
)
colnames(x = tsne.data) <- paste0(reduction.key, 1:ncol(x = tsne.data))
rownames(x = tsne.data) <- rownames(x = object)
tsne.reduction <- CreateDimReducObject(
embeddings = tsne.data,
key = reduction.key,
assay = assay,
global = TRUE
)
return(tsne.reduction)
}
#' @param cells Which cells to analyze (default, all cells)
#' @param dims Which dimensions to use as input features
#'
#' @rdname RunTSNE
#' @export
#' @method RunTSNE DimReduc
#'
RunTSNE.DimReduc <- function(
object,
cells = NULL,
dims = 1:5,
seed.use = 1,
tsne.method = "Rtsne",
dim.embed = 2,
reduction.key = "tSNE_",
...
) {
args <- as.list(x = sys.frame(which = sys.nframe()))
args <- c(args, list(...))
args$object <- args$object[[cells, args$dims]]
args$dims <- NULL
args$cells <- NULL
args$assay <- DefaultAssay(object = object)
return(do.call(what = 'RunTSNE', args = args))
}
#' @rdname RunTSNE
#' @export
#' @method RunTSNE dist
#'
RunTSNE.dist <- function(
object,
assay = NULL,
seed.use = 1,
tsne.method = "Rtsne",
dim.embed = 2,
reduction.key = "tSNE_",
...
) {
args <- as.list(x = sys.frame(which = sys.nframe()))
args <- c(args, list(...))
args$object <- as.matrix(x = args$object)
args$is_distance <- TRUE
return(do.call(what = 'RunTSNE', args = args))
}
#' @param reduction Which dimensional reduction (e.g. PCA, ICA) to use for
#' the tSNE. Default is PCA
#' @param features If set, run the tSNE on this subset of features
#' (instead of running on a set of reduced dimensions). Not set (NULL) by default;
#' \code{dims} must be NULL to run on features
#' @param distance.matrix If set, runs tSNE on the given distance matrix
#' instead of data matrix (experimental)
#' @param reduction.name dimensional reduction name, specifies the position in the object$dr list. tsne by default
#'
#' @rdname RunTSNE
#' @export
#' @method RunTSNE Seurat
#'
RunTSNE.Seurat <- function(
object,
reduction = "pca",
cells = NULL,
dims = 1:5,
features = NULL,
seed.use = 1,
tsne.method = "Rtsne",
dim.embed = 2,
distance.matrix = NULL,
reduction.name = "tsne",
reduction.key = "tSNE_",
...
) {
cells <- cells %||% Cells(x = object)
tsne.reduction <- if (!is.null(x = distance.matrix)) {
RunTSNE(
object = distance.matrix,
assay = DefaultAssay(object = object),
seed.use = seed.use,
tsne.method = tsne.method,
dim.embed = dim.embed,
reduction.key = reduction.key,
is_distance = TRUE,
...
)
} else if (!is.null(x = dims)) {
RunTSNE(
object = object[[reduction]],
cells = cells,
dims = dims,
seed.use = seed.use,
tsne.method = tsne.method,
dim.embed = dim.embed,
reduction.key = reduction.key,
pca = FALSE,
...
)
} else if (!is.null(x = features)) {
RunTSNE(
object = t(x = as.matrix(x = GetAssayData(object = object)[features, cells])),
assay = DefaultAssay(object = object),
seed.use = seed.use,
tsne.method = tsne.method,
dim.embed = dim.embed,
reduction.key = reduction.key,
pca = FALSE,
...
)
} else {
stop("Unknown way of running tSNE")
}
object[[reduction.name]] <- tsne.reduction
object <- LogSeuratCommand(object = object)
return(object)
}
#' @importFrom uwot umap
#' @importFrom future nbrOfWorkers
#'
#' @rdname RunUMAP
#' @method RunUMAP default
#' @export
#'
RunUMAP.default <- function(
object,
assay = NULL,
n.neighbors = 30L,
n.components = 2L,
metric = 'cosine',
n.epochs = NULL,
learning.rate = 1.0,
min.dist = 0.3,
spread = 1.0,
set.op.mix.ratio = 1.0,
local.connectivity = 1L,
repulsion.strength = 1,
negative.sample.rate = 5,
a = NULL,
b = NULL,
uwot.sgd = FALSE,
seed.use = 42,
metric.kwds = NULL,
angular.rp.forest = FALSE,
reduction.key = 'UMAP_',
verbose = TRUE,
...
) {
CheckDots(...)
if (!is.null(x = seed.use)) {
set.seed(seed = seed.use)
}
if (metric == 'correlation') {
warning(
"UWOT does not implement the correlation metric, using cosine instead",
call. = FALSE,
immediate. = TRUE
)
metric <- 'cosine'
}
umap.output <- umap(
X = object,
n_threads = nbrOfWorkers(),
n_neighbors = as.integer(x = n.neighbors),
n_components = as.integer(x = n.components),
metric = metric,
n_epochs = n.epochs,
learning_rate = learning.rate,
min_dist = min.dist,
spread = spread,
set_op_mix_ratio = set.op.mix.ratio,
local_connectivity = local.connectivity,
repulsion_strength = repulsion.strength,
negative_sample_rate = negative.sample.rate,
a = a,
b = b,
fast_sgd = uwot.sgd,
verbose = verbose
)
colnames(x = umap.output) <- paste0(reduction.key, 1:ncol(x = umap.output))
if (inherits(x = object, what = 'dist')) {
rownames(x = umap.output) <- attr(x = object, "Labels")
} else {
rownames(x = umap.output) <- rownames(x = object)
}
umap.reduction <- CreateDimReducObject(
embeddings = umap.output,
key = reduction.key,
assay = assay,
global = TRUE
)
return(umap.reduction)
}
#' @param dims Which dimensions to use as input features, used only if
#' \code{features} is NULL
#' @param reduction Which dimensional reduction (PCA or ICA) to use for the
#' UMAP input. Default is PCA
#' @param features If set, run UMAP on this subset of features (instead of running on a
#' set of reduced dimensions). Not set (NULL) by default; \code{dims} must be NULL to run
#' on features
#' @param graph Name of graph on which to run UMAP
#' @param assay Assay to pull data for when using \code{features}.
#' @param n.neighbors This determines the number of neighboring points used in
#' local approximations of manifold structure. Larger values will result in more
#' global structure being preserved at the loss of detailed local structure. In
#' general this parameter should often be in the range 5 to 50.
#' @param n.components The dimension of the space to embed into.
#' @param metric metric: This determines the choice of metric used to measure
#' distance in the input space. A wide variety of metrics are already coded, and
#' a user defined function can be passed as long as it has been JITd by numba.
#' @param n.epochs he number of training epochs to be used in optimizing the low dimensional
#' embedding. Larger values result in more accurate embeddings. If NULL is specified, a value will
#' be selected based on the size of the input dataset (200 for large datasets, 500 for small).
#' @param learning.rate The initial learning rate for the embedding optimization.
#' @param min.dist This controls how tightly the embedding is allowed compress points together.
#' Larger values ensure embedded points are moreevenly distributed, while smaller values allow the
#' algorithm to optimise more accurately with regard to local structure. Sensible values are in
#' the range 0.001 to 0.5.
#' @param spread The effective scale of embedded points. In combination with min.dist this
#' determines how clustered/clumped the embedded points are.
#' @param set.op.mix.ratio Interpolate between (fuzzy) union and intersection as the set operation
#' used to combine local fuzzy simplicial sets to obtain a global fuzzy simplicial sets. Both fuzzy
#' set operations use the product t-norm. The value of this parameter should be between 0.0 and
#' 1.0; a value of 1.0 will use a pure fuzzy union, while 0.0 will use a pure fuzzy intersection.
#' @param local.connectivity The local connectivity required - i.e. the number of nearest neighbors
#' that should be assumed to be connected at a local level. The higher this value the more connected
#' the manifold becomes locally. In practice this should be not more than the local intrinsic
#' dimension of the manifold.
#' @param repulsion.strength Weighting applied to negative samples in low dimensional embedding
#' optimization. Values higher than one will result in greater weight being given to negative
#' samples.
#' @param negative.sample.rate The number of negative samples to select per positive sample in the
#' optimization process. Increasing this value will result in greater repulsive force being applied,
#' greater optimization cost, but slightly more accuracy.
#' @param a More specific parameters controlling the embedding. If NULL, these values are set
#' automatically as determined by min. dist and spread. Parameter of differentiable approximation of
#' right adjoint functor.
#' @param b More specific parameters controlling the embedding. If NULL, these values are set
#' automatically as determined by min. dist and spread. Parameter of differentiable approximation of
#' right adjoint functor.
#' @param uwot.sgd Set \code{uwot::umap(fast_sgd = TRUE)}; see \code{\link[uwot]{umap}} for more details
#' @param metric.kwds A dictionary of arguments to pass on to the metric, such as the p value for
#' Minkowski distance. If NULL then no arguments are passed on.
#' @param angular.rp.forest Whether to use an angular random projection forest to initialise the
#' approximate nearest neighbor search. This can be faster, but is mostly on useful for metric that
#' use an angular style distance such as cosine, correlation etc. In the case of those metrics
#' angular forests will be chosen automatically.
#' @param reduction.name Name to store dimensional reduction under in the Seurat object
#' @param reduction.key dimensional reduction key, specifies the string before
#' the number for the dimension names. UMAP by default
#' @param seed.use Set a random seed. By default, sets the seed to 42. Setting
#' NULL will not set a seed
#' @param verbose Controls verbosity
#'
#' @rdname RunUMAP
#' @export
#' @method RunUMAP Seurat
#'
RunUMAP.Seurat <- function(
object,
dims = NULL,
reduction = 'pca',
features = NULL,
graph = NULL,
assay = 'RNA',
n.neighbors = 30L,
n.components = 2L,
metric = 'cosine',
n.epochs = NULL,
learning.rate = 1,
min.dist = 0.3,
spread = 1,
set.op.mix.ratio = 1,
local.connectivity = 1L,
repulsion.strength = 1,
negative.sample.rate = 5L,
a = NULL,
b = NULL,
uwot.sgd = FALSE,
seed.use = 42L,
metric.kwds = NULL,
angular.rp.forest = FALSE,
verbose = TRUE,
reduction.name = 'umap',
reduction.key = 'UMAP_',
...
) {
CheckDots(...)
if (sum(c(is.null(x = dims), is.null(x = features), is.null(x = graph))) < 2) {
stop("Please specify only one of the following arguments: dims, features, or graph")
}
if (!is.null(x = features)) {
data.use <- as.matrix(x = t(x = GetAssayData(object = object, slot = 'data', assay = assay)[features, , drop = FALSE]))
if (ncol(x = data.use) < n.components) {
stop(
"Please provide as many or more features than n.components: ",
length(x = features),
" features provided, ",
n.components,
" UMAP components requested",
call. = FALSE
)
}
} else if (!is.null(x = dims)) {
data.use <- Embeddings(object[[reduction]])[, dims]
assay <- DefaultAssay(object = object[[reduction]])
if (length(x = dims) < n.components) {
stop(
"Please provide as many or more dims than n.components: ",
length(x = dims),
" dims provided, ",
n.components,
" UMAP components requested",
call. = FALSE
)
}
} else if (!is.null(x = graph)) {
data.use <- object[[graph]]
} else {
stop("Please specify one of dims, features, or graph")
}
object[[reduction.name]] <- RunUMAP(
object = data.use,
assay = assay,
n.neighbors = n.neighbors,
n.components = n.components,
metric = metric,
n.epochs = n.epochs,
learning.rate = learning.rate,
min.dist = min.dist,
spread = spread,
set.op.mix.ratio = set.op.mix.ratio,
local.connectivity = local.connectivity,
repulsion.strength = repulsion.strength,
negative.sample.rate = negative.sample.rate,
a = a,
b = b,
uwot.sgd = uwot.sgd,
seed.use = seed.use,
metric.kwds = metric.kwds,
angular.rp.forest = angular.rp.forest,
reduction.key = reduction.key,
verbose = verbose
)
object <- LogSeuratCommand(object = object)
return(object)
}
#' @param dims Which dimensions to examine
#' @param score.thresh Threshold to use for the proportion test of PC
#' significance (see Details)
#'
#' @importFrom stats prop.test
#'
#' @rdname ScoreJackStraw
#' @export
#' @method ScoreJackStraw JackStrawData
#'
ScoreJackStraw.JackStrawData <- function(
object,
dims = 1:5,
score.thresh = 1e-5,
...
) {
CheckDots(...)
pAll <- JS(object = object, slot = "empirical.p.values")
pAll <- pAll[, dims, drop = FALSE]
pAll <- as.data.frame(pAll)
pAll$Contig <- rownames(x = pAll)
score.df <- NULL
for (i in dims) {
pc.score <- suppressWarnings(prop.test(
x = c(
length(x = which(x = pAll[, i] <= score.thresh)),
floor(x = nrow(x = pAll) * score.thresh)
),
n = c(nrow(pAll), nrow(pAll))
)$p.val)
if (length(x = which(x = pAll[, i] <= score.thresh)) == 0) {
pc.score <- 1
}
if (is.null(x = score.df)) {
score.df <- data.frame(PC = paste0("PC", i), Score = pc.score)
} else {
score.df <- rbind(score.df, data.frame(PC = paste0("PC", i), Score = pc.score))
}
}
score.df$PC <- dims
score.df <- as.matrix(score.df)
JS(object = object, slot = 'overall') <- score.df
return(object)
}
#' @rdname ScoreJackStraw
#' @export
#' @method ScoreJackStraw DimReduc
#'
ScoreJackStraw.DimReduc <- function(object, dims = 1:5, score.thresh = 1e-5, ...) {
JS(object = object) <- ScoreJackStraw(
object = JS(object = object),
dims = dims,
score.thresh = 1e-5,
...
)
return(object)
}
#' @param reduction Reduction associated with JackStraw to score
#' @param do.plot Show plot. To return ggplot object, use \code{JackStrawPlot} after
#' running ScoreJackStraw.
#'
#' @seealso \code{\link{JackStrawPlot}}
#'
#' @rdname ScoreJackStraw
#' @export
#' @method ScoreJackStraw Seurat
#'
ScoreJackStraw.Seurat <- function(
object,
reduction = "pca",
dims = 1:5,
score.thresh = 1e-5,
do.plot = FALSE,
...
) {
object[[reduction]] <- ScoreJackStraw(
object = object[[reduction]],
dims = dims,
...
)
if (do.plot) {
CheckDots(..., fxns = 'JackStrawPlot')
suppressWarnings(expr = print(JackStrawPlot(
object = object,
reduction = reduction,
dims = dims,
...
)))
}
object <- LogSeuratCommand(object = object)
return(object)
}
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Methods for R-defined generics
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Internal
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Check that features are present and have non-zero variance
#
# @param data.use Feature matrix (features are rows)
# @param features Features to check
# @param object.name Name of object for message printing
# @param verbose Print warnings
#
# @return Returns a vector of features that is the subset of features
# that have non-zero variance
#
CheckFeatures <- function(
data.use,
features,
object.name,
verbose = TRUE
) {
if (any(!features %in% rownames(x = data.use))) {
missing.features <- features[!features %in% rownames(x = data.use)]
features <- setdiff(x = features, y = missing.features)
if (verbose){
warning(
paste0(
"The following ", length(x = missing.features),
" features are not scaled in ",
object.name,
": ",
paste0(missing.features, collapse = ", ")
))
}
}
if (inherits(x = data.use, what = 'dgCMatrix')) {
features.var <- SparseRowVar(mat = data.use[features, ], display_progress = F)
}
else {
features.var <- RowVar(x = data.use[features, ])
}
no.var.features <- features[features.var == 0]
if (length(x = no.var.features) > 0 && verbose) {
warning(
paste0(
"The following features have zero variance in ",
object.name,
": ",
paste0(no.var.features, collapse = ", ")
))
}
features <- setdiff(x = features, y = no.var.features)
features <- features[!is.na(x = features)]
return(features)
}
#internal
EmpiricalP <- function(x, nullval) {
return(sum(nullval > x) / length(x = nullval))
}
# FIt-SNE helper function for calling fast_tsne from R
#
# Based on Kluger Lab FIt-SNE v1.1.0 code on https://github.com/KlugerLab/FIt-SNE/blob/master/fast_tsne.R
# commit d2cf403 on Feb 8, 2019
#
#' @importFrom utils file_test
#
fftRtsne <- function(X,
dims = 2,
perplexity = 30,
theta = 0.5,
check_duplicates = TRUE,
max_iter = 1000,
fft_not_bh = TRUE,
ann_not_vptree = TRUE,
stop_early_exag_iter = 250,
exaggeration_factor = 12.0,
no_momentum_during_exag = FALSE,
start_late_exag_iter = -1.0,
late_exag_coeff = 1.0,
mom_switch_iter = 250,
momentum = 0.5,
final_momentum = 0.8,
learning_rate = 200,
n_trees = 50,
search_k = -1,
rand_seed = -1,
nterms = 3,
intervals_per_integer = 1,
min_num_intervals = 50,
K = -1,
sigma = -30,
initialization = NULL,
data_path = NULL,
result_path = NULL,
load_affinities = NULL,
fast_tsne_path = NULL,
nthreads = getOption('mc.cores', default = 1),
perplexity_list = NULL,
get_costs = FALSE,
df = 1.0,
...
) {
CheckDots(...)
if (is.null(x = data_path)) {
data_path <- tempfile(pattern = 'fftRtsne_data_', fileext = '.dat')
}
if (is.null(x = result_path)) {
result_path <- tempfile(pattern = 'fftRtsne_result_', fileext = '.dat')
}
if (is.null(x = fast_tsne_path)) {
# suppressWarnings(expr = fast_tsne_path <- system2(command = 'which', args = 'fast_tsne', stdout = TRUE))
fast_tsne_path <- SysExec(progs = ifelse(
test = .Platform$OS.type == 'windows',
yes = 'FItSNE.exe',
no = 'fast_tsne'
))
if (length(x = fast_tsne_path) == 0) {
stop("no fast_tsne_path specified and fast_tsne binary is not in the search path")
}
}
fast_tsne_path <- normalizePath(path = fast_tsne_path)
if (!file_test(op = '-x', x = fast_tsne_path)) {
stop("fast_tsne_path '", fast_tsne_path, "' does not exist or is not executable")
}
# check fast_tsne version
ft.out <- suppressWarnings(expr = system2(command = fast_tsne_path, stdout = TRUE))
if (grepl(pattern = '= t-SNE v1.1', x = ft.out[1])) {
version_number <- '1.1.0'
} else if (grepl(pattern = '= t-SNE v1.0', x = ft.out[1])) {
version_number <- '1.0'
} else {
message("First line of fast_tsne output is")
message(ft.out[1])
stop("Our FIt-SNE wrapper requires FIt-SNE v1.X.X, please install the appropriate version from github.com/KlugerLab/FIt-SNE and have fast_tsne_path point to it if it's not in your path")
}
is.wholenumber <- function(x, tol = .Machine$double.eps ^ 0.5) {
return(abs(x = x - round(x = x)) < tol)
}
if (version_number == '1.0' && df != 1.0) {
stop("This version of FIt-SNE does not support df!=1. Please install the appropriate version from github.com/KlugerLab/FIt-SNE")
}
if (!is.numeric(x = theta) || (theta < 0.0) || (theta > 1.0) ) {
stop("Incorrect theta.")
}
if (nrow(x = X) - 1 < 3 * perplexity) {
stop("Perplexity is too large.")
}
if (!is.matrix(x = X)) {
stop("Input X is not a matrix")
}
if (!(max_iter > 0)) {
stop("Incorrect number of iterations.")
}
if (!is.wholenumber(x = stop_early_exag_iter) || stop_early_exag_iter < 0) {
stop("stop_early_exag_iter should be a positive integer")
}
if (!is.numeric(x = exaggeration_factor)) {
stop("exaggeration_factor should be numeric")
}
if (!is.wholenumber(x = dims) || dims <= 0) {
stop("Incorrect dimensionality.")
}
if (search_k == -1) {
if (perplexity > 0) {
search_k <- n_trees * perplexity * 3
} else if (perplexity == 0) {
search_k <- n_trees * max(perplexity_list) * 3
} else {
search_k <- n_trees * K * 3
}
}
nbody_algo <- ifelse(test = fft_not_bh, yes = 2, no = 1)
if (is.null(load_affinities)) {
load_affinities <- 0
} else {
if (load_affinities == 'load') {
load_affinities <- 1
} else if (load_affinities == 'save') {
load_affinities <- 2
} else {
load_affinities <- 0
}
}
knn_algo <- ifelse(test = ann_not_vptree, yes = 1, no = 2)
f <- file(description = data_path, open = "wb")
n = nrow(x = X)
D = ncol(x = X)
writeBin(object = as.integer(x = n), con = f, size = 4)
writeBin(object = as.integer(x = D), con = f, size = 4)
writeBin(object = as.numeric(x = theta), con = f, size = 8) #theta
writeBin(object = as.numeric(x = perplexity), con = f, size = 8) #theta
if (perplexity == 0) {
writeBin(object = as.integer(x = length(x = perplexity_list)), con = f, size = 4)
writeBin(object = perplexity_list, con = f)
}
writeBin(object = as.integer(x = dims), con = f, size = 4) #theta
writeBin(object = as.integer(x = max_iter), con = f, size = 4)
writeBin(object = as.integer(x = stop_early_exag_iter), con = f, size = 4)
writeBin(object = as.integer(x = mom_switch_iter), con = f, size = 4)
writeBin(object = as.numeric(x = momentum), con = f, size = 8)
writeBin(object = as.numeric(x = final_momentum), con = f, size = 8)
writeBin(object = as.numeric(x = learning_rate), con = f, size = 8)
writeBin(object = as.integer(x = K), con = f, size = 4) #K
writeBin(object = as.numeric(x = sigma), con = f, size = 8) #sigma
writeBin(object = as.integer(x = nbody_algo), con = f, size = 4) #not barnes hut
writeBin(object = as.integer(x = knn_algo), con = f, size = 4)
writeBin(object = as.numeric(x = exaggeration_factor), con = f, size = 8) #compexag
writeBin(object = as.integer(x = no_momentum_during_exag), con = f, size = 4)
writeBin(object = as.integer(x = n_trees), con = f, size = 4)
writeBin(object = as.integer(x = search_k), con = f, size = 4)
writeBin(object = as.integer(x = start_late_exag_iter), con = f, size = 4)
writeBin(object = as.numeric(x = late_exag_coeff), con = f, size = 8)
writeBin(object = as.integer(x = nterms), con = f, size = 4)
writeBin(object = as.numeric(x = intervals_per_integer), con = f, size = 8)
writeBin(object = as.integer(x = min_num_intervals), con = f, size = 4)
tX = c(t(X))
writeBin(object = tX, con = f)
writeBin(object = as.integer(x = rand_seed), con = f, size = 4)
if (version_number != "1.0") {
writeBin(object = as.numeric(x = df), con = f, size = 8)
}
writeBin(object = as.integer(x = load_affinities), con = f, size = 4)
if (!is.null(x = initialization)) {
writeBin(object = c(t(x = initialization)), con = f)
}
close(con = f)
if (version_number == "1.0") {
flag <- system2(
command = fast_tsne_path,
args = c(data_path, result_path, nthreads)
)
} else {
flag <- system2(
command = fast_tsne_path,
args = c(version_number, data_path, result_path, nthreads)
)
}
if (flag != 0) {
stop('tsne call failed')
}
f <- file(description = result_path, open = "rb")
n <- readBin(con = f, what = integer(), n = 1, size = 4)
d <- readBin(con = f, what = integer(), n = 1, size = 4)
Y <- readBin(con = f, what = numeric(), n = n * d)
Y <- t(x = matrix(Y, nrow = d))
if (get_costs) {
tmp <- readBin(con = f, what = integer(), n = 1, size = 4)
costs <- readBin(con = f, what = numeric(), n = max_iter, size = 8)
Yout <- list(Y = Y, costs = costs)
} else {
Yout <- Y
}
close(con = f)
file.remove(data_path)
file.remove(result_path)
return(Yout)
}
#internal
#
JackRandom <- function(
scaled.data,
prop.use = 0.01,
r1.use = 1,
r2.use = 5,
seed.use = 1,
rev.pca = FALSE,
weight.by.var = weight.by.var,
maxit = 1000
) {
if (!is.null(x = seed.use)) {
set.seed(seed = seed.use)
}
rand.genes <- sample(
x = rownames(x = scaled.data),
size = nrow(x = scaled.data) * prop.use
)
# make sure that rand.genes is at least 3
if (length(x = rand.genes) < 3) {
rand.genes <- sample(x = rownames(x = scaled.data), size = 3)
}
data.mod <- scaled.data
data.mod[rand.genes, ] <- MatrixRowShuffle(x = scaled.data[rand.genes, ])
temp.object <- RunPCA(
object = data.mod,
assay = "temp",
npcs = r2.use,
features = rownames(x = data.mod),
rev.pca = rev.pca,
weight.by.var = weight.by.var,
verbose = FALSE,
maxit = maxit
)
return(Loadings(temp.object)[rand.genes, r1.use:r2.use])
}
# Calculates the l2-norm of a vector
#
# Modified from PMA package
# @references Witten, Tibshirani, and Hastie, Biostatistics 2009
# @references \url{https://github.com/cran/PMA/blob/master/R/PMD.R}
#
# @param vec numeric vector
#
# @return returns the l2-norm.
#
L2Norm <- function(vec) {
a <- sqrt(x = sum(vec ^ 2))
if (a == 0) {
a <- .05
}
return(a)
}
# Prep data for dimensional reduction
#
# Common checks and preparatory steps before running certain dimensional
# reduction techniques
#
# @param object Assay object
# @param features Features to use as input for the dimensional reduction technique.
# Default is variable features
# @ param verbose Print messages and warnings
#
#
PrepDR <- function(
object,
features = NULL,
verbose = TRUE
) {
if (length(x = VariableFeatures(object = object)) == 0 && is.null(x = features)) {
stop("Variable features haven't been set. Run FindVariableFeatures() or provide a vector of feature names.")
}
data.use <- GetAssayData(object = object, slot = "scale.data")
if (nrow(x = data.use ) == 0) {
stop("Data has not been scaled. Please run ScaleData and retry")
}
features <- features %||% VariableFeatures(object = object)
features.keep <- unique(x = features[features %in% rownames(x = data.use)])
if (length(x = features.keep) < length(x = features)) {
features.exclude <- setdiff(x = features, y = features.keep)
if (verbose) {
warning(paste0("The following ", length(x = features.exclude), " features requested have not been scaled (running reduction without them): ", paste0(features.exclude, collapse = ", ")))
}
}
features <- features.keep
features.var <- apply(X = data.use[features, ], MARGIN = 1, FUN = var)
features.keep <- features[features.var > 0]
if (length(x = features.keep) < length(x = features)) {
features.exclude <- setdiff(x = features, y = features.keep)
if (verbose) {
warning(paste0("The following ", length(x = features.exclude), " features requested have zero variance (running reduction without them): ", paste0(features.exclude, collapse = ", ")))
}
}
features <- features.keep
features <- features[!is.na(x = features)]
data.use <- data.use[features, ]
return(data.use)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.