#' @include generics.R
#'
NULL
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Functions
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#' Determine statistical significance of PCA scores.
#'
#' Randomly permutes a subset of data, and calculates projected PCA scores for
#' these 'random' genes. Then compares the PCA scores for the 'random' genes
#' with the observed PCA scores to determine statistical signifance. End result
#' is a p-value for each gene's association with each principal component.
#'
#' @param object Seurat object
#' @param reduction DimReduc to use. ONLY PCA CURRENTLY SUPPORTED.
#' @param assay Assay used to calculate reduction.
#' @param dims Number of PCs to compute significance for
#' @param num.replicate Number of replicate samplings to perform
#' @param prop.freq Proportion of the data to randomly permute for each
#' replicate
#' @param verbose Print progress bar showing the number of replicates
#' that have been processed.
#' @param maxit maximum number of iterations to be performed by the irlba function of RunPCA
#'
#' @return Returns a Seurat object where JS(object = object[['pca']], slot = 'empirical')
#' represents p-values for each gene in the PCA analysis. If ProjectPCA is
#' subsequently run, JS(object = object[['pca']], slot = 'full') then
#' represents p-values for all genes.
#'
#' @importFrom methods new
#' @importFrom pbapply pblapply pbsapply
#' @importFrom future.apply future_lapply future_sapply
#'
#' @references Inspired by Chung et al, Bioinformatics (2014)
#'
#' @export
#'
#' @examples
#' \dontrun{
#' pbmc_small = suppressWarnings(JackStraw(pbmc_small))
#' head(JS(object = pbmc_small[['pca']], slot = 'empirical'))
#' }
#'
JackStraw <- function(
object,
reduction = "pca",
assay = NULL,
dims = 20,
num.replicate = 100,
prop.freq = 0.01,
verbose = TRUE,
maxit = 1000
) {
if (reduction != "pca") {
stop("Only pca for reduction is currently supported")
}
if (verbose && PlanThreads() == 1) {
my.lapply <- pblapply
my.sapply <- pbsapply
} else {
my.lapply <- future_lapply
my.sapply <- future_sapply
}
assay <- assay %||% DefaultAssay(object = object)
if (dims > length(x = object[[reduction]])) {
dims <- length(x = object[[reduction]])
warning("Number of dimensions specified is greater than those available. Setting dims to ", dims, " and continuing", immediate. = TRUE)
}
if (dims > ncol(x = object)) {
dims <- ncol(x = object)
warning("Number of dimensions specified is greater than the number of cells. Setting dims to ", dims, " and continuing", immediate. = TRUE)
}
loadings <- Loadings(object = object[[reduction]], projected = FALSE)
reduc.features <- rownames(x = loadings)
if (length(x = reduc.features) < 3) {
stop("Too few features")
}
if (length(x = reduc.features) * prop.freq < 3) {
warning(
"Number of variable genes given ",
prop.freq,
" as the prop.freq is low. Consider including more variable genes and/or increasing prop.freq. ",
"Continuing with 3 genes in every random sampling."
)
}
data.use <- GetAssayData(object = object, assay = assay, slot = "scale.data")[reduc.features, ]
rev.pca <- object[[paste0('RunPCA.', assay)]]$rev.pca
weight.by.var <- object[[paste0('RunPCA.', assay)]]$weight.by.var
fake.vals.raw <- my.lapply(
X = 1:num.replicate,
FUN = JackRandom,
scaled.data = data.use,
prop.use = prop.freq,
r1.use = 1,
r2.use = dims,
rev.pca = rev.pca,
weight.by.var = weight.by.var,
maxit = maxit
)
fake.vals <- sapply(
X = 1:dims,
FUN = function(x) {
return(as.numeric(x = unlist(x = lapply(
X = 1:num.replicate,
FUN = function(y) {
return(fake.vals.raw[[y]][, x])
}
))))
}
)
fake.vals <- as.matrix(x = fake.vals)
jackStraw.empP <- as.matrix(
my.sapply(
X = 1:dims,
FUN = function(x) {
return(unlist(x = lapply(
X = abs(loadings[, x]),
FUN = EmpiricalP,
nullval = abs(fake.vals[,x])
)))
}
)
)
colnames(x = jackStraw.empP) <- paste0("PC", 1:ncol(x = jackStraw.empP))
jackstraw.obj <- new(
Class = "JackStrawData",
empirical.p.values = jackStraw.empP,
fake.reduction.scores = fake.vals,
empirical.p.values.full = matrix()
)
JS(object = object[[reduction]]) <- jackstraw.obj
object <- LogSeuratCommand(object = object)
return(object)
}
#' L2-normalization
#'
#' Perform l2 normalization on given dimensional reduction
#'
#' @param object Seurat object
#' @param reduction Dimensional reduction to normalize
#' @param new.dr name of new dimensional reduction to store
#' (default is olddr.l2)
#' @param new.key name of key for new dimensional reduction
#'
#' @return Returns a \code{\link{Seurat}} object
#'
#' @export
#'
L2Dim <- function(object, reduction, new.dr = NULL, new.key = NULL){
l2.norm <- L2Norm(mat = Embeddings(object[[reduction]]))
if(is.null(new.dr)){
new.dr <- paste0(reduction, ".l2")
}
if(is.null(new.key)){
new.key <- paste0("L2", Key(object[[reduction]]))
}
colnames(x = l2.norm) <- paste0(new.key, 1:ncol(x = l2.norm))
l2.dr <- CreateDimReducObject(
embeddings = l2.norm,
loadings = Loadings(object = object[[reduction]]),
projected = Loadings(object = object[[reduction]]),
assay = DefaultAssay(object = object),
stdev = slot(object = object[[reduction]], name = 'stdev'),
key = new.key,
jackstraw = slot(object = object[[reduction]], name = 'jackstraw'),
misc = slot(object = object[[reduction]], name = 'misc')
)
object[[new.dr]] <- l2.dr
return(object)
}
#' L2-Normalize CCA
#'
#' Perform l2 normalization on CCs
#'
#' @param object Seurat object
#' @param \dots Additional parameters to L2Dim.
#'
#' @export
#'
L2CCA <- function(object, ...){
return(L2Dim(object = object, reduction = "cca", ...))
}
#' Significant genes from a PCA
#'
#' Returns a set of genes, based on the JackStraw analysis, that have
#' statistically significant associations with a set of PCs.
#'
#' @param object Seurat object
#' @param pcs.use PCS to use.
#' @param pval.cut P-value cutoff
#' @param use.full Use the full list of genes (from the projected PCA). Assumes
#' that \code{ProjectDim} has been run. Currently, must be set to FALSE.
#' @param max.per.pc Maximum number of genes to return per PC. Used to avoid genes from one PC dominating the entire analysis.
#'
#' @return A vector of genes whose p-values are statistically significant for
#' at least one of the given PCs.
#'
#' @export
#'
#' @seealso \code{\link{ProjectDim}} \code{\link{JackStraw}}
#'
#' @examples
#' PCASigGenes(pbmc_small, pcs.use = 1:2)
#'
PCASigGenes <- function(
object,
pcs.use,
pval.cut = 0.1,
use.full = FALSE,
max.per.pc = NULL
) {
# pvals.use <- GetDimReduction(object,reduction.type = "pca",slot = "jackstraw")@empirical.p.values
empirical.use <- ifelse(test = use.full, yes = 'full', no = 'empirical')
pvals.use <- JS(object = object[['pca']], slot = empirical.use)
if (length(x = pcs.use) == 1) {
pvals.min <- pvals.use[, pcs.use]
}
if (length(x = pcs.use) > 1) {
pvals.min <- apply(X = pvals.use[, pcs.use], MARGIN = 1, FUN = min)
}
names(x = pvals.min) <- rownames(x = pvals.use)
features <- names(x = pvals.min)[pvals.min < pval.cut]
if (!is.null(x = max.per.pc)) {
top.features <- TopFeatures(
object = object[['pca']],
dim = pcs.use,
nfeatures = max.per.pc,
projected = use.full,
balanced = FALSE
)
features <- intersect(x = top.features, y = features)
}
return(features)
}
#' Project Dimensional reduction onto full dataset
#'
#' Takes a pre-computed dimensional reduction (typically calculated on a subset
#' of genes) and projects this onto the entire dataset (all genes). Note that
#' the cell loadings will remain unchanged, but now there are gene loadings for
#' all genes.
#'
#' @param object Seurat object
#' @param reduction Reduction to use
#' @param assay Assay to use
#' @param dims.print Number of dims to print features for
#' @param nfeatures.print Number of features with highest/lowest loadings to print for
#' each dimension
#' @param overwrite Replace the existing data in feature.loadings
#' @param do.center Center the dataset prior to projection (should be set to TRUE)
#' @param verbose Print top genes associated with the projected dimensions
#'
#' @return Returns Seurat object with the projected values
#'
#' @export
#'
#' @examples
#' pbmc_small
#' pbmc_small <- ProjectDim(object = pbmc_small, reduction = "pca")
#' # Vizualize top projected genes in heatmap
#' DimHeatmap(object = pbmc_small, reduction = "pca", dims = 1, balanced = TRUE)
#'
ProjectDim <- function(
object,
reduction = "pca",
assay = NULL,
dims.print = 1:5,
nfeatures.print = 20,
overwrite = FALSE,
do.center = FALSE,
verbose = TRUE
) {
redeuc <- object[[reduction]]
assay <- assay %||% DefaultAssay(object = redeuc)
data.use <- GetAssayData(
object = object[[assay]],
slot = "scale.data"
)
if (do.center) {
data.use <- scale(x = as.matrix(x = data.use), center = TRUE, scale = FALSE)
}
cell.embeddings <- Embeddings(object = redeuc)
new.feature.loadings.full <- FastMatMult(m1 = data.use, m2 = cell.embeddings)
rownames(x = new.feature.loadings.full) <- rownames(x = data.use)
colnames(x = new.feature.loadings.full) <- colnames(x = cell.embeddings)
Loadings(object = redeuc, projected = TRUE) <- new.feature.loadings.full
if (overwrite) {
Loadings(object = redeuc, projected = FALSE) <- new.feature.loadings.full
}
object[[reduction]] <- redeuc
if (verbose) {
print(
x = redeuc,
dims = dims.print,
nfeatures = nfeatures.print,
projected = TRUE
)
}
object <- LogSeuratCommand(object = object)
return(object)
}
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Methods for Seurat-defined generics
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#' @param standardize Standardize matrices - scales columns to have unit variance
#' and mean 0
#' @param num.cc Number of canonical vectors to calculate
#' @param verbose ...
#' @param use.cpp ...
#'
#' @importFrom irlba irlba
#'
#' @rdname RunCCA
#' @export
#'
RunCCA.default <- function(
object1,
object2,
standardize = TRUE,
num.cc = 20,
verbose = FALSE,
use.cpp = TRUE,
...
) {
set.seed(seed = 42)
cells1 <- colnames(x = object1)
cells2 <- colnames(x = object2)
if (standardize) {
object1 <- Standardize(mat = object1, display_progress = FALSE)
object2 <- Standardize(mat = object2, display_progress = FALSE)
}
if (as.numeric(x = max(dim(x = object1))) * as.numeric(x = max(dim(x = object2))) > .Machine$integer.max) {
# if the returned matrix from FastMatMult has more than 2^31-1 entries, throws an error due to
# storage of certain attributes as ints, force usage of R version
use.cpp <- FALSE
}
if (use.cpp == TRUE) {
mat3 <- FastMatMult(m1 = t(x = object1), m2 = object2)
}
else {
mat3 <- crossprod(x = object1, y = object2)
}
cca.svd <- irlba(A = mat3, nv = num.cc)
cca.data <- rbind(cca.svd$u, cca.svd$v)
colnames(x = cca.data) <- paste0("CC", 1:num.cc)
rownames(cca.data) <- c(cells1, cells2)
cca.data <- apply(
X = cca.data,
MARGIN = 2,
FUN = function(x) {
if (sign(x[1]) == -1) {
x <- x * -1
}
return(x)
}
)
return(list(ccv = cca.data, d = cca.svd$d))
}
#' @param assay1,assay2 Assays to pull from in the first and second objects, respectively
#' @param features Set of genes to use in CCA. Default is the union of both
#' the variable features sets present in both objects.
#' @param renormalize Renormalize raw data after merging the objects. If FALSE,
#' merge the data matrices also.
#' @param rescale Rescale the datasets prior to CCA. If FALSE, uses existing data in the scale data slots.
#' @param compute.gene.loadings Also compute the gene loadings. NOTE - this will
#' scale every gene in the dataset which may impose a high memory cost.
#' @param add.cell.id1,add.cell.id2 Add ...
#' @param ... Extra parameters (passed onto MergeSeurat in case with two objects
#' passed, passed onto ScaleData in case with single object and rescale.groups
#' set to TRUE)
#'
#' @rdname RunCCA
#' @export
#' @method RunCCA Seurat
#'
RunCCA.Seurat <- function(
object1,
object2,
assay1 = NULL,
assay2 = NULL,
num.cc = 20,
features = NULL,
renormalize = FALSE,
rescale = FALSE,
compute.gene.loadings = TRUE,
add.cell.id1 = NULL,
add.cell.id2 = NULL,
verbose = TRUE,
use.cpp = TRUE,
...
) {
assay1 <- assay1 %||% DefaultAssay(object = object1)
assay2 <- assay2 %||% DefaultAssay(object = object2)
if (assay1 != assay2) {
warning("Running CCA on different assays")
}
if (is.null(x = features)) {
if (length(x = VariableFeatures(object = object1, assay = assay1)) == 0) {
stop(paste0("VariableFeatures not computed for the ", assay1, " assay in object1"))
}
if (length(x = VariableFeatures(object = object2, assay = assay2)) == 0) {
stop(paste0("VariableFeatures not computed for the ", assay2, " assay in object2"))
}
features <- union(x = VariableFeatures(object = object1), y = VariableFeatures(object = object2))
if (length(x = features) == 0) {
stop("Zero features in the union of the VariableFeature sets ")
}
}
nfeatures <- length(x = features)
if (!(rescale)) {
data.use1 <- GetAssayData(object = object1, assay = assay1, slot = "scale.data")
data.use2 <- GetAssayData(object = object2, assay = assay2, slot = "scale.data")
features <- CheckFeatures(data.use = data.use1, features = features, object.name = "object1", verbose = FALSE)
features <- CheckFeatures(data.use = data.use2, features = features, object.name = "object2", verbose = FALSE)
data1 <- data.use1[features, ]
data2 <- data.use2[features, ]
}
if (rescale) {
data.use1 <- GetAssayData(object = object1, assay = assay1, slot = "data")
data.use2 <- GetAssayData(object = object2, assay = assay2, slot = "data")
features <- CheckFeatures(data.use = data.use1, features = features, object.name = "object1", verbose = FALSE)
features <- CheckFeatures(data.use = data.use2, features = features, object.name = "object2", verbose = FALSE)
data1 <- data.use1[features,]
data2 <- data.use2[features,]
if (verbose) message("Rescaling groups")
data1 <- FastRowScale(as.matrix(data1))
dimnames(data1) <- list(features, colnames(x = object1))
data2 <- FastRowScale(as.matrix(data2))
dimnames(data2) <- list(features, colnames(x = object2))
}
if (length(x = features) / nfeatures < 0.1 & verbose) {
warning("More than 10% of provided features filtered out. Please check that the given features are present in the scale.data slot for both the assays provided here and that they have non-zero variance.")
}
if (length(x = features) < 50) {
warning("Fewer than 50 features used as input for CCA.")
}
if (verbose) {
message("Running CCA")
}
cca.results <- RunCCA(
object1 = data1,
object2 = data2,
standardize = TRUE,
num.cc = num.cc,
verbose = verbose,
use.cpp = use.cpp
)
if (verbose) {
message("Merging objects")
}
combined.object <- merge(
x = object1,
y = object2,
merge.data = TRUE,
...
)
combined.object[['cca']] <- CreateDimReducObject(
embeddings = cca.results$ccv[colnames(combined.object), ],
assay = assay1,
key = "CC_"
)
combined.object[['cca']]@assay.used <- DefaultAssay(combined.object)
if (ncol(combined.object) != (ncol(object1) + ncol(object2))) {
warning("Some cells removed after object merge due to minimum feature count cutoff")
}
combined.scale <- cbind(data1,data2)
combined.object <- SetAssayData(object = combined.object,new.data = combined.scale, slot = "scale.data")
if (renormalize) {
combined.object <- NormalizeData(
object = combined.object,
assay = assay1,
normalization.method = object1[[paste0("NormalizeData.", assay1)]]$normalization.method,
scale.factor = object1[[paste0("NormalizeData.", assay1)]]$scale.factor
)
}
if (compute.gene.loadings) {
combined.object <- ProjectDim(
object = combined.object,
reduction = "cca",
verbose = FALSE,
overwrite = TRUE)
}
return(combined.object)
}
#' @param assay Name of Assay ICA is being run on
#' @param nics Number of ICs to compute
#' @param rev.ica By default, computes the dimensional reduction on the cell x
#' feature matrix. Setting to true will compute it on the transpose (feature x cell
#' matrix).
#' @param ica.function ICA function from ica package to run (options: icafast,
#' icaimax, icajade)
#' @param verbose Print the top genes associated with high/low loadings for
#' the ICs
#' @param ndims.print ICs to print genes for
#' @param nfeatures.print Number of genes to print for each IC
#' @param reduction.key dimensional reduction key, specifies the string before
#' the number for the dimension names.
#' @param seed.use Set a random seed. Setting NULL will not set a seed.
#' @param \dots Additional arguments to be passed to fastica
#'
#' @importFrom ica icafast icaimax icajade
#'
#' @rdname RunICA
#' @export
#' @method RunICA default
#'
RunICA.default <- function(
object,
assay = NULL,
nics = 50,
rev.ica = FALSE,
ica.function = "icafast",
verbose = TRUE,
ndims.print = 1:5,
nfeatures.print = 30,
reduction.name = "ica",
reduction.key = "ica_",
seed.use = 42,
...
) {
if (!is.null(x = seed.use)) {
set.seed(seed = seed.use)
}
nics <- min(nics, ncol(x = object))
ica.fxn <- eval(expr = parse(text = ica.function))
if (rev.ica) {
ica.results <- ica.fxn(object, nc = nics,...)
cell.embeddings <- ica.results$M
} else {
ica.results <- ica.fxn(t(x = object), nc = nics,...)
cell.embeddings <- ica.results$S
}
feature.loadings <- (as.matrix(x = object ) %*% as.matrix(x = cell.embeddings))
colnames(x = feature.loadings) <- paste0(reduction.key, 1:ncol(x = feature.loadings))
colnames(x = cell.embeddings) <- paste0(reduction.key, 1:ncol(x = cell.embeddings))
reduction.data <- CreateDimReducObject(
embeddings = cell.embeddings,
loadings = feature.loadings,
assay = assay,
key = reduction.key
)
if (verbose) {
print(x = reduction.data, dims = ndims.print, nfeatures = nfeatures.print)
}
return(reduction.data)
}
#' @param features Features to compute ICA on
#'
#' @rdname RunICA
#' @export
#' @method RunICA Assay
#'
RunICA.Assay <- function(
object,
assay = NULL,
features = NULL,
nics = 50,
rev.ica = FALSE,
ica.function = "icafast",
verbose = TRUE,
ndims.print = 1:5,
nfeatures.print = 30,
reduction.name = "ica",
reduction.key = "ica_",
seed.use = 42,
...
) {
data.use <- PrepDR(
object = object,
features = features,
verbose = verbose
)
reduction.data <- RunICA(
object = data.use,
assay = assay,
nics = nics,
rev.ica = rev.ica,
ica.function = ica.function,
verbose = verbose,
ndims.print = ndims.print,
nfeatures.print = nfeatures.print,
reduction.key = reduction.key,
seed.use = seed.use,
...
)
return(reduction.data)
}
#' @param reduction.name dimensional reduction name
#'
#' @rdname RunICA
#' @method RunICA Seurat
#' @export
#'
RunICA.Seurat <- function(
object,
assay = NULL,
features = NULL,
nics = 50,
rev.ica = FALSE,
ica.function = "icafast",
verbose = TRUE,
ndims.print = 1:5,
nfeatures.print = 30,
reduction.name = "ica",
reduction.key = "IC_",
seed.use = 42,
...
) {
assay <- assay %||% DefaultAssay(object = object)
assay.data <- GetAssay(object = object, assay = assay)
reduction.data <- RunICA(
object = assay.data,
assay = assay,
features = features,
nics = nics,
rev.ica = rev.ica,
ica.function = ica.function,
verbose = verbose,
ndims.print = ndims.print,
nfeatures.print = nfeatures.print,
reduction.key = reduction.key,
seed.use = seed.use,
...
)
object[[reduction.name]] <- reduction.data
object <- LogSeuratCommand(object = object)
return(object)
}
#' @param assay Name of Assay PCA is being run on
#' @param npcs Total Number of PCs to compute and store (50 by default)
#' @param rev.pca By default computes the PCA on the cell x gene matrix. Setting
#' to true will compute it on gene x cell matrix.
#' @param weight.by.var Weight the cell embeddings by the variance of each PC
#' (weights the gene loadings if rev.pca is TRUE)
#' @param verbose Print the top genes associated with high/low loadings for
#' the PCs
#' @param ndims.print PCs to print genes for
#' @param nfeatures.print Number of genes to print for each PC
#' @param reduction.key dimensional reduction key, specifies the string before
#' the number for the dimension names. PC by default
#' @param seed.use Set a random seed. By default, sets the seed to 42. Setting
#' NULL will not set a seed.
#' @param approx Use truncated singular value decomposition to approximate PCA
#'
#' @importFrom irlba irlba
#' @importFrom stats prcomp
#'
#' @rdname RunPCA
#' @export
#'
RunPCA.default <- function(
object,
assay = NULL,
npcs = 50,
rev.pca = FALSE,
weight.by.var = TRUE,
verbose = TRUE,
ndims.print = 1:5,
nfeatures.print = 30,
reduction.key = "PC_",
seed.use = 42,
approx = TRUE,
...
) {
if (!is.null(x = seed.use)) {
set.seed(seed = seed.use)
}
if (rev.pca) {
npcs <- min(npcs, ncol(x = object) - 1)
pca.results <- irlba(A = object, nv = npcs, ...)
sdev <- pca.results$d/sqrt(max(1, nrow(x = object) - 1))
if (weight.by.var) {
feature.loadings <- pca.results$u %*% diag(pca.results$d)
} else{
feature.loadings <- pca.results$u
}
cell.embeddings <- pca.results$v
}
else {
if (approx) {
npcs <- min(npcs, nrow(x = object) - 1)
pca.results <- irlba(A = t(x = object), nv = npcs, ...)
feature.loadings <- pca.results$v
sdev <- pca.results$d/sqrt(max(1, ncol(object) - 1))
if (weight.by.var) {
cell.embeddings <- pca.results$u %*% diag(pca.results$d)
} else {
cell.embeddings <- pca.results$u
}
} else {
npcs <- min(npcs, nrow(x = object))
pca.results <- prcomp(x = t(object), rank. = npcs, ...)
feature.loadings <- pca.results$rotation
sdev <- pca.results$sdev
if (weight.by.var) {
cell.embeddings <- pca.results$x %*% diag(pca.results$sdev[1:npcs]^2)
} else {
cell.embeddings <- pca.results$x
}
}
}
rownames(x = feature.loadings) <- rownames(x = object)
colnames(x = feature.loadings) <- paste0(reduction.key, 1:npcs)
rownames(x = cell.embeddings) <- colnames(x = object)
colnames(x = cell.embeddings) <- colnames(x = feature.loadings)
reduction.data <- CreateDimReducObject(
embeddings = cell.embeddings,
loadings = feature.loadings,
assay = assay,
stdev = sdev,
key = reduction.key
)
if (verbose) {
print(x = reduction.data, dims = ndims.print, nfeatures = nfeatures.print)
}
return(reduction.data)
}
#' @param features Features to compute PCA on
#'
#' @rdname RunPCA
#' @export
#' @method RunPCA Assay
#'
RunPCA.Assay <- function(
object,
assay = NULL,
features = NULL,
npcs = 50,
rev.pca = FALSE,
weight.by.var = TRUE,
verbose = TRUE,
ndims.print = 1:5,
nfeatures.print = 30,
reduction.key = "PC_",
seed.use = 42,
...
) {
data.use <- PrepDR(
object = object,
features = features,
verbose = verbose
)
reduction.data <- RunPCA(
object = data.use,
assay = assay,
pc.features = features,
npcs = npcs,
rev.pca = rev.pca,
weight.by.var = weight.by.var,
verbose = verbose,
ndims.print = ndims.print,
nfeatures.print = nfeatures.print,
reduction.key = reduction.key,
seed.use = seed.use,
...
)
return(reduction.data)
}
#' @param reduction.name dimensional reduction name, pca by default
#'
#' @rdname RunPCA
#' @export
#' @method RunPCA Seurat
#'
RunPCA.Seurat <- function(
object,
assay = NULL,
features = NULL,
npcs = 50,
rev.pca = FALSE,
weight.by.var = TRUE,
verbose = TRUE,
ndims.print = 1:5,
nfeatures.print = 30,
reduction.name = "pca",
reduction.key = "PC_",
seed.use = 42,
...
) {
assay <- assay %||% DefaultAssay(object = object)
assay.data <- GetAssay(object = object, assay = assay)
reduction.data <- RunPCA(
object = assay.data,
assay = assay,
features = features,
npcs = npcs,
rev.pca = rev.pca,
weight.by.var = weight.by.var,
verbose = verbose,
ndims.print = ndims.print,
nfeatures.print = nfeatures.print,
reduction.key = reduction.key,
seed.use = seed.use,
...
)
object[[reduction.name]] <- reduction.data
object <- LogSeuratCommand(object = object)
return(object)
}
#' @param assay Name of assay that that t-SNE is being run on
#' @param seed.use Random seed for the t-SNE
#' @param tsne.method Select the method to use to compute the tSNE. Available
#' methods are:
#' \itemize{
#' \item{Rtsne: }{Use the Rtsne package Barnes-Hut implementation of tSNE (default)}
# \item{tsne: }{standard tsne - not recommended for large datasets}
#' \item{FIt-SNE: }{Use the FFT-accelerated Interpolation-based t-SNE. Based on
#' Kluger Lab code found here: https://github.com/KlugerLab/FIt-SNE}
#' }
#' @param add.iter If an existing tSNE has already been computed, uses the
#' current tSNE to seed the algorithm and then adds additional iterations on top
#' of this
#' @param dim.embed The dimensional space of the resulting tSNE embedding
#' (default is 2). For example, set to 3 for a 3d tSNE
#' @param reduction.key dimensional reduction key, specifies the string before the number for the dimension names. tSNE_ by default
#'
#' @importFrom tsne tsne
#' @importFrom Rtsne Rtsne
#'
#' @rdname RunTSNE
#' @export
#' @method RunTSNE matrix
#'
RunTSNE.matrix <- function(
object,
assay = NULL,
seed.use = 1,
tsne.method = "Rtsne",
add.iter = 0,
dim.embed = 2,
reduction.key = "tSNE_",
...
) {
set.seed(seed = seed.use)
tsne.data <- switch(
EXPR = tsne.method,
'Rtsne' = Rtsne(
X = object,
dims = dim.embed,
... # PCA/is_distance
)$Y,
'FIt-SNE' = fftRtsne(X = object, dims = dim.embed, rand_seed = seed.use, ...),
stop("Invalid tSNE method: please choose from 'Rtsne' or 'FIt-SNE'")
)
if (add.iter > 0) {
tsne.data <- tsne(
X = object,
initial_config = as.matrix(x = tsne.data),
max_iter = add.iter,
...
)
}
colnames(x = tsne.data) <- paste0(reduction.key, 1:ncol(x = tsne.data))
rownames(x = tsne.data) <- rownames(x = object)
tsne.reduction <- CreateDimReducObject(
embeddings = tsne.data,
key = reduction.key,
assay = assay
)
return(tsne.reduction)
}
#' @param cells Which cells to analyze (default, all cells)
#' @param dims Which dimensions to use as input features
#'
#' @rdname RunTSNE
#' @export
#' @method RunTSNE DimReduc
#'
RunTSNE.DimReduc <- function(
object,
cells = NULL,
dims = 1:5,
seed.use = 1,
tsne.method = "Rtsne",
add.iter = 0,
dim.embed = 2,
reduction.key = "tSNE_",
...
) {
tsne.reduction <- RunTSNE(
object = object[[, dims]],
assay = DefaultAssay(object = object),
seed.use = seed.use,
tsne.method = tsne.method,
add.iter = add.iter,
dim.embed = dim.embed,
reduction.key = reduction.key,
...
)
return(tsne.reduction)
}
#' @param reduction Which dimensional reduction (e.g. PCA, ICA) to use for
#' the tSNE. Default is PCA
#' @param features If set, run the tSNE on this subset of features
#' (instead of running on a set of reduced dimensions). Not set (NULL) by default;
#' \code{dims} must be NULL to run on features
#' @param distance.matrix If set, runs tSNE on the given distance matrix
#' instead of data matrix (experimental)
#' @param reduction.name dimensional reduction name, specifies the position in the object$dr list. tsne by default
#'
#' @rdname RunTSNE
#' @export
#' @method RunTSNE Seurat
#'
RunTSNE.Seurat <- function(
object,
reduction = "pca",
cells = NULL,
dims = 1:5,
features = NULL,
seed.use = 1,
tsne.method = "Rtsne",
add.iter = 0,
dim.embed = 2,
distance.matrix = NULL,
reduction.name = "tsne",
reduction.key = "tSNE_",
...
) {
tsne.reduction <- if (!is.null(x = distance.matrix)) {
RunTSNE(
object = as.matrix(x = distance.matrix),
assay = DefaultAssay(object = object),
seed.use = seed.use,
tsne.method = tsne.method,
add.iter = add.iter,
dim.embed = dim.embed,
reduction.key = reduction.key,
is_distance = TRUE,
...
)
} else if (!is.null(x = dims)) {
RunTSNE(
object = object[[reduction]],
dims = dims,
seed.use = seed.use,
tsne.method = tsne.method,
add.iter = add.iter,
dim.embed = dim.embed,
reduction.key = reduction.key,
pca = FALSE,
...
)
} else if (!is.null(x = features)) {
RunTSNE(
object = as.matrix(x = GetAssayData(object = object)[features, ]),
assay = DefaultAssay(object = object),
seed.use = seed.use,
tsne.method = tsne.method,
add.iter = add.iter,
dim.embed = dim.embed,
reduction.key = reduction.key,
pca = FALSE,
...
)
} else {
stop("Unknown way of running tSNE")
}
object[[reduction.name]] <- tsne.reduction
object <- LogSeuratCommand(object = object)
return(object)
}
#' @importFrom reticulate py_module_available py_set_seed import
#'
#' @rdname RunUMAP
#' @method RunUMAP default
#' @export
#'
RunUMAP.default <- function(
object,
assay = NULL,
n.neighbors = 30L,
n.components = 2L,
metric = "correlation",
n.epochs = NULL,
learning.rate = 1.0,
min.dist = 0.3,
spread = 1.0,
set.op.mix.ratio = 1.0,
local.connectivity = 1L,
repulsion.strength = 1,
negative.sample.rate = 5,
a = NULL,
b = NULL,
seed.use = 42,
metric.kwds = NULL,
angular.rp.forest = FALSE,
reduction.key = 'UMAP_',
verbose = TRUE,
...
) {
if (!py_module_available(module = 'umap')) {
stop("Cannot find UMAP, please install through pip (e.g. pip install umap-learn).")
}
if (!is.null(x = seed.use)) {
set.seed(seed = seed.use)
py_set_seed(seed = seed.use)
}
umap_import <- import(module = "umap", delay_load = TRUE)
umap <- umap_import$UMAP(
n_neighbors = as.integer(x = n.neighbors),
n_components = as.integer(x = n.components),
metric = metric,
n_epochs = n.epochs,
learning_rate = learning.rate,
min_dist = min.dist,
spread = spread,
set_op_mix_ratio = set.op.mix.ratio,
local_connectivity = local.connectivity,
repulsion_strength = repulsion.strength,
negative_sample_rate = negative.sample.rate,
a = a,
b = b,
metric_kwds = metric.kwds,
angular_rp_forest = angular.rp.forest,
verbose = verbose
)
umap_output <- umap$fit_transform(as.matrix(x = object))
colnames(x = umap_output) <- paste0(reduction.key, 1:ncol(x = umap_output))
rownames(x = umap_output) <- rownames(object)
umap.reduction <- CreateDimReducObject(
embeddings = umap_output,
key = reduction.key,
assay = assay
)
return(umap.reduction)
}
#' @importFrom reticulate py_module_available import
#'
#' @rdname RunUMAP
#' @method RunUMAP Graph
#' @export
#'
RunUMAP.Graph <- function(
object,
assay = NULL,
n.components = 2L,
metric = "correlation",
n.epochs = 0L,
learning.rate = 1,
min.dist = 0.3,
spread = 1,
repulsion.strength = 1,
negative.sample.rate = 5L,
a = NULL,
b = NULL,
seed.use = 42L,
metric.kwds = NULL,
verbose = TRUE,
reduction.key = 'UMAP_',
...
) {
if (!py_module_available(module = 'umap')) {
stop("Cannot find UMAP, please install through pip (e.g. pip install umap-learn).")
}
if (!py_module_available(module = 'numpy')) {
stop("Cannot find numpy, please install through pip (e.g. pip install numpy).")
}
if (!py_module_available(module = 'sklearn')) {
stop("Cannot find sklearn, please install through pip (e.g. pip install scikit-learn).")
}
if (!py_module_available(module = 'scipy')) {
stop("Cannot find scipy, please install through pip (e.g. pip install scipy).")
}
np <- import("numpy", delay_load = TRUE)
sp <- import("scipy", delay_load = TRUE)
sklearn <- import("sklearn", delay_load = TRUE)
umap <- import("umap", delay_load = TRUE)
diag(x = object) <- 0
data <- object
object <- sp$sparse$coo_matrix(arg1 = object)
ab.params <- umap$umap_$find_ab_params(spread = spread, min_dist = min.dist)
a <- a %||% ab.params[[1]]
b <- b %||% ab.params[[2]]
n.epochs <- n.epochs %||% 0L
random.state <- sklearn$utils$check_random_state(seed = as.integer(x = seed.use))
embeddings <- umap$umap_$simplicial_set_embedding(
data = data,
graph = object,
n_components = n.components,
initial_alpha = learning.rate,
a = a,
b = b,
gamma = repulsion.strength,
negative_sample_rate = negative.sample.rate,
n_epochs = n.epochs,
random_state = random.state,
init = "spectral",
metric = metric,
metric_kwds = metric.kwds,
verbose = verbose
)
rownames(x = embeddings) <- colnames(x = data)
colnames(x = embeddings) <- paste0("UMAP_", 1:n.components)
# center the embeddings on zero
embeddings <- scale(x = embeddings, scale = FALSE)
umap <- CreateDimReducObject(embeddings = embeddings, key = reduction.key, assay = assay)
return(umap)
}
#' @param dims Which dimensions to use as input features, used only if
#' \code{features} is NULL
#' @param reduction Which dimensional reduction (PCA or ICA) to use for the
#' UMAP input. Default is PCA
#' @param features If set, run UMAP on this subset of features (instead of running on a
#' set of reduced dimensions). Not set (NULL) by default; \code{dims} must be NULL to run
#' on features
#' @param graph Name of graph on which to run UMAP
#' @param assay Assay to pull data for when using \code{features}, or assay used to construct Graph
#' if running UMAP on a Graph
#' @param n.neighbors This determines the number of neighboring points used in
#' local approximations of manifold structure. Larger values will result in more
#' global structure being preserved at the loss of detailed local structure. In
#' general this parameter should often be in the range 5 to 50.
#' @param n.components The dimension of the space to embed into.
#' @param metric metric: This determines the choice of metric used to measure
#' distance in the input space. A wide variety of metrics are already coded, and
#' a user defined function can be passed as long as it has been JITd by numba.
#' @param n.epochs he number of training epochs to be used in optimizing the low dimensional
#' embedding. Larger values result in more accurate embeddings. If NULL is specified, a value will
#' be selected based on the size of the input dataset (200 for large datasets, 500 for small).
#' @param learning.rate The initial learning rate for the embedding optimization.
#' @param min.dist This controls how tightly the embedding is allowed compress points together.
#' Larger values ensure embedded points are moreevenly distributed, while smaller values allow the
#' algorithm to optimise more accurately with regard to local structure. Sensible values are in
#' the range 0.001 to 0.5.
#' @param spread The effective scale of embedded points. In combination with min.dist this
#' determines how clustered/clumped the embedded points are.
#' @param set.op.mix.ratio Interpolate between (fuzzy) union and intersection as the set operation
#' used to combine local fuzzy simplicial sets to obtain a global fuzzy simplicial sets. Both fuzzy
#' set operations use the product t-norm. The value of this parameter should be between 0.0 and
#' 1.0; a value of 1.0 will use a pure fuzzy union, while 0.0 will use a pure fuzzy intersection.
#' @param local.connectivity The local connectivity required – i.e. the number of nearest neighbors
#' that should be assumed to be connected at a local level. The higher this value the more connected
#' the manifold becomes locally. In practice this should be not more than the local intrinsic
#' dimension of the manifold.
#' @param repulsion.strength Weighting applied to negative samples in low dimensional embedding
#' optimization. Values higher than one will result in greater weight being given to negative
#' samples.
#' @param negative.sample.rate The number of negative samples to select per positive sample in the
#' optimization process. Increasing this value will result in greater repulsive force being applied,
#' greater optimization cost, but slightly more accuracy.
#' @param a More specific parameters controlling the embedding. If NULL, these values are set
#' automatically as determined by min. dist and spread. Parameter of differentiable approximation of
#' right adjoint functor.
#' @param b More specific parameters controlling the embedding. If NULL, these values are set
#' automatically as determined by min. dist and spread. Parameter of differentiable approximation of
#' right adjoint functor.
#' @param metric.kwds A dictionary of arguments to pass on to the metric, such as the p value for
#' Minkowski distance. If NULL then no arguments are passed on.
#' @param angular.rp.forest Whether to use an angular random projection forest to initialise the
#' approximate nearest neighbor search. This can be faster, but is mostly on useful for metric that
#' use an angular style distance such as cosine, correlation etc. In the case of those metrics
#' angular forests will be chosen automatically.
#' @param reduction.name Name to store dimensional reduction under in the Seurat object
#' @param reduction.key dimensional reduction key, specifies the string before
#' the number for the dimension names. UMAP by default
#' @param seed.use Set a random seed. By default, sets the seed to 42. Setting
#' NULL will not set a seed
#' @param verbose Controls verbosity
#'
#' @rdname RunUMAP
#' @export
#' @method RunUMAP Seurat
#'
RunUMAP.Seurat <- function(
object,
dims = NULL,
reduction = 'pca',
features = NULL,
graph = NULL,
assay = 'RNA',
n.neighbors = 30L,
n.components = 2L,
metric = "correlation",
n.epochs = NULL,
learning.rate = 1,
min.dist = 0.3,
spread = 1,
set.op.mix.ratio = 1,
local.connectivity = 1L,
repulsion.strength = 1,
negative.sample.rate = 5L,
a = NULL,
b = NULL,
seed.use = 42L,
metric.kwds = NULL,
angular.rp.forest = FALSE,
verbose = TRUE,
reduction.name = "umap",
reduction.key = "UMAP_",
...
) {
if (sum(c(is.null(x = dims), is.null(x = features), is.null(x = graph))) < 2) {
stop("Please specify only one of the following arguments: dims, features, or graph")
}
if (!is.null(x = features)) {
data.use <- t(x = GetAssayData(object = object, slot = 'data', assay = assay)[features, ])
} else if (!is.null(x = dims)) {
data.use <- Embeddings(object[[reduction]])[, dims]
assay <- DefaultAssay(object = object[[reduction]])
} else if (!is.null(x = graph)) {
data.use <- object[[graph]]
} else {
stop("Please specify one of dims, features, or graph")
}
object[[reduction.name]] <- RunUMAP(
object = data.use,
assay = assay,
n.neighbors = n.neighbors,
n.components = n.components,
metric = metric,
n.epochs = n.epochs,
learning.rate = learning.rate,
min.dist = min.dist,
spread = spread,
set.op.mix.ratio = set.op.mix.ratio,
local.connectivity = local.connectivity,
repulsion.strength = repulsion.strength,
negative.sample.rate = negative.sample.rate,
a = a,
b = b,
seed.use = seed.use,
metric.kwds = metric.kwds,
angular.rp.forest = angular.rp.forest,
reduction.key = reduction.key,
verbose = verbose
)
object <- LogSeuratCommand(object = object)
return(object)
}
#' @param dims Which dimensions to examine
#' @param score.thresh Threshold to use for the proportion test of PC
#' significance (see Details)
#'
#' @importFrom stats prop.test
#'
#' @rdname ScoreJackStraw
#' @export
#' @method ScoreJackStraw JackStrawData
#'
ScoreJackStraw.JackStrawData <- function(
object,
dims = 1:5,
score.thresh = 1e-5,
...
) {
pAll <- JS(object = object, slot = "empirical.p.values")
pAll <- pAll[, dims, drop = FALSE]
pAll <- as.data.frame(pAll)
pAll$Contig <- rownames(x = pAll)
score.df <- NULL
for (i in dims) {
pc.score <- suppressWarnings(prop.test(
x = c(
length(x = which(x = pAll[, i] <= score.thresh)),
floor(x = nrow(x = pAll) * score.thresh)
),
n = c(nrow(pAll), nrow(pAll))
)$p.val)
if (length(x = which(x = pAll[, i] <= score.thresh)) == 0) {
pc.score <- 1
}
if (is.null(x = score.df)) {
score.df <- data.frame(PC = paste0("PC", i), Score = pc.score)
} else {
score.df <- rbind(score.df, data.frame(PC = paste0("PC", i), Score = pc.score))
}
}
score.df$PC <- dims
score.df <- as.matrix(score.df)
JS(object = object, slot = 'overall') <- score.df
return(object)
}
#' @rdname ScoreJackStraw
#' @export
#' @method ScoreJackStraw DimReduc
#'
ScoreJackStraw.DimReduc <- function(object, dims = 1:5, score.thresh = 1e-5, ...) {
JS(object = object) <- ScoreJackStraw(
object = JS(object = object),
dims = dims,
score.thresh = 1e-5,
...
)
return(object)
}
#' @param reduction Reduction associated with JackStraw to score
#' @param do.plot Show plot. To return ggplot object, use \code{JackStrawPlot} after
#' running ScoreJackStraw.
#'
#' @seealso \code{\link{JackStrawPlot}}
#'
#' @rdname ScoreJackStraw
#' @export
#' @method ScoreJackStraw Seurat
#'
ScoreJackStraw.Seurat <- function(
object,
reduction = "pca",
dims = 1:5,
score.thresh = 1e-5,
do.plot = FALSE,
...
) {
object[[reduction]] <- ScoreJackStraw(
object = object[[reduction]],
dims = dims,
...
)
if (do.plot) {
suppressWarnings(expr = print(JackStrawPlot(
object = object,
reduction = reduction,
dims = dims,
...
)))
}
object <- LogSeuratCommand(object = object)
return(object)
}
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Internal
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Check that features are present and have non-zero variance
#
# @param data.use Feature matrix (features are rows)
# @param features Features to check
# @param object.name Name of object for message printing
# @param verbose Print warnings
#
# @return Returns a vector of features that is the subset of features
# that have non-zero variance
#
CheckFeatures <- function(
data.use,
features,
object.name,
verbose = TRUE
) {
if (any(!features %in% rownames(x = data.use))) {
missing.features <- features[!features %in% rownames(x = data.use)]
features <- setdiff(x = features, y = missing.features)
if (verbose){
warning(
paste0(
"The following ", length(x = missing.features),
" features are not scaled in ",
object.name,
": ",
paste0(missing.features, collapse = ", ")
))
}
}
if (inherits(x = data.use, what = 'dgCMatrix')) {
features.var <- SparseRowVar(mat = data.use[features, ], display_progress = F)
}
else {
features.var <- RowVar(x = data.use[features, ])
}
no.var.features <- features[features.var == 0]
if (length(x = no.var.features) > 0 && verbose) {
warning(
paste0(
"The following features have zero variance in ",
object.name,
": ",
paste0(no.var.features, collapse = ", ")
))
}
features <- setdiff(x = features, y = no.var.features)
features <- features[!is.na(x = features)]
return(features)
}
#internal
EmpiricalP <- function(x, nullval) {
return(sum(nullval > x) / length(x = nullval))
}
# FIt-SNE helper function for calling fast_tsne from R
#
# Based on Kluger Lab code on https://github.com/KlugerLab/FIt-SNE/blob/master/fast_tsne.R
# commit 7a5212e on Oct 27, 2018
#
#' @importFrom utils file_test
#
fftRtsne <- function(
X,
dims = 2,
perplexity = 30,
theta = 0.5,
check_duplicates = TRUE,
max_iter = 1000,
fft_not_bh = TRUE,
ann_not_vptree = TRUE,
stop_early_exag_iter = 250,
exaggeration_factor = 12.0,
no_momentum_during_exag = FALSE,
start_late_exag_iter = -1.0,
late_exag_coeff = 1.0,
mom_switch_iter = 250,
momentum = 0.5,
final_momentum = 0.8,
learning_rate = 200,
n_trees = 50,
search_k = -1,
rand_seed = -1,
nterms = 3,
intervals_per_integer = 1,
min_num_intervals = 50,
K = -1,
sigma = -30,
initialization = NULL,
data_path = NULL,
result_path = NULL,
load_affinities = NULL,
fast_tsne_path = NULL,
nthreads = getOption('mc.cores', default = 1),
perplexity_list = NULL,
get_costs = FALSE,
...
) {
if (is.null(x = data_path)) {
data_path <- tempfile(pattern = 'fftRtsne_data_', fileext = '.dat')
}
if (is.null(x = result_path)) {
result_path <- tempfile(pattern = 'fftRtsne_result_', fileext = '.dat')
}
if (is.null(x = fast_tsne_path)) {
suppressWarnings(expr = fast_tsne_path <- system2(command = 'which', args = 'fast_tsne', stdout = TRUE))
if (length(x = fast_tsne_path) == 0) {
stop("no fast_tsne_path specified and fast_tsne binary is not in the search path")
}
}
fast_tsne_path <- normalizePath(path = fast_tsne_path)
if (!file_test(op = '-x', x = fast_tsne_path)) {
stop("fast_tsne_path '", fast_tsne_path, "' does not exist or is not executable")
}
# check fast_tsne version
ft.out <- system2(command = fast_tsne_path, stdout = TRUE)
if (!grepl('= t-SNE v1.', ft.out[1])) {
message('First line of fast_tsne output is')
message(ft.out[1])
stop("Our FIt-SNE wrapper requires FIt-SNE v1, please install the appropriate version from github.com/KlugerLab/FIt-SNE and have fast_tsne_path point to it if it's not in your path")
}
is.wholenumber <- function(x, tol = .Machine$double.eps ^ 0.5) {
return(abs(x = x - round(x = x)) < tol)
}
if (!is.numeric(x = theta) || (theta < 0.0) || (theta > 1.0) ) {
stop("Incorrect theta.")
}
if (nrow(x = X) - 1 < 3 * perplexity) {
stop("Perplexity is too large.")
}
if (!is.matrix(x = X)) {
stop("Input X is not a matrix")
}
if (!(max_iter > 0)) {
stop("Incorrect number of iterations.")
}
if (!is.wholenumber(x = stop_early_exag_iter) || stop_early_exag_iter < 0) {
stop("stop_early_exag_iter should be a positive integer")
}
if (!is.numeric(x = exaggeration_factor)) {
stop("exaggeration_factor should be numeric")
}
if (!is.wholenumber(x = dims) || dims <= 0) {
stop("Incorrect dimensionality.")
}
if (search_k == -1) {
if (perplexity>0) {
search_k <- n_trees * perplexity * 3
} else if (perplexity==0) {
search_k <- n_trees * max(perplexity_list) * 3
} else {
search_k <- n_trees * K * 3
}
}
nbody_algo <- ifelse(test = fft_not_bh, yes = 2, no = 1)
if (is.null(load_affinities)) {
load_affinities <- 0
} else {
if (load_affinities == 'load') {
load_affinities <- 1
} else if (load_affinities == 'save') {
load_affinities <- 2
} else {
load_affinities <- 0;
}
}
knn_algo <- ifelse(test = ann_not_vptree, yes = 1, no = 2)
f <- file(data_path, "wb")
n = nrow(x = X)
D = ncol(x = X)
writeBin(object = as.integer(x = n), con = f, size = 4)
writeBin(object = as.integer(x = D), con = f, size = 4)
writeBin(object = as.numeric(x = theta), con = f, size = 8) #theta
writeBin(object = as.numeric(x = perplexity), con = f, size = 8) #theta
if (perplexity == 0) {
writeBin(object = as.integer(x = length(x = perplexity_list)), con = f, size = 4)
writeBin(object = perplexity_list, con = f)
}
writeBin(object = as.integer(x = dims), con = f, size = 4) #theta
writeBin(object = as.integer(x = max_iter), con = f, size = 4)
writeBin(object = as.integer(x = stop_early_exag_iter), con = f, size = 4)
writeBin(object = as.integer(x = mom_switch_iter), con = f, size = 4)
writeBin(object = as.numeric(x = momentum), con = f, size = 8)
writeBin(object = as.numeric(x = final_momentum), con = f, size = 8)
writeBin(object = as.numeric(x = learning_rate), con = f, size = 8)
writeBin(object = as.integer(x = K), con = f, size = 4) #K
writeBin(object = as.numeric(x = sigma), con = f, size = 8) #sigma
writeBin(object = as.integer(x = nbody_algo), con = f, size = 4) #not barnes hut
writeBin(object = as.integer(x = knn_algo), con = f, size = 4)
writeBin(object = as.numeric(x = exaggeration_factor), con = f, size = 8) #compexag
writeBin(object = as.integer(x = no_momentum_during_exag), con = f, size = 4)
writeBin(object = as.integer(x = n_trees), con = f, size = 4)
writeBin(object = as.integer(x = search_k), con = f, size = 4)
writeBin(object = as.integer(x = start_late_exag_iter), con = f, size = 4)
writeBin(object = as.numeric(x = late_exag_coeff), con = f, size = 8)
writeBin(object = as.integer(x = nterms), con = f, size = 4)
writeBin(object = as.numeric(x = intervals_per_integer), con = f, size = 8)
writeBin(object = as.integer(x = min_num_intervals), con = f, size = 4)
tX = c(t(X))
writeBin(object = tX, con = f)
writeBin(object = as.integer(x = rand_seed), con = f, size = 4)
writeBin(object = as.integer(x = load_affinities), con = f, size = 4)
if (!is.null(x = initialization)) {
writeBin(object = c(t(x = initialization)), con = f)
}
close(con = f)
flag <- system2(command = fast_tsne_path, args = c(data_path, result_path, nthreads))
if (flag != 0) {
stop('tsne call failed')
}
f <- file(description = result_path, open = "rb")
n <- readBin(con = f, what = integer(), n = 1, size = 4)
d <- readBin(con = f, what = integer(), n = 1, size = 4)
Y <- readBin(con = f, what = numeric(), n = n * d)
Y <- t(x = matrix(Y, nrow = d))
if (get_costs) {
costs <- readBin(con = f, what = numeric(), n = max_iter, size = 8)
Yout <- list(Y = Y, costs = costs)
}else {
Yout <- Y
}
close(con = f)
file.remove(data_path)
file.remove(result_path)
return(Yout)
}
#internal
#
JackRandom <- function(
scaled.data,
prop.use = 0.01,
r1.use = 1,
r2.use = 5,
seed.use = 1,
rev.pca = FALSE,
weight.by.var = weight.by.var,
maxit = 1000
) {
set.seed(seed = seed.use)
rand.genes <- sample(
x = rownames(x = scaled.data),
size = nrow(x = scaled.data) * prop.use
)
# make sure that rand.genes is at least 3
if (length(x = rand.genes) < 3) {
rand.genes <- sample(x = rownames(x = scaled.data), size = 3)
}
data.mod <- scaled.data
data.mod[rand.genes, ] <- MatrixRowShuffle(x = scaled.data[rand.genes, ])
temp.object <- RunPCA(
object = data.mod,
assay = "temp",
pcs.compute = r2.use,
features = rownames(x = data.mod),
rev.pca = rev.pca,
weight.by.var = weight.by.var,
verbose = FALSE,
maxit = maxit
)
return(Loadings(temp.object)[rand.genes, r1.use:r2.use])
}
# Calculates the l2-norm of a vector
#
# Modified from PMA package
# @references Witten, Tibshirani, and Hastie, Biostatistics 2009
# @references \url{https://github.com/cran/PMA/blob/master/R/PMD.R}
#
# @param vec numeric vector
#
# @return returns the l2-norm.
#
L2Norm <- function(vec){
a <- sqrt(x = sum(vec ^ 2))
if (a == 0) {
a <- .05
}
return(a)
}
# Prep data for dimensional reduction
#
# Common checks and preparatory steps before running certain dimensional
# reduction techniques
#
# @param object Assay object
# @param features Features to use as input for the dimensional reduction technique.
# Default is variable features
# @ param verbose Print messages and warnings
#
#
PrepDR <- function(
object,
features = NULL,
verbose = TRUE
) {
if (length(x = VariableFeatures(object = object)) == 0 && is.null(x = features)) {
stop("Variable features haven't been set. Run FindVariableFeatures() or provide a vector of feature names.")
}
data.use <- GetAssayData(object = object, slot = "scale.data")
if (nrow(x = data.use ) == 0) {
stop("Data has not been scaled. Please run ScaleData and retry")
}
features <- features %||% VariableFeatures(object = object)
features.keep <- unique(x = features[features %in% rownames(x = data.use)])
if (length(x = features.keep) < length(x = features)) {
features.exclude <- setdiff(x = features, y = features.keep)
if (verbose) {
warning(paste0("The following ", length(x = features.exclude), " features requested have not been scaled (running reduction without them): ", paste0(features.exclude, collapse = ", ")))
}
}
features <- features.keep
features.var <- apply(X = data.use[features, ], MARGIN = 1, FUN = var)
features.keep <- features[features.var > 0]
if (length(x = features.keep) < length(x = features)) {
features.exclude <- setdiff(x = features, y = features.keep)
if (verbose) {
warning(paste0("The following ", length(x = features.exclude), " features requested have zero variance (running reduction without them): ", paste0(features.exclude, collapse = ", ")))
}
}
features <- features.keep
features <- features[!is.na(x = features)]
data.use <- data.use[features, ]
return(data.use)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.