#' Evaluate the cell identities using clustering
#'
#' This function is a simple wrapper for \code{EvaluateIdentKmeans},
#' facilitating unsupervised evaluation of cell identities.
#'
#' First, run K-means clustering or mini-batch K-means clustering using \code{ClusterCellsKmeans},
#' which computes computationally defined cell identities, \code{ident} in the Seurat object.
#' Second, run this function with the aforementioned Seurat object.
#' Ensure that the same parameters (\code{reduction.type}, \code{dims.use}, \code{k.param}, ...)
#' are used.
#'
#' @param object Seurat object
#' @param genes.use Genes to use for clustering
#' @param clustering Clustering method to use
#' @param data.cut Clip all z-scores to have an absolute value below this
#' Reduces the effect of huge outliers in the data. (default is NULL)
#' @param do.plot Draw histograms of statistics (default is FALSE)
#' @param seed Random seed
#' @param \dots Additional parameters passed to \code{EvaluateIdentKmeans} and related functions
#'
#' @seealso EvaluateIdentKmeans
#'
#' @importFrom tidyr gather
#' @import ggplot2
#' @importFrom methods new
#' @importFrom stats kmeans
#'
#' @return Seurat object where the unsupervised evaluation for cell identities is stored in
#' object@ident_prob. Furthermore, chosen probabilities (PIP, adjusted p-values, or etc) are
#' stored in object@meta.data["ident_prob"]. See the optional argument \code{prob.use}.
#'
#' @export EvaluateIdent
#'
EvaluateIdent <- function(
object,
genes.use = NULL,
clustering = "kmeans",
data.cut = NULL,
do.plot = FALSE,
seed = 1,
...
) {
if("kmeans" %in% tolower(clustering)) {
return(EvaluateIdentKmeans(
object = object,
genes.use = genes.use,
seed = seed,
data.cut = data.cut,
do.plot = do.plot,
minibatch = FALSE,
...
))
} else if(any(c("minibatch","minibatchkmeans") %in% tolower(clustering))) {
return(EvaluateIdentKmeans(
object = object,
genes.use = genes.use,
seed = seed,
data.cut = NULL,
do.plot = do.plot,
minibatch = TRUE,
...
))
} else {
stop("Please select clustering algorithms that are supported by SeuratAddon.")
}
}
#' Evaluate the cell identities using K-means clustering
#'
#' Evaluate the cell identities that were determined from k-means clustering
#'
#' Use \code{data.cut} to trim outliers, as done in \code{DoKMeans2}.
#' Ensure that these parameters are identical to the original clustering step.
#'
#' @param object Seurat object
#' @param prob.use Probabilities saved to object@meta.data["ident_prob"]; To be used in downstream analysis by default
#' @param p.adjust.methods A p-value correction method for multiple comparisons
#' @param genes.use Genes to use for clustering
#' @param reduction.type Name of dimensional reduction technique (e.g. "pca", "ica")
#' @param dims.use A vector of the dimensions to use (e.g. To use the first 10 PCs, pass 1:10)
#' @param center Center the cells
#' @param data.cut Clip all z-scores to have an absolute value below this.
#' Reduces the effect of huge outliers in the data. (default is NULL)
#' @param minibatch FALSE by default. If TRUE, use the mini-batch K-means clustering implemented in the ClusterR package.
#' @param assay.type Type of assay to fetch data for (default is RNA)
#' @param do.plot Draw histograms of statistics (default is FALSE)
#' @param seed Random seed
#' @param verbose Print the computational progress. TRUE, by default.
#' @param return.jackstraw FALSE by default. If TRUE, return a list of jackstraw-related statistics, instead of a Seurat object,
#' @param \dots Additional parameters passed to jackstraw.kmeans
#'
#' @seealso DoKMeans2
#'
#' @importFrom tidyr gather
#' @import ggplot2
#' @importFrom methods new
#' @importFrom stats kmeans
#'
#' @return Seurat object where the unsupervised evaluation for cell identities is stored in
#' object@ident_prob. Furthermore, chosen probabilities (PIP, adjusted p-values, or etc) are
#' stored in object@meta.data["ident_prob"]. See the optional argument \code{prob.use}.
#'
#' @export EvaluateIdentKmeans
#'
#' @examples
#' \dontrun{
#' set.seed(1234)
#' require(Seurat)
#' pbmc_small <- DoKMeans2(pbmc_small, k.cells = 3)
#' pbmc_small2 <- EvaluateIdentKmeans(pbmc_small)
#' }
#'
EvaluateIdentKmeans <- function(
object,
prob.use = "pip",
p.adjust.methods = "BH",
genes.use = NULL,
reduction.type = NULL,
dims.use = NULL,
center = TRUE,
data.cut = NULL,
minibatch = FALSE,
assay.type="RNA",
do.plot = TRUE,
seed = 1,
verbose = TRUE,
return.jackstraw = FALSE,
...
) {
# Make sure k-means clustering has been applied
if(is.null(object@kmeans)) {
stop("Run k-means clustering on the Seurat object using DoKMeans2() with k.cells > 0.")
} else {
if(is.null(object@kmeans@cell.kmeans.obj)) {
stop("Run k-means clustering on the Seurat object using DoKMeans2() with k.cells > 0.")
}
}
k.cells = length(unique(object@kmeans@cell.kmeans.obj$cluster))
# Get and prepare data
if(is.null(genes.use)) { genes.use <- Seurat:::SetIfNull(x = genes.use, default = object@var.genes) }
if (is.null(x = dims.use)) {
message("Using scaled data.")
data.use <- GetAssayData(
object = object,
assay.type = assay.type,
slot = "scale.data"
)
# rows: genes and cols: cells
genes.use <- genes.use[genes.use %in% rownames(x = data.use)]
data.use <- data.use[genes.use,]
# rows: cells and cols: genes
if(center) {
data.use <- t(scale(data.use, center = TRUE, scale = FALSE))
} else {
data.use <- t(data.use)
}
if(!is.null(data.cut)) { data.use <- MinMax(data = data.use, min = data.cut * (-1), max = data.cut) }
} else {
message(paste0("Using ", reduction.type, "."))
data.use <- GetCellEmbeddings(object = object,
reduction.type = reduction.type,
dims.use = dims.use)
# rows: cells and cols: genes
if(center) {
data.use <- t(scale(t(data.use), center = TRUE, scale = FALSE))
}
}
if(minibatch) {
jackstraw.ident = jackstraw_MiniBatchKmeans(data.use, MiniBatchKmeans.output=object@kmeans@cell.kmeans.obj, verbose=verbose, center=center, ...)
} else {
jackstraw.ident = jackstraw_kmeanspp(data.use, kmeans.dat=object@kmeans@cell.kmeans.obj, verbose=verbose, center=center, ...)
}
jackstraw.ident$pi0 = tryCatch(pi0est(jackstraw.ident$p.F$pi0),
error = function(e) {
message(paste("Default pi0 estimation failed. Setting pi0 = 1 as the conservative option."));
1}
)
jackstraw.ident$pip <- pip(jackstraw.ident$p.F, pi0=jackstraw.ident$pi0)
jackstraw.ident$p.adj <- p.adjust(jackstraw.ident$p.F, method=p.adjust.methods)
if (any(c("PIP","pip","posterior") %in% prob.use)) {
object@meta.data["ident_prob"] = unlist(jackstraw.ident$pip)
} else if (any(c("p.adjust","p.adj") %in% prob.use)) {
object@meta.data["ident_prob"] = unlist(jackstraw.ident$p.adj)
} else if (any(c("p.value","pvalue") %in% prob.use)) {
object@meta.data["ident_prob"] = unlist(jackstraw.ident$p.F)
}
if (do.plot) {
plot_data <- gather(data.frame(AdjustedPvalue = jackstraw.ident$p.adj, PIP = jackstraw.ident$pip))
ggplot(plot_data) + geom_histogram(aes(value)) + facet_wrap(~ key) + theme_bw()
}
if(return.jackstraw) {
return(jackstraw.ident)
} else {
return(object)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.