#' SJ expression markers of identity classes
#'
#' Finds markers (differentially expressed SJs) for identity classes
#' @title FindMarkers
#' @name FindMarkers
#' @param object ICASDataSet object
#' @param ident.1 Identity class to define markers for
#' @param ident.2 A second identity class for comparison. If NULL (default) -
#' use all other cells for comparison.
#' @param genes.use SJs to test. Default is to use all SJs
#' @param delta.threshold Limit testing to SJs which show, on average, at least
#' delta.threshold between the two groups of samples. Default is 0.1
#' Increasing delta.threshold speeds up the function, but can miss weaker signals.
#' @param test.use Denotes which test to use. Available options are:
##' \itemize{
##' \item{"wilcox"} : Wilcoxon rank sum test
##' \item{"bimod"} : Likelihood-ratio test for SJ expression,
##' (McDavid et al., Bioinformatics, 2013)
##' \item{"roc"} : Standard AUC classifier
##' \item{"t"} : Student's t-test
##' \item{"WD"} : waldtest of variance model
##' \item{"tobit"} : Tobit-test for differential SJ expression (Trapnell et
##' al., Nature Biotech, 2014) (default)
##' \item{"poisson"} : Likelihood ratio test assuming an underlying poisson
##' distribution. Use only for UMI-based datasets
##' \item{"negbinom"} : Likelihood ratio test assuming an underlying negative
##' binomial distribution. Use only for UMI-based datasets
##' \item{"MAST"} : GLM-framework that treates cellular detection rate as a
##' covariate (Finak et al, Genome Biology, 2015)
##' \item{"Hyper"} : DE based on Hypergeometric test
##' \item{"BB"} : DE based on a generalized linear models using betabinomial distribution
##' }
#' @param min.pct only test SJs that are detected in a minimum fraction of
#' min.pct cells in either of the two populations. Meant to speed up the function
#' by not testing SJs that are very infrequently expressed. Default is 0.1
#' @param min.diff.pct only test SJs that show a minimum difference in the
#' fraction of detection between the two groups. Set to -Inf by default
#' @param only.pos Only return positive markers (FALSE by default)
#' @param print.bar Print a progress bar once expression testing begins (uses
#' pbapply to do this)
#' @param max.cells.per.ident Down sample each identity class to a max number.
#' Default is no downsampling. Not activated by default (set to Inf)
#' @param random.seed Random seed for downsampling
#' @param min.cells.gene Minimum number of cells expressing the SJ in at least one
#' of the two groups, currently only used for poisson and negative binomial tests
#' @param maxit (Only for test.use is BB) maximum number of (usually Fisher-scoring) iterations allowed.
#' Decreasing maxit speeds up the function, but can weaken statistical reliability.
#' @param confounder (Only for test.use is BB) The confounder to regress out.
#' @param min.cells.group Minimum number of cells in one of the groups
#' @param NT cores for parallel (Currently only support roc test)
#' @param \dots Additional parameters to pass to specific DE functions
#' @seealso \code{\link{MASTDETest}}, and \code{\link{DESeq2DETest}} for more information on these methods
#' @return Matrix containing a ranked list of putative markers, and associated
#' statistics (p-values, ROC score, etc.)
#' @details p-value adjustment is performed using bonferroni correction based on
#' the total number of SJs in the dataset. Other correction methods are not
#' recommended, as ICAS pre-filters SJs using the arguments above, reducing
#' the number of tests performed. Lastly, as Aaron Lun has pointed out, p-values
#' should be interpreted cautiously, as the SJs used for clustering are the
#' same SJs tested for differential expression.
#' @import pbapply
#' @importFrom lmtest lrtest
#' @importFrom Matrix rowMeans
#' @importFrom matrixStats rowSds
#'
#' @seealso \code{\link{NegBinomDETest}}
#' @seealso Seurat
#' @references Seurat
#'
#' @export
#'
globalVariables(names = 'deltaPSI', package = 'ICAS', add = TRUE)
FindMarkers <- function(
object,
ident.1,
ident.2 = NULL,
genes.use = NULL,
delta.threshold = 0.1,
test.use = "tobit",
min.pct = 0.1,
min.diff.pct = -Inf,
print.bar = TRUE,
only.pos = FALSE,
max.cells.per.ident = Inf,
random.seed = 1,
min.cells.gene = 3,
min.cells.group = 3,
maxit = 10,
confounder = NULL,
NT = 1,
...
) {
if(!is(object, "ICASDataSet"))
stop("The object must be a ICASDataSet data")
data.use <- psi(object = object)
genes.use <- SetIfNull(x = genes.use, default = rownames(x = data.use))
if(!all(genes.use %in% rownames(x = data.use))) {
stop("genes.use are not in object@psi")
}
if(!ident.1 %in% levels(object@colData[[object@design]])) {
stop("ident.1 are not in levels of design")
}
if(!is.null(ident.2)) {
if(any(!ident.2 %in% levels(object@colData[[object@design]]))) {
stop("ident.2 are not in levels of design")
}
}
cells.1 <- WhichCells(object = object, ident = ident.1)
# if NULL for ident.2, use all other cells
if(is.null(ident.2)) {
cells.2 <- setdiff(colnames(data.use), cells.1)
} else {
cells.2 <- unlist(lapply(ident.2, FUN = function(x) WhichCells(object = object, ident = x)))
}
# error checking
if (length(x = cells.1) == 0) {
message(paste("Cell group 1 is empty - no cells with identity class", ident.1))
return(NULL)
}
if (length(x = cells.2) == 0) {
message(paste("Cell group 2 is empty - no cells with identity class", ident.2))
return(NULL)
}
if (length(cells.1) < min.cells.group) {
stop(paste("Cell group 1 has fewer than", as.character(min.cells.group), "cells in identity class", ident.1))
}
if (length(cells.2) < min.cells.group) {
stop(paste("Cell group 2 has fewer than", as.character(min.cells.group), " cells in identity class", ident.2))
}
if(is.null(NT)) {
NT <- 1
}
if(NT < 1) {
stop("parallel must be a positive interger")
}
# gene selection (based on percent expressed)
thresh.min <- 0
data.temp1 <- round(
x = apply(
X = data.use[genes.use, cells.1, drop = F],
MARGIN = 1,
FUN = function(x) {
return(sum(x > thresh.min, na.rm = TRUE) / length(x = x))
# return(length(x = x[x>thresh.min]) / length(x = x))
}
),
digits = 3
)
data.temp2 <- round(
x = apply(
X = data.use[genes.use, cells.2, drop = F],
MARGIN = 1,
FUN = function(x) {
return(sum(x > thresh.min, na.rm = TRUE) / length(x = x))
# return(length(x = x[x > thresh.min]) / length(x = x))
}
),
digits = 3
)
data.alpha <- cbind(data.temp1, data.temp2)
colnames(x = data.alpha) <- c("pct.1","pct.2")
alpha.min <- apply(X = data.alpha, MARGIN = 1, FUN = min)
names(x = alpha.min) <- rownames(x = data.alpha)
genes.use <- names(x = which(x = alpha.min > min.pct))
if (length(x = genes.use) == 0) {
stop("No SJs pass min.pct threshold")
}
alpha.diff <- apply(X = data.alpha, MARGIN = 1, FUN = max) - alpha.min
genes.use <- names(
x = which(x = alpha.min > min.pct & alpha.diff > min.diff.pct)
)
if (length(x = genes.use) == 0) {
stop("No SJs pass min.diff.pct threshold")
}
#gene selection (based on average difference)
data.1 <- Matrix::rowMeans(data.use[genes.use, cells.1, drop = F], na.rm = TRUE)
data.2 <- Matrix::rowMeans(data.use[genes.use, cells.2, drop = F], na.rm = TRUE)
sd.1 <- matrixStats::rowSds(data.use[genes.use, cells.1, drop = F], na.rm = TRUE)
sd.2 <- matrixStats::rowSds(data.use[genes.use, cells.2, drop = F], na.rm = TRUE)
data.mean <- data.frame(mean.1 = data.1, mean.2 = data.2, row.names = genes.use)
data.sd <- data.frame(sd.1 = sd.1, sd.2 = sd.2, row.names = genes.use)
total.diff <- data.1 - data.2
if (!only.pos) genes.diff <- names(x = which(x = abs(x = total.diff) >= delta.threshold))
if (only.pos) genes.diff <- names(x = which(x = total.diff >= delta.threshold))
genes.use <- intersect(x = genes.use, y = genes.diff)
if (length(x = genes.use) == 0) {
stop("No SJs pass delta.threshold threshold")
}
if (max.cells.per.ident < Inf) {
set.seed(seed = random.seed)
if (length(cells.1) > max.cells.per.ident) cells.1 = sample(x = cells.1, size = max.cells.per.ident)
if (length(cells.2) > max.cells.per.ident) cells.2 = sample(x = cells.2, size = max.cells.per.ident)
}
if (test.use == "bimod") {
to.return <- DiffExpTest(
object = object,
cells.1 = cells.1,
cells.2 = cells.2,
genes.use = genes.use,
print.bar = print.bar,
NT = NT
)
}
if (test.use == "roc") {
to.return <- varImportances(
object = object,
cells.1 = cells.1,
cells.2 = cells.2,
genes.use = genes.use,
print.bar = print.bar,
NT = NT
)
}
if (test.use == "t") {
to.return <- DiffTTest(
object = object,
cells.1 = cells.1,
cells.2 = cells.2,
genes.use = genes.use,
print.bar = print.bar,
NT = NT
)
}
if (test.use == "tobit") {
to.return <- TobitTest(
object = object,
cells.1 = cells.1,
cells.2 = cells.2,
genes.use = genes.use,
print.bar = print.bar,
NT = NT
)
}
if (test.use == "negbinom") {
to.return <- NegBinomDETest(
object = object,
cells.1 = cells.1,
cells.2 = cells.2,
genes.use = genes.use,
print.bar = print.bar,
min.cells = min.cells.gene,
NT = NT
)
}
if (test.use == "poisson") {
to.return <- PoissonDETest(
object = object,
cells.1 = cells.1,
cells.2 = cells.2,
genes.use = genes.use,
print.bar = print.bar,
min.cells = min.cells.gene,
NT = NT
)
}
if (test.use == "MAST") {
to.return <- MASTDETest(
object = object,
cells.1 = cells.1,
cells.2 = cells.2,
genes.use = genes.use,
...
)
}
if (test.use == "wilcox") {
to.return <- WilcoxDETest(
object = object,
cells.1 = cells.1,
cells.2 = cells.2,
genes.use = genes.use,
print.bar = print.bar,
NT = NT,
...
)
}
if (test.use == "WD") {
to.return <- LRDETest(
object = object,
cells.1 = cells.1,
cells.2 = cells.2,
genes.use = genes.use,
print.bar = print.bar,
NT = NT,
...
)
}
if(test.use == "Hyper") {
to.return <- phyperTest(
object = object,
cells.1 = cells.1,
cells.2 = cells.2,
genes.use = genes.use,
print.bar = print.bar,
NT = NT
)
}
if(test.use == "BB") {
to.return <- bbTest(
object = object,
cells.1 = cells.1,
cells.2 = cells.2,
genes.use = genes.use,
print.bar = print.bar,
maxit = maxit,
confounder = confounder,
NT = NT
)
}
#return results
to.return[, "deltaPSI"] <- total.diff[rownames(x = to.return)]
to.return <- cbind(to.return,
data.alpha[rownames(x = to.return), , drop = FALSE],
data.mean[rownames(x = to.return), , drop = FALSE],
data.sd[rownames(x = to.return), , drop = FALSE])
if(test.use != "roc") {
to.return$p_val_adj = p.adjust(p = to.return$p_val)
}
if (test.use == "roc") {
to.return <- to.return[order(-to.return$Importance, -to.return$deltaPSI), ]
} else {
to.return <- to.return[order(to.return$p_val, -to.return$deltaPSI), ]
}
if (only.pos) {
to.return <- subset(x = to.return, subset = deltaPSI > 0)
}
return(to.return)
}
globalVariables(
names = c('myAUC', 'p_val', 'deltaPSI'),
package = 'ICAS',
add = TRUE
)
#' @title FindAllMarkers
#' SJ expression markers for all identity classes
#'
#' Finds markers (differentially expressed SJs) for each of the identity classes in a dataset
#'
#' @inheritParams FindMarkers
#' @param print.bar Print a progress bar once expression testing begins (uses pbapply to do this)
#' @param max.cells.per.ident Down sample each identity class to a max number. Default is no downsampling.
#' @param random.seed Random seed for downsampling
#' @param return.thresh Only return markers that have a p-value < return.thresh, or a power > return.thresh (if the test is ROC)
#' @param min.cells.gene Minimum number of cells expressing the SJ in at least one
#' of the two groups, currently only used for poisson and negative binomial tests
#' @param min.cells.group Minimum number of cells in one of the groups
#' @param \dots Additional parameters to pass to specific DE functions
#'
#' @return Matrix containing a ranked list of putative markers, and associated
#' statistics (p-values, ROC score, etc.)
#'
#' @export
#'
FindAllMarkers <- function(
object,
genes.use = NULL,
delta.threshold = 0.1,
test.use = "tobit",
min.pct = 0.1,
min.diff.pct = -Inf,
print.bar = TRUE,
only.pos = FALSE,
max.cells.per.ident = Inf,
return.thresh = 1e-2,
random.seed = 1,
maxit = 10,
confounder = NULL,
min.cells.gene = 3,
min.cells.group = 3,
NT = 1,
...
) {
data.1 <- psi(object)
genes.use <- SetIfNull(x = genes.use, default = rownames(x = data.1))
if ((test.use == "roc") && (return.thresh == 1e-2)) {
return.thresh = 0.7
}
idents.all <- levels(colData(object)[[object@design]])
genes.de <- list()
for (i in seq_along(idents.all)) {
genes.de[[i]] <- tryCatch(
{
FindMarkers(
object = object,
ident.1 = idents.all[i],
ident.2 = NULL,
genes.use = genes.use,
delta.threshold = delta.threshold,
test.use = test.use,
min.pct = min.pct,
min.diff.pct = min.diff.pct,
print.bar = print.bar,
min.cells.gene = min.cells.gene,
min.cells.group = min.cells.group,
max.cells.per.ident = max.cells.per.ident,
maxit = maxit,
confounder = confounder,
NT = NT,
...
)
},
error = function(cond){
return(NULL)
}
)
}
gde.all <- data.frame()
for (i in seq_along(idents.all)) {
if (is.null(x = unlist(x = genes.de[i]))) {
next
}
gde <- genes.de[[i]]
if (nrow(x = gde) > 0) {
if (test.use == "roc") {
gde <- gde
} else {
gde <- gde[order(gde$p_val, -gde$deltaPSI), ]
gde <- subset(x = gde, subset = p_val <= return.thresh)
}
if (nrow(x = gde) > 0) {
gde$cluster <- idents.all[i]
gde$gene <- rownames(x = gde)
}
if (nrow(x = gde) > 0) {
gde.all <- rbind(gde.all, gde)
}
}
}
if ((only.pos) && nrow(gde.all) > 0) {
return(subset(x = gde.all, subset = deltaPSI > 0))
}
rownames(x = gde.all) <- make.unique(names = as.character(x = gde.all$gene))
if (nrow(gde.all) == 0) {
warning("No DE SJs identified.")
}
return(gde.all)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.