#' MAST
#'
#' Run and visualize MAST analysis on a SCtkExperiment object.
#'
#' @param inSCE Input SCtkExperiment object. Required
#' @param useAssay The assay to use for the MAST calculations. The default is
#' "logcounts"
#' @param condition select variable (from the colData) that is used for the
#' model.
#' @param interest.level If the condition of interest has more than two factors,
#' indicate which level should be used to compare to all other samples.
#' @param freqExpressed Filter genes that are expressed in at least this
#' fraction of cells. The default is expression in 0.1 of samples.
#' @param fcThreshold Minimum fold change for differentially expressed gene.
#' @param p.value p values for selecting the hurdle result, default is 0.05
#' @param useThresh Use adaptive thresholding to filter genes. The default is
#' FALSE.
#'
#' @describeIn MAST Run MAST analysis.
#'
#' @return MAST(): A data.frame of differentially expressed genes with p-values.
#' @export
MAST <- function(inSCE, condition = NULL, interest.level = NULL,
freqExpressed = 0.1, fcThreshold=log2(1.5), p.value = 0.05,
useThresh=FALSE, useAssay = "logcounts"){
if (is.null(condition)){
stop("specify the condition of interest")
}
if (length(unique(SingleCellExperiment::colData(inSCE)[, condition])) == 1) {
stop("only one level is in the condition")
}
if (is.null(interest.level) & length(
unique(SingleCellExperiment::colData(inSCE)[, condition])) > 2){
stop("You must specify a level of interest when more than 2 levels are in",
" the condition")
}
# Create MAST SingleCellAssay
pdata <- SingleCellExperiment::colData(inSCE)
expres <- SummarizedExperiment::assay(inSCE, useAssay)
fdata <- SingleCellExperiment::rowData(inSCE)
SCENew <- MAST::FromMatrix(expres, pdata, fdata)
#Caculate CDR for zlm model
SummarizedExperiment::colData(SCENew)$cngeneson <-
scale(colSums(SummarizedExperiment::assay(SCENew) > 0))
if (useThresh){
SCENew <- SCENew[which(MAST::freq(SCENew) > 0), ]
invisible(utils::capture.output(thresh <- MAST::thresholdSCRNACountMatrix(
SummarizedExperiment::assay(SCENew), nbins = 20, min_per_bin = 30)))
SummarizedExperiment::assays(SCENew) <-
list(thresh = thresh$counts_threshold,
tpm = SummarizedExperiment::assay(SCENew))
}
# filter based on frequency of expression across samples
if (sum(MAST::freq(SCENew) > freqExpressed) <= 1){
stop("Not enough genes pass frequency expressed filter of 1")
}
SCENewSample <- SCENew[which(MAST::freq(SCENew) > freqExpressed), ]
# if the condition of interest is numeric, to change it to a factor
if (is.numeric(SummarizedExperiment::colData(SCENewSample)[, condition])){
SummarizedExperiment::colData(SCENewSample)[, condition] <-
as.factor(SummarizedExperiment::colData(SCENewSample)[, condition])
}
#Check for NAs, if true throw error
if (any(is.na(SingleCellExperiment::colData(inSCE)[, condition]))){
stop("Annotation data has NA values. Filter them to continue.")
}
# >2 levels in the condition
if (!is.null(interest.level) &
length(unique(SingleCellExperiment::colData(inSCE)[, condition])) > 2){
levels(SummarizedExperiment::colData(SCENewSample)[, condition]) <-
c(levels(SummarizedExperiment::colData(SCENewSample)[, condition]),
paste0("no_", interest.level))
SummarizedExperiment::colData(SCENewSample)[, condition][
SummarizedExperiment::colData(SCENewSample)[, condition] !=
interest.level] <- paste0("no_", interest.level)
SummarizedExperiment::colData(SCENewSample)[, condition] <-
droplevels(as.factor(
SummarizedExperiment::colData(SCENewSample)[, condition]))
hurdle1 <- MAST::zlm(stats::as.formula(paste0("~", condition,
"+cngeneson")), SCENewSample)
summaryh1 <- MAST::summary(hurdle1, doLRT = paste0(condition, "no_",
interest.level))
summaryDT <- summaryh1[["datatable"]]
fcHurdle <- merge(
summaryDT[summaryDT$contrast == paste0(condition, "no_", interest.level) &
summaryDT$component == "H", c("primerid", "Pr(>Chisq)")],
summaryDT[summaryDT$contrast == paste0(condition, "no_", interest.level) &
summaryDT$component == "logFC", c("primerid", "coef", "ci.hi",
"ci.lo")]
)
} else {
SummarizedExperiment::colData(SCENewSample)[, condition] <-
droplevels(as.factor(
SummarizedExperiment::colData(SCENewSample)[, condition]))
level.cond <- levels(
SummarizedExperiment::colData(SCENewSample)[, condition])
hurdle1 <- MAST::zlm(stats::as.formula(paste0("~", condition,
"+cngeneson")), SCENewSample)
summaryh1 <- MAST::summary(hurdle1, doLRT = paste0(condition,
level.cond[2]))
summaryDT <- summaryh1[["datatable"]]
fcHurdle <- merge(
summaryDT[summaryDT$contrast == paste0(condition, level.cond[2]) &
summaryDT$component == "H", c("primerid", "Pr(>Chisq)")],
summaryDT[summaryDT$contrast == paste0(condition, level.cond[2]) &
summaryDT$component == "logFC", c("primerid", "coef", "ci.hi",
"ci.lo")]
)
}
# Use p-value correction method, here we use fdr
fcHurdle$fdr <- stats::p.adjust(fcHurdle$"Pr(>Chisq)", "fdr")
# Filter the data again by the adjusted pvalue and coef
fcHurdleSig <- fcHurdle[fcHurdle$fdr < p.value & abs(fcHurdle$coef) >
fcThreshold & !is.nan(fcHurdle$coef), ]
colnames(fcHurdleSig)[1] <- "Gene"
data.table::setorder(fcHurdleSig, "fdr")
return(fcHurdleSig)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.