# Load packages suppressPackageStartupMessages({ library(snseq.stats) library(dplyr) library(ggplot2) library(SingleCellExperiment) library(splatter) library(MAST) library(splattdr) }) # Load an example dataset with simulated data data(params) sim.dose = c(0, 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30) dose.prob = rep(1/9, 9) sim = splatSimulateDR(params, dose.names = sim.dose, dose.prob = dose.prob, verbose = FALSE) # Remove low abundance genes from dataset (expressed in at least 5% cells in # each dose group) sim = filterLow(sim, pct.expressed = 0.05)
As part of this single-cell RNAseq differential expression testing package we have implemented several existing tests best suited for the analysis of continuous groups. Tests include the following:
We also implemented a non-Bayesian formulation on which our new Bayesian test was derived from. These are:
Describe
LRT.linear = DETest(sim, method = 'LRT.linear') LRT.multiple = DETest(sim, method = 'LRT.multiple') head(LRT.linear[[1]]) head(LRT.multiple[[1]])
Describe ChiSQ
chi.square = DETest(sim, method = 'CHISQ') head(chi.square[[1]])
All differential expression tests included in this package can be run using the
DETest
function with the method paramer All
. Here we run all tests on a
simulated dataset. Depending on the number of genes, running all tests can take
a large amount of time.
DGEAnalysis = DETest(sim, method = 'All', verbose = TRUE) names(DGEAnalysis)
We implemented numerous tests in order to benchmark the Bayesian Hurdle Model (BHM) test. We can use simulated data with known differentially expressed genes to verify the ability of tests to accuarately identified genes with altered expression from those which are not differentially expressed.
1) We must first extract the truth from the simulated dataset. See Splattdr
for details on simulating dose-response data. Here will do not apply any
fold-change threshold (fc.threshold
) or percent expressing cells
(pct.expressed
) as truth is known.
trueDEGs = TruthFromSim(sim, fc.threshold = 0.0, pct.expressed = 0)
2) Next we identify all differentially expressed genes from the individual
tests. This can be accomplished using the getDEGs
function specifying the
p-value threshold (threshold
), fold-change threshold (fc.threshold
), Bayes
threshold (bayes.threshold
), and minimum percent expressing cells
(pct.expressed
). We use standard parameters for p-value and Bayes threshold.
No fold-change threshold is initially applied to evaluate the performance
in absence of any other filtering method. Very low abundance genes are filtered
out to be expressed in at least 5% of cells in any group.
significantGenes = getDEGs(sim, DGEAnalysis, threshold = 0.05, fc.threshold = 0.0, bayes.threshold = 1/3, pct.expressed = 0.05)
benchmarkConfusionMat = benchmarkDETests(sim, trueDEGs, significantGenes) head(benchmarkConfusionMat$classification) head(benchmarkConfusionMat$results)
ggplot(benchmarkConfusionMat$classification, aes(fill = result, y = value, x = tname)) + geom_text(aes(label = value), position = position_dodge(width = 1), size = 3, vjust = -0.5) + geom_bar(position = "dodge", stat = "identity", color = 'black') + #scale_fill_manual(values = c('#e34242', '#c70000', '#58fc5b', '#04cc08')) + theme_bw()
ggplot(benchmarkConfusionMat$results, aes(y = value, x = test)) + facet_wrap(~metric) + geom_point() + theme_bw()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.