# Load packages
suppressPackageStartupMessages({
  library(snseq.stats)
  library(dplyr)
  library(ggplot2)
  library(SingleCellExperiment)
  library(splatter)
  library(MAST)
  library(splattdr)
})

# Load an example dataset with simulated data
data(params)
sim.dose = c(0, 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30)
dose.prob = rep(1/9, 9)
sim = splatSimulateDR(params, 
                      dose.names = sim.dose, 
                      dose.prob = dose.prob, 
                      verbose = FALSE)

# Remove low abundance genes from dataset (expressed in at least 5% cells in 
# each dose group)
sim = filterLow(sim, pct.expressed = 0.05)

DETesting

As part of this single-cell RNAseq differential expression testing package we have implemented several existing tests best suited for the analysis of continuous groups. Tests include the following:

We also implemented a non-Bayesian formulation on which our new Bayesian test was derived from. These are:

LRT_linear

Describe

LRT.linear = DETest(sim, method = 'LRT.linear')
LRT.multiple = DETest(sim, method = 'LRT.multiple')
head(LRT.linear[[1]])
head(LRT.multiple[[1]])

Describe ChiSQ

chi.square = DETest(sim, method = 'CHISQ')
head(chi.square[[1]])

All differential expression tests included in this package can be run using the DETest function with the method paramer All. Here we run all tests on a simulated dataset. Depending on the number of genes, running all tests can take a large amount of time.

DGEAnalysis = DETest(sim, method = 'All', verbose = TRUE)
names(DGEAnalysis)

Benchmarking

We implemented numerous tests in order to benchmark the Bayesian Hurdle Model (BHM) test. We can use simulated data with known differentially expressed genes to verify the ability of tests to accuarately identified genes with altered expression from those which are not differentially expressed.

1) We must first extract the truth from the simulated dataset. See Splattdr for details on simulating dose-response data. Here will do not apply any fold-change threshold (fc.threshold) or percent expressing cells (pct.expressed) as truth is known.

trueDEGs = TruthFromSim(sim, fc.threshold = 0.0, pct.expressed = 0)

2) Next we identify all differentially expressed genes from the individual tests. This can be accomplished using the getDEGs function specifying the p-value threshold (threshold), fold-change threshold (fc.threshold), Bayes threshold (bayes.threshold), and minimum percent expressing cells (pct.expressed). We use standard parameters for p-value and Bayes threshold. No fold-change threshold is initially applied to evaluate the performance in absence of any other filtering method. Very low abundance genes are filtered out to be expressed in at least 5% of cells in any group.

significantGenes = getDEGs(sim, DGEAnalysis, threshold = 0.05, fc.threshold = 0.0, bayes.threshold = 1/3, pct.expressed = 0.05)
benchmarkConfusionMat = benchmarkDETests(sim, trueDEGs, significantGenes)
head(benchmarkConfusionMat$classification)
head(benchmarkConfusionMat$results)
ggplot(benchmarkConfusionMat$classification, aes(fill = result, y = value, x = tname)) +
  geom_text(aes(label = value), position = position_dodge(width = 1),
            size = 3, vjust = -0.5) +
  geom_bar(position = "dodge", stat = "identity", color = 'black') + 
  #scale_fill_manual(values = c('#e34242', '#c70000', '#58fc5b', '#04cc08')) +
  theme_bw()
ggplot(benchmarkConfusionMat$results, aes(y = value, x = test)) +
  facet_wrap(~metric) +
  geom_point() + 
  theme_bw()


satabdisaha1288/scBT documentation built on June 1, 2025, 4:06 p.m.