Nothing
# This tests that the C++ code for emptyDrops does what it says it does.
# library(DropletUtils); library(testthat); source("test-empty.R")
test_that("estimateAmbience works correctly", {
# Mocking up some counts.
set.seed(1000)
my.counts <- DropletUtils:::simCounts()
limit <- 100
out <- estimateAmbience(my.counts)
keep <- colSums(my.counts) <= limit
naive <- rowSums(my.counts[,keep])
expect_false(is.unsorted(out[order(naive)]))
expect_identical(naive, estimateAmbience(my.counts, good.turing=FALSE))
my.counts2 <- my.counts
rownames(my.counts2) <- NULL
out2 <- estimateAmbience(my.counts2)
expect_identical(unname(out), out2)
# Performs correctly with the rank-based method.
out.r <- estimateAmbience(my.counts, by.rank=1000)
expect_false(identical(out, out.r))
out.r2 <- estimateAmbience(my.counts, by.rank=1000, good.turing=FALSE)
expect_identical(out.r2, rowSums(my.counts[,rank(-colSums(my.counts), ties.method="first") > 1000]))
# Deals with zeroes properly.
my.counts3 <- rbind(0,0,0,my.counts)
out3 <- estimateAmbience(my.counts3)
expect_identical(out, tail(out3, -3))
})
test_that("Good-Turing protection works correctly", {
# Protection gives the same estimates for zeroes when a count
# of 1 is redistributed somewhere else.
out <- DropletUtils:::.safe_good_turing(c(0,0,2,4))
ref <- DropletUtils:::.safe_good_turing(c(0,0,1,2,3))
expect_equal(sum(ref), 1)
expect_equal(sum(out), 1)
expect_false(any(ref==0))
expect_false(any(out==0))
expect_equal(out[1:2], ref[1:2])
# Continues to work with loads of observations.
out <- DropletUtils:::.safe_good_turing(c(0,0,2,4,10000))
ref <- DropletUtils:::.safe_good_turing(c(0,0,1,2,3,10000))
expect_equal(out[1:2], ref[1:2])
})
test_that("multinomial calculations are correct without alpha", {
set.seed(1000)
prop <- runif(100)
X <- matrix(rpois(100000, lambda=prop*2), nrow=length(prop))
# DropletUtils assumes that proportions sum to unity!
prop <- prop/sum(prop)
# Checking it's the same regardless of the input matrix.
dense.out <- DropletUtils:::.compute_multinom_prob_data(X, prop)
sparse.out1 <- DropletUtils:::.compute_multinom_prob_data(as(X, "dgCMatrix"), prop)
expect_equal(dense.out, sparse.out1)
sparse.out2 <- DropletUtils:::.compute_multinom_prob_data(as(X, "dgTMatrix"), prop)
expect_equal(dense.out, sparse.out2)
# Checking it's the same compared to a reference.
ref <- apply(X, 2, dmultinom, prob=prop, log=TRUE)
combined <- dense.out + DropletUtils:::.compute_multinom_prob_rest(colSums(X))
expect_equal(ref, combined)
})
ddirmultinom <- function(x, prob, alpha)
# A helper function for computing the Dirichlet-multionial PDF.
{
size <- sum(x)
lfactorial(size) + lgamma(alpha) - lgamma(size + alpha) +
sum(lgamma(x + prob * alpha) - lfactorial(x) - lgamma(prob * alpha))
}
test_that("multinomial calculations are correct with alpha", {
set.seed(2000)
prop <- runif(100)
X <- matrix(rpois(100000, lambda=prop*2), nrow=length(prop))
prop <- prop/sum(prop)
# Checking it's the same regardless of the input matrix.
for (alpha in c(1, 10, 100)) {
dense.out <- DropletUtils:::.compute_multinom_prob_data(X, prop, alpha=alpha)
sparse.out1 <- DropletUtils:::.compute_multinom_prob_data(as(X, "dgCMatrix"), prop, alpha=alpha)
expect_equal(dense.out, sparse.out1)
sparse.out2 <- DropletUtils:::.compute_multinom_prob_data(as(X, "dgTMatrix"), prop, alpha=alpha)
expect_equal(dense.out, sparse.out2)
# Checking it's the same compared to the reference.
ref <- apply(X, 2, FUN=ddirmultinom, prob=prop, alpha=alpha)
combined <- dense.out + DropletUtils:::.compute_multinom_prob_rest(colSums(X), alpha=alpha)
expect_equal(ref, combined)
}
})
rdirmultinom <- function(ambient, totals, alpha) {
true.prob <- ambient * alpha
collected <- vector("list", length(totals))
for (i in seq_along(collected)) {
prob <- rgamma(length(true.prob), true.prob) # pre-simulation of the Dirichlet
sim <- rmultinom(1, totals[i], prob=prob)
collected[[i]] <- sim
}
collected
}
set.seed(100010010)
test_that("dispersion calculation is close to correct", {
totals <- sample(10000, 100, replace=TRUE)
ambient <- runif(2000)
ambient <- ambient/sum(ambient)
simulated <- rdirmultinom(ambient, totals, alpha=5)
mat <- do.call(cbind, simulated)
x <- DropletUtils:::.estimate_alpha(mat, ambient, totals)
expect_true(abs(x - 5) <= 0.2)
simulated <- rdirmultinom(ambient, totals, alpha=20)
mat <- do.call(cbind, simulated)
x <- DropletUtils:::.estimate_alpha(mat, ambient, totals)
expect_true(abs(x - 20) <= 1)
simulated <- rdirmultinom(ambient, totals, alpha=100)
mat <- do.call(cbind, simulated)
x <- DropletUtils:::.estimate_alpha(mat, ambient, totals)
expect_true(abs(x - 100) <= 5)
})
UNICHECKER <- function(values, tol=1.25) {
for (thresh in c(0.01, 0.05, 0.1, 0.2, 0.5)) {
# Generous boundaries for thresholds.
observed <- mean(values < thresh)
expect_true(observed < thresh * tol)
expect_true(observed > thresh / tol)
}
invisible(NULL)
}
test_that("p-value calculations are correct without alpha", {
# Simulating some multinomial probabilities.
SIMSTUFF <- function(nbarcodes, ngenes) {
totals <- sample(1000, nbarcodes, replace=TRUE)
ambient <- runif(ngenes)
probs <- numeric(length(totals))
for (i in seq_along(totals)) {
sim <- rmultinom(1, totals[i], prob=ambient)
probs[i] <- sum(sim * log(ambient) - lfactorial(sim))
}
return(list(totals=totals, probs=probs, ambient=ambient))
}
# Checking for uniformity:
set.seed(0)
sim <- SIMSTUFF(10000, 1000)
totals <- sim$totals
probs <- sim$probs
ambient.prof <- sim$ambient
set.seed(100)
N <- 10000
stats <- DropletUtils:::.permute_counter(totals, probs, ambient.prof, iter=N)
UNICHECKER(stats/N)
# Same result for multiple cores (set BPPARAM first as MulticoreParam redefines the seed!)
BPPARAM <- if (.Platform$OS.type=="windows") BiocParallel::SerialParam() else BiocParallel::MulticoreParam(3)
set.seed(100)
stats2 <- DropletUtils:::.permute_counter(totals, probs, ambient.prof, BPPARAM=BPPARAM, iter=N)
expect_identical(stats, stats2)
# Different result for different seeds.
set.seed(101)
stats3 <- DropletUtils:::.permute_counter(totals, probs, ambient.prof, iter=N)
expect_false(identical(stats, stats3))
UNICHECKER(stats3/N)
# Worker splitting behaves for near-zero or no jobs.
almost_none <- DropletUtils:::.permute_counter(totals, probs, ambient.prof, BPPARAM=BPPARAM, iter=1L)
expect_true(all(almost_none %in% 0:1))
none <- DropletUtils:::.permute_counter(totals, probs, ambient.prof, BPPARAM=BPPARAM, iter=0L)
expect_identical(none, integer(length(totals)))
})
test_that("p-value calculations are correct with alpha", {
# Simulating some Dirichlet-multinomial probabilities.
SIMSTUFF <- function(nbarcodes, ngenes, alpha) {
totals <- sample(1000, nbarcodes, replace=TRUE)
ambient <- runif(ngenes)
simulated <- rdirmultinom(ambient, totals, alpha)
true.prob <- ambient * alpha
probs <- numeric(length(totals))
for (i in seq_along(simulated)) {
sim <- simulated[[i]]
probs[i] <- sum(lgamma(sim + true.prob) - lfactorial(sim) - lgamma(true.prob))
}
list(totals=totals, probs=probs, ambient=ambient)
}
# Checking for uniformity.
set.seed(0)
sim <- SIMSTUFF(10000, 800, alpha=10)
totals <- sim$totals
probs <- sim$probs
ambient.prof <- sim$ambient
set.seed(100)
N <- 10000
stats <- DropletUtils:::.permute_counter(totals, probs, ambient.prof, iter=N, alpha=10)
UNICHECKER(stats/N)
# Same result for multiple cores (set BPPARAM first as it redefines the seed!)
BPPARAM <- if (.Platform$OS.type=="windows") BiocParallel::SerialParam() else BiocParallel::MulticoreParam(3)
set.seed(100)
stats2 <- DropletUtils:::.permute_counter(totals, probs, ambient.prof, BPPARAM=BPPARAM, iter=N, alpha=10)
expect_identical(stats, stats2)
# Different result for different seeds.
set.seed(101)
stats3 <- DropletUtils:::.permute_counter(totals, probs, ambient.prof, iter=N, alpha=10)
expect_false(identical(stats, stats3))
UNICHECKER(stats3/N)
# Trying with a different alpha.
set.seed(0)
sim <- SIMSTUFF(10000, 800, alpha=5)
totals <- sim$totals
probs <- sim$probs
ambient.prof <- sim$ambient
set.seed(100)
stats4 <- DropletUtils:::.permute_counter(totals, probs, ambient.prof, iter=N, alpha=5)
UNICHECKER(stats4/N)
expect_false(identical(stats, stats4))
# Triggering a check on 'alpha'.
expect_error(DropletUtils:::.permute_counter(totals, probs, ambient.prof, iter=N, alpha=0), "must be positive")
})
test_that("emptyDrops runs to completion", {
# Mocking up some counts.
set.seed(1000)
my.counts <- DropletUtils:::simCounts()
limit <- 100
e.out <- emptyDrops(my.counts, lower=limit, alpha=Inf)
totals <- Matrix::colSums(my.counts)
expect_identical(as.integer(totals), e.out$Total)
expect_true(all(is.na(e.out$LogProb[totals<=limit])))
expect_true(all(!is.na(e.out$LogProb[totals>limit])))
# Checking that the log-probability calculation is correct.
ambient.prop <- edgeR::goodTuringProportions(Matrix::rowSums(my.counts[,totals<= limit]))
valid <- which(totals > limit)
collected <- numeric(nrow(e.out))
for (v in valid) {
collected[v] <- dmultinom(my.counts[,v], size=totals[v], prob=ambient.prop, log=TRUE)
}
expect_equal(e.out$LogProb[valid], collected[valid])
# Checking ambient tests.
e.out2 <- emptyDrops(my.counts, lower=limit, test.ambient=TRUE, alpha=Inf)
expect_identical(e.out$Total, e.out2$Total)
expect_identical(e.out$LogProb[totals>limit], e.out2$LogProb[totals>limit])
expect_true(all(!is.na(e.out2$LogProb[totals>0])))
# Checking the ignore argument works correctly.
set.seed(1001)
e.out3a <- emptyDrops(my.counts, lower=limit, ignore=200, alpha=Inf)
e.out3a$FDR <- NULL
set.seed(1001)
survivors <- totals <= 100 | totals > 200
new.counts <- my.counts[,survivors]
e.out3b <- emptyDrops(new.counts, lower=limit, ignore=NULL, alpha=Inf)
e.out3b$FDR <- NULL
expect_equal(e.out3a[survivors,], e.out3b)
# Checking retention options.
K <- metadata(barcodeRanks(my.counts, lower=limit))$knee
expect_true(all(e.out$FDR[totals >= K]==0))
expect_true(!all(e.out$FDR[totals < K]==0))
e.outK <- emptyDrops(my.counts, retain=K*0.6, alpha=Inf)
expect_true(all(e.outK$FDR[totals >= K*0.6]==0))
expect_true(!all(e.outK$FDR[totals < K*0.6]==0))
})
test_that("emptyDrops works correctly with alpha estimation", {
# Mocking up some counts.
set.seed(7000)
my.counts <- DropletUtils:::simCounts() * 2
limit <- 100
e.out <- emptyDrops(my.counts, lower=limit, alpha=NULL)
# Pulling out the alpha.
totals <- Matrix::colSums(my.counts)
discarded <- totals <= limit
lostmat <- my.counts[,discarded,drop=FALSE]
ambient.prop <- metadata(e.out)$ambient
alpha0 <- metadata(e.out)$alpha
alphaT <- DropletUtils:::.estimate_alpha(as(lostmat, "dgTMatrix"), ambient.prop, totals[discarded])
expect_identical(alpha0, alphaT)
# Checking that the total probability calculations are correct.
valid <- which(totals > limit)
collected <- numeric(nrow(e.out))
for (v in valid) {
collected[v] <- ddirmultinom(my.counts[,v], prob=ambient.prop, alpha=alpha0)
}
expect_equal(e.out$LogProb[valid], collected[valid])
# Checking that it responds to the specified value of alpha.
e.out2 <- emptyDrops(my.counts, lower=limit, alpha=5)
expect_false(isTRUE(all.equal(e.out2$PValue, e.out$PValue)))
})
test_that("emptyDrops automatically rounds non-integer values", {
# Mocking up some counts.
set.seed(7001)
my.counts <- DropletUtils:::simCounts() * 1.2
expect_identical(round(my.counts), DropletUtils:::.rounded_to_integer(my.counts))
set.seed(7002)
out <- emptyDrops(my.counts)
set.seed(7002)
ref <- emptyDrops(round(my.counts))
expect_identical(out,ref)
})
set.seed(80001)
test_that("emptyDrops works with by.rank=TRUE", {
my.counts <- DropletUtils:::simCounts()
set.seed(999)
out <- emptyDrops(my.counts, by.rank=1000)
expect_false(metadata(out)$lower==100)
set.seed(999)
ref <- emptyDrops(my.counts, lower=metadata(out)$lower, by.rank=NULL)
expect_identical(out, ref)
})
test_that("emptyDrops fails when you don't give it low counts", {
y <- matrix(rpois(100000, lambda=100), ncol=10000)
expect_error(emptyDrops(y), "no counts available")
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.