# This tests the post-hoc clustering methods.
# library(csaw); library(testthat); source("test-cluster.R")
compCFDR <- function(ids, threshold, weights) {
if (is.null(weights)) {
obs.sizes <- table(ids)
} else {
obs.sizes <- sapply(split(weights, ids), FUN=sum)
}
obs.sizes <- sort(obs.sizes)
num.fp <- sum(cumsum(obs.sizes) <= sum(obs.sizes) * threshold)
num.fp/length(obs.sizes)
}
set.seed(100)
test_that('clusterFDR works correctly', {
for (mu in c(5, 10, 20)) {
for (size in c(1, 10, 20)) {
ids <- rnbinom(100, mu=mu, size=size)
for (threshold in c(0.05, 0.1)) {
out <- compCFDR(ids, threshold, NULL)
test.fdr <- clusterFDR(ids, threshold)
expect_identical(out, test.fdr)
w <- runif(100)
out <- compCFDR(ids, threshold, w)
test.fdr <- clusterFDR(ids, threshold, weights=w)
expect_identical(out, test.fdr)
}
}
}
# Silly input checks.
expect_identical(clusterFDR(integer(0), 0.05), 0)
expect_error(clusterFDR(integer(0), 0.05, weights=1), "must be the same")
expect_identical(clusterFDR(runif(100), 0), 0) # threshold of zero => FDR of zero.
expect_identical(clusterFDR(1, numeric(0)), 0)
})
set.seed(101)
test_that("weighted p-value calculations are correct", {
# Beta-distributed, with and without weights.
pvals <- rbeta(1000, 1, 20)
expect_equal(p.adjust(pvals, method="BH"), csaw:::.weightedFDR(pvals, rep(1, length(pvals))))
weight <- sample(5, length(pvals), replace=TRUE)
exp.p <- rep(pvals, weight)
exp.bh <- p.adjust(exp.p, method="BH")
expect_equal(exp.bh[cumsum(weight)], csaw:::.weightedFDR(pvals, weight))
# Uniformly-distributed, with and without weights.
pvals <- runif(1000)
expect_equal(p.adjust(pvals, method="BH"), csaw:::.weightedFDR(pvals, rep(1, length(pvals))))
weight <- sample(5, length(pvals), replace=TRUE)
exp.p <- rep(pvals, weight)
exp.bh <- p.adjust(exp.p, method="BH")
expect_equal(exp.bh[cumsum(weight)], csaw:::.weightedFDR(pvals, weight))
# Other bits and pieces.
pvals <- rep(1, 1000)
expect_equal(p.adjust(pvals, method="BH"), csaw:::.weightedFDR(pvals, rep(1, length(pvals))))
expect_equal(numeric(0), csaw:::.weightedFDR(numeric(0), numeric(0)))
})
##################################################
set.seed(102)
test_that("controlClusterFDR works as expected", {
nfalse <- 1000
ntrue <- 100
for (nsites in c(5, 10, 20)) {
for (target in c(0.01, 0.05, 0.1)) {
# Setting up situations with small and large clusters, where the latter are always detected (strong true DB).
# This provides the maximal chance that the window- and cluster-level FDRs are different.
ids <- c(seq_len(nfalse), nfalse + sample(nsites, ntrue, replace=TRUE))
p <- c(runif(nfalse), numeric(ntrue))
FUN <- function(is.sig) ids[is.sig]
out <- controlClusterFDR(target=target, adjp=p, FUN=FUN)
expect_true(out$FDR <= target)
expect_true(out$threshold <= target)
expect_identical(clusterFDR(FUN(p <= out$threshold), out$threshold), out$FDR)
# Exceeding the window-level threshold should generally increase the cluster-level FDR
# (note: in theory, it is possible to get failures here as this depends on resolution).
for (up in c(1.05, 1.1, 1.5, 2)) {
a.bit.up <- out$threshold*up
expect_true(clusterFDR(FUN(p <= a.bit.up), a.bit.up) > target || a.bit.up > target)
}
}
}
# Caps at the target.
expect_identical(controlClusterFDR(target=0.05, adjp=0, FUN=function(is.sig) { 1 })$threshold, 0.05)
expect_identical(controlClusterFDR(target=0.1, adjp=0, FUN=function(is.sig) { 1 })$threshold, 0.1)
expect_identical(controlClusterFDR(target=0.01, adjp=0, FUN=function(is.sig) { 1 })$threshold, 0.01)
# Checking correct behaviour for empty inputs.
out <- controlClusterFDR(target=0.05, adjp=numeric(0), FUN=FUN)
expect_identical(out$threshold, 0.05)
expect_identical(out$FDR, 0)
})
##################################################
set.seed(103)
test_that("clusterWindows works as expected", {
windows <- GRanges("chrA", IRanges(1:1000, 1:1000))
test.p <- runif(1000)
test.p[rep(1:2, 100) + rep(0:99, each=2) * 10] <- 0 # every 10 bases contains 2 true positives.
tab <- data.frame(PValue=test.p, logFC=rnorm(length(test.p)))
target <- 0.05
out.0 <- clusterWindows(windows, tab, target=target, tol=0)
expect_true(out.0$FDR <= target)
adjp <- p.adjust(test.p, method="BH")
keep <- !is.na(out.0$id)
presumed.threshold <- max(adjp[keep])
expect_identical(keep, adjp <= presumed.threshold)
merged <- mergeWindows(windows[keep], tol=0)
expect_identical(merged$region, out.0$region)
expect_identical(merged$id, out.0$id[keep])
# Statistics are correctly transformed.
expect_null(out.0$statistics$FDR)
expect_identical(out.0$stats$rep.logFC, tab$logFC[out.0$stats$rep.test])
expect_identical(out.0$stats$num.tests, out.0$stats$num.up.logFC + out.0$stats$num.down.logFC)
# Trying with a higher tolerance - this should merge everything.
out.10 <- clusterWindows(windows, tab, target=target, tol=10)
expect_identical(start(out.10$region), 1L)
expect_identical(end(out.10$region), max(which(test.p==0)))
expect_identical(out.10$FDR, 0)
expect_identical(!is.na(out.10$id), adjp <= target)
# Checking that the frequency weights work.
weight <- sample(3, length(windows), replace=TRUE)
expand <- rep(seq_along(weight), weight)
out <- clusterWindows(windows, tab, target=target, tol=0, weights=weight)
ref <- clusterWindows(windows[expand], tab[expand,], target=target, tol=0)
expect_identical(out$FDR, ref$FDR)
expect_identical(out$region, ref$region)
expect_identical(out$id, ref$id[!duplicated(expand)])
# Responsive to the sign.
lfc <- rnorm(length(windows))
out <- clusterWindows(windows, data.frame(PValue=test.p, logFC=lfc), target=target, tol=0, signs=TRUE)
expect_true(out$FDR <= target)
expect_true(all(lengths(lapply(split(lfc > 0, out$id), unique))==1L))
# Trying with empty and other silly inputs.
out <- clusterWindows(windows[0], tab[0,], target=target, tol=0)
expect_identical(out$id, integer(0))
expect_identical(out$region, windows[0])
expect_identical(out$FDR, 0)
expect_error(clusterWindows(windows[0], tab, target=target, tol=0), "number of ranges")
expect_warning(clusterWindows(windows[1], tab[1,], target=target), "'tol'")
expect_warning(clusterWindows(windows[1], tab[1,], tol=100), "set to 0.05")
})
##################################################
checkResults <- function(data.list, result.list, target, pval.col="PValue", ..., true.pos) {
out <- clusterWindowsList(data.list, result.list, pval.col=pval.col, target=target, ...)
expect_true(out$FDR <= target)
# Checking that the clustering is fine.
all.ids <- out$ids
ref <- splitAsList(do.call(c, data.list), all.ids)
names(ref) <- NULL
expect_identical(unlist(range(ref)), out$regions)
# Checking that the right windows were chosen.
all.ps <- unlist(lapply(result.list, FUN=function(x) { x[,pval.col] }))
was.sig <- !is.na(all.ids)
if (any(was.sig) && any(!was.sig)) {
expect_true(max(all.ps[was.sig]) < min(all.ps[!was.sig]))
}
# Comparing the observed and estimated FDRs (far too fragile for use).
# np <- out$region[!overlapsAny(out$region, true.pos),]
# expect_true(length(np)/length(out$region) <= out$FDR)
# print(length(np)/length(out$region))
return(NULL)
}
set.seed(104)
test_that("clusterWindowsList works as expected", {
windows <- GRanges("chrA", IRanges(1:2000, 1:2000))
test.p <- runif(2000)
test.p[rep(1:2, 50) + rep(0:49, each=2) * 40] <- 0
true.pos <- windows[test.p==0]
checkResults(list(windows, windows[1:10]), list(data.frame(PValue=test.p), data.frame(PValue=test.p[1:10])),
tol=0, target=0.05, true.pos=true.pos)
checkResults(list(windows, windows[1:10]), list(data.frame(PValue=test.p), data.frame(PValue=test.p[1:10])),
equiweight=FALSE, tol=0, target=0.05, true.pos=true.pos)
checkResults(list(windows, windows[1:10]), list(data.frame(PValue=test.p), data.frame(PValue=test.p[1:10])),
tol=5, target=0.05, true.pos=true.pos)
checkResults(list(windows, windows[1:10]), list(data.frame(PValue=test.p), data.frame(PValue=test.p[1:10])),
tol=0, target=0.1, true.pos=true.pos)
# Adding sign information.
signs <- ifelse(rbinom(1000, 1, 0.5)!=0L, 1, -1)
checkResults(list(windows, windows[1:10]),
list(data.frame(PValue=test.p, logFC=signs), data.frame(PValue=test.p[1:10], logFC=signs[1:10])),
tol=0, fc.col="logFC", target=0.05, true.pos=true.pos)
# Checking behaviour when empty.
out <- clusterWindowsList(list(windows[0]), list(data.frame(PValue=numeric(0))), tol=0, target=0.05)
expect_identical(out$ids, integer(0))
expect_identical(out$FDR, 0)
expect_s4_class(out$region, "GRanges")
expect_identical(length(out$region), 0L)
expect_error(
clusterWindowsList(list(windows, windows[1:10]), list(data.frame(PValue=test.p)), tol=0, target=0.1),
"should have equal length")
expect_error(
clusterWindowsList(list(windows, windows[1:10]), list(data.frame(PValue=test.p), data.frame(PValue=test.p))),
"elements of .* should have equal length")
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.