# This tests the trainSingleR function.
# library(testthat); library(SingleR); source("setup.R"); source("test-train.R")
test_that("trainSingleR works correctly for genes='de'", {
out <- trainSingleR(training, training$label)
expect_identical(out$ref, logcounts(training))
expect_identical(out$labels$full, training$label)
expect_identical(out$labels$unique, sort(unique(training$label)))
expect_identical(sort(out$markers$unique), sort(unique(unlist(out$markers$full))))
# Checking the structure of the DE gene set.
expect_identical(names(out$markers$full), sort(unique(training$label)))
for (u in names(out$markers$full)) {
expect_identical(names(out$markers$full[[u]]), names(out$markers$full))
expect_identical(out$markers$full[[u]][[u]], character(0))
# Genes in opposite directions should not intersect.
for (j in names(out$markers$full)) {
combined <- intersect(out$markers$full[[u]][[j]], out$markers$full[[j]][[u]])
expect_identical(combined, character(0))
}
}
})
test_that("trainSingleR works correctly for a list of lists of genes", {
collected <- SingleR:::.get_genes_by_de(logcounts(training), training$label, de.n=13)
expect_identical(sort(names(collected)), sort(unique(training$label)))
in.names <- unique(lapply(collected, names))
expect_identical(length(in.names), 1L)
expect_identical(in.names[[1]], names(collected))
out <- trainSingleR(training, training$label, genes=collected)
expect_identical(out$markers$full, collected)
})
test_that("trainSingleR works correctly for a list of genes", {
collected <- SingleR:::.get_genes_by_de(logcounts(training), training$label)
re.collected <- lapply(collected, unlist, use.names=FALSE)
out <- trainSingleR(training, training$label, genes=re.collected)
expect_identical(sort(out$markers$unique), sort(unique(unlist(out$markers$full))))
for (u in names(out$markers$full)) {
for (j in names(out$markers$full)) {
if (u == j) {
expect_equal(out$markers$full[[u]][[u]], unique(re.collected[[u]]))
} else {
expect_identical(out$markers$full[[u]][[j]], character(0))
}
}
}
})
test_that("trainSingleR fails correctly for a list of lists of genes", {
# Fails when a weird gene set input is provided.
expect_error(trainSingleR(training, training$label, genes=list(A=list(), B=character(0))), "'genes' must be")
expect_error(trainSingleR(training, training$label, genes=list(A=list(), B=list())), "for each label")
empty <- rep(list(list()), length(unique(training$label)))
names(empty) <- unique(training$label)
expect_error(trainSingleR(training, training$label, genes=empty), "between each pair of labels")
})
test_that("trainSingleR works correctly for other DE testing methods", {
# For Wilcox.
by.t <- scran::pairwiseWilcox(logcounts(training), training$label, direction="up")
markers <- scran::getTopMarkers(by.t[[1]], by.t[[2]], n=10)
ref <- trainSingleR(training, training$label, genes='de', de.method="wilcox")
trained <- trainSingleR(training, training$label, genes=markers)
expect_identical(ref$markers, trained$markers)
# For t-tests.
by.t <- scran::pairwiseTTests(logcounts(training), training$label, direction="up")
markers <- scran::getTopMarkers(by.t[[1]], by.t[[2]], n=10)
ref <- trainSingleR(training, training$label, genes='de', de.method="t")
trained <- trainSingleR(training, training$label, genes=markers)
expect_identical(ref$markers, trained$markers)
# Responds to the requested number of genes.
by.t <- scran::pairwiseTTests(logcounts(training), training$label, direction="up", lfc=1)
markers <- scran::getTopMarkers(by.t[[1]], by.t[[2]], n=20)
ref <- trainSingleR(training, training$label, genes='de', de.method="t", de.n=20, de.args=list(lfc=1))
trained <- trainSingleR(training, training$label, genes=markers)
expect_identical(ref$markers, trained$markers)
})
test_that("trainSingleR is robust to non-character labels", {
ids <- sample(1:5, ncol(training), replace=TRUE)
out <- trainSingleR(training, ids)
ref <- trainSingleR(training, as.character(ids))
expect_equal(out$labels, ref$labels)
})
test_that("trainSingleR works on various expression matrices", {
out <- trainSingleR(training, training$label)
alt <- trainSingleR(logcounts(training), training$label)
expect_identical(out$ref, alt$ref)
# assay.type= works.
blah <- training
assays(blah, withDimnames=FALSE) <- list(stuff=matrix(0, nrow(blah), ncol(blah)), whee=logcounts(training))
re.alt <- trainSingleR(blah, blah$label, assay.type="whee")
expect_identical(out$ref, re.alt$ref)
# robust to invariant transformations.
sce <- training
assay(sce, "shifted") <- logcounts(sce) + 1
assay(sce, "scaled") <- logcounts(sce) * 2
out <- trainSingleR(sce, sce$label)
alt <- trainSingleR(sce, sce$label, assay.type="shifted")
expect_identical(out$markers, alt$markers)
alt <- trainSingleR(sce, sce$label, assay.type="scaled")
expect_identical(out$markers, alt$markers)
})
test_that("trainSingleR strips out NAs", {
sce <- training
logcounts(sce)[1,1] <- NA
out <- trainSingleR(sce, sce$label)
ref <- trainSingleR(sce[-1,], sce$label)
expect_identical(as.matrix(out$ref), ref$ref)
expect_identical(out$markers, ref$markers)
})
test_that("trainSingleR behaves with multiple references, plus recomputation", {
training1 <- training2 <- training
training1 <- training1[sample(nrow(training1)),]
rownames(training1) <- rownames(training)
ref1 <- trainSingleR(training1, training1$label)
ref2 <- trainSingleR(training2, training2$label)
out <- trainSingleR(list(training1, training2), list(training1$label, training2$label))
except.built <- setdiff(names(ref1), "built")
expect_identical(ref1[except.built], out[[1]][except.built])
expect_identical(ref2[except.built], out[[2]][except.built])
# Same result with names.
out <- trainSingleR(list(foo=training1, bar=training2), list(training1$label, training2$label))
expect_identical(names(out), c("foo", "bar"))
})
test_that("trainSingleR behaves with aggregation turned on", {
set.seed(10000)
suppressWarnings(out <- trainSingleR(training, training$label, aggr.ref=TRUE))
expect_true(ncol(out$ref) <= ncol(training))
set.seed(10000)
suppressWarnings(out2 <- trainSingleR(ref=list(training, training), label=list(training$label, training$label), aggr.ref=TRUE))
expect_identical(out2[[1]]$ref, out$ref)
expect_false(identical(out2[[2]]$ref, out$ref)) # different k-means initialization.
})
test_that("trainSingleR behaves with silly inputs", {
expect_error(out <- trainSingleR(training[,0], training$label[0]), "at least one column")
out <- trainSingleR(training[0,], training$label)
expect_identical(length(out$markers$unique), 0L)
unnamed <- unname(training)
expect_error(trainSingleR(unnamed, unnamed$label), "must have row names")
})
test_that("trainSingleR works when 'genes' contains markers outside of the reference", {
train.sub <- head(training, 90)
collected <- SingleR:::.get_genes_by_de(logcounts(training), training$label)
genes <- unique(unlist(collected))
# Make sure more genes than ref
expect_false(all(genes %in% row.names(train.sub)))
expect_error(out <- SingleR::trainSingleR(train.sub, training$label, genes = collected), NA)
# Behaves the same as if those genes were intersected.
collected2 <- lapply(collected, function(l) lapply(l, intersect, y=rownames(train.sub)))
ref <- SingleR::trainSingleR(train.sub, training$label, genes = collected2)
expect_identical(out$markers, ref$markers)
})
test_that("trainSingleR works when restricting", {
keep <- c(letters, head(rownames(training), 90))
expect_error(out <- SingleR::trainSingleR(training, training$label, restrict=keep), NA)
# Behaves the same as if those genes were intersected.
ref <- SingleR::trainSingleR(head(training, 90), training$label)
expect_identical(out$markers, ref$markers)
})
test_that("trainSingleR auto-eliminates NA labels", {
populate <- rbinom(length(training$label), 1, 0.2)==1
training$label[populate] <- NA
out <- trainSingleR(training, training$label)
ref <- trainSingleR(training[,!populate], training$label[!populate])
expect_identical(out$labels, ref$labels)
expect_identical(out$ref, ref$ref)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.