Nothing
# tests for cell-based pre-processing functions.
# library(scuttle); library(testthat); source("setup.R"); source("test-sum-across-cells.R")
library(Matrix)
library(DelayedArray)
##########################################################
set.seed(10003)
test_that("we can summarise counts at cell cluster level", {
ids <- sample(ncol(sce)/2, ncol(sce), replace=TRUE)
out <- sumCountsAcrossCells(sce, ids)
expect_identical(assay(out), colsum(counts(sce), ids))
expect_identical(colnames(out), as.character(sort(unique(ids)))) # numeric ordering is preserved.
expect_identical(sort(unique(ids)), out$ids)
out2 <- sumCountsAcrossCells(counts(sce), ids)
expect_identical(out, out2)
# Robust to column names.
copy <- sce
colnames(copy) <- paste0("CELL", seq_len(ncol(copy)))
expect_identical(sumCountsAcrossCells(copy, ids), out)
# assay.type= works correctly.
alt <- sce
assayNames(alt) <- "whee"
out2 <- sumCountsAcrossCells(alt, ids, assay.type="whee")
expect_identical(out, out2)
# Respects levels properly.
fids <- factor(ids, levels=rev(sort(unique(ids))))
fout <- sumCountsAcrossCells(sce, fids)
fout <- fout[,ncol(fout):1]
fout$ids <- as.integer(levels(fout$ids))[fout$ids]
expect_identical(out, fout)
# Drops unused levels.
fids <- factor(ids, levels=1:100)
fout <- sumCountsAcrossCells(sce, fids)
expect_identical(fout$ids, sort(unique(fids)))
fout$ids <- as.integer(fout$ids)
expect_identical(out, fout)
# Handles NA's correctly.
ids2 <- sample(LETTERS, ncol(sce), replace=TRUE)
out2 <- sumCountsAcrossCells(sce, ids2)
ids3 <- ids2
ids3[ids3=="A"] <- NA
out3 <- sumCountsAcrossCells(sce, ids3)
expect_identical(out2[,setdiff(colnames(out2), "A")], out3)
all.na <- ids
all.na[] <- NA
out3 <- sumCountsAcrossCells(sce, all.na)
expect_identical(ncol(out3), 0L)
expect_identical(rownames(out3), rownames(sce))
# Doesn't store numbers fi we don't ask.
out <- sumCountsAcrossCells(sce, ids, store.number=NULL)
expect_null(out$ncells)
})
set.seed(10004)
test_that("by-cell count summarization behaves with other classes", {
ids <- sample(ncol(sce)/2, ncol(sce), replace=TRUE)
ref <- sumCountsAcrossCells(sce, ids)
# Handles sparse matrices properly.
sparsified <- sce
counts(sparsified) <- as(counts(sparsified), "dgCMatrix")
spack <- sumCountsAcrossCells(sparsified, ids)
expect_identical(ref, spack)
unknown <- sce
counts(unknown) <- as(counts(unknown), "dgTMatrix")
spack <- sumCountsAcrossCells(unknown, ids)
expect_identical(ref, spack)
# Handles DelayedArrays properly.
delayed <- sce
counts(delayed) <- DelayedArray(counts(delayed))
dack <- sumCountsAcrossCells(delayed, ids)
expect_equivalent(ref, dack)
# Handles _sparse_ DelayedArrays properly.
sdelayed <- sce
counts(sdelayed) <- DelayedArray(as(counts(sce), "dgCMatrix"))
sdack <- sumCountsAcrossCells(sdelayed, ids)
expect_equivalent(ref, sdack)
})
set.seed(100041)
test_that("by-cell count summarization handles parallelization properly", {
ids <- sample(ncol(sce)/2, ncol(sce), replace=TRUE)
ref <- sumCountsAcrossCells(sce, ids)
alt <- sumCountsAcrossCells(sce, ids, BPPARAM=safeBPParam(2))
expect_identical(alt, ref)
alt <- sumCountsAcrossCells(sce, ids, BPPARAM=safeBPParam(3))
expect_identical(alt, ref)
})
set.seed(10004001)
test_that("by-cell count summarization behaves with subsetting", {
ids <- sample(LETTERS[1:5], ncol(sce), replace=TRUE)
expect_identical(sumCountsAcrossCells(counts(sce), ids, subset.row=10:1),
sumCountsAcrossCells(counts(sce), ids)[10:1,])
expect_identical(sumCountsAcrossCells(counts(sce), ids, subset.col=2:15),
sumCountsAcrossCells(counts(sce)[,2:15], ids[2:15]))
})
set.seed(100040002)
test_that("by-cell count summarization works with various average types", {
ids <- sample(LETTERS[1:5], ncol(sce), replace=TRUE)
ref <- sumCountsAcrossCells(sce, ids)
# Handles vanilla averaging:
out2 <- sumCountsAcrossCells(sce, ids, average=TRUE)
expect_identical(assay(out2), t(t(colsum(counts(sce), ids))/as.integer(table(ids))))
expect_identical(colData(out2), colData(ref))
out3 <- sumCountsAcrossCells(sce, ids, average="mean")
expect_identical(out2, out3)
# Handles medianizing.
out4 <- sumCountsAcrossCells(sce, ids, average="median")
expect_false(identical(out2, out4))
# 'none' is the same as FALSE.
expect_identical(ref, sumCountsAcrossCells(sce, ids, average="none"))
})
set.seed(1000401)
test_that("Aggregation across cells works correctly with DFs", {
# One factor.
ids <- sample(ncol(sce)/2, ncol(sce), replace=TRUE)
ref <- sumCountsAcrossCells(sce, ids)
out <- sumCountsAcrossCells(sce, DataFrame(X=ids))
expect_identical(colnames(ref), as.character(out$X))
expect_equivalent(assay(ref), assay(out))
expect_identical(out$ncells, ref$ncells)
expect_identical(out$ncells, as.integer(table(ids)))
# Two factors.
extra <- sample(LETTERS[1:3], ncol(sce), replace=TRUE)
combined <- paste0(ids, "-", extra)
ref <- sumCountsAcrossCells(sce, combined)
df <- DataFrame(X=ids, Y=extra)
out <- sumCountsAcrossCells(sce, df)
post.combined <- paste0(out$X, "-", out$Y)
expect_identical(sort(colnames(ref)), sort(post.combined))
m <- match(colnames(ref), post.combined)
expect_equivalent(assay(ref), assay(out)[,m])
expect_identical(order(colData(out)), seq_len(ncol(out))) # output is ordered.
expect_identical(out$ncells, as.integer(table(selfmatch(sort(df)))))
ref <- sumCountsAcrossCells(sce, combined, average=TRUE)
out <- sumCountsAcrossCells(sce, df, average=TRUE)
expect_equivalent(assay(ref), assay(out)[,m])
# Handles NAs correctly.
extra[1] <- NA
ids[2] <- NA
df <- DataFrame(X=ids, Y=extra)
ref <- sumCountsAcrossCells(sce[,-(1:2)], df[-(1:2),])
out <- sumCountsAcrossCells(sce, df)
expect_equal(assay(ref), assay(out))
expect_equal(colData(ref), colData(out))
out2 <- sumCountsAcrossCells(sce, df, subset.col=-(1:2))
expect_equal(out, out2)
})
##########################################################
set.seed(100041)
test_that("Aggregation across cells works correctly for SCEs", {
ids <- paste0("CLUSTER_", sample(ncol(sce)/2, ncol(sce), replace=TRUE))
alt <- aggregateAcrossCells(sce, ids)
expect_identical(colnames(alt), sort(unique(ids)))
ref <- sumCountsAcrossCells(counts(sce), ids)
expect_identical(counts(alt), assay(ref))
expect_identical(alt$ncells, ref$ncells)
expect_identical(alt$ids, ref$ids)
# Behaves in the presence of multiple assays.
normcounts(sce) <- normalizeCounts(sce, log=FALSE)
alt2 <- aggregateAcrossCells(sce, ids)
expect_identical(alt, alt2)
sce <- logNormCounts(sce, log=FALSE)
alt3 <- aggregateAcrossCells(sce, ids, use.assay.type=c("counts", "normcounts"))
expect_identical(counts(alt), counts(alt3))
ref <- sumCountsAcrossCells(sce, ids, assay.type="normcounts")
expect_identical(normcounts(alt3), assay(ref))
# Works when the count matrix is not the first.
sce2 <- sce
assays(sce2) <- assays(sce2)[2:1]
alt4 <- aggregateAcrossCells(sce2, ids)
ref <- sumCountsAcrossCells(counts(sce), ids)
expect_identical(counts(alt4), assay(ref))
})
set.seed(1000401)
test_that("Aggregation across cells works correctly with altExps", {
ids <- paste0("CLUSTER_", sample(10, ncol(sce), replace=TRUE))
copy <- sce
altExp(copy, "THING") <- sce
counts(altExp(copy)) <- counts(altExp(copy)) * 2
agg <- aggregateAcrossCells(copy, ids)
expect_identical(counts(altExp(agg, "THING")), counts(agg)*2)
agg0 <- aggregateAcrossCells(sce, ids, use.altexps=FALSE)
expect_identical(counts(agg0), counts(agg))
expect_identical(altExpNames(agg0), character(0))
# Subsetting only affects the main experiment.
agg2 <- aggregateAcrossCells(copy, ids, subset.row=1:5)
expect_equal(agg[1:5,], agg2)
# Other arguments are passed down.
agg3 <- aggregateAcrossCells(copy, ids, average=TRUE)
ref <- sumCountsAcrossCells(copy, ids, average=TRUE)
expect_identical(counts(agg3), assay(ref))
expect_identical(counts(altExp(agg3)), assay(ref)*2)
# Behaves correctly with NAs.
ids2 <- ids
failed <- ids2==ids2[1]
ids2[failed] <- NA
expect_identical(
aggregateAcrossCells(copy, ids2, average=TRUE),
aggregateAcrossCells(copy[,!failed], ids[!failed], average=TRUE)
)
# Other options work correctly.
agg4 <- aggregateAcrossCells(copy, ids, use.altexps=1)
expect_identical(altExpNames(agg4), "THING")
expect_error(aggregateAcrossCells(copy, ids, use.altexps=10), 'use.altexps')
agg5 <- aggregateAcrossCells(copy, ids, use.altexps="THING")
expect_identical(altExpNames(agg5), "THING")
expect_error(aggregateAcrossCells(copy, ids, use.altexps="WASDA"), 'use.altexps')
# We get the same behavior for deeply nested alternative experiments.
altExp(altExp(copy, "THING"), "BLAH") <- sce[1:10,]
agg6 <- aggregateAcrossCells(copy, ids)
expect_identical(assay(altExp(altExp(agg6))), assay(agg6)[1:10,])
})
set.seed(1000401)
test_that("Aggregation across cells works correctly with reducedDims", {
ids <- paste0("CLUSTER_", sample(20, ncol(sce), replace=TRUE))
copy <- sce
reducedDim(copy, "PCA") <- t(assay(sce)[1:3,])
reducedDim(copy, "TSNE") <- t(assay(sce)[1:10,])
# Responds to the average settings.
agg <- aggregateAcrossCells(copy, ids, average=TRUE)
expect_identical(reducedDim(agg, "PCA"), t(assay(agg)[1:3,]))
expect_identical(reducedDim(agg, "TSNE"), t(assay(agg)[1:10,]))
agg2 <- aggregateAcrossCells(copy, ids, average=FALSE)
expect_identical(reducedDims(agg2), reducedDims(agg))
agg3 <- aggregateAcrossCells(copy, ids, average="median", dimred.stats="median")
expect_identical(reducedDim(agg3, "PCA"), t(assay(agg3)[1:3,]))
expect_identical(reducedDim(agg3, "TSNE"), t(assay(agg3)[1:10,]))
expect_false(identical(agg3, agg))
agg4 <- aggregateAcrossCells(copy, ids, average="mean")
expect_true(identical(agg4, agg))
# Behaves with NAs.
ids2 <- ids
failed <- ids2==ids2[1]
ids2[failed] <- NA
expect_identical(
aggregateAcrossCells(copy, ids2, average=TRUE),
aggregateAcrossCells(copy[,!failed], ids[!failed], average=TRUE)
)
# Other options work correctly.
agg1 <- aggregateAcrossCells(copy, ids, use.dimred=1)
expect_identical(reducedDimNames(agg1), "PCA")
expect_error(aggregateAcrossCells(copy, ids, use.dimred=10), 'use.dimred')
agg2 <- aggregateAcrossCells(copy, ids, use.dimred="TSNE")
expect_identical(reducedDimNames(agg2), "TSNE")
expect_error(aggregateAcrossCells(copy, ids, use.dimred="WHEE"), 'use.dimred')
agg0 <- aggregateAcrossCells(sce, ids, use.dimred=FALSE)
expect_identical(reducedDimNames(agg0), character(0))
})
set.seed(1000411)
test_that("Aggregation across cells works correctly for SCEs with DFs", {
ids <- paste0("CLUSTER_", sample(ncol(sce)/2, ncol(sce), replace=TRUE))
extra <- sample(LETTERS[1:3], ncol(sce), replace=TRUE)
combined <- DataFrame(X=ids, Y=extra)
agg <- aggregateAcrossCells(sce, combined)
ref <- sumCountsAcrossCells(counts(sce), combined)
expect_identical(counts(agg), assay(ref))
expect_identical(agg$X, ref$X)
expect_identical(agg$Y, ref$Y)
expect_identical(agg$ncells, ref$ncells)
# Same for alternative experiments.
copy <- sce
altExp(copy, "THING") <- sce
counts(altExp(copy)) <- counts(altExp(copy)) * 2
agg <- aggregateAcrossCells(copy, combined)
expect_identical(counts(agg), assay(ref))
expect_identical(counts(altExp(agg, "THING")), assay(ref)*2)
expect_identical(ref$X, altExp(agg)$X)
expect_identical(ref$Y, altExp(agg)$Y)
expect_identical(agg$ncells, ref$ncells)
expect_identical(altExp(agg)$ncells, ref$ncells)
# Same for reduced dimensions.
copy <- sce
reducedDim(copy, "PCA") <- t(assay(sce)[1:3,])
agg <- aggregateAcrossCells(copy, combined, average=TRUE)
expect_identical(reducedDim(agg, "PCA"), t(assay(agg)[1:3,]))
})
set.seed(1000412)
test_that("Aggregation across cells works correctly with custom coldata acquisition", {
ids <- paste0("CLUSTER_", sample(ncol(sce)/2, ncol(sce), replace=TRUE))
sce$thing <- seq_len(ncol(sce))
# Defaults to partial NA's.
alt <- aggregateAcrossCells(sce, ids)
tab <- table(ids)
expect_identical(colnames(alt)[is.na(alt$thing)], names(tab)[tab > 1])
expect_equivalent(alt$thing[!is.na(alt$thing)], sce$thing[match(names(tab)[tab==1], ids)])
# Defaults to a sensible value if we enforce identity within each group.
sce$thing2 <- ids
alt <- aggregateAcrossCells(sce, ids)
expect_equivalent(colnames(alt), alt$thing2)
# Responds to taking the first.
alt <- aggregateAcrossCells(sce, ids, coldata_merge=function(x) head(x, 1))
expect_equivalent(alt$thing, as.integer(by(sce$thing, ids, head, n=1)))
expect_equivalent(alt$Mutation_Status, as.character(
by(data.frame(sce$Mutation_Status, stringsAsFactors=FALSE), ids, FUN=head, n=1))
)
expect_identical(colnames(alt), sort(unique(ids)))
# Responds to taking the sum.
alt <- aggregateAcrossCells(sce, ids, coldata_merge=list(thing=sum))
expect_equivalent(alt$thing, as.integer(by(sce$thing, ids, sum)))
expect_identical(colnames(alt), sort(unique(ids)))
alt <- aggregateAcrossCells(sce, ids, coldata_merge=list(Cell_Cycle=function(x) paste(x, collapse="")))
expect_type(alt$Cell_Cycle, "character")
# Setting FALSE works corectly.
alt <- aggregateAcrossCells(sce, ids, coldata_merge=FALSE)
expect_identical(colnames(colData(alt)), c("ids", "ncells"))
alt <- aggregateAcrossCells(sce, ids, coldata_merge=list(thing=FALSE))
expect_identical(alt$thing, NULL)
})
set.seed(1000412)
test_that("Aggregation across cells works correctly with different colData types", {
ids <- paste0("CLUSTER_", sample(ncol(sce)/2, ncol(sce), replace=TRUE))
# Handles factors correctly.
alt <- sce
alt$Cell_Cycle <- factor(alt$Cell_Cycle)
alt <- aggregateAcrossCells(alt, factor(ids))
expect_true(is.factor(alt$Cell_Cycle))
expect_true(is.factor(alt$ids))
alt <- sce
alt$Cell_Cycle <- factor(alt$Cell_Cycle)
alt <- aggregateAcrossCells(alt, rep(NA, ncol(alt)))
expect_true(is.factor(alt$Cell_Cycle))
# Handles nested DFs correctly.
alt <- sce
alt$nested <- DataFrame(ID=ids)
alt <- aggregateAcrossCells(alt, ids)
expect_s4_class(alt$nested, "DFrame")
expect_identical(alt$nested$ID, alt$ids)
alt <- sce
alt$nested <- DataFrame(ID=runif(ncol(alt)))
alt <- aggregateAcrossCells(alt, ids)
expect_s4_class(alt$nested, "DFrame")
expect_type(alt$nested$ID, "double")
expect_true(any(is.na(alt$nested$ID)))
alt <- sce
alt$nested <- DataFrame(ID=runif(ncol(alt)))
alt <- aggregateAcrossCells(alt, rep(NA, ncol(alt)))
expect_s4_class(alt$nested, "DFrame")
expect_type(alt$nested$ID, "double")
})
set.seed(10004121)
test_that("Aggregation across cells works correctly with suffixing", {
ids <- paste0("CLUSTER_", sample(ncol(sce)/2, ncol(sce), replace=TRUE))
alt <- aggregateAcrossCells(sce, ids, suffix=TRUE)
expect_identical(assayNames(alt), "counts.sum")
sce <- logNormCounts(sce)
alt <- aggregateAcrossCells(sce, ids, statistics=c("mean", "median"), use.assay.type=TRUE)
expect_identical(assayNames(alt), c("counts.mean", "counts.median", "logcounts.mean", "logcounts.median"))
# Works for reddims.
reducedDims(sce) <- list(PCA=matrix(runif(ncol(sce)*2), ncol=2), TSNE=matrix(rnorm(ncol(sce)*2), ncol=2))
alt <- aggregateAcrossCells(sce, ids)
expect_identical(reducedDimNames(alt), reducedDimNames(sce))
alt <- aggregateAcrossCells(sce, ids, suffix=TRUE)
expect_identical(reducedDimNames(alt), paste0(reducedDimNames(sce), ".mean"))
expect_identical(assayNames(alt), "counts.sum") # passed along.
alt <- aggregateAcrossCells(sce, ids, use.dimred="PCA", dimred.stats=c("mean", "median"))
expect_identical(reducedDimNames(alt), c("PCA.mean", "PCA.median"))
})
set.seed(1000413)
test_that("Aggregation across cells doesn't choke on syntactically invalid names", {
ids <- paste0("CLUSTER_", sample(ncol(sce)/2, ncol(sce), replace=TRUE))
sce$`batman and robin` <- 2
alt <- aggregateAcrossCells(sce, ids)
expect_identical(sum(alt$`batman and robin`==2), ncol(alt))
})
set.seed(100042)
test_that("Aggregation across cells works correctly for SEs", {
ids <- paste0("CLUSTER_", sample(ncol(sce)/2, ncol(sce), replace=TRUE))
alt <- aggregateAcrossCells(sce, ids)
expect_identical(colnames(alt), sort(unique(ids)))
ref <- sumCountsAcrossCells(counts(sce), ids)
expect_identical(counts(alt), assay(ref))
expect_identical(alt$ids, ref$ids)
expect_identical(alt$ncells, ref$ncells)
})
##########################################################
test_that("numDetectedAcrossCells works as expected", {
ids <- sample(LETTERS[1:5], ncol(sce), replace=TRUE)
out <- numDetectedAcrossCells(counts(sce), ids)
expect_equal(assay(out), colsum((counts(sce) > 0)+0, ids))
expect_identical(out$ids, colnames(out))
out <- numDetectedAcrossCells(counts(sce), ids, average=TRUE)
expect_identical(assay(out), t(t(colsum((counts(sce) > 0)+0, ids))/as.integer(table(ids))))
# Checking that it works direclty with SCEs.
expect_equal(numDetectedAcrossCells(counts(sce), ids),
numDetectedAcrossCells(sce, ids))
expect_equal(numDetectedAcrossCells(counts(sce), ids, average=TRUE),
numDetectedAcrossCells(sce, ids, average=TRUE))
# Checking that subsetting works.
expect_identical(numDetectedAcrossCells(counts(sce), ids, subset.row=10:1),
numDetectedAcrossCells(counts(sce), ids)[10:1,])
expect_identical(numDetectedAcrossCells(counts(sce), ids, subset.col=2:15),
numDetectedAcrossCells(counts(sce)[,2:15], ids[2:15]))
ids[c(1,3,5,6)] <- NA
expect_identical(numDetectedAcrossCells(counts(sce), ids),
numDetectedAcrossCells(counts(sce)[,!is.na(ids)], ids[!is.na(ids)]))
# Comparing to sumCountsAcrossCells.
expect_equal(numDetectedAcrossCells(counts(sce), ids),
sumCountsAcrossCells((counts(sce) > 0)+0, ids))
expect_equal(numDetectedAcrossCells(counts(sce), ids, average=TRUE),
sumCountsAcrossCells((counts(sce) > 0)+0, ids, average=TRUE))
})
test_that("numDetectedAcrossCells handles other matrix classes", {
thing <- matrix(rpois(2000, lambda=0.5), ncol=100, nrow=20)
ids <- sample(LETTERS[1:6], ncol(thing), replace=TRUE)
ref <- numDetectedAcrossCells(thing, ids)
expect_equal(rowSums(assay(ref)), rowSums(thing > 0)) # basic sanity check.
sparse <- as(thing, 'dgCMatrix')
expect_equal(numDetectedAcrossCells(sparse, ids), ref)
delayed <- DelayedArray(thing)
expect_equal(numDetectedAcrossCells(delayed, ids), ref)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.