tests/testthat/test-find-top-corr.R

# This tests the findTopCorrelations function.
# library(testthat); library(mumosa); source("setup.R"); source("test-find-top-corr.R")

set.seed(109109100)
test_that("findTopCorrelations works for the top self-correlations", {
    library(scuttle)
    sce <- mockSCE(ngenes=100)
    sce <- logNormCounts(sce)

    # Setting d to the max, so that the search is basically exact.
    df <- findTopCorrelations(sce, number=20, d=nrow(sce), BSPARAM=BiocSingular::ExactParam()) 
    df2 <- findTopCorrelations(sce, number=20, d=NA)
    expect_identical(df, df2)

    # Performing a reference computation.
    all.cor <- cor(t(logcounts(sce)), method="spearman")
    collected.best <- collected.worst <- collected.best.cor <- collected.worst.cor <- vector("list", nrow(all.cor))
    for (i in seq_len(nrow(all.cor))) {
        current <- setdiff(order(all.cor[i,], decreasing=TRUE), i)

        my.best <- head(current, 20)
        collected.best[[i]] <- colnames(all.cor)[my.best]
        collected.best.cor[[i]] <- all.cor[i,my.best]

        my.worst <- rev(tail(current, 20))
        collected.worst[[i]] <- colnames(all.cor)[my.worst]
        collected.worst.cor[[i]] <- all.cor[i,my.worst]
    }

    expect_identical(df$positive$feature2, unlist(collected.best))
    expect_equal(df$positive$rho, unname(unlist(collected.best.cor)))
    expect_identical(df$negative$feature2, unlist(collected.worst))
    expect_equal(df$negative$rho, unname(unlist(collected.worst.cor)))

    # Expect the same results from one setting of direction.
    dfp <- findTopCorrelations(sce, number=20, d=nrow(sce), direction="positive", BSPARAM=BiocSingular::ExactParam()) 
    expect_identical(dfp, df["positive"])

    dfn <- findTopCorrelations(sce, number=20, d=nrow(sce), direction="negative", BSPARAM=BiocSingular::ExactParam()) 
    expect_identical(dfn, df["negative"])

    # Handles subset.cols correctly.
    sub <- findTopCorrelations(sce, number=5, d=NA, subset.cols=5:150)
    ref <- findTopCorrelations(sce[,5:150], number=5, d=NA)
    expect_identical(sub, ref)
})

test_that("findTopCorrelations p-value is correctly computed", {
    mat <- matrix(runif(2000), ncol=100)
    df <- findTopCorrelations(mat, number=1, d=NA)

    up.p <- down.p <- numeric(nrow(df$positive))
    for (x in seq_len(nrow(df$positive))) {
        up.p[x] <- cor.test(mat[x,], mat[df$positive$feature2[x],], method="spearman", alternative="greater", exact=FALSE)$p.value
        down.p[x] <- cor.test(mat[x,], mat[df$negative$feature2[x],], method="spearman", alternative="less", exact=FALSE)$p.value
    }

    expect_equal(up.p, df$positive$p.value)
    expect_equal(down.p, df$negative$p.value)

    # This should not be true.
    expect_false(isTRUE(all.equal(p.adjust(df$p.value, method="BH"), df$FDR)))
})

test_that("findTopCorrelations correctly excludes self from negative correlations", {
    mat <- matrix(1:5, 10, 5, byrow=TRUE)
    df <- suppressWarnings(findTopCorrelations(mat, number=20, d=ncol(mat), BSPARAM=BiocSingular::ExactParam()) )

    expect_identical(nrow(df$positive), nrow(mat) * (nrow(mat)  - 1L))
    expect_identical(nrow(df$positive), nrow(df$negative))
    expect_true(all(df$negative$feature1!=df$negative$feature2))
})

set.seed(109109101)
test_that("findTopCorrelations approximations work well enough", {
    thingy <- matrix(runif(2000), ncol=100)
    mat <- rbind(thingy, thingy)

    df <- suppressWarnings(findTopCorrelations(mat, number=1, d=NA))
    set.seed(1000)
    df2 <- suppressWarnings(findTopCorrelations(mat, number=1, d=5))
    expect_identical(df$positive, df2$positive)

    mat <- rbind(thingy, -thingy)
    df <- suppressWarnings(findTopCorrelations(mat, number=1, d=NA))
    set.seed(1000)
    df2 <- suppressWarnings(findTopCorrelations(mat, number=1, d=5))
    expect_identical(df$negative, df2$negative)
})

set.seed(109109102)
test_that("findTopCorrelations works for the top cross-correlations", {
    sce1 <- mockSCE(ngenes=100)
    sce1 <- logNormCounts(sce1)

    sce2 <- mockSCE(ngenes=200)
    sce2 <- logNormCounts(sce2)

    df <- findTopCorrelations(sce1, y=sce2, number=20, d=ncol(sce1), BSPARAM=BiocSingular::ExactParam()) 
    df2 <- findTopCorrelations(sce1, y=sce2, number=20, d=NA)
    expect_identical(df, df2)

    # Performing a reference computation.
    all.cor <- cor(t(logcounts(sce1)), t(logcounts(sce2)), method="spearman")
    collected.best <- collected.worst <- collected.best.cor <- collected.worst.cor <- vector("list", nrow(all.cor))
    for (i in seq_len(nrow(all.cor))) {
        current <- order(all.cor[i,], decreasing=TRUE)

        my.best <- head(current, 20)
        collected.best[[i]] <- colnames(all.cor)[my.best]
        collected.best.cor[[i]] <- all.cor[i,my.best]

        my.worst <- rev(tail(current, 20))
        collected.worst[[i]] <- colnames(all.cor)[my.worst]
        collected.worst.cor[[i]] <- all.cor[i,my.worst]
    }

    expect_identical(df$positive$feature2, unlist(collected.best))
    expect_equal(df$positive$rho, unname(unlist(collected.best.cor)))
    expect_identical(df$negative$feature2, unlist(collected.worst))
    expect_equal(df$negative$rho, unname(unlist(collected.worst.cor)))

    # Expect the same results from one setting of direction.
    dfp <- findTopCorrelations(sce1, y=sce2, number=20, d=ncol(sce1), direction="positive", BSPARAM=BiocSingular::ExactParam()) 
    expect_identical(dfp, df["positive"])

    dfn <- findTopCorrelations(sce1, y=sce2, number=20, d=ncol(sce2), direction="negative", BSPARAM=BiocSingular::ExactParam()) 
    expect_identical(dfn, df["negative"])

    # Handles subset.cols correctly.
    sub <- findTopCorrelations(sce1, y=sce2, number=5, d=NA, subset.cols=5:150)
    ref <- findTopCorrelations(sce1[,5:150], y=sce2[,5:150], number=5, d=NA)
    expect_identical(sub, ref)
})

set.seed(109109102)
test_that("findTopCorrelations works with blocked self-correlations", {
    library(scuttle)
    sce <- mockSCE(ngenes=100)
    sce <- logNormCounts(sce)

    ref <- findTopCorrelations(sce, number=20, d=10, BSPARAM=BiocSingular::ExactParam())
    out <- findTopCorrelations(sce, number=20, d=10, block=rep(1, ncol(sce)), BSPARAM=BiocSingular::ExactParam()) 
    expect_identical(ref,out)

    # Equiweighting preserves the results when blocks are unbalanced.
    block <- rep(1:2, each=ncol(sce)/2)
    ref <- findTopCorrelations(sce, number=20, d=10, block=block, BSPARAM=BiocSingular::ExactParam())

    expanded <- c(seq_len(ncol(sce)), seq_len(ncol(sce)/2))
    out <- findTopCorrelations(sce[,expanded], number=20, d=10, block=block[expanded], BSPARAM=BiocSingular::ExactParam()) 

    ref$positive <- ref$positive[,1:3]
    ref$positive <- ref$positive[do.call(order, as.list(ref$positive)),]
    out$positive <- out$positive[,1:3]
    out$positive <- out$positive[do.call(order, as.list(out$positive)),]
    ref$negative <- ref$negative[,1:3]
    ref$negative <- ref$negative[do.call(order, as.list(ref$negative)),]
    out$negative <- out$negative[,1:3]
    out$negative <- out$negative[do.call(order, as.list(out$negative)),]
    expect_equal(ref, out)

    # Same results without equiweighting, for balanced blocks.
    weight <- findTopCorrelations(sce, number=20, d=NA, block=block, BSPARAM=BiocSingular::ExactParam())
    noweight <- findTopCorrelations(sce, number=20, d=NA, block=block, equiweight=FALSE, BSPARAM=BiocSingular::ExactParam())
    expect_equal(weight, noweight)

    # Not true of unweighted and unbalanced blocks.    
    block0 <- rep(1:2, c(20, ncol(sce)-20))
    weight <- findTopCorrelations(sce, number=20, d=NA, block=block0, BSPARAM=BiocSingular::ExactParam())
    noweight <- findTopCorrelations(sce, number=20, d=NA, block=block0, equiweight=FALSE, BSPARAM=BiocSingular::ExactParam())
    expect_false(isTRUE(all.equal(weight, noweight)))

    # Handles subset.cols correctly.
    sub <- findTopCorrelations(sce, number=5, block=block, d=NA, subset.cols=5:150)
    ref <- findTopCorrelations(sce[,5:150], block=block[5:150], number=5, d=NA)
    expect_identical(sub, ref)
})

set.seed(109109103)
test_that("findTopCorrelations works with blocked cross-correlations", {
    sce1 <- mockSCE(ngenes=100)
    sce1 <- logNormCounts(sce1)

    sce2 <- mockSCE(ngenes=200)
    sce2 <- logNormCounts(sce2)

    ref <- findTopCorrelations(sce1, y=sce2, number=20, d=10, BSPARAM=BiocSingular::ExactParam())
    out <- findTopCorrelations(sce1, y=sce2, number=20, d=10, block=rep(1, ncol(sce1)), BSPARAM=BiocSingular::ExactParam()) 
    expect_identical(ref,out)

    # Equiweighting preserves the results when blocks are unbalanced.
    block <- rep(1:2, each=ncol(sce1)/2)
    ref <- findTopCorrelations(sce1, y=sce2, number=20, d=10, block=block, BSPARAM=BiocSingular::ExactParam())

    expanded <- c(seq_len(ncol(sce1)), seq_len(ncol(sce1)/2))
    out <- findTopCorrelations(sce1[,expanded], y=sce2[,expanded], number=20, d=10, 
        block=block[expanded], BSPARAM=BiocSingular::ExactParam()) 

    ref$positive <- ref$positive[,1:3]
    ref$positive <- ref$positive[do.call(order, as.list(ref$positive)),]
    out$positive <- out$positive[,1:3]
    out$positive <- out$positive[do.call(order, as.list(out$positive)),]
    ref$negative <- ref$negative[,1:3]
    ref$negative <- ref$negative[do.call(order, as.list(ref$negative)),]
    out$negative <- out$negative[,1:3]
    out$negative <- out$negative[do.call(order, as.list(out$negative)),]
    expect_equal(ref, out)

    # Same results without equiweighting, for balanced blocks.
    weight <- findTopCorrelations(sce1, y=sce2, number=20, d=NA, block=block, BSPARAM=BiocSingular::ExactParam())
    noweight <- findTopCorrelations(sce1, y=sce2, number=20, d=NA, block=block, equiweight=FALSE, BSPARAM=BiocSingular::ExactParam())
    expect_equal(weight, noweight)

    # Not true of unweighted and unbalanced blocks.    
    block0 <- rep(1:2, c(20, ncol(sce1)-20))
    weight <- findTopCorrelations(sce1, y=sce2, number=20, d=NA, block=block0, BSPARAM=BiocSingular::ExactParam())
    noweight <- findTopCorrelations(sce1, y=sce2, number=20, d=NA, block=block0, equiweight=FALSE, BSPARAM=BiocSingular::ExactParam())
    expect_false(isTRUE(all.equal(weight, noweight)))

    # Handles subset.cols correctly.
    sub <- findTopCorrelations(sce1, y=sce2, number=5, block=block, d=NA, subset.cols=5:150)
    ref <- findTopCorrelations(sce1[,5:150], y=sce2[,5:150], block=block[5:150], number=5, d=NA)
    expect_identical(sub, ref)
})

set.seed(109109104)
test_that("findTopCorrelations works with unblocked zero-variance genes", {
    sce1 <- mockSCE(ngenes=100)
    vals1 <- counts(sce1)
    vals1[1:10,] <- 0:9

    ref <- findTopCorrelations(vals1, number=20, d=10, BSPARAM=BiocSingular::ExactParam())
    out <- findTopCorrelations(vals1[-(1:10),], number=20, d=10, BSPARAM=BiocSingular::ExactParam())
    expect_identical(ref, out)

    sce2 <- mockSCE(ngenes=100)
    vals2 <- counts(sce2)
    vals2[1:10,] <- 0:9

    ref <- findTopCorrelations(vals1, y=vals2, number=20, d=10, BSPARAM=BiocSingular::ExactParam())
    out <- findTopCorrelations(vals1[-(1:10),], y=vals2[-(1:10),], number=20, d=10, BSPARAM=BiocSingular::ExactParam())
    expect_identical(ref, out)
})

set.seed(109109105)
test_that("findTopCorrelations (self) works with blocked zero-variance genes", {
    sce1 <- mockSCE(ngenes=100)
    vals1 <- counts(sce1)

    block <- sample(2, ncol(sce1), replace=TRUE)
    vals1[1:5,block==1] <- 0
    vals1[6:10,block==2] <- 0

    out <- suppressWarnings(findTopCorrelations(vals1[1:10,], number=20, d=10, block=block, BSPARAM=BiocSingular::ExactParam()))
    ref1 <- suppressWarnings(findTopCorrelations(vals1[1:5,block!=1], number=20, d=10, BSPARAM=BiocSingular::ExactParam()))
    ref2 <- suppressWarnings(findTopCorrelations(vals1[6:10,block!=2], number=20, d=10, BSPARAM=BiocSingular::ExactParam()))
    expect_identical(out$positive[,1:4], rbind(ref1$positive[,1:4], ref2$positive[,1:4]))
    expect_identical(out$negative[,1:4], rbind(ref1$negative[,1:4], ref2$negative[,1:4]))

    others <- 11:nrow(vals1)
    refo <- findTopCorrelations(vals1[others,], number=20, d=10, block=block, BSPARAM=BiocSingular::ExactParam())
    ref1o <- findTopCorrelations(vals1[1:5,block!=1], y=vals1[others,block!=1], number=20, d=10, BSPARAM=BiocSingular::ExactParam())
    refo1 <- suppressWarnings(findTopCorrelations(vals1[others,block!=1], y=vals1[1:5,block!=1], number=20, d=10, BSPARAM=BiocSingular::ExactParam()))
    ref2o <- findTopCorrelations(vals1[6:10,block!=2], y=vals1[others,block!=2], number=20, d=10, BSPARAM=BiocSingular::ExactParam())
    refo2 <- suppressWarnings(findTopCorrelations(vals1[others,block!=2], y=vals1[6:10,block!=2], number=20, d=10, BSPARAM=BiocSingular::ExactParam()))

    refp <- rbind(ref1$positive, ref2$positive, refo$positive, ref1o$positive, refo1$positive, ref2o$positive, refo2$positive)
    refp <- refp[order(refp$feature1, refp$p.value),]
    refp <- unlist(heads(S4Vectors::split(refp, refp$feature1), 20), use.names=FALSE)
    refn <- rbind(ref1$negative, ref2$negative, refo$negative, ref1o$negative, refo1$negative, ref2o$negative, refo2$negative)
    refn <- refn[order(refn$feature1, refn$p.value),]
    refn <- unlist(heads(S4Vectors::split(refn, refn$feature1), 20), use.names=FALSE)

    out <- suppressWarnings(findTopCorrelations(vals1, number=20, d=10, block=block, BSPARAM=BiocSingular::ExactParam()))
    expect_identical(out$positive[,1:4], refp[,1:4])
    expect_identical(out$negative[,1:4], refn[,1:4])
})

set.seed(109109106)
test_that("findTopCorrelations (self) works with blocked zero-variance genes", {
    library(scuttle)
    sce1 <- mockSCE(ngenes=100)
    vals1 <- counts(sce1)
    sce2 <- mockSCE(ngenes=200)
    vals2 <- counts(sce2)

    block <- sample(2, ncol(sce1), replace=TRUE)
    vals1[1:5,block==1] <- 0
    vals2[6:10,block==2] <- 0

    out <- suppressWarnings(findTopCorrelations(vals1[1:5,], y=vals2[6:10,], number=20, d=10, block=block, BSPARAM=BiocSingular::ExactParam()))
    expect_identical(nrow(out$positive), 0L)
    expect_identical(nrow(out$negative), 0L)

    others1 <- seq_len(nrow(vals1))[-(1:5)]
    others2 <- seq_len(nrow(vals2))[-(6:10)]
    refo <- findTopCorrelations(vals1[others1,], y=vals2[others2,], number=20, d=10, block=block, BSPARAM=BiocSingular::ExactParam())
    ref1 <- findTopCorrelations(vals1[1:5,block!=1], y=vals2[others2,block!=1], number=20, d=10, BSPARAM=BiocSingular::ExactParam())
    ref2 <- suppressWarnings(findTopCorrelations(vals1[others1,block!=2], y=vals2[6:10,block!=2], number=20, d=10, BSPARAM=BiocSingular::ExactParam()))

    refp <- rbind(ref1$positive, ref2$positive, refo$positive)
    refp <- refp[order(refp$feature1, refp$p.value),]
    refp <- unlist(heads(S4Vectors::split(refp, refp$feature1), 20), use.names=FALSE)
    refn <- rbind(ref1$negative, ref2$negative, refo$negative)
    refn <- refn[order(refn$feature1, refn$p.value),]
    refn <- unlist(heads(S4Vectors::split(refn, refn$feature1), 20), use.names=FALSE)

    out <- suppressWarnings(findTopCorrelations(vals1, y=vals2, number=20, d=10, block=block, BSPARAM=BiocSingular::ExactParam()))
    expect_identical(out$positive[,1:4], refp[,1:4])
    expect_identical(out$negative[,1:4], refn[,1:4])
})
LTLA/mumosa documentation built on March 10, 2024, 1:20 a.m.