tests/testthat/test-enrichment.R

## Tests for functions related to enrichment analyses - Aug 2023
set.seed(123)

test_that("`hyperg_test()` -- returns an appropriate p value", {
    expect_is(tmp_p <- hyperg_test(term_genes = LETTERS[1:10], chosen_genes = LETTERS[2:5],
        background_genes = LETTERS), "numeric")
    expect_true(tmp_p >= 0 & tmp_p <= 1)

    expect_is(tmp_p2 <- hyperg_test(term_genes = LETTERS[1:4], chosen_genes = LETTERS[3:10],
        background_genes = LETTERS), "numeric")
    expect_true(tmp_p2 >= 0 & tmp_p2 <= 1)
    expect_true(tmp_p2 > tmp_p)
})

test_that("`hyperg_test()` -- argument checks work", {
    expect_error(hyperg_test(term_genes = list()), "`term_genes` should be a vector")
    expect_error(hyperg_test(term_genes = LETTERS, chosen_genes = list()), "`chosen_genes` should be a vector")
    expect_error(hyperg_test(term_genes = LETTERS, chosen_genes = LETTERS[1:2], background_genes = list()),
        "`background_genes` should be a vector")
    expect_error(hyperg_test(term_genes = c(LETTERS, LETTERS), chosen_genes = LETTERS[1:3],
        background_genes = LETTERS), "`term_genes` cannot be larger than `background_genes`!")
    expect_error(hyperg_test(term_genes = LETTERS[1:10], chosen_genes = c(LETTERS,
        LETTERS), background_genes = LETTERS), "`chosen_genes` cannot be larger than `background_genes`!")
})

test_that("`enrichment()` -- returns a data frame", {
    expected_num_significant <- 10
    gsets <- example_pathfindR_output$ID[1:50]
    p_val_vec <- c(runif(expected_num_significant, min = 1e-05, max = 0.001), runif(length(gsets) -
        expected_num_significant, min = 0.05, max = 1))
    names(p_val_vec) <- gsets
    mock_vapply <- mockery::mock(p_val_vec, 5, 2, cycle = TRUE)
    mockery::stub(enrichment, "vapply", mock_vapply)
    mockery::stub(enrichment, "base::setdiff", c("RPS6KA2", "HSPA2", "SCN4B", "PPP2R1B",
        "PTCH1", "CASP10", "TIRAP", "BEX3", "KIF5C", "TNFSF13B"))

    # default
    expect_is(enr_res <- enrichment(input_genes = example_pathfindR_input$Gene.symbol,
        sig_genes_vec = c("DummyGene"), background_genes = c("DummyGene")), "data.frame")
    expect_equal(nrow(enr_res), expected_num_significant)
    expect_true(any(enr_res$non_Signif_Snw_Genes != ""))
    expect_true(all(enr_res$Fold_Enrichment == 2.5))

    # higher threshold - no filter
    expect_is(enr_res2 <- enrichment(input_genes = example_pathfindR_input$Gene.symbol,
        sig_genes_vec = c("DummyGene"), background_genes = c("DummyGene"), enrichment_threshold = 1), "data.frame")
    expect_equal(nrow(enr_res2), 50)
    expect_true(any(enr_res2$non_Signif_Snw_Genes != ""))

    # no enrichment case
    mockery::stub(enrichment, "stats::p.adjust", rep(1, 50))
    expect_null(enr_res3 <- enrichment(input_genes = example_pathfindR_input$Gene.symbol,
        sig_genes_vec = c("DummyGene"), background_genes = c("DummyGene")))
})

test_that("`enrichment()` -- argument checks work", {
    tmp_input_genes <- example_pathfindR_input$Gene.symbol[1:6]
    tmp_sig_vec <- example_pathfindR_input$Gene.symbol[1:3]
    ## input genes
    expect_error(enrichment(input_genes = list(), sig_genes_vec = "PER1", background_genes = unlist(kegg_genes)),
        "`input_genes` should be a vector of gene symbols")

    ## gene sets data
    expect_error(enrichment(input_genes = tmp_input_genes, genes_by_term = "INVALID",
        sig_genes_vec = tmp_sig_vec, background_genes = unlist(kegg_genes)), "`genes_by_term` should be a list of term gene sets")
    expect_error(enrichment(input_genes = tmp_input_genes, genes_by_term = list(1:3),
        sig_genes_vec = tmp_sig_vec, background_genes = unlist(kegg_genes)), "`genes_by_term` should be a named list \\(names are gene set IDs\\)")

    expect_error(enrichment(input_genes = tmp_input_genes, term_descriptions = list(),
        sig_genes_vec = tmp_sig_vec, background_genes = unlist(kegg_genes)), "`term_descriptions` should be a vector of term gene descriptions")
    expect_error(enrichment(input_genes = tmp_input_genes, term_descriptions = 1:3,
        sig_genes_vec = tmp_sig_vec, background_genes = unlist(kegg_genes)), "`term_descriptions` should be a named vector \\(names are gene set IDs\\)")

    expect_error(enrichment(input_genes = tmp_input_genes, genes_by_term = list(A = 1:3),
        term_descriptions = c(A = "a", B = "b"), sig_genes_vec = tmp_sig_vec, background_genes = unlist(kegg_genes)),
        "The lengths of `genes_by_term` and `term_descriptions` should be the same")
    expect_error(enrichment(input_genes = tmp_input_genes, genes_by_term = list(A = 1:3,
        X = 1:3), term_descriptions = c(A = "a", B = "b"), sig_genes_vec = tmp_sig_vec,
        background_genes = unlist(kegg_genes)), "The names of `genes_by_term` and `term_descriptions` should all be the same")

    ## enrichment threshold
    expect_error(enrichment(input_genes = tmp_input_genes, sig_genes_vec = tmp_sig_vec,
        background_genes = unlist(kegg_genes), enrichment_threshold = "INVALID"),
        "`enrichment_threshold` should be a numeric value between 0 and 1")

    expect_error(enrichment(input_genes = tmp_input_genes, sig_genes_vec = tmp_sig_vec,
        background_genes = unlist(kegg_genes), enrichment_threshold = -1), "`enrichment_threshold` should be between 0 and 1")

    ## signif. genes and background (universal set) genes
    expect_error(enrichment(input_genes = tmp_input_genes, sig_genes_vec = list(),
        background_genes = unlist(kegg_genes)), "`sig_genes_vec` should be a vector")
    expect_error(enrichment(input_genes = tmp_input_genes, sig_genes_vec = tmp_sig_vec,
        background_genes = list()), "`background_genes` should be a vector")
})

tmp_gset_genes <- kegg_genes[example_pathfindR_output$ID[order(example_pathfindR_output$support,
    decreasing = TRUE)[1:10]]]
tmp_gset_desc <- kegg_descriptions[names(tmp_gset_genes)]

all_iter_enr_res <- list(NULL, NULL, NULL)
subnw_start_idx <- 1:3
for (idx in seq_along(subnw_start_idx)) {
    j <- subnw_start_idx[idx]
    res <- enrichment_analyses(snws = example_active_snws[j:j + 2], sig_genes_vec = example_pathfindR_input$Gene.symbol,
        genes_by_term = tmp_gset_genes, term_descriptions = tmp_gset_desc, list_active_snw_genes = TRUE)
    if (!is.null(res)) {
        all_iter_enr_res[[idx]] <- res
    }
}

combined_res <- do.call(rbind, all_iter_enr_res)

test_that("`enrichment_analyses()` -- returns a data frame", {
    toy_pin <- data.frame(V1 = paste("Gene", sample(1:50, 10)), V2 = "pp", V3 = paste("Gene",
        sample(1:50, 10)))
    mockery::stub(enrichment_analyses, "return_pin_path", NULL)
    mockery::stub(enrichment_analyses, "utils::read.delim", toy_pin)

    mock_lapply <- mockery::mock(c(), all_iter_enr_res, cycle = TRUE)
    mockery::stub(enrichment_analyses, "lapply", mock_lapply)

    # default
    expect_is(enr_res1 <- enrichment_analyses(snws = example_active_snws[1:3], sig_genes_vec = example_pathfindR_input$Gene.symbol,
        list_active_snw_genes = FALSE), "data.frame")
    total <- sum(vapply(all_iter_enr_res, function(x) ifelse(is.null(x), 0, nrow(x)),
        1))
    expect_true(nrow(enr_res1) <= total)

    # list active snw genes
    expect_is(enr_res2 <- enrichment_analyses(snws = example_active_snws[1:3], sig_genes_vec = example_pathfindR_input$Gene.symbol,
        list_active_snw_genes = TRUE), "data.frame")
    expect_true(ncol(enr_res2) == ncol(enr_res1) + 1)
})

test_that("`enrichment_analyses()` -- argument check works", {
    expect_error(enrichment_analyses(snws = example_active_snws, list_active_snw_genes = "INVALID"),
        "`list_active_snw_genes` should be either TRUE or FALSE")
})

test_that("`summarize_enrichment_results()` -- returns summarized enrichment results",
    {
        # default
        expect_is(summ_res <- summarize_enrichment_results(enrichment_res = combined_res[,
            -6]), "data.frame")
        expect_equal(ncol(summ_res), 7)
        expect_false("non_Signif_Snw_Genes" %in% colnames(summ_res))
        expect_true(nrow(summ_res) <= nrow(combined_res))

        # list active snw genes
        expect_is(summ_res2 <- summarize_enrichment_results(enrichment_res = combined_res,
            list_active_snw_genes = TRUE), "data.frame")
        expect_equal(ncol(summ_res2), 8)
        expect_true("non_Signif_Snw_Genes" %in% colnames(summ_res2))
        expect_true(nrow(summ_res2) <= nrow(combined_res))
    })

test_that("`summarize_enrichment_results()` -- argument checks work", {
    expect_error(summarize_enrichment_results(enrichment_res = combined_res, list_active_snw_genes = "INVALID"),
        "`list_active_snw_genes` should be either TRUE or FALSE")

    expect_error(summarize_enrichment_results(enrichment_res = list()), "`enrichment_res` should be a data frame")

    # list_active_snw_genes = FALSE
    nec_cols <- c("ID", "Term_Description", "Fold_Enrichment", "p_value", "adj_p",
        "support")

    expect_error(summarize_enrichment_results(enrichment_res = data.frame()), paste0("`enrichment_res` should have exactly ",
        length(nec_cols), " columns"))

    tmp <- as.data.frame(matrix(nrow = 1, ncol = length(nec_cols), dimnames = list(NULL,
        letters[seq_along(nec_cols)])))
    expect_error(summarize_enrichment_results(enrichment_res = tmp), paste0("`enrichment_res` should have column names ",
        paste(dQuote(nec_cols), collapse = ", ")))

    # list_active_snw_genes = TRUE
    nec_cols <- c("ID", "Term_Description", "Fold_Enrichment", "p_value", "adj_p",
        "support", "non_Signif_Snw_Genes")

    expect_error(summarize_enrichment_results(enrichment_res = data.frame(), list_active_snw_genes = TRUE),
        paste0("`enrichment_res` should have exactly ", length(nec_cols), " columns"))

    tmp <- as.data.frame(matrix(nrow = 1, ncol = length(nec_cols), dimnames = list(NULL,
        letters[seq_along(nec_cols)])))
    expect_error(summarize_enrichment_results(enrichment_res = tmp, list_active_snw_genes = TRUE),
        paste0("`enrichment_res` should have column names ", paste(dQuote(nec_cols),
            collapse = ", ")))
})
egeulgen/pathfindR documentation built on May 8, 2024, 8:40 a.m.