tests/testthat/test-clustering.R

## Tests for enriched term clustering functions - Aug 2023

enrichment_res <- example_pathfindR_output[1:5, ]
input_kappa_mat <- create_kappa_matrix(enrichment_res)

test_that("`create_kappa_matrix()` -- creates kappa matrix", {
    input_df <- enrichment_res
    kappa_mat <- create_kappa_matrix(input_df)
    expect_true(isSymmetric.matrix(kappa_mat))
    expect_true(all(kappa_mat >= 0 & kappa_mat <= 1 | kappa_mat >= -1 & kappa_mat <=
        0))
    expect_identical(colnames(kappa_mat), rownames(kappa_mat))
    expect_identical(colnames(kappa_mat), input_df$ID)

    # zero length excluded
    input_df2 <- input_df
    input_df2$Down_regulated[1] <- input_df2$Up_regulated[1] <- ""
    kappa_mat2 <- create_kappa_matrix(input_df2)
    expect_true(isSymmetric.matrix(kappa_mat2))
    expect_false(input_df2$ID[1] %in% colnames(kappa_mat2))

    input_df$non_Signif_Snw_Genes <- c("GeneA, GeneB", "GeneA", "GeneC", "GeneB, GeneC",
        "")
    kappa_mat3 <- create_kappa_matrix(input_df, use_active_snw_genes = TRUE)
    expect_true(isSymmetric.matrix(kappa_mat3))
    expect_true(!all(kappa_mat3 != kappa_mat))
})

test_that("`create_kappa_matrix()` -- argument checks works", {
    expect_error(create_kappa_matrix(example_pathfindR_output, use_description = "INVALID"),
        "`use_description` should be TRUE or FALSE")
    expect_error(create_kappa_matrix(example_pathfindR_output, use_active_snw_genes = "INVALID"),
        "`use_active_snw_genes` should be TRUE or FALSE")
    expect_error(create_kappa_matrix(list()), "`enrichment_res` should be a data frame of enrichment results")
    expect_error(create_kappa_matrix(example_pathfindR_output[1, ]), "`enrichment_res` should contain at least 2 rows")

    cr_cols <- function(use_description = FALSE, use_active_snw_genes = FALSE) {
        nec_cols <- c("Down_regulated", "Up_regulated")
        if (use_description) {
            nec_cols <- c("Term_Description", nec_cols)
        } else {
            nec_cols <- c("ID", nec_cols)
        }
        if (use_active_snw_genes) {
            nec_cols <- c(nec_cols, "non_Signif_Snw_Genes")
        }
        return(nec_cols)
    }

    # desc F
    nec_cols <- cr_cols()
    valid_res <- enrichment_res[, -2]
    expect_silent(create_kappa_matrix(valid_res))
    invalid_res <- enrichment_res[, -1]
    expect_error(create_kappa_matrix(invalid_res), paste0("`enrichment_res` should contain all of ",
        paste(dQuote(nec_cols), collapse = ", ")))
    # desc T
    nec_cols <- cr_cols(use_description = TRUE)
    valid_res <- enrichment_res[, -1]
    expect_silent(create_kappa_matrix(valid_res, use_description = TRUE))
    invalid_res <- enrichment_res[, -2]
    expect_error(create_kappa_matrix(invalid_res, use_description = TRUE), paste0("`enrichment_res` should contain all of ",
        paste(dQuote(nec_cols), collapse = ", ")))
    # snw_g T
    nec_cols <- cr_cols(use_active_snw_genes = TRUE)
    valid_res <- enrichment_res
    valid_res$non_Signif_Snw_Genes <- ""
    expect_silent(create_kappa_matrix(valid_res, use_active_snw_genes = TRUE))
    expect_error(create_kappa_matrix(enrichment_res, use_active_snw_genes = TRUE),
        paste0("`enrichment_res` should contain all of ", paste(dQuote(nec_cols),
            collapse = ", ")))
})

test_that("`hierarchical_term_clustering()` -- returns integer vector", {
    m <- mockery::mock(NULL, cycle = TRUE)
    mockery::stub(hierarchical_term_clustering, "graphics::plot", m)
    mockery::stub(hierarchical_term_clustering, "stats::heatmap", m)
    mockery::stub(hierarchical_term_clustering, "stats::rect.hclust", m)

    expected_message_regex <- "The maximum average silhouette width was -?(0\\.?\\d{0,2}|1) for k = \\d+ \n\n"
    expect_message(clu_res <- hierarchical_term_clustering(input_kappa_mat, enrichment_res,
        plot_hmap = TRUE, plot_dend = TRUE), expected_message_regex)
    expect_is(clu_res, "integer")
    expect_true(max(clu_res) <= nrow(input_kappa_mat))
    expect_identical(rownames(input_kappa_mat), names(clu_res))
})

test_that("`hierarchical_term_clustering()` -- `num_clusters` works", {
    for (selected_num_clusters in seq_len(nrow(enrichment_res))) {
        expect_is(res <- hierarchical_term_clustering(input_kappa_mat, enrichment_res,
            num_clusters = selected_num_clusters, plot_hmap = FALSE, plot_dend = FALSE),
            "integer")
        expect_equal(max(res), selected_num_clusters)
    }
})

test_that("`hierarchical_term_clustering()` -- `kseq` (sequence of number of clusters to try) is determined appropriately",
    {
        mockery::stub(hierarchical_term_clustering, "stats::hclust", NULL)
        mockery::stub(hierarchical_term_clustering, "isSymmetric.matrix", TRUE)

        mock_cutree <- function(tree, k, h = NULL) {
            return(k)
        }
        mockery::stub(hierarchical_term_clustering, "stats::cutree", mock_cutree)

        for (num_terms in c(3, 15, 153, 200, 204, 432)) {
            kmax <- max(num_terms%/%2, 2)
            num_expected_calls <- ifelse(kmax <= 20, kmax - 1, ifelse(kmax <= 100,
                18 + kmax%/%10 - 1, 26 + kmax%/%50 - 1))
            target_k <- ifelse(kmax <= 20, kmax, ifelse(kmax <= 100, round(kmax%/%10) *
                10, round(kmax%/%50) * 50))

            tmp_enr_res <- example_pathfindR_output[seq_len(num_terms), ]
            tmp_kappa_mat <- matrix(NA, nrow = num_terms, ncol = num_terms, dimnames = list(tmp_enr_res$ID,
                tmp_enr_res$ID))

            silwidth_out_vec <- vector("list", num_expected_calls)
            for (idx in seq_len(num_expected_calls)) {
                if (idx == length(silwidth_out_vec)) {
                  silwidth_out_vec[[idx]] <- list(avg.silwidth = 100)
                } else {
                  silwidth_out_vec[[idx]] <- list(avg.silwidth = -100)
                }
            }
            mock_cluster.stats <- do.call(mockery::mock, silwidth_out_vec)
            mockery::stub(hierarchical_term_clustering, "fpc::cluster.stats", mock_cluster.stats)

            expected_message <- paste0("The maximum average silhouette width was 100 for k = ",
                target_k, " \n\n")
            expect_message(res_k <- hierarchical_term_clustering(tmp_kappa_mat, tmp_enr_res,
                plot_hmap = FALSE, plot_dend = FALSE), expected_message)
            expect_equal(res_k, target_k)
            mockery::expect_called(mock_cluster.stats, num_expected_calls)
        }
    })

test_that("`hierarchical_term_clustering()` -- argument checks work", {
    expect_error(hierarchical_term_clustering(kappa_mat = list(), enrichment_res = data.frame()),
        "`kappa_mat` should be a symmetric matrix")
    expect_error(hierarchical_term_clustering(kappa_mat = matrix(nrow = 1, ncol = 2),
        enrichment_res = data.frame()), "`kappa_mat` should be a symmetric matrix")

    mat <- matrix(nrow = 3, ncol = 3, dimnames = list(1:3, 1:3))
    expect_error(hierarchical_term_clustering(kappa_mat = mat, enrichment_res = data.frame(ID = 4:5)),
        "All terms in `kappa_mat` should be present in `enrichment_res`")
    expect_error(hierarchical_term_clustering(kappa_mat = mat, enrichment_res = data.frame(ID = 1:3),
        plot_hmap = "INVALID"), "`plot_hmap` should be TRUE or FALSE")
    expect_error(hierarchical_term_clustering(kappa_mat = mat, enrichment_res = data.frame(ID = 1:3),
        plot_dend = "INVALID"), "`plot_dend` should be TRUE or FALSE")
})

test_that("`fuzzy_term_clustering()` -- returns matrix of cluster memberships", {
    expect_is(res_mat <- fuzzy_term_clustering(create_kappa_matrix(example_pathfindR_output[1:25,
        ]), example_pathfindR_output[1:25, ], kappa_threshold = 0.1), "matrix")
    expect_true(is.logical(res_mat))
})

test_that("`fuzzy_term_clustering()` -- argument checks work", {
    expect_error(fuzzy_term_clustering(kappa_mat = list(), enrichment_res = data.frame()),
        "`kappa_mat` should be a symmetric matrix")
    expect_error(fuzzy_term_clustering(kappa_mat = matrix(nrow = 1, ncol = 2), enrichment_res = data.frame()),
        "`kappa_mat` should be a symmetric matrix")

    mat <- matrix(nrow = 3, ncol = 3, dimnames = list(1:3, 1:3))
    expect_error(fuzzy_term_clustering(kappa_mat = mat, enrichment_res = data.frame(ID = 4:5)),
        "All terms in `kappa_mat` should be present in `enrichment_res`")
    expect_error(fuzzy_term_clustering(kappa_mat = mat, enrichment_res = data.frame(ID = 1:3),
        kappa_threshold = "INVALID"), "`kappa_threshold` should be numeric")
    expect_error(fuzzy_term_clustering(kappa_mat = mat, enrichment_res = data.frame(ID = 1:3),
        kappa_threshold = 1.5), "`kappa_threshold` should be at most 1 as kappa statistic is always <= 1")
})

test_that("`cluster_graph_vis()` -- graph visualization of clusters works OK", {
    mockery::stub(hierarchical_term_clustering, "graphics::plot", NULL)
    mockery::stub(hierarchical_term_clustering, "stats::rect.hclust", NULL)
    mock_plot.igraph <- mockery::mock(NULL, cycle = TRUE)
    mockery::stub(cluster_graph_vis, "igraph::plot.igraph", mock_plot.igraph)
    ## use_description = FALSE
    for (clustering_func in c(hierarchical_term_clustering, fuzzy_term_clustering)) {
        clu_obj <- clustering_func(input_kappa_mat, enrichment_res)
        expect_silent(cluster_graph_vis(clu_obj, input_kappa_mat, enrichment_res))
    }
    mockery::expect_called(mock_plot.igraph, 2)
})

test_that("`cluster_graph_vis()` -- coloring of 'extra' clusters work", {
    mockery::stub(cluster_graph_vis, "igraph::plot.igraph", NULL)
    ### more than 41 clusters (number of colors available)
    selected_num_terms <- 45
    clu_input_df <- example_pathfindR_output[seq_len(selected_num_terms), ]
    mock_kappa_mat <- matrix(NA, nrow = selected_num_terms, ncol = selected_num_terms,
        dimnames = list(clu_input_df$ID, clu_input_df$ID))

    # dummy hierarchical result
    hierarchical_clu_obj <- seq_len(selected_num_terms)
    names(hierarchical_clu_obj) <- clu_input_df$ID
    expect_silent(cluster_graph_vis(hierarchical_clu_obj, mock_kappa_mat, clu_input_df))

    # dummy fuzzy result
    fuzzy_clu_obj <- matrix(FALSE, nrow = selected_num_terms, ncol = selected_num_terms,
        dimnames = list(clu_input_df$ID, seq_len(selected_num_terms)))
    diag(fuzzy_clu_obj) <- TRUE
    expect_silent(cluster_graph_vis(fuzzy_clu_obj, mock_kappa_mat, clu_input_df))
})

test_that("`cluster_graph_vis()` -- check errors are raised appropriately", {
    expect_error(cluster_graph_vis(list(), matrix(), data.frame(ID = 1)), "Invalid class for `clu_obj`!")

    # hierarchical - missing terms in kappa matrix
    clu_obj <- hierarchical_term_clustering(input_kappa_mat, enrichment_res, plot_dend = FALSE)
    expect_error(cluster_graph_vis(c(clu_obj, EXTRA = 1L), input_kappa_mat, enrichment_res),
        "Not all terms in `clu_obj` present in `kappa_mat`!")

    # fuzzy - missing terms in kappa matrix
    clu_obj <- fuzzy_term_clustering(input_kappa_mat, enrichment_res)
    expect_error(cluster_graph_vis(rbind(clu_obj, EXTRA = rep(FALSE, ncol(clu_obj))),
        input_kappa_mat, enrichment_res), "Not all terms in `clu_obj` present in `kappa_mat`!")
})

test_that("`cluster_enriched_terms()` -- returns the input data frame
          with the additional columns `Cluster` and `Status`",
    {
        set.seed(123)
        num_clusters <- 3
        available_clus <- seq_len(num_clusters)
        toy_hierarchical_clu_obj <- sample(available_clus, size = nrow(enrichment_res) -
            1, replace = TRUE)
        missing_clu <- setdiff(available_clus, toy_hierarchical_clu_obj)
        toy_hierarchical_clu_obj <- c(toy_hierarchical_clu_obj, missing_clu)
        toy_hierarchical_clu_obj <- sample(toy_hierarchical_clu_obj)
        names(toy_hierarchical_clu_obj) <- enrichment_res$ID

        toy_fuzzy_clu_obj <- matrix(FALSE, nrow = nrow(enrichment_res), ncol = num_clusters,
            dimnames = list(enrichment_res$ID, available_clus))
        for (row_idx in seq_len(nrow(toy_fuzzy_clu_obj))) {
            num_memberships <- sample(available_clus, 1)
            new_cols <- sample(available_clus, size = num_memberships)
            toy_fuzzy_clu_obj[row_idx, new_cols] <- TRUE
        }

        mock_doCall <- function(...) {
            arguments <- list(...)
            if (arguments[1] == "hierarchical_term_clustering") {
                return(toy_hierarchical_clu_obj)
            }
            if (arguments[1] == "fuzzy_term_clustering") {
                return(toy_fuzzy_clu_obj)
            }
            return(NULL)
        }

        mockery::stub(cluster_enriched_terms, "create_kappa_matrix", input_kappa_mat)
        mockery::stub(cluster_enriched_terms, "R.utils::doCall", mock_doCall)

        # hierarchical
        expect_is(h_clu_res <- cluster_enriched_terms(enrichment_res), "data.frame")
        expect_true(all(c("Cluster", "Status") %in% colnames(h_clu_res)))
        expect_equal(max(h_clu_res$Cluster), num_clusters)
        # expect to have same number of rep. terms as the number of clusters
        expect_equal(max(h_clu_res$Cluster), sum(h_clu_res$Status == "Representative"))

        ## fuzzy
        expect_is(fuzzy_clu_res <- cluster_enriched_terms(enrichment_res, method = "fuzzy"),
            "data.frame")
        expect_true(all(c("Cluster", "Status") %in% colnames(fuzzy_clu_res)))
        expect_true(max(fuzzy_clu_res$Cluster) <= sum(fuzzy_clu_res$Status == "Representative"))
    })

test_that("`cluster_enriched_terms()` argument checks work", {
    expect_error(cluster_enriched_terms(enrichment_res, method = "INVALID"), "the clustering `method` must either be \"hierarchical\" or \"fuzzy\"")
    expect_error(cluster_enriched_terms(enrichment_res, plot_clusters_graph = "INVALID"),
        "`plot_clusters_graph` must be logical!")
})
egeulgen/pathfindR documentation built on May 8, 2024, 8:40 a.m.