tests/testthat/test-data_generation.R

## Tests for functions related to data generation - Aug 2023

set.seed(123)
gene_pool <- paste0("Gene", 1:100)
toy_biogrid_pin <- data.frame(A = sample(gene_pool, 25), B = sample(gene_pool, 25))
colnames(toy_biogrid_pin) <- c("Official Symbol Interactor A", "Official Symbol Interactor B")

test_that("`process_pin()` -- removes self-interactions and duplicated interactions",
    {
        input_pin_df <- toy_biogrid_pin
        colnames(input_pin_df) <- c("Interactor_A", "Interactor_B")
        input_pin_df <- rbind(input_pin_df, data.frame(Interactor_A = input_pin_df$Interactor_B[1:5],
            Interactor_B = input_pin_df$Interactor_A[1:5]))

        processed_df <- process_pin(input_pin_df)

        expect_true(nrow(processed_df) < nrow(input_pin_df))
    })

test_that("`get_biogrid_pin()` -- returns a path to a valid PIN file", {
    mockery::stub(get_biogrid_pin, "utils::download.file", NULL)
    mockery::stub(get_biogrid_pin, "utils::unzip", list(Name = "BIOGRID-ORGANISM-Homo_sapiens-4.4.211.tab3.txt"))
    mockery::stub(get_biogrid_pin, "utils::read.delim", toy_biogrid_pin)

    expected_biogrid_pin_df <- toy_biogrid_pin
    colnames(expected_biogrid_pin_df) <- c("Interactor_A", "Interactor_B")
    expected_biogrid_pin_df <- process_pin(expected_biogrid_pin_df)
    expected_biogrid_pin_df <- data.frame(V1 = expected_biogrid_pin_df$Interactor_A,
        V2 = "pp", V3 = expected_biogrid_pin_df$Interactor_B)

    pin_path <- get_biogrid_pin(release = "4.4.211")
    pin_df <- read.delim(pin_path, header = FALSE)
    expect_true(ncol(pin_df) == 3)
    expect_true(all(pin_df[, 2] == "pp"))
    expect_identical(pin_df, expected_biogrid_pin_df)
})

test_that("`get_biogrid_pin()` -- determines and downloads the latest version", {
  mockery::stub(get_biogrid_pin, "utils::download.file", NULL)
  mockery::stub(get_biogrid_pin, "utils::unzip", list(Name = "BIOGRID-ORGANISM-Homo_sapiens-X.X.X.tab3.txt"))
  mockery::stub(get_biogrid_pin, "utils::read.delim", toy_biogrid_pin)

  expected_biogrid_pin_df <- toy_biogrid_pin
  colnames(expected_biogrid_pin_df) <- c("Interactor_A", "Interactor_B")
  expected_biogrid_pin_df <- process_pin(expected_biogrid_pin_df)
  expected_biogrid_pin_df <- data.frame(V1 = expected_biogrid_pin_df$Interactor_A,
                                        V2 = "pp", V3 = expected_biogrid_pin_df$Interactor_B)

  pin_path <- get_biogrid_pin()
  pin_df <- read.delim(pin_path, header = FALSE)
  expect_true(ncol(pin_df) == 3)
  expect_true(all(pin_df[, 2] == "pp"))
  expect_identical(pin_df, expected_biogrid_pin_df)
})

test_that("`get_biogrid_pin()` -- error check works", {
    # invalid organism error
    expect_error(get_biogrid_pin(org = "Hsapiens"), paste("Hsapiens is not a valid Biogrid organism.",
        "Available organisms are listed on: https://wiki.thebiogrid.org/doku.php/statistics"))
})

test_that("`get_pin_file()` -- works as expected", {
    with_mocked_bindings({
        pin_path <- get_pin_file()
        expect_identical(pin_path, "/path/to/some/PIN/file")
    }, get_biogrid_pin = function(...) "/path/to/some/PIN/file", .package = "pathfindR")

    expect_error(get_pin_file(source = "STRING"), "As of this version, this function is implemented to get data from BioGRID only")
})

test_that("`gset_list_from_gmt()` -- works as expected", {
    gmt_list <- list(GSA = sample(gene_pool, 80), GSB = sample(gene_pool, 100), GSC = sample(gene_pool,
        33))
    description_vec <- c(GSA = "gene set A", GSB = "gene set B", GSC = "gene set C")

    gmt_df <- c()
    for (gset in names(gmt_list)) {
        tmp <- c(gset, description_vec[gset])
        tmp <- c(tmp, gmt_list[[gset]], rep("", 100 - length(gmt_list[[gset]])))
        gmt_df <- rbind(gmt_df, tmp)
    }

    path2gmt <- tempfile()
    write.table(gmt_df, path2gmt, sep = "\t", col.names = FALSE, row.names = FALSE,
        quote = FALSE)

    expect_is(res <- gset_list_from_gmt(path2gmt), "list")
    expect_identical(res$gene_sets, gmt_list)
    expect_identical(res$descriptions, description_vec)
})


test_that("`get_kegg_gsets()` -- works as expected", {
  skip_on_cran()
  mock_response <- "eco00010\tdescription\neco00071\tdescription2"

  # function to manage sequential responses
  mock_content <- function(...) {
    return(mock_response)
  }


  with_mock(`httr::content` = mock_content, {
    expect_is(toy_eco_kegg <- pathfindR:::get_kegg_gsets(), "list")
  })

  expect_length(toy_eco_kegg, 2)
  expect_true(all(names(toy_eco_kegg) == c("gene_sets", "descriptions")))
  expect_true(all(names(toy_eco_kegg[["gene_sets"]]) %in% names(toy_eco_kegg[["descriptions"]])))
  expect_length(toy_eco_kegg[["gene_sets"]], 2)
  expect_length(toy_eco_kegg[["descriptions"]], 2)

  expect_true(toy_eco_kegg[["descriptions"]]["eco00010"] == "description")
  expect_true(toy_eco_kegg[["descriptions"]]["eco00071"] == "description2")

  expect_length(toy_eco_kegg[["gene_sets"]][["eco00010"]], 47)
  expect_length(toy_eco_kegg[["gene_sets"]][["eco00071"]], 15)
})

test_that("`get_reactome_gsets()` -- works as expected", {
  skip_on_cran()

  pw1 <- "Pathway1"
  pw2 <- "Pathway2"
  desc1 <- "Description1"
  desc2 <- "Description2"
  genes1 <- c("GeneA", "GeneB")
  genes2 <- c("GeneC", "GeneD", "GeneE")

  gmt_content <- paste(
    c(
      paste(c(desc1, pw1, genes1), collapse = "\t"),
      paste(c(desc2, pw2, genes2), collapse = "\t")
    ),
    collapse = "\n"
  )

  mockery::stub(get_reactome_gsets, "utils::download.file", NULL)

  unz_mock <- function(zipfile, filename, ...) {
    textConnection(gmt_content)
  }
  mockery::stub(get_reactome_gsets, "unz", unz_mock)

  expected_gsets <- list(genes1, genes2)
  names(expected_gsets) <- c(pw1, pw2)
  expect_descriptions <- c(desc1, desc2)
  names(expect_descriptions) <- c(pw1, pw2)

  expect_is(reactome <- get_reactome_gsets(), "list")
  expect_length(reactome, 2)
  expect_length(reactome$gene_sets, 2)
  expect_length(reactome$descriptions, 2)
  expect_equal(names(reactome$gene_sets), names(reactome$descriptions))
  expect_equal(reactome$gene_sets, expected_gsets)
  expect_equal(reactome$descriptions, expect_descriptions)
})

test_that("`get_mgsigdb_gsets()` -- works as expected", {
    toy_msigdb_df <- c()
    for (gs_idx in 1:5) {
        toy_msigdb_df <- rbind(toy_msigdb_df, data.frame(gene_symbol = sample(gene_pool,
            sample(25:75, 1)), gs_id = paste0("GS", gs_idx), gs_name = paste("Gene Set",
            gs_idx)))
    }
    mockery::stub(get_mgsigdb_gsets, "msigdbr::msigdbr", toy_msigdb_df)

    expect_is(res_msig_db <- get_mgsigdb_gsets(collection = "C1"), "list")
    expect_length(res_msig_db, 2)
    expect_true(all(names(res_msig_db) == c("gene_sets", "descriptions")))
    expect_true(all(names(res_msig_db[["gene_sets"]] %in% names(res_msig_db[["descriptions"]]))))
})

test_that("`get_mgsigdb_gsets()` -- error works", {
    all_collections <- c("H", "C1", "C2", "C3", "C4", "C5", "C6", "C7")
    expect_error(pathfindR:::get_mgsigdb_gsets(collection = "INVALID"), paste0("`collection` should be one of ",
        paste(dQuote(all_collections), collapse = ", ")))
})

test_that("`get_gene_sets_list()` works", {
    expect_error(gsets <- get_gene_sets_list("Wiki"), "As of this version, this function is implemented to get data from KEGG, Reactome and MSigDB only")

    mockery::stub(get_gene_sets_list, "get_kegg_gsets", NULL)
    mockery::stub(get_gene_sets_list, "get_reactome_gsets", NULL)
    mockery::stub(get_gene_sets_list, "get_mgsigdb_gsets", NULL)
    expect_silent(kegg <- get_gene_sets_list(org_code = "vcn"))
    expect_message(rctm <- get_gene_sets_list("Reactome"))
    expect_silent(msig <- get_gene_sets_list("MSigDB", species = "Mus musculus",
        collection = "C3", subcollection = "MIR:MIR_Legacy"))
})
egeulgen/pathfindR documentation built on May 8, 2024, 8:40 a.m.