tests/testthat/test-vimp_table.R

testthat::skip_on_cran()
# power.transform is also required for transformation_method = "none".
if (!rlang::is_installed("power.transform")) testthat::skip()

# Basic tests ------------------------------------------------------------------

data <- familiar:::test_create_synthetic_correlated_data(
  outcome_type = "continuous",
  n_numeric = 2,
  cluster_size = c(1, 1, 2, 3))

vimp_object <- familiar:::prepare_vimp_object(
  data = data,
  vimp_method = "pearson",
  outcome_type = "continuous",
  transformation_method = "none",
  normalisation_method = "none",
  cluster_method = "hclust",
  cluster_cut_method = "fixed_cut",
  cluster_similarity_metric = "mcfadden_r2",
  cluster_similarity_threshold = 0.99,
  imputation_method = "simple")

vimp_table_object <- suppressWarnings(familiar:::.vimp(vimp_object, data))

testthat::test_that("Test that the variable importance table in its initial state is correct.", {
  vimp_table <- familiar:::get_vimp_table(vimp_table_object, "initial")

  # Assert that colnames are correct.
  testthat::expect_setequal(colnames(vimp_table), c("score", "name"))

  # Assert that there are two features that end with cluster.
  testthat::expect_equal(sum(sapply(vimp_table$name, endsWith, "cluster")), 2L)

  # Assert that there are three entries for feature_1.
  testthat::expect_equal(sum(sapply(vimp_table$name, startsWith, "feature_1")), 3L)

  # Assert that there are four entries for feature_2.
  testthat::expect_equal(sum(sapply(vimp_table$name, startsWith, "feature_2")), 4L)
})


testthat::test_that("Test that the variable importance table in its encoded state is correct.", {
  vimp_table <- familiar:::get_vimp_table(vimp_table_object, "decoded")

  # Assert that colnames are correct.
  testthat::expect_setequal(colnames(vimp_table), c("score", "name"))

  # Assert that there are two features that end with cluster.
  testthat::expect_equal(sum(sapply(vimp_table$name, endsWith, "cluster")), 2L)

  # Assert that there is one entry for feature_1.
  testthat::expect_equal(sum(vimp_table$name == "feature_1"), 1L)

  # Assert that there is one entry for feature_2.
  testthat::expect_equal(sum(vimp_table$name == "feature_2"), 1L)
})


testthat::test_that("Test that the variable importance table in its declustered state is correct.", {
  vimp_table <- familiar:::get_vimp_table(vimp_table_object, "declustered")

  # Assert that colnames are correct.
  testthat::expect_setequal(colnames(vimp_table), c("score", "name"))

  # Assert that there is one entry for feature_1.
  testthat::expect_equal(sum(vimp_table$name == "feature_1"), 1L)

  # Assert that there is one entry for feature_2.
  testthat::expect_equal(sum(vimp_table$name == "feature_2"), 1L)

  # Assert that there are two entries for feature_3.
  testthat::expect_equal(sum(sapply(vimp_table$name, startsWith, "feature_3")), 2L)

  # Assert that there are three entries for feature_4.
  testthat::expect_equal(sum(sapply(vimp_table$name, startsWith, "feature_4")), 3L)
})


testthat::test_that("Test that the variable importance table in its re-clustered state is correct.", {
  # Compute reclustered vimp table.
  reclustered_vimp_table_object <- familiar:::recluster_vimp_table(vimp_table_object)
  reclustered_vimp_table <- familiar:::get_vimp_table(reclustered_vimp_table_object, "reclustered")

  # Compute decoded variable importance table. The reclustered table and the
  # decoded table should have the same contents.
  vimp_table <- familiar:::get_vimp_table(vimp_table_object, "decoded")

  # Assert that colnames are correct.
  testthat::expect_setequal(colnames(vimp_table), c("score", "name"))

  # Assert that there are two features that end with cluster.
  testthat::expect_equal(sum(sapply(vimp_table$name, endsWith, "cluster")), 2L)

  # Assert that there is one entry for feature_1.
  testthat::expect_equal(sum(vimp_table$name == "feature_1"), 1L)

  # Assert that there is one entry for feature_2.
  testthat::expect_equal(sum(vimp_table$name == "feature_2"), 1L)

  # Assert that scores match.
  testthat::expect_equal(
    reclustered_vimp_table[order(name)]$score,
    vimp_table[order(name)]$score)
})


testthat::test_that("Test that the variable importance table in its ranked state is correct.", {
  vimp_table <- familiar:::get_vimp_table(vimp_table_object, "ranked")

  # Assert that colnames are correct.
  testthat::expect_setequal(colnames(vimp_table), c("score", "name", "rank"))

  # Assert that there is one entry for feature_1.
  testthat::expect_equal(sum(vimp_table$name == "feature_1"), 1L)

  # Assert that there is one entry for feature_2.
  testthat::expect_equal(sum(vimp_table$name == "feature_2"), 1L)

  # Assert that there are two entries for feature_3.
  testthat::expect_equal(sum(sapply(vimp_table$name, startsWith, "feature_3")), 2L)

  # Assert that there are three entries for feature_4.
  testthat::expect_equal(sum(sapply(vimp_table$name, startsWith, "feature_4")), 3L)

  # Assert that there 4 ranks.
  testthat::expect_equal(sort(unique(vimp_table$rank)), seq_len(4L))

  # Assert that feature_3 has the same rank for each entry.
  testthat::expect_equal(data.table::uniqueN(vimp_table[startsWith(name, "feature_3")]$rank), 1L)

  # Assert that feature_4 has the same rank for each entry.
  testthat::expect_equal(data.table::uniqueN(vimp_table[startsWith(name, "feature_4")]$rank), 1L)
})

testthat::test_that("Test that the re-clustered variable importance table in its ranked state is correct.", {
  reclustered_vimp_table_object <- familiar:::recluster_vimp_table(vimp_table_object)
  vimp_table <- familiar:::get_vimp_table(reclustered_vimp_table_object, "ranked")

  # Assert that colnames are correct.
  testthat::expect_setequal(colnames(vimp_table), c("score", "name", "rank"))

  # Assert that there are two features that end with cluster.
  testthat::expect_equal(sum(sapply(vimp_table$name, endsWith, "cluster")), 2L)

  # Assert that there is one entry for feature_1.
  testthat::expect_equal(sum(vimp_table$name == "feature_1"), 1L)

  # Assert that there is one entry for feature_2.
  testthat::expect_equal(sum(vimp_table$name == "feature_2"), 1L)
})

# Test updating with different cluster table -----------------------------------

data_2 <- familiar:::test_create_synthetic_correlated_data(
  outcome_type = "continuous",
  n_numeric = 2,
  cluster_size = c(1, 1, 4, 2))

vimp_object_2 <- familiar:::prepare_vimp_object(
  data = data_2,
  vimp_method = "pearson",
  outcome_type = "continuous",
  transformation_method = "none",
  normalisation_method = "none",
  cluster_method = "hclust",
  cluster_cut_method = "fixed_cut",
  cluster_similarity_metric = "mcfadden_r2",
  cluster_similarity_threshold = 0.99,
  imputation_method = "simple")

reference_cluster_table <- familiar:::.create_clustering_table(
  vimp_object_2@feature_info)

# Update the variable importance object using the reference cluster table.
updated_vimp_table_object <- familiar:::update_vimp_table_to_reference(
  vimp_table_object,
  reference_cluster_table)

testthat::test_that("Updating variable importance tables functions correctly.", {
  vimp_table <- familiar:::get_vimp_table(updated_vimp_table_object, "ranked")

  # Assert that colnames are correct.
  testthat::expect_setequal(colnames(vimp_table), c("score", "name", "rank"))

  # Assert that there is one entry for feature_1.
  testthat::expect_equal(sum(vimp_table$name == "feature_1"), 1L)

  # Assert that there is one entry for feature_2.
  testthat::expect_equal(sum(vimp_table$name == "feature_2"), 1L)

  # Assert that there are two entries for feature_3.
  testthat::expect_equal(sum(sapply(vimp_table$name, startsWith, "feature_3")), 4L)

  # Assert that there are three entries for feature_4.
  testthat::expect_equal(sum(sapply(vimp_table$name, startsWith, "feature_4")), 2L)

  # Assert that there 4 ranks.
  testthat::expect_equal(sort(unique(vimp_table$rank)), seq_len(4L))

  # Assert that feature_3 has the same rank for each entry.
  testthat::expect_equal(data.table::uniqueN(vimp_table[startsWith(name, "feature_3")]$rank), 1L)

  # Assert that feature_4 has the same rank for each entry.
  testthat::expect_equal(data.table::uniqueN(vimp_table[startsWith(name, "feature_4")]$rank), 1L)
})

testthat::test_that("Updating variable importance tables with reclustering functions correctly.", {
  reclustered_vimp_table_object <- familiar:::recluster_vimp_table(
    updated_vimp_table_object)
  vimp_table <- familiar:::get_vimp_table(reclustered_vimp_table_object, "ranked")

  # Assert that colnames are correct.
  testthat::expect_setequal(colnames(vimp_table), c("score", "name", "rank"))

  # Assert that there are two features that end with cluster.
  testthat::expect_equal(sum(sapply(vimp_table$name, endsWith, "cluster")), 2L)

  # Assert that there is one entry for feature_1.
  testthat::expect_equal(sum(vimp_table$name == "feature_1"), 1L)

  # Assert that there is one entry for feature_2.
  testthat::expect_equal(sum(vimp_table$name == "feature_2"), 1L)
})

# Test updating table where all features are different -------------------------

data_3 <- familiar:::test_create_synthetic_correlated_data(
  outcome_type = "continuous",
  n_numeric = 2,
  cluster_size = c(1, 1, 1, 1))

# Rename features.
data.table::setnames(
  x = data_3@data,
  old = c("feature_1", "feature_2", "feature_3", "feature_4"),
  new = c("feature_A", "feature_B", "feature_C", "feature_D"))

vimp_object_3 <- familiar:::prepare_vimp_object(
  data = data_3,
  vimp_method = "pearson",
  outcome_type = "continuous",
  transformation_method = "none",
  normalisation_method = "none",
  cluster_method = "hclust",
  cluster_cut_method = "fixed_cut",
  cluster_similarity_metric = "mcfadden_r2",
  cluster_similarity_threshold = 0.99,
  imputation_method = "simple")

reference_cluster_table <- familiar:::.create_clustering_table(
  vimp_object_3@feature_info)

# Update the variable importance object using the reference cluster table.
updated_vimp_table_object <- familiar:::update_vimp_table_to_reference(
  vimp_table_object,
  reference_cluster_table)

testthat::test_that("Updating variable importance tables with mismatching reference cluster tables.", {
  vimp_table <- familiar:::get_vimp_table(updated_vimp_table_object, "ranked")

  testthat::expect_equal(familiar:::is_empty(updated_vimp_table_object), TRUE)

  testthat::expect_equal(familiar:::is_empty(vimp_table), TRUE)
})


testthat::test_that("Updating variable importance tables with reclustering functions correctly.", {
  reclustered_vimp_table_object <- familiar:::recluster_vimp_table(updated_vimp_table_object)
  vimp_table <- familiar:::get_vimp_table(reclustered_vimp_table_object, "ranked")

  testthat::expect_equal(familiar:::is_empty(updated_vimp_table_object), TRUE)

  testthat::expect_equal(familiar:::is_empty(vimp_table), TRUE)
})

# Test updating table where the variable importance table could not be created. -----

empty_vimp_table_object <- vimp_table_object
empty_vimp_table_object@vimp_table <- NULL

# Update the variable importance object using the reference cluster table.
updated_vimp_table_object <- familiar:::update_vimp_table_to_reference(
  empty_vimp_table_object,
  reference_cluster_table)

testthat::test_that("Updating variable importance tables with mismatching reference cluster tables.", {
  vimp_table <- familiar:::get_vimp_table(updated_vimp_table_object, "ranked")

  testthat::expect_equal(familiar:::is_empty(updated_vimp_table_object), TRUE)

  testthat::expect_equal(familiar:::is_empty(vimp_table), TRUE)
})

testthat::test_that("Updating variable importance tables with reclustering functions correctly.", {
  reclustered_vimp_table_object <- familiar:::recluster_vimp_table(updated_vimp_table_object)
  vimp_table <- familiar:::get_vimp_table(reclustered_vimp_table_object, "ranked")

  testthat::expect_equal(familiar:::is_empty(updated_vimp_table_object), TRUE)

  testthat::expect_equal(familiar:::is_empty(vimp_table), TRUE)
})

# Test with signature features -------------------------------------------------

data <- familiar:::test_create_synthetic_correlated_data(
  outcome_type = "continuous",
  n_numeric = 2,
  cluster_size = c(1, 1, 1, 1))

vimp_object <- familiar:::prepare_vimp_object(
  data = data,
  vimp_method = "pearson",
  outcome_type = "continuous",
  transformation_method = "none",
  normalisation_method = "none",
  cluster_method = "hclust",
  cluster_cut_method = "fixed_cut",
  cluster_similarity_metric = "mcfadden_r2",
  cluster_similarity_threshold = 0.99,
  signature = c("feature_1"),
  imputation_method = "simple")

vimp_table_object <- suppressWarnings(familiar:::.vimp(vimp_object, data))

testthat::test_that("A signature feature does not appear in the variable importance table.", {
  vimp_table <- familiar:::get_vimp_table(vimp_table_object, "ranked")

  # Assert that colnames are correct.
  testthat::expect_setequal(colnames(vimp_table), c("score", "name", "rank"))

  # Assert that there is no entry for feature_1.
  testthat::expect_equal(sum(vimp_table$name == "feature_1"), 0L)

  # Assert that there is one entry for feature_2.
  testthat::expect_equal(sum(vimp_table$name == "feature_2"), 1L)

  # Assert that there is one entry for feature_3.
  testthat::expect_equal(sum(vimp_table$name == "feature_3"), 1L)

  # Assert that there is one entry for feature_4.
  testthat::expect_equal(sum(vimp_table$name == "feature_4"), 1L)

  # Assert that there 3 ranks.
  testthat::expect_equal(sort(unique(vimp_table$rank)), seq_len(3L))
})

# Test with all signature features ---------------------------------------------

vimp_object <- familiar:::prepare_vimp_object(
  data = data,
  vimp_method = "pearson",
  outcome_type = "continuous",
  transformation_method = "none",
  normalisation_method = "none",
  cluster_method = "hclust",
  cluster_cut_method = "fixed_cut",
  cluster_similarity_metric = "mcfadden_r2",
  cluster_similarity_threshold = 0.99,
  signature = c("feature_1", "feature_2", "feature_3", "feature_4"),
  imputation_method = "simple")

vimp_table_object <- suppressWarnings(familiar:::.vimp(vimp_object, data))

testthat::test_that("A signature feature does not appear in the variable importance table.", {
  vimp_table <- familiar:::get_vimp_table(vimp_table_object, "ranked")

  # Assert that both the variable importance table object and the variable
  # importance table are empty.
  testthat::expect_equal(familiar:::is_empty(vimp_table_object), TRUE)
  testthat::expect_equal(familiar:::is_empty(vimp_table), TRUE)
})

Try the familiar package in your browser

Any scripts or data that you put into this service are public.

familiar documentation built on Sept. 30, 2024, 9:18 a.m.