tests/testthat/test-checks.R

test_df <- data.frame(
  outcome = c("normal", "normal", "cancer"),
  var1 = 1:3,
  var2 = 4:6
)

test_df_novar <- data.frame(
  outcome = c("normal", "normal", "normal"),
  var1 = 1:3,
  var2 = 4:6
)

test_df_na <- data.frame(
  outcome = c("normal", NA, "cancer"),
  var1 = 1:3,
  var2 = 4:6
)

test_df_empty <- data.frame(
  outcome = c("", "", "cancer"),
  var1 = 1:3,
  var2 = 4:6
)

test_df_numeric <- data.frame(
  outcome = c(0, 1, 2),
  var1 = 1:3,
  var2 = 4:6
)

test_that("check_dataset works", {
  expect_true(is.null(check_dataset(test_df)))
  expect_error(
    check_dataset("not_a_df"),
    "The dataset must be a `data.frame` or `tibble`"
  )
  expect_error(
    check_dataset(data.frame(outcome = c(), var1 = c())),
    "No rows detected in dataset."
  )
  expect_error(
    check_dataset(data.frame(outcome = 1:3)),
    "1 or fewer columns detected in dataset. There should be an outcome column and at least one feature column."
  )
})

test_that("check_method works", {
  expect_error(
    check_method("not_a_method", NULL),
    "Method 'not_a_method' is not officially supported by mikropml."
  )
  expect_null(check_method("glmnet", NULL))
})

test_that("check_outcome_column works", {
  expect_message(
    expect_equal(
      check_outcome_column(test_df, NULL),
      "outcome"
    ),
    "Using"
  )
  expect_error(
    check_outcome_column(test_df, "not_a_column"),
    "Outcome 'not_a_column' not in column names of data."
  )
})

test_that("check_outcome_value works", {
  expect_null(check_outcome_value(test_df, "outcome"))
  expect_error(
    check_outcome_value(test_df_na, "outcome"),
    "Missing data in the output variable is not allowed, but the outcome variable has"
  )
  expect_warning(
    check_outcome_value(test_df_empty, "outcome"),
    "Possible missing data in the output variable: "
  )
  expect_error(
    check_outcome_value(test_df_novar, "outcome"),
    "A binary or multi-class outcome variable is required, but this dataset has"
  )
  expect_warning(
    expect_null(check_outcome_value(test_df_numeric, "outcome")),
    "Data is being considered numeric, but all outcome values are integers. If you meant to code your values as categorical, please use character values."
  )
})

test_that("check_permute works", {
  expect_true(is.null(check_permute(TRUE)))
  expect_true(is.null(check_permute(FALSE)))
  expect_error(
    check_permute("not_a_logical"),
    "`permute` must be TRUE or FALSE"
  )
})

test_that("check_kfold works", {
  expect_true(is.null(check_kfold(2, test_df)))
  expect_error(check_kfold(1, test_df), "`kfold` must be an integer between 1 and the number of features in the data.")
  expect_error(
    check_kfold(10, test_df),
    "`kfold` must be an integer"
  )
  expect_error(
    check_kfold(0, test_df),
    "`kfold` must be an integer"
  )
  expect_warning(expect_error(
    check_kfold("not_an_int", test_df),
    "`kfold` must be an integer"
  ), "NAs introduced by coercion")
})

test_that("check_training_frac works", {
  expect_true(is.null(check_training_frac(0.8)))
  expect_error(
    check_training_frac("not_a_number"),
    "`training_frac` must be a numeric between 0 and 1."
  )
  expect_error(
    check_training_frac(1),
    "`training_frac` must be a numeric between 0 and 1."
  )
  expect_error(
    check_training_frac(0),
    "`training_frac` must be a numeric between 0 and 1."
  )
  expect_warning(
    check_training_frac(0.499),
    "`training_frac` is less than 0.5. The training set will be smaller than the testing set."
  )
})
test_that("check_training_indices works", {
  dat <- data.frame(a = 1:3, b = 2:4)
  expect_warning(
    check_training_indices(c(2.8, 1), dat),
    "The training indices vector contains non-integer numbers."
  )
  expect_error(
    check_training_indices(c(1, 12312, 1), dat),
    "The training indices vector contains a value that is too large"
  )
  expect_error(
    check_training_indices(c(-1, 2, 3), dat),
    "The training indices vector contains a value less than 1."
  )
  expect_error(
    check_training_indices(c(1:5), dat),
    "The training indices vector contains too many values for the size of the dataset."
  )
})
test_that("check_seed works", {
  expect_true(is.null(check_seed(NA)))
  expect_true(is.null(check_seed(10)))
  expect_error(
    check_seed("not_a_number"),
    "`seed` must be `NA` or numeric."
  )
})

test_that("check_all works", {
  expect_null(check_all(
    otu_small, "glmnet", TRUE, as.integer(5), 0.8,
    NULL, NULL, NULL,
    NULL, NULL, NA, NULL
  ))
})

test_that("check_packages_installed works", {
  expect_equal(all(check_packages_installed("caret")), TRUE)
  expect_equal(all(check_packages_installed("this_is_not_a_package")), FALSE)
  expect_equal(all(check_packages_installed("caret", "this_is_not_a_package")), FALSE)
  expect_equal(all(check_packages_installed(c("caret", "this_is_not_a_package"))), FALSE)
})

test_that("check_features works", {
  expect_true(is.null(check_features(test_df)))
  expect_true(is.null(check_features(dplyr::as_tibble(test_df))))
  expect_error(check_features(NULL))
  expect_true(is.null(check_features(test_df_na, check_missing = FALSE)))
  expect_warning(
    expect_true(is.null(check_features(test_df_empty))),
    "ossible missing data in the features: "
  )
  expect_error(
    check_features(test_df_na, check_missing = TRUE),
    "Missing data in the features is not allowed, but the features have"
  )
})

test_that("check_groups works", {
  expect_null(check_groups(mikropml::otu_mini_bin, NULL, 2))
  expect_null(check_groups(mikropml::otu_mini_bin, sample(LETTERS, nrow(mikropml::otu_mini_bin), replace = T), 2))
  expect_error(check_groups(mikropml::otu_mini_bin, c(1, 2), 2), "group should be a vector that is the same length as the number of rows in the dataset")
  expect_error(check_groups(mikropml::otu_mini_bin, data.frame(x = c(1, 2)), 2), "group should be either a vector or NULL, but group is class")
  expect_error(check_groups(mikropml::otu_mini_bin, c(rep(1, 199), NA), 2), "No NA values are allowed in group, but ")
  expect_error(check_groups(mikropml::otu_mini_bin, c(rep(1, 200)), 2), "The total number of groups should be greater than 1. If all samples are from the same group, use `group=NULL`")
})

test_that("check_group_partitions works", {
  set.seed(20211104)
  sample_groups <- sample(LETTERS[1:8], nrow(otu_mini_bin), replace = TRUE)
  group_part <- list(train = c("A", "B"), test = c("C", "D"))

  expect_null(check_group_partitions(otu_mini_bin, sample_groups, group_part))
  expect_error(
    check_group_partitions(otu_mini_bin, sample_groups, list(what = c("A", "B"))),
    "Unrecognized name\\(s\\) in `group_partitions`: what"
  )
  expect_error(
    check_group_partitions(
      otu_mini_bin, sample_groups, list(train = c("X"))
    ),
    "`group_partitions` contains group names not in groups vector"
  )
})

test_that("check_corr_thresh works", {
  expect_null(check_corr_thresh(1))
  expect_null(check_corr_thresh(0.8))
  expect_null(check_corr_thresh(NULL))
  expect_error(check_corr_thresh(2019), "`corr_thresh` must be `NULL` or numeric between 0 and 1 inclusive.
    You provided: ")
  expect_error(check_corr_thresh(corr_thresh = "a"), "`corr_thresh` must be `NULL` or numeric between 0 and 1 inclusive.
    You provided:")
})

test_that("check_perf_metric_function works", {
  expect_null(check_perf_metric_function(caret::defaultSummary))
  expect_null(check_perf_metric_function(NULL))
  expect_error(check_perf_metric_function("a"), "`perf_metric_function` must be `NULL` or a function.
    You provided:")
})

test_that("check_perf_metric_name works", {
  expect_null(check_perf_metric_name("a"))
  expect_null(check_perf_metric_name(NULL))
  expect_error(check_perf_metric_name(1), "`perf_metric_name` must be `NULL` or a character\n    You provided: 1")
})

test_that("check_cat_feats works", {
  expect_null(check_cat_feats(test_df[, 2:3]))
  expect_error(check_cat_feats(test_df), "No categorical features can be used when performing permutation importance. Please change these features to numeric. One option is to use `preprocess_data`.")
})

test_that("check_remove_var works", {
  expect_null(check_remove_var(NULL))
  expect_null(check_remove_var("nzv"))
  expect_error(check_remove_var("asdf"), "`remove_var` must be one of: NULL, 'nzv','zv'. You provided:")
})

test_that("check_ntree works", {
  expect_null(check_ntree(NULL)) %>% expect_warning("'check_ntree' is deprecated.")
  expect_null(check_ntree(1000)) %>% expect_warning("'check_ntree' is deprecated.")
  expect_error(check_ntree("asdf"), "`ntree` must be of length 1 and class numeric. You provided: ") %>% expect_warning("'check_ntree' is deprecated.")
  expect_error(check_ntree(-10), "`ntree` must be greater than zero. You provided: ") %>% expect_warning("'check_ntree' is deprecated.")
  expect_error(check_ntree(c(0, 1)), "`ntree` must be of length 1 and class numeric. You provided: ") %>% expect_warning("'check_ntree' is deprecated.")
})

test_that("abort_packages_not_installed works", {
  testfun <- function(...) abort_packages_not_installed(...)
  expect_null(testfun("utils"))
  expect_error(
    testfun("not_a_package"),
    "The following package\\(s\\) are required for `testfun\\(\\)` but are not installed:"
  )
})
SchlossLab/mikropml documentation built on Aug. 24, 2023, 9:51 p.m.