tests/testthat/test_select_metrics.R

library(cvms)
context("select_metrics()")


test_that("select_metrics() works with output from cross-validation", {

  testthat::skip_on_cran()

  # Create data ####

  xpectr::set_test_seed(1)
  dat <- participant.scores %>%
    groupdata2::fold(k = 3, cat_col = "diagnosis")
  mdata <- musicians %>%
    groupdata2::fold(k = 3, cat_col = "Class")

  cv_gauss <- suppressMessages(
    cross_validate_fn(
      data = dat,
      formulas = "score ~ diagnosis + (1|session)",
      model_fn = model_functions("lmer"),
      predict_fn = predict_functions("lmer"),
      hyperparameters = list("REML" = FALSE),
      fold_cols = ".folds",
      metrics = list("all" = TRUE),
      type = "gaussian"
    ))

  cv_binom <- suppressMessages(
    cross_validate_fn(
      data = dat,
      formulas = "diagnosis ~ score + (1|session)",
      model_fn = model_functions("glmer_binomial"),
      predict_fn = predict_functions("glmer_binomial"),
      fold_cols = ".folds",
      metrics = list("all" = TRUE),
      type = "binomial"
    ))

  cv_multinom <- suppressMessages(
    cross_validate_fn(
      data = mdata,
      formulas = "Class ~ Height + Bass + Guitar + Keys + Vocals + (1|Drums)",
      model_fn = model_functions("svm_multinomial"),
      predict_fn = predict_functions("svm_multinomial"),
      hyperparameters = list("kernel" = "linear", "cost" = 10),
      fold_cols = ".folds",
      metrics = list("all" = TRUE),
      type = "multinomial"
    ))

  # Test select_metrics() ####

  gaussian_metrics <- select_metrics(cv_gauss)
  binomial_metrics <- select_metrics(cv_binom)
  multinomial_metrics <- select_metrics(cv_multinom)


  ## Testing 'gaussian_metrics'                                             ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(gaussian_metrics),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    gaussian_metrics[["Fixed"]],
    "diagnosis",
    fixed = TRUE)
  expect_equal(
    gaussian_metrics[["RMSE"]],
    9.41959,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["MAE"]],
    7.72296,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["NRMSE(RNG)"]],
    0.16782,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["NRMSE(IQR)"]],
    0.46732,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["NRMSE(STD)"]],
    0.49816,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["NRMSE(AVG)"]],
    0.23755,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["RMSLE"]],
    0.25275,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["MALE"]],
    0.20454,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["RAE"]],
    0.51833,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["RSE"]],
    0.28456,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["RRSE"]],
    0.52511,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["MAPE"]],
    0.21814,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["MSE"]],
    100.48666,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["TAE"]],
    77.22961,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["TSE"]],
    1004.86659,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["r2m"]],
    0.28373,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["r2c"]],
    0.81539,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["AIC"]],
    155.75007,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["AICc"]],
    158.41674,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["BIC"]],
    159.733,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["Dependent"]],
    "score",
    fixed = TRUE)
  expect_equal(
    gaussian_metrics[["Random"]],
    "(1|session)",
    fixed = TRUE)
  # Testing column names
  expect_equal(
    names(gaussian_metrics),
    c("Fixed", "RMSE", "MAE", "NRMSE(RNG)", "NRMSE(IQR)", "NRMSE(STD)",
      "NRMSE(AVG)", "RSE", "RRSE", "RAE", "RMSLE", "MALE", "MAPE",
      "MSE", "TAE", "TSE", "r2m", "r2c", "AIC", "AICc", "BIC", "Dependent",
      "Random", "HParams"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(gaussian_metrics),
    c("character", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "character", "character",
      ifelse(is_dplyr_1(), "vctrs_list_of", "list")),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(gaussian_metrics),
    c("character", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "character", "character", "list"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(gaussian_metrics),
    c(1L, 24L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(gaussian_metrics)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'gaussian_metrics'                                    ####


  ## Testing 'binomial_metrics'                                             ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(binomial_metrics),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    binomial_metrics[["Fixed"]],
    "score",
    fixed = TRUE)
  expect_equal(
    binomial_metrics[["Balanced Accuracy"]],
    0.73611,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Accuracy"]],
    0.76667,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["F1"]],
    0.82051,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Sensitivity"]],
    0.88889,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Specificity"]],
    0.58333,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Pos Pred Value"]],
    0.7619,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Neg Pred Value"]],
    0.77778,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["AUC"]],
    0.8287,
    tolerance = 1e-2) # 1e-2 for windows compatibility
  expect_equal(
    binomial_metrics[["Lower CI"]],
    0.68274,
    tolerance = 1e-2) # 1e-2 for windows compatibility
  expect_equal(
    binomial_metrics[["Upper CI"]],
    0.97467,
    tolerance = 1e-2) # 1e-2 for windows compatibility
  expect_equal(
    binomial_metrics[["Kappa"]],
    0.49275,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["MCC"]],
    0.50483,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Detection Rate"]],
    0.53333,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Detection Prevalence"]],
    0.7,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Prevalence"]],
    0.6,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["False Neg Rate"]],
    0.11111,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["False Pos Rate"]],
    0.41667,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["False Discovery Rate"]],
    0.2381,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["False Omission Rate"]],
    0.22222,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Threat Score"]],
    0.695652,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["AIC"]],
    22.92391,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["AICc"]],
    24.42391,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["BIC"]],
    25.9111,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Dependent"]],
    "diagnosis",
    fixed = TRUE)
  expect_equal(
    binomial_metrics[["Random"]],
    "(1|session)",
    fixed = TRUE)
  # Testing column names
  expect_equal(
    names(binomial_metrics),
    c("Fixed", "Balanced Accuracy", "Accuracy", "F1", "Sensitivity",
      "Specificity", "Pos Pred Value", "Neg Pred Value", "AUC", "Lower CI",
      "Upper CI", "Kappa", "MCC", "Detection Rate", "Detection Prevalence",
      "Prevalence", "False Neg Rate", "False Pos Rate", "False Discovery Rate",
      "False Omission Rate", "Threat Score", "AIC", "AICc", "BIC",
      "Dependent", "Random"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(binomial_metrics),
    c("character", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "character", "character"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(binomial_metrics),
    c("character", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "character", "character"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(binomial_metrics),
    c(1L, 26L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(binomial_metrics)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'binomial_metrics'                                    ####


  ## Testing 'multinomial_metrics'                                          ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(multinomial_metrics),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    multinomial_metrics[["Fixed"]],
    "Height+Bass+Guitar+Keys+Vocals",
    fixed = TRUE)
  expect_equal(
    multinomial_metrics[["Overall Accuracy"]],
    0.26667,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Balanced Accuracy"]],
    0.51111,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Balanced Accuracy"]],
    0.51111,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Accuracy"]],
    0.63333,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Accuracy"]],
    0.63333,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["F1"]],
    0.23715,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted F1"]],
    0.23715,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Sensitivity"]],
    0.26667,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Sensitivity"]],
    0.26667,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Specificity"]],
    0.75556,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Specificity"]],
    0.75556,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Pos Pred Value"]],
    0.22435,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Pos Pred Value"]],
    0.22435,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Neg Pred Value"]],
    0.76044,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Neg Pred Value"]],
    0.76044,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["AUC"]],
    0.62741,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Kappa"]],
    0.00472,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Kappa"]],
    0.00472,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["MCC"]],
    0.02285,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Detection Rate"]],
    0.06667,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Detection Rate"]],
    0.06667,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Detection Prevalence"]],
    0.25,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Detection Prevalence"]],
    0.25,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Prevalence"]],
    0.25,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Prevalence"]],
    0.25,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["False Neg Rate"]],
    0.73333,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted False Neg Rate"]],
    0.73333,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["False Pos Rate"]],
    0.24444,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted False Pos Rate"]],
    0.24444,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["False Discovery Rate"]],
    0.77565,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted False Discovery Rate"]],
    0.77565,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["False Omission Rate"]],
    0.23956,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted False Omission Rate"]],
    0.23956,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Threat Score"]],
    0.143494,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Threat Score"]],
    0.1434946,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["AIC"]],
    NA_real_,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["AICc"]],
    NA_real_,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["BIC"]],
    NA_real_,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Dependent"]],
    "Class",
    fixed = TRUE)
  expect_equal(
    multinomial_metrics[["Random"]],
    "(1|Drums)",
    fixed = TRUE)
  # Testing column names
  expect_equal(
    names(multinomial_metrics),
    c("Fixed", "Overall Accuracy", "Balanced Accuracy", "Weighted Balanced Accuracy",
      "Accuracy", "Weighted Accuracy", "F1", "Weighted F1", "Sensitivity",
      "Weighted Sensitivity", "Specificity", "Weighted Specificity",
      "Pos Pred Value", "Weighted Pos Pred Value", "Neg Pred Value",
      "Weighted Neg Pred Value", "AUC", "Kappa", "Weighted Kappa",
      "MCC", "Detection Rate", "Weighted Detection Rate", "Detection Prevalence",
      "Weighted Detection Prevalence", "Prevalence", "Weighted Prevalence",
      "False Neg Rate", "Weighted False Neg Rate", "False Pos Rate",
      "Weighted False Pos Rate", "False Discovery Rate", "Weighted False Discovery Rate",
      "False Omission Rate", "Weighted False Omission Rate", "Threat Score",
      "Weighted Threat Score", "AIC", "AICc", "BIC", "Dependent",
      "Random", "HParams"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::smpl(xpectr::element_classes(multinomial_metrics), n = 30),
    c("numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "character",
      ifelse(is_dplyr_1(), "vctrs_list_of", "list")),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::smpl(xpectr::element_types(multinomial_metrics), n = 30),
    c("double", "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "character", "list"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(multinomial_metrics),
    c(1L, 42L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(multinomial_metrics)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'multinomial_metrics'                                 ####

})

test_that("select_metrics() works with output from baseline", {

  testthat::skip_on_cran()

  # Create data ####
  xpectr::set_test_seed(1)
  train_test <- groupdata2::partition(musicians, p = 0.5, cat_col = "Class")
  train <- train_test[[1]]
  test <- train_test[[2]]
  bsl_gauss <-
    baseline(
      test_data = test,
      train_data = train,
      dependent_col = "Age",
      family = "gaussian",
      n = 4,
      metrics = "all"
    )
  bsl_binom <- baseline(
    test_data = test,
    dependent_col = "Drums",
    family = "binomial",
    n = 4,
    metrics = "all"
  )
  bsl_multinom <- baseline(
    test_data = test,
    dependent_col = "Class",
    family = "multinomial",
    n = 4,
    metrics = "all"
  )

  # Test select_metrics() ####

  gaussian_metrics <- select_metrics(bsl_gauss$random_evaluations)
  binomial_metrics <- select_metrics(bsl_binom$random_evaluations)
  multinomial_metrics <- select_metrics(bsl_multinom$random_evaluations)


  ## Testing 'gaussian_metrics'                                             ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(gaussian_metrics),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    gaussian_metrics[["Fixed"]],
    c("1", "1", "1", "1"),
    fixed = TRUE)
  expect_equal(
    gaussian_metrics[["RMSE"]],
    c(14.01785, 14.03277, 14.0827, 15.83953),
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["MAE"]],
    c(12.5, 12.54044, 12.58437, 13.25781),
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["NRMSE(RNG)"]],
    c(0.29825, 0.29857, 0.29963, 0.33701),
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["NRMSE(IQR)"]],
    c(0.59023, 0.59085, 0.59296, 0.66693),
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["NRMSE(STD)"]],
    c(0.98425, 0.9853, 0.9888, 1.11216),
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["NRMSE(AVG)"]],
    c(0.35943, 0.35981, 0.36109, 0.40614),
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["RMSLE"]],
    c(0.37328, 0.37078, 0.36884, 0.3928),
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["MALE"]],
    c(0.32866, 0.32967, 0.3308, 0.35018),
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["RAE"]],
    c(1, 1.00324, 1.00675, 1.06063),
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["RSE"]],
    c(1, 1.00213, 1.00927, 1.2768),
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["RRSE"]],
    c(1, 1.00106, 1.00463, 1.12995),
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["MAPE"]],
    c(0.38123, 0.37595, 0.3702, 0.32969),
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["MSE"]],
    c(196.5, 196.91869, 198.3225, 250.89063),
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["TAE"]],
    c(400, 401.29412, 402.7, 424.25),
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["TSE"]],
    c(6288, 6301.39792, 6346.32, 8028.5),
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["r2m"]],
    c(0, 0, 0, 0),
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["r2c"]],
    c(0, 0, 0, 0),
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["AIC"]],
    c(60.68841, 147.40183, 168.28416, 65.16688),
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["AICc"]],
    c(63.68841, 148.25897, 168.99005, 67.56688),
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["BIC"]],
    c(60.58023, 149.06825, 170.27563, 65.32577),
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["Dependent"]],
    c("Age", "Age", "Age", "Age"),
    fixed = TRUE)
  # Testing column names
  expect_equal(
    names(gaussian_metrics),
    c("Fixed", "RMSE", "MAE", "NRMSE(RNG)", "NRMSE(IQR)", "NRMSE(STD)",
      "NRMSE(AVG)", "RSE", "RRSE", "RAE", "RMSLE", "MALE", "MAPE",
      "MSE", "TAE", "TSE", "r2m", "r2c", "AIC", "AICc", "BIC", "Dependent"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(gaussian_metrics),
    c("character", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "character"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(gaussian_metrics),
    c("character", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "character"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(gaussian_metrics),
    c(4L, 22L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(gaussian_metrics)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'gaussian_metrics'                                    ####



  ## Testing 'binomial_metrics'                                             ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(binomial_metrics),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    binomial_metrics[["Balanced Accuracy"]],
    c(0.48178, 0.46761, 0.36235, 0.51619),
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Accuracy"]],
    c(0.5, 0.46875, 0.34375, 0.46875),
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["F1"]],
    c(0.57895, 0.51429, 0.32258, 0.37037),
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Sensitivity"]],
    c(0.57895, 0.47368, 0.26316, 0.26316),
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Specificity"]],
    c(0.38462, 0.46154, 0.46154, 0.76923),
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Pos Pred Value"]],
    c(0.57895, 0.5625, 0.41667, 0.625),
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Neg Pred Value"]],
    c(0.38462, 0.375, 0.3, 0.41667),
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["AUC"]],
    c(0.51417, 0.41296, 0.35223, 0.61134),
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Lower CI"]],
    c(0.30661, 0.19913, 0.13439, 0.39675),
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Upper CI"]],
    c(0.72173, 0.62678, 0.57006, 0.82593),
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Kappa"]],
    c(-0.03644, -0.0625, -0.25373, 0.02857),
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["MCC"]],
    c(-0.03644, -0.06363, -0.27929, 0.03674),
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Detection Rate"]],
    c(0.34375, 0.28125, 0.15625, 0.15625),
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Detection Prevalence"]],
    c(0.59375, 0.5, 0.375, 0.25),
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Prevalence"]],
    c(0.59375, 0.59375, 0.59375, 0.59375),
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["False Neg Rate"]],
    c(0.42105, 0.52632, 0.73684, 0.73684),
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["False Pos Rate"]],
    c(0.61538, 0.53846, 0.53846, 0.23077),
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["False Discovery Rate"]],
    c(0.42105, 0.4375, 0.58333, 0.375),
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["False Omission Rate"]],
    c(0.61538, 0.625, 0.7, 0.58333),
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Threat Score"]],
    c(0.407407, 0.346153, 0.192307, 0.22727),
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Dependent"]],
    c("Drums", "Drums", "Drums", "Drums"),
    fixed = TRUE)
  # Testing column names
  expect_equal(
    names(binomial_metrics),
    c("Balanced Accuracy", "Accuracy", "F1", "Sensitivity", "Specificity",
      "Pos Pred Value", "Neg Pred Value", "AUC", "Lower CI", "Upper CI",
      "Kappa", "MCC", "Detection Rate", "Detection Prevalence", "Prevalence",
      "False Neg Rate", "False Pos Rate", "False Discovery Rate",
      "False Omission Rate", "Threat Score", "Dependent"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(binomial_metrics),
    c("numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "character"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(binomial_metrics),
    c("double", "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "character"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(binomial_metrics),
    c(4L, 21L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(binomial_metrics)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'binomial_metrics'                                    ####



  ## Testing 'multinomial_metrics'                                          ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(multinomial_metrics),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    multinomial_metrics[["Overall Accuracy"]],
    c(0.21875, 0.21875, 0.375, 0.25),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Balanced Accuracy"]],
    c(0.47917, 0.47917, 0.58333, 0.5),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Balanced Accuracy"]],
    c(0.47917, 0.47917, 0.58333, 0.5),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Accuracy"]],
    c(0.60938, 0.60938, 0.6875, 0.625),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Accuracy"]],
    c(0.60938, 0.60938, 0.6875, 0.625),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["F1"]],
    c(0.21808, 0.21586, 0.36685, 0.23016),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted F1"]],
    c(0.21808, 0.21586, 0.36685, 0.23016),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Sensitivity"]],
    c(0.21875, 0.21875, 0.375, 0.25),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Sensitivity"]],
    c(0.21875, 0.21875, 0.375, 0.25),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Specificity"]],
    c(0.73958, 0.73958, 0.79167, 0.75),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Specificity"]],
    c(0.73958, 0.73958, 0.79167, 0.75),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Pos Pred Value"]],
    c(0.22879, 0.21528, 0.36587, 0.23333),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Pos Pred Value"]],
    c(0.22879, 0.21528, 0.36587, 0.23333),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Neg Pred Value"]],
    c(0.73788, 0.73976, 0.79337, 0.75315),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Neg Pred Value"]],
    c(0.73788, 0.73976, 0.79337, 0.75315),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["AUC"]],
    c(0.42057, 0.53516, 0.57812, 0.48177),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Kappa"]],
    c(-0.03619, -0.04273, 0.16129, -0.00899),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Kappa"]],
    c(-0.03619, -0.04273, 0.16129, -0.00899),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["MCC"]],
    c(-0.04239, -0.04183, 0.16776, 0),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Detection Rate"]],
    c(0.05469, 0.05469, 0.09375, 0.0625),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Detection Rate"]],
    c(0.05469, 0.05469, 0.09375, 0.0625),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Detection Prevalence"]],
    c(0.25, 0.25, 0.25, 0.25),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Detection Prevalence"]],
    c(0.25, 0.25, 0.25, 0.25),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Prevalence"]],
    c(0.25, 0.25, 0.25, 0.25),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Prevalence"]],
    c(0.25, 0.25, 0.25, 0.25),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["False Neg Rate"]],
    c(0.78125, 0.78125, 0.625, 0.75),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted False Neg Rate"]],
    c(0.78125, 0.78125, 0.625, 0.75),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["False Pos Rate"]],
    c(0.26042, 0.26042, 0.20833, 0.25),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted False Pos Rate"]],
    c(0.26042, 0.26042, 0.20833, 0.25),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["False Discovery Rate"]],
    c(0.77121, 0.78472, 0.63413, 0.76667),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted False Discovery Rate"]],
    c(0.77121, 0.78472, 0.63413, 0.76667),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["False Omission Rate"]],
    c(0.26212, 0.26024, 0.20663, 0.24685),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted False Omission Rate"]],
    c(0.26212, 0.26024, 0.20663, 0.24685),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Threat Score"]],
    c(0.12316, 0.12414, 0.22847, 0.13999),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Threat Score"]],
    c(0.12316, 0.12414, 0.22847, 0.13999),
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Dependent"]],
    c("Class", "Class", "Class", "Class"),
    fixed = TRUE)
  # Testing column names
  expect_equal(
    names(multinomial_metrics),
    c("Overall Accuracy", "Balanced Accuracy", "Weighted Balanced Accuracy",
      "Accuracy", "Weighted Accuracy", "F1", "Weighted F1", "Sensitivity",
      "Weighted Sensitivity", "Specificity", "Weighted Specificity",
      "Pos Pred Value", "Weighted Pos Pred Value", "Neg Pred Value",
      "Weighted Neg Pred Value", "AUC", "Kappa", "Weighted Kappa",
      "MCC", "Detection Rate", "Weighted Detection Rate", "Detection Prevalence",
      "Weighted Detection Prevalence", "Prevalence", "Weighted Prevalence",
      "False Neg Rate", "Weighted False Neg Rate", "False Pos Rate",
      "Weighted False Pos Rate", "False Discovery Rate", "Weighted False Discovery Rate",
      "False Omission Rate", "Weighted False Omission Rate", "Threat Score",
      "Weighted Threat Score", "Dependent"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::smpl(xpectr::element_classes(multinomial_metrics), n = 30),
    c("numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "character"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::smpl(xpectr::element_types(multinomial_metrics), n = 30),
    c("double", "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "character"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(multinomial_metrics),
    c(4L, 36L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(multinomial_metrics)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'multinomial_metrics'                                 ####


})

test_that("select_metrics() works with output from evaluate", {

  # Perform evaluations ####
  xpectr::set_test_seed(1)
  evl_gauss <-
    evaluate(
      musicians,
      target_col = "Drums",
      prediction_cols = "Guitar",
      type = "gaussian",
      metrics = "all"
    )
  evl_binom <- evaluate(
    musicians,
    target_col = "Drums",
    prediction_cols = "Guitar",
    type = "binomial",
    metrics = "all"
  )
  evl_multinom <- evaluate(
    musicians %>% dplyr::mutate(Class = as.character(Class), Pred = sample(Class)),
    target_col = "Class",
    prediction_cols = "Pred",
    type = "multinomial",
    metrics = "all"
  )

  # Test select_metrics() ####

  gaussian_metrics <- select_metrics(evl_gauss)
  binomial_metrics <- select_metrics(evl_binom)
  multinomial_metrics <- select_metrics(evl_multinom)


  ## Testing 'gaussian_metrics'                                             ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(gaussian_metrics),
    c("eval_results", "tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    gaussian_metrics[["RMSE"]],
    0.67082,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["MAE"]],
    0.45,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["NRMSE(RNG)"]],
    0.67082,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["NRMSE(IQR)"]],
    0.67082,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["NRMSE(STD)"]],
    1.36818,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["NRMSE(AVG)"]],
    1.08782,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["RMSLE"]],
    0.46498,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["MALE"]],
    0.31192,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["RAE"]],
    0.95182,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["RSE"]],
    1.90364,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["RRSE"]],
    1.37973,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["MAPE"]],
    NaN,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["MSE"]],
    0.45,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["TAE"]],
    27,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["TSE"]],
    27,
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(gaussian_metrics),
    c("RMSE", "MAE", "NRMSE(RNG)", "NRMSE(IQR)", "NRMSE(STD)", "NRMSE(AVG)",
      "RSE", "RRSE", "RAE", "RMSLE", "MALE", "MAPE", "MSE", "TAE",
      "TSE"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(gaussian_metrics),
    c("numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(gaussian_metrics),
    c("double", "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(gaussian_metrics),
    c(1L, 15L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(gaussian_metrics)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'gaussian_metrics'                                    ####



  ## Testing 'binomial_metrics'                                             ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(binomial_metrics),
    c("eval_results", "tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    binomial_metrics[["Balanced Accuracy"]],
    0.54465,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Accuracy"]],
    0.55,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["F1"]],
    0.6087,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Sensitivity"]],
    0.56757,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Specificity"]],
    0.52174,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Pos Pred Value"]],
    0.65625,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Neg Pred Value"]],
    0.42857,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["AUC"]],
    0.54465,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Lower CI"]],
    0.41259,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Upper CI"]],
    0.67671,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Kappa"]],
    0.08578,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["MCC"]],
    0.08704,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Detection Rate"]],
    0.35,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Detection Prevalence"]],
    0.53333,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Prevalence"]],
    0.61667,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["False Neg Rate"]],
    0.43243,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["False Pos Rate"]],
    0.47826,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["False Discovery Rate"]],
    0.34375,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["False Omission Rate"]],
    0.57143,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Threat Score"]],
    0.4375,
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(binomial_metrics),
    c("Balanced Accuracy", "Accuracy", "F1", "Sensitivity", "Specificity",
      "Pos Pred Value", "Neg Pred Value", "AUC", "Lower CI", "Upper CI",
      "Kappa", "MCC", "Detection Rate", "Detection Prevalence", "Prevalence",
      "False Neg Rate", "False Pos Rate", "False Discovery Rate",
      "False Omission Rate", "Threat Score"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(binomial_metrics),
    c("numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(binomial_metrics),
    c("double", "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(binomial_metrics),
    c(1L, 20L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(binomial_metrics)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'binomial_metrics'                                    ####



  ## Testing 'multinomial_metrics'                                          ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(multinomial_metrics),
    c("eval_results", "tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    multinomial_metrics[["Overall Accuracy"]],
    0.25,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Balanced Accuracy"]],
    0.5,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Balanced Accuracy"]],
    0.5,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Accuracy"]],
    0.625,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Accuracy"]],
    0.625,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["F1"]],
    0.25,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted F1"]],
    0.25,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Sensitivity"]],
    0.25,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Sensitivity"]],
    0.25,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Specificity"]],
    0.75,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Specificity"]],
    0.75,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Pos Pred Value"]],
    0.25,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Pos Pred Value"]],
    0.25,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Neg Pred Value"]],
    0.75,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Neg Pred Value"]],
    0.75,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["AUC"]],
    0.5,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Kappa"]],
    0,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Kappa"]],
    0,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["MCC"]],
    0,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Detection Rate"]],
    0.0625,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Detection Rate"]],
    0.0625,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Detection Prevalence"]],
    0.25,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Detection Prevalence"]],
    0.25,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Prevalence"]],
    0.25,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Prevalence"]],
    0.25,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["False Neg Rate"]],
    0.75,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted False Neg Rate"]],
    0.75,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["False Pos Rate"]],
    0.25,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted False Pos Rate"]],
    0.25,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["False Discovery Rate"]],
    0.75,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted False Discovery Rate"]],
    0.75,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["False Omission Rate"]],
    0.25,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted False Omission Rate"]],
    0.25,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Threat Score"]],
    0.14316,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Threat Score"]],
    0.14316,
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(multinomial_metrics),
    c("Overall Accuracy", "Balanced Accuracy", "Weighted Balanced Accuracy",
      "Accuracy", "Weighted Accuracy", "F1", "Weighted F1", "Sensitivity",
      "Weighted Sensitivity", "Specificity", "Weighted Specificity",
      "Pos Pred Value", "Weighted Pos Pred Value", "Neg Pred Value",
      "Weighted Neg Pred Value", "AUC", "Kappa", "Weighted Kappa",
      "MCC", "Detection Rate", "Weighted Detection Rate", "Detection Prevalence",
      "Weighted Detection Prevalence", "Prevalence", "Weighted Prevalence",
      "False Neg Rate", "Weighted False Neg Rate", "False Pos Rate",
      "Weighted False Pos Rate", "False Discovery Rate", "Weighted False Discovery Rate",
      "False Omission Rate", "Weighted False Omission Rate", "Threat Score",
      "Weighted Threat Score"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::smpl(xpectr::element_classes(multinomial_metrics), n = 30),
    c("numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::smpl(xpectr::element_types(multinomial_metrics), n = 30),
    c("double", "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(multinomial_metrics),
    c(1L, 35L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(multinomial_metrics)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'multinomial_metrics'                                 ####


})

test_that("select_metrics() works with output from evaluate_residuals", {
  res_gauss <-
    evaluate_residuals(
      musicians,
      prediction_col = "Drums",
      target_col = "Guitar",
      metrics = "all"
    )

  gaussian_metrics <- select_metrics(res_gauss)


  ## Testing 'gaussian_metrics'                                             ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(gaussian_metrics),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    gaussian_metrics[["RMSE"]],
    0.67082,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["MAE"]],
    0.45,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["NRMSE(RNG)"]],
    0.67082,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["NRMSE(IQR)"]],
    0.67082,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["NRMSE(STD)"]],
    1.33338,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["NRMSE(AVG)"]],
    1.25779,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["RMSLE"]],
    0.46498,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["MALE"]],
    0.31192,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["RAE"]],
    0.90402,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["RSE"]],
    1.80804,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["RRSE"]],
    1.34463,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["MAPE"]],
    NaN,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["MSE"]],
    0.45,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["TAE"]],
    27,
    tolerance = 1e-4)
  expect_equal(
    gaussian_metrics[["TSE"]],
    27,
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(gaussian_metrics),
    c("RMSE", "MAE", "NRMSE(RNG)", "NRMSE(IQR)", "NRMSE(STD)", "NRMSE(AVG)",
      "RSE", "RRSE", "RAE", "RMSLE", "MALE", "MAPE", "MSE", "TAE",
      "TSE"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(gaussian_metrics),
    c("numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(gaussian_metrics),
    c("double", "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(gaussian_metrics),
    c(1L, 15L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(gaussian_metrics)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'gaussian_metrics'                                    ####


})

test_that("select_metrics() works with confusion matrix", {
  cfm_binom <-
    confusion_matrix(
      targets = c(1, 0, 1, 0, 1, 0),
      predictions = c(0, 1, 1, 0, 0, 1),
      metrics = "all"
    )
  cfm_multinom <- confusion_matrix(
    targets = c(1, 0, 1, 0, 1, 0, 2, 2, 2),
    predictions = c(0, 1, 2, 0, 0, 1, 0, 2, 2),
    metrics = "all"
  )

  binomial_metrics <- select_metrics(cfm_binom)
  multinomial_metrics <- select_metrics(cfm_multinom)


  ## Testing 'binomial_metrics'                                             ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(binomial_metrics),
    c("cfm_results", "cfm_binomial", "tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    binomial_metrics[["Balanced Accuracy"]],
    0.33333,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Accuracy"]],
    0.33333,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["F1"]],
    0.33333,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Sensitivity"]],
    0.33333,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Specificity"]],
    0.33333,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Pos Pred Value"]],
    0.33333,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Neg Pred Value"]],
    0.33333,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Kappa"]],
    -0.33333,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["MCC"]],
    -0.33333,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Detection Rate"]],
    0.16667,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Detection Prevalence"]],
    0.5,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Prevalence"]],
    0.5,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["False Neg Rate"]],
    0.66667,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["False Pos Rate"]],
    0.66667,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["False Discovery Rate"]],
    0.66667,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["False Omission Rate"]],
    0.66667,
    tolerance = 1e-4)
  expect_equal(
    binomial_metrics[["Threat Score"]],
    0.2,
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(binomial_metrics),
    c("Balanced Accuracy", "Accuracy", "F1", "Sensitivity", "Specificity",
      "Pos Pred Value", "Neg Pred Value", "Kappa", "MCC", "Detection Rate",
      "Detection Prevalence", "Prevalence", "False Neg Rate", "False Pos Rate",
      "False Discovery Rate", "False Omission Rate", "Threat Score"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(binomial_metrics),
    c("numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(binomial_metrics),
    c("double", "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(binomial_metrics),
    c(1L, 17L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(binomial_metrics)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'binomial_metrics'                                    ####



  ## Testing 'multinomial_metrics'                                          ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(multinomial_metrics),
    c("cfm_results", "cfm_multinomial", "tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    multinomial_metrics[["Overall Accuracy"]],
    0.33333,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Balanced Accuracy"]],
    0.5,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Balanced Accuracy"]],
    0.5,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Accuracy"]],
    0.55556,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Accuracy"]],
    0.55556,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["F1"]],
    NaN,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted F1"]],
    NaN,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Sensitivity"]],
    0.33333,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Sensitivity"]],
    0.33333,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Specificity"]],
    0.66667,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Specificity"]],
    0.66667,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Pos Pred Value"]],
    0.30556,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Pos Pred Value"]],
    0.30556,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Neg Pred Value"]],
    0.66825,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Neg Pred Value"]],
    0.66825,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Kappa"]],
    -0.00583,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Kappa"]],
    -0.00583,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["MCC"]],
    0,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Detection Rate"]],
    0.11111,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Detection Rate"]],
    0.11111,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Detection Prevalence"]],
    0.33333,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Detection Prevalence"]],
    0.33333,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Prevalence"]],
    0.33333,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Prevalence"]],
    0.33333,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["False Neg Rate"]],
    0.66667,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted False Neg Rate"]],
    0.66667,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["False Pos Rate"]],
    0.33333,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted False Pos Rate"]],
    0.33333,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["False Discovery Rate"]],
    0.69444,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted False Discovery Rate"]],
    0.69444,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["False Omission Rate"]],
    0.33175,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted False Omission Rate"]],
    0.33175,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Threat Score"]],
    0.22222,
    tolerance = 1e-4)
  expect_equal(
    multinomial_metrics[["Weighted Threat Score"]],
    0.22222,
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(multinomial_metrics),
    c("Overall Accuracy", "Balanced Accuracy", "Weighted Balanced Accuracy",
      "Accuracy", "Weighted Accuracy", "F1", "Weighted F1", "Sensitivity",
      "Weighted Sensitivity", "Specificity", "Weighted Specificity",
      "Pos Pred Value", "Weighted Pos Pred Value", "Neg Pred Value",
      "Weighted Neg Pred Value", "Kappa", "Weighted Kappa", "MCC",
      "Detection Rate", "Weighted Detection Rate", "Detection Prevalence",
      "Weighted Detection Prevalence", "Prevalence", "Weighted Prevalence",
      "False Neg Rate", "Weighted False Neg Rate", "False Pos Rate",
      "Weighted False Pos Rate", "False Discovery Rate", "Weighted False Discovery Rate",
      "False Omission Rate", "Weighted False Omission Rate", "Threat Score",
      "Weighted Threat Score"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::smpl(xpectr::element_classes(multinomial_metrics), n = 30),
    c("numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric", "numeric"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::smpl(xpectr::element_types(multinomial_metrics), n = 30),
    c("double", "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double", "double",
      "double", "double", "double", "double", "double"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(multinomial_metrics),
    c(1L, 34L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(multinomial_metrics)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'multinomial_metrics'                                 ####


})
LudvigOlsen/cvms documentation built on March 2, 2024, 1:54 p.m.