Nothing
library(cvms)
context("evaluate()")
test_that("multinomial evaluations are correct in evaluate()", {
xpectr::set_test_seed(1)
random_probabilities <- multiclass_probability_tibble(
num_classes = 5,
num_observations = 20,
apply_softmax = FALSE # Test with as well
)
expect_equal(sum(random_probabilities), 51.78471, tolerance = 1e-5)
data_ <- random_probabilities %>%
dplyr::mutate(
cl = as.factor(rep(1:5, each = 4)),
cl_char = paste0("cl_", cl)
)
expect_error(
evaluate(
data = data_,
target_col = "cl",
prediction_cols = paste0("class_", 1:5),
type = "multinomial",
apply_softmax = TRUE
),
"Not all levels in 'target_col' was found in 'prediction_cols'.",
fixed = T
)
data_ <- data_ %>%
dplyr::rename_at(dplyr::vars(paste0("class_", 1:5)), .funs = ~ paste0("cl_", 1:5))
mn_eval_1 <- evaluate(
data = data_,
target_col = "cl_char",
prediction_cols = paste0("cl_", 1:5),
type = "multinomial",
apply_softmax = TRUE,
metrics = list("AUC" = TRUE)
)
# TODO Add more tests
expect_equal(mn_eval_1$`Overall Accuracy`, 0.2, tolerance = 1e-4)
expect_equal(mn_eval_1$`Balanced Accuracy`, 0.5, tolerance = 1e-4)
expect_equal(
mn_eval_1$`Balanced Accuracy`,
mean(mn_eval_1$`Class Level Results`[[1]]$`Balanced Accuracy`)
)
expect_equal(mn_eval_1$F1, NaN)
expect_equal(mn_eval_1$Sensitivity, 0.2, tolerance = 1e-4)
expect_equal(mn_eval_1$Specificity, 0.8, tolerance = 1e-4)
expect_equal(mn_eval_1$`Pos Pred Value`, 0.23, tolerance = 1e-4)
expect_equal(mn_eval_1$`Neg Pred Value`, 0.7991667, tolerance = 1e-4)
expect_equal(mn_eval_1$AUC, 0.49375)
expect_equal(mn_eval_1$Kappa, 0.008653846, tolerance = 1e-4)
expect_equal(mn_eval_1$MCC, 0.0, tolerance = 1e-4)
expect_equal(mn_eval_1$`Detection Rate`, 0.04, tolerance = 1e-4)
expect_equal(mn_eval_1$`Detection Prevalence`, 0.2, tolerance = 1e-4)
expect_equal(mn_eval_1$Prevalence, 0.2, tolerance = 1e-4)
expect_equal(as.numeric(mn_eval_1$ROC[[1]]$auc),
0.49375,
tolerance = 1e-4
)
expect_equal(
names(mn_eval_1$ROC[[1]]$rocs),
c(
"cl_1/cl_2", "cl_1/cl_3", "cl_1/cl_4", "cl_1/cl_5", "cl_2/cl_3",
"cl_2/cl_4", "cl_2/cl_5", "cl_3/cl_4", "cl_3/cl_5", "cl_4/cl_5"
)
)
expect_equal(
mn_eval_1$ROC[[1]]$rocs$`cl_1/cl_2`[[1]]$sensitivities,
c(1, 0.75, 0.75, 0.5, 0.25, 0.25, 0.25, 0, 0)
)
expect_equal(
mn_eval_1$ROC[[1]]$rocs$`cl_1/cl_2`[[1]]$specificities,
c(0, 0, 0.25, 0.25, 0.25, 0.5, 0.75, 0.75, 1)
)
expect_equal(
mn_eval_1$ROC[[1]]$rocs$`cl_1/cl_2`[[2]]$sensitivities,
c(1, 0.75, 0.5, 0.5, 0.5, 0.5, 0.25, 0, 0)
)
expect_equal(
mn_eval_1$ROC[[1]]$rocs$`cl_1/cl_2`[[2]]$specificities,
c(0, 0, 0, 0.25, 0.5, 0.75, 0.75, 0.75, 1)
)
expect_equal(
mn_eval_1$ROC[[1]]$rocs$`cl_4/cl_5`[[1]]$sensitivities,
c(1, 0.75, 0.75, 0.5, 0.25, 0.25, 0, 0, 0)
)
expect_equal(
mn_eval_1$ROC[[1]]$rocs$`cl_4/cl_5`[[1]]$specificities,
c(0, 0, 0.25, 0.25, 0.25, 0.5, 0.5, 0.75, 1)
)
expect_equal(
mn_eval_1$ROC[[1]]$rocs$`cl_4/cl_5`[[2]]$sensitivities,
c(1, 0.75, 0.5, 0.25, 0.25, 0, 0, 0, 0)
)
expect_equal(
mn_eval_1$ROC[[1]]$rocs$`cl_4/cl_5`[[2]]$specificities,
c(0, 0, 0, 0, 0.25, 0.25, 0.5, 0.75, 1)
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$Class,
c("cl_1", "cl_2", "cl_3", "cl_4", "cl_5")
)
expect_equal(mn_eval_1$`Class Level Results`[[1]]$`Balanced Accuracy`,
c(0.50000, 0.37500, 0.59375, 0.50000, 0.53125),
tolerance = 1e-4
)
expect_equal(mn_eval_1$`Class Level Results`[[1]]$F1,
c(0.2222222, NaN, 0.3333333, 0.2222222, 0.2500000),
tolerance = 1e-4
)
expect_equal(mn_eval_1$`Class Level Results`[[1]]$Sensitivity,
c(0.25, 0.0, 0.25, 0.25, 0.25),
tolerance = 1e-4
)
expect_equal(mn_eval_1$`Class Level Results`[[1]]$Specificity,
c(0.7500, 0.7500, 0.9375, 0.7500, 0.8125),
tolerance = 1e-4
)
expect_equal(mn_eval_1$`Class Level Results`[[1]]$`Pos Pred Value`,
c(0.20, 0.00, 0.50, 0.20, 0.25),
tolerance = 1e-4
)
expect_equal(mn_eval_1$`Class Level Results`[[1]]$`Neg Pred Value`,
c(0.80, 0.750, 0.8333333, 0.80, 0.81250),
tolerance = 1e-4
)
expect_equal(mn_eval_1$`Class Level Results`[[1]]$Kappa,
c(-3.172066e-16, -2.500000e-01, 2.307692e-01, -3.172066e-16, 6.250000e-02),
tolerance = 1e-4
)
expect_equal(mn_eval_1$`Class Level Results`[[1]]$`Detection Rate`,
c(0.05, 0.00, 0.05, 0.05, 0.05),
tolerance = 1e-4
)
expect_equal(mn_eval_1$`Class Level Results`[[1]]$`Detection Prevalence`,
c(0.25, 0.20, 0.10, 0.25, 0.20),
tolerance = 1e-4
)
expect_equal(mn_eval_1$`Class Level Results`[[1]]$Prevalence,
c(0.2, 0.2, 0.2, 0.2, 0.2),
tolerance = 1e-4
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$Support,
c(4, 4, 4, 4, 4)
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Prediction,
as.character(c(0, 1, 0, 1))
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Target,
as.character(c(0, 0, 1, 1))
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_0,
c("TP", "FN", "FP", "TN")
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_1,
c("TN", "FP", "FN", "TP")
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$N,
c(12, 4, 3, 1)
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Prediction,
as.character(c(0, 1, 0, 1))
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Target,
as.character(c(0, 0, 1, 1))
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Pos_0,
c("TP", "FN", "FP", "TN")
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Pos_1,
c("TN", "FP", "FN", "TP")
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$N,
c(12, 4, 4, 0)
)
expect_equal(
colnames(mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]),
c("Class", "Prediction", "Target", "Pos_0", "Pos_1", "N")
)
expect_equal(
colnames(mn_eval_1$`Confusion Matrix`[[1]]),
c("Prediction", "Target", "N")
)
# Test Weighted metrics, and metrics == "all"
xpectr::set_test_seed(1)
mn_eval_2 <- evaluate(
data = data_ %>% dplyr::sample_n(17),
target_col = "cl_char",
prediction_cols = paste0("cl_", 1:5),
type = "multinomial",
apply_softmax = TRUE,
metrics = "all"
)
# Create manual weighted mean function
manual_weighted_mean <- function(x, w) {
sum(x * w) / sum(w)
}
# Test manual weighted mean function
expect_equal(manual_weighted_mean(x = c(0.2, 0.8), w = c(1, 1)), 0.5, tolerance = 1e-4)
expect_equal(manual_weighted_mean(x = c(0.3, 0.7), w = c(1, 1)), 0.5, tolerance = 1e-4)
expect_equal(manual_weighted_mean(x = c(0.5, 0.5), w = c(1, 1)), 0.5, tolerance = 1e-4)
expect_equal(manual_weighted_mean(x = c(0.2), w = c(4)), 0.2)
expect_equal(manual_weighted_mean(x = c(0.2, 0.8), w = c(4, 1)), 0.32, tolerance = 1e-4)
expect_equal(mn_eval_2$`Overall Accuracy`, 0.2352941, tolerance = 1e-4)
expect_equal(mn_eval_2$`Balanced Accuracy`, 0.5380586, tolerance = 1e-4)
expect_equal(
mn_eval_2$`Balanced Accuracy`,
mean(mn_eval_2$`Class Level Results`[[1]]$`Balanced Accuracy`)
)
expect_equal(mn_eval_2$`Weighted Balanced Accuracy`, 0.5236264, tolerance = 1e-4)
expect_equal(
mn_eval_2$`Weighted Balanced Accuracy`,
manual_weighted_mean(
x = mn_eval_2$`Class Level Results`[[1]]$`Balanced Accuracy`,
w = mn_eval_2$`Class Level Results`[[1]]$Support
)
)
expect_equal(mn_eval_2$Accuracy, 0.6941176, tolerance = 1e-4)
expect_equal(mn_eval_2$`Weighted Accuracy`, 0.6851211, tolerance = 1e-4)
expect_equal(mn_eval_2$F1, NaN)
expect_equal(mn_eval_2$`Weighted F1`, NaN)
expect_equal(mn_eval_2$Sensitivity, 0.2666667, tolerance = 1e-4)
expect_equal(mn_eval_2$`Weighted Sensitivity`, 0.2352941, tolerance = 1e-4)
expect_equal(mn_eval_2$Specificity, 0.8094505, tolerance = 1e-4)
expect_equal(mn_eval_2$`Weighted Specificity`, 0.8119586, tolerance = 1e-4)
expect_equal(mn_eval_2$`Pos Pred Value`, 0.2666667, tolerance = 1e-4)
expect_equal(mn_eval_2$`Weighted Pos Pred Value`, 0.2696078, tolerance = 1e-4)
expect_equal(mn_eval_2$`Neg Pred Value`, 0.8094505, tolerance = 1e-4)
expect_equal(mn_eval_2$`Weighted Neg Pred Value`, 0.7939237, tolerance = 1e-4)
expect_equal(mn_eval_2$AUC, 0.528125, tolerance = 1e-4)
expect_equal(mn_eval_2$Kappa, 0.06428773, tolerance = 1e-4)
expect_equal(mn_eval_2$`Weighted Kappa`, 0.04481687, tolerance = 1e-4)
expect_equal(mn_eval_2$MCC, 0.0526315789473684, tolerance = 1e-4)
expect_equal(mn_eval_2$`Detection Rate`, 0.04705882, tolerance = 1e-4)
expect_equal(mn_eval_2$`Weighted Detection Rate`, 0.0449827, tolerance = 1e-4)
expect_equal(mn_eval_2$`Detection Prevalence`, 0.2, tolerance = 1e-4)
expect_equal(mn_eval_2$`Weighted Detection Prevalence`, 0.1937716, tolerance = 1e-4)
expect_equal(mn_eval_2$Prevalence, 0.2, tolerance = 1e-4)
expect_equal(mn_eval_2$`Weighted Prevalence`, 0.2110727, tolerance = 1e-4)
expect_equal(as.numeric(mn_eval_2$ROC[[1]]$auc), 0.528125, tolerance = 1e-4)
expect_equal(mn_eval_2$ROC[[1]]$rocs$`cl_1/cl_2`[[1]]$direction, ">", tolerance = 1e-4)
expect_equal(mn_eval_2$ROC[[1]]$rocs$`cl_1/cl_2`[[1]]$sensitivities,
c(1, 0.75, 0.75, 0.5, 0.25, 0.25, 0.25, 0, 0),
tolerance = 1e-4
)
expect_equal(mn_eval_2$ROC[[1]]$rocs$`cl_1/cl_2`[[1]]$specificities,
c(0, 0, 0.25, 0.25, 0.25, 0.5, 0.75, 0.75, 1),
tolerance = 1e-4
)
expect_equal(mn_eval_2$ROC[[1]]$rocs$`cl_3/cl_4`[[1]]$direction, ">", tolerance = 1e-4)
expect_equal(mn_eval_2$ROC[[1]]$rocs$`cl_3/cl_4`[[1]]$sensitivities,
c(1, 1, 1, 1, 1, 0.666666666666667, 0.333333333333333, 0),
tolerance = 1e-4
)
expect_equal(mn_eval_2$ROC[[1]]$rocs$`cl_3/cl_4`[[1]]$specificities,
c(0, 0.25, 0.5, 0.75, 1, 1, 1, 1),
tolerance = 1e-4
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$Class,
c("cl_1", "cl_2", "cl_3", "cl_4", "cl_5")
)
expect_equal(mn_eval_2$`Class Level Results`[[1]]$`Balanced Accuracy`,
c(0.5480769, 0.3461538, 0.5865385, 0.5595238, 0.6500000),
tolerance = 1e-4
)
expect_equal(mn_eval_2$`Class Level Results`[[1]]$F1,
c(0.2857143, NaN, 0.3333333, 0.2857143, 0.3333333),
tolerance = 1e-4
)
expect_equal(mn_eval_2$`Class Level Results`[[1]]$Sensitivity,
c(0.250, 0.00, 0.250, 0.3333333, 0.500),
tolerance = 1e-4
)
expect_equal(mn_eval_2$`Class Level Results`[[1]]$Specificity,
c(0.8461538, 0.6923077, 0.9230769, 0.7857143, 0.80),
tolerance = 1e-4
)
expect_equal(mn_eval_2$`Class Level Results`[[1]]$`Pos Pred Value`,
c(0.3333333, 0.0, 0.50, 0.25, 0.25),
tolerance = 1e-4
)
expect_equal(mn_eval_2$`Class Level Results`[[1]]$`Neg Pred Value`,
c(0.7857143, 0.6923077, 0.80, 0.8461538, 0.9230769),
tolerance = 1e-4
)
expect_equal(mn_eval_2$`Class Level Results`[[1]]$Kappa,
c(0.1052632, -0.3076923, 0.2093023, 0.1052632, 0.2093023),
tolerance = 1e-4
)
expect_equal(mn_eval_2$`Class Level Results`[[1]]$`Detection Rate`,
c(0.05882353, 0.00, 0.05882353, 0.05882353, 0.05882353),
tolerance = 1e-4
)
expect_equal(mn_eval_2$`Class Level Results`[[1]]$`Detection Prevalence`,
c(0.1764706, 0.2352941, 0.1176471, 0.2352941, 0.2352941),
tolerance = 1e-4
)
expect_equal(mn_eval_2$`Class Level Results`[[1]]$Prevalence,
c(0.2352941, 0.2352941, 0.2352941, 0.1764706, 0.1176471),
tolerance = 1e-4
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$Support,
c(4, 4, 4, 3, 2)
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Prediction,
as.character(c(0, 1, 0, 1))
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Target,
as.character(c(0, 0, 1, 1))
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_0,
c("TP", "FN", "FP", "TN")
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_1,
c("TN", "FP", "FN", "TP")
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$N,
c(11, 2, 3, 1)
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Prediction,
as.character(c(0, 1, 0, 1))
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Target,
as.character(c(0, 0, 1, 1))
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Pos_0,
c("TP", "FN", "FP", "TN")
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Pos_1,
c("TN", "FP", "FN", "TP")
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$N,
c(9, 4, 4, 0)
)
expect_equal(
colnames(mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]),
c("Class", "Prediction", "Target", "Pos_0", "Pos_1", "N")
)
expect_equal(
colnames(mn_eval_2$`Confusion Matrix`[[1]]),
c("Prediction", "Target", "N")
)
# Enabling and disabling a few metrics
xpectr::set_test_seed(1)
mn_eval_3 <- evaluate(
data = data_ %>% dplyr::sample_n(17),
target_col = "cl_char",
prediction_cols = paste0("cl_", 1:5),
type = "multinomial",
apply_softmax = TRUE,
metrics = list("Accuracy" = TRUE, "Overall Accuracy" = FALSE, "F1" = FALSE)
)
expect_true("Accuracy" %in% colnames(mn_eval_3))
expect_equal(mn_eval_3$Accuracy, 0.6941176, tolerance = 1e-4)
expect_true("Accuracy" %in% colnames(mn_eval_3$`Class Level Results`[[1]]))
expect_equal(mn_eval_3$`Class Level Results`[[1]]$Accuracy,
c(0.7058824, 0.5294118, 0.7647059, 0.7058824, 0.7647059),
tolerance = 1e-4
)
expect_true("Overall Accuracy" %ni% colnames(mn_eval_3))
expect_true("F1" %ni% colnames(mn_eval_3))
expect_true("F1" %ni% colnames(mn_eval_3$`Class Level Results`[[1]]))
# TODO
# ID level
data_ <- data_ %>%
dplyr::mutate(id = factor(rep(1:10, each = 2)))
xpectr::set_test_seed(9)
suppressWarnings(
mn_id_eval_1 <- evaluate(
data = data_ %>% dplyr::sample_n(13),
target_col = "cl_char",
prediction_cols = paste0("cl_", 1:5),
id_col = "id",
id_method = "mean",
type = "multinomial",
apply_softmax = TRUE,
metrics = "all"
)
)
expect_equal(mn_id_eval_1$`Overall Accuracy`, 0.222222, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Balanced Accuracy`, 0.5535714, tolerance = 1e-4)
expect_equal(
mn_id_eval_1$`Balanced Accuracy`,
mean(mn_id_eval_1$`Class Level Results`[[1]]$`Balanced Accuracy`)
)
expect_equal(mn_id_eval_1$`Weighted Balanced Accuracy`, 0.5178571, tolerance = 1e-4)
expect_equal(
mn_id_eval_1$`Weighted Balanced Accuracy`,
manual_weighted_mean(
x = mn_id_eval_1$`Class Level Results`[[1]]$`Balanced Accuracy`,
w = mn_id_eval_1$`Class Level Results`[[1]]$Support
)
)
expect_equal(mn_id_eval_1$Accuracy, 0.688888889, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Weighted Accuracy`, 0.6790123, tolerance = 1e-4)
expect_equal(mn_id_eval_1$F1, NaN)
expect_equal(mn_id_eval_1$`Weighted F1`, NaN)
expect_equal(mn_id_eval_1$Sensitivity, 0.3, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Weighted Sensitivity`, 0.222222, tolerance = 1e-4)
expect_equal(mn_id_eval_1$Specificity, 0.8071429, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Weighted Specificity`, 0.8134921, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Pos Pred Value`, NaN)
expect_equal(mn_id_eval_1$`Weighted Pos Pred Value`, NaN)
expect_equal(mn_id_eval_1$`Neg Pred Value`, 0.8126984, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Weighted Neg Pred Value`, 0.7918871, tolerance = 1e-4)
expect_equal(mn_id_eval_1$AUC, 0.6, tolerance = 1e-4)
expect_equal(mn_id_eval_1$Kappa, 0.03714286, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Weighted Kappa`, -0.003174603, tolerance = 1e-4)
expect_equal(mn_id_eval_1$MCC, 0.0484122918275927, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Detection Rate`, 0.04444444, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Weighted Detection Rate`, 0.03703704, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Detection Prevalence`, 0.2, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Weighted Detection Prevalence`, 0.1851852, tolerance = 1e-4)
expect_equal(mn_id_eval_1$Prevalence, 0.2, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Weighted Prevalence`, 0.2098765, tolerance = 1e-4)
expect_equal(
mn_id_eval_1$`Class Level Results`[[1]]$Class,
c("cl_1", "cl_2", "cl_3", "cl_4", "cl_5")
)
expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$`Balanced Accuracy`,
c(0.6785714, 0.3571429, 0.3571429, 0.8750000, 0.5000000),
tolerance = 1e-4
)
expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$F1,
c(0.5, NaN, NaN, 0.5, NA),
tolerance = 1e-4
)
expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$Sensitivity,
c(0.5, 0.0, 0.0, 1.0, 0.0),
tolerance = 1e-4
)
expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$Specificity,
c(0.8571429, 0.7142857, 0.7142857, 0.7500000, 1.0000000),
tolerance = 1e-4
)
expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$`Pos Pred Value`,
c(0.5000000, 0.0000000, 0.0000000, 0.3333333, NaN),
tolerance = 1e-4
)
expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$`Neg Pred Value`,
c(0.8571429, 0.7142857, 0.7142857, 1.0000000, 0.7777778),
tolerance = 1e-4
)
expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$Kappa,
c(0.3571429, -0.2857143, -0.2857143, 0.4000000, 0.0000000),
tolerance = 1e-4
)
expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$`Detection Rate`,
c(0.1111111, 0.0000000, 0.0000000, 0.1111111, 0.0000000),
tolerance = 1e-4
)
expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$`Detection Prevalence`,
c(0.2222222, 0.2222222, 0.2222222, 0.3333333, 0.0000000),
tolerance = 1e-4
)
expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$Prevalence,
c(0.2222222, 0.2222222, 0.2222222, 0.1111111, 0.2222222),
tolerance = 1e-4
)
expect_equal(
mn_id_eval_1$`Class Level Results`[[1]]$Support,
c(2, 2, 2, 1, 2)
)
expect_equal(
mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Prediction,
c("0", "1", "0", "1")
)
expect_equal(
mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Target,
c("0", "0", "1", "1")
)
expect_equal(
mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_0,
c("TP", "FN", "FP", "TN")
)
expect_equal(
mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_1,
c("TN", "FP", "FN", "TP")
)
expect_equal(
mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$N,
c(6, 1, 1, 1)
)
expect_equal(
colnames(mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]),
c("Class", "Prediction", "Target", "Pos_0", "Pos_1", "N")
)
expect_equal(
colnames(mn_id_eval_1$`Confusion Matrix`[[1]]),
c("Prediction", "Target", "N")
)
expect_equal(
colnames(mn_id_eval_1$Predictions[[1]]),
c("Target", "Prediction", "SD", "Predicted Class", "id", "id_method")
)
preds <-
tidyr::unnest(
mn_id_eval_1$Predictions[[1]],
cols = c("Prediction", "SD"),
names_sep = c("_")
)
## Testing 'preds' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(preds),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
preds[["Target"]],
c("cl_1", "cl_1", "cl_2", "cl_2", "cl_3", "cl_3", "cl_4", "cl_5",
"cl_5"),
fixed = TRUE)
expect_equal(
preds[["Prediction_cl_1"]],
c(0.15567, 0.24832, 0.21023, 0.23344, 0.21542, 0.17221, 0.17573,
0.31819, 0.1845),
tolerance = 1e-4)
expect_equal(
preds[["Prediction_cl_2"]],
c(0.20081, 0.17465, 0.16815, 0.17671, 0.27402, 0.22698, 0.20845,
0.13146, 0.18244),
tolerance = 1e-4)
expect_equal(
preds[["Prediction_cl_3"]],
c(0.23579, 0.23094, 0.23456, 0.19429, 0.23884, 0.22597, 0.118, 0.19822,
0.1765),
tolerance = 1e-4)
expect_equal(
preds[["Prediction_cl_4"]],
c(0.20689, 0.1759, 0.19107, 0.25941, 0.12493, 0.19673, 0.26071,
0.17429, 0.24663),
tolerance = 1e-4)
expect_equal(
preds[["Prediction_cl_5"]],
c(0.20084, 0.17018, 0.19599, 0.13616, 0.14679, 0.17811, 0.23711,
0.17783, 0.20994),
tolerance = 1e-4)
expect_equal(
preds[["SD_cl_1"]],
c(0.07539, 0.23713, 0.49265, NA, NA, NA, NA, NA, 0.28101),
tolerance = 1e-4)
expect_equal(
preds[["SD_cl_2"]],
c(0.51093, 0.37202, 0.08407, NA, NA, NA, NA, NA, 0.22093),
tolerance = 1e-4)
expect_equal(
preds[["SD_cl_3"]],
c(0.12296, 0.16256, 0.18359, NA, NA, NA, NA, NA, 0.18044),
tolerance = 1e-4)
expect_equal(
preds[["SD_cl_4"]],
c(0.43789, 0.08957, 0.27779, NA, NA, NA, NA, NA, 0.12961),
tolerance = 1e-4)
expect_equal(
preds[["SD_cl_5"]],
c(0.19647, 0.05278, 0.39202, NA, NA, NA, NA, NA, 0.14562),
tolerance = 1e-4)
expect_equal(
preds[["Predicted Class"]],
c("cl_3", "cl_1", "cl_3", "cl_4", "cl_2", "cl_2", "cl_4", "cl_1",
"cl_4"),
fixed = TRUE)
expect_equal(
preds[["id"]],
structure(c(1L, 2L, 3L, 4L, 5L, 6L, 8L, 9L, 10L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10"), class = "factor"))
expect_equal(
preds[["id_method"]],
c("mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean"),
fixed = TRUE)
# Testing column names
expect_equal(
names(preds),
c("Target", "Prediction_cl_1", "Prediction_cl_2", "Prediction_cl_3",
"Prediction_cl_4", "Prediction_cl_5", "SD_cl_1", "SD_cl_2",
"SD_cl_3", "SD_cl_4", "SD_cl_5", "Predicted Class", "id", "id_method"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(preds),
c("character", "numeric", "numeric", "numeric", "numeric", "numeric",
"numeric", "numeric", "numeric", "numeric", "numeric", "character",
"factor", "character"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(preds),
c("character", "double", "double", "double", "double", "double",
"double", "double", "double", "double", "double", "character",
"integer", "character"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(preds),
c(9L, 14L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(preds)),
character(0),
fixed = TRUE)
## Finished testing 'preds' ####
# Test grouping vars
data_2 <- data_ %>%
dplyr::mutate(fold_ = 1) %>%
dplyr::bind_rows(data_ %>% dplyr::mutate(fold_ = 2))
mn_id_eval_2 <- evaluate(
data = data_2 %>% dplyr::group_by(fold_),
target_col = "cl_char",
prediction_cols = paste0("cl_", 1:5),
id_col = "id",
id_method = "majority",
type = "multinomial",
apply_softmax = TRUE
)
expect_equal(mn_id_eval_2$fold_, c(1, 2))
expect_equal(dplyr::bind_rows(mn_id_eval_2$`Class Level Results`)$fold_, rep(1:2, each = 5))
expect_equal(
colnames(mn_id_eval_2),
c(
"fold_", "Overall Accuracy", "Balanced Accuracy", "F1", "Sensitivity",
"Specificity", "Pos Pred Value", "Neg Pred Value",
"Kappa", "MCC", "Detection Rate", "Detection Prevalence",
"Prevalence", "Predictions", "Confusion Matrix", "Class Level Results",
"Process"
)
)
expect_equal(
colnames(mn_id_eval_2$`Class Level Results`[[1]]),
c(
"fold_", "Class", "Balanced Accuracy", "F1", "Sensitivity",
"Specificity", "Pos Pred Value", "Neg Pred Value",
"Kappa", "Detection Rate", "Detection Prevalence",
"Prevalence", "Support", "Confusion Matrix"
)
)
expect_equal(
mn_id_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]][, -1], # remove fold_
mn_id_eval_2$`Class Level Results`[[2]]$`Confusion Matrix`[[1]][, -1]
)
expect_equal(
colnames(mn_id_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]),
c("fold_", "Class", "Prediction", "Target", "Pos_0", "Pos_1", "N")
)
# What happens when a class is not in the targets but has a probability column?
data_3 <- random_probabilities %>%
dplyr::mutate(
cl = as.factor(rep(1:4, each = 5)),
cl_char = paste0("cl_", cl)
) %>%
dplyr::rename_at(dplyr::vars(paste0("class_", 1:5)), .funs = ~ paste0("cl_", 1:5))
# Testing multinomial
expect_warning(mb_eval <- evaluate(
data = data_3,
target_col = "cl_char",
prediction_cols = paste0("cl_", 1:5),
apply_softmax = TRUE,
type = "multinomial",
metrics = list("AUC" = TRUE)
), "The following classes were not found in 'response': cl_5.",
fixed = TRUE
)
expect_equal(mb_eval$`Overall Accuracy`, 0.3)
expect_equal(mb_eval$`Balanced Accuracy`, NaN)
expect_equal(mb_eval$F1, NaN)
expect_equal(mb_eval$Sensitivity, NaN)
expect_equal(mb_eval$Specificity, 0.8266667, tolerance = 1e-4)
expect_equal(mb_eval$`Pos Pred Value`, 0.31, tolerance = 1e-4)
expect_equal(mb_eval$`Neg Pred Value`, 0.825555555555556, tolerance = 1e-4)
expect_equal(mb_eval$AUC, 0.49, tolerance = 1e-4)
expect_equal(mb_eval$Kappa, 0.1133333, tolerance = 1e-4)
expect_equal(mb_eval$MCC, 0.130327042490215, tolerance = 1e-4)
expect_equal(mb_eval$`Detection Rate`, 0.06, tolerance = 1e-4)
expect_equal(mb_eval$`Detection Prevalence`, 0.2, tolerance = 1e-4)
expect_equal(mb_eval$Prevalence, 0.2, tolerance = 1e-4)
expect_true("cl_5" %ni% mb_eval$Predictions[[1]]$Target)
expect_equal(
mb_eval$`Confusion Matrix`[[1]]$Target,
rep(paste0("cl_", 1:5), each = 5)
)
expect_equal(
mb_eval$`Confusion Matrix`[[1]]$Prediction,
rep(paste0("cl_", 1:5), 5)
)
expect_equal(
mb_eval$`Confusion Matrix`[[1]]$N,
c(
1L, 1L, 1L, 0L, 2L, 2L, 1L, 0L, 2L, 0L, 1L, 2L, 1L, 0L, 1L,
1L, 0L, 0L, 3L, 1L, 0L, 0L, 0L, 0L, 0L
)
)
expect_equal(
mb_eval$`Class Level Results`[[1]]$Class,
c("cl_1", "cl_2", "cl_3", "cl_4", "cl_5")
)
expect_equal(
mb_eval$`Class Level Results`[[1]]$`Balanced Accuracy`,
c(
0.466666666666667, 0.5, 0.566666666666667,
0.733333333333333, NaN
)
)
expect_equal(
mb_eval$`Class Level Results`[[1]]$F1,
c(0.2, 0.222222222222222, 0.285714285714286, 0.6, NaN)
)
expect_equal(
mb_eval$`Class Level Results`[[1]]$Sensitivity,
c(0.2, 0.2, 0.2, 0.6, NA)
)
expect_equal(mb_eval$`Class Level Results`[[1]]$Specificity,
c(
0.733333333333333, 0.8, 0.933333333333333,
0.866666666666667, 0.8
),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$`Pos Pred Value`,
c(0.2, 0.25, 0.5, 0.6, 0),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$`Neg Pred Value`,
c(
0.733333333333333, 0.75, 0.777777777777778,
0.866666666666667, 1
),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$Kappa,
c(
-0.0666666666666667, -3.17206578464331e-16, 0.166666666666666,
0.466666666666667, 0
),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$`Detection Rate`,
c(0.05, 0.05, 0.05, 0.15, 0),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$`Detection Prevalence`,
c(0.25, 0.2, 0.1, 0.25, 0.2),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$Prevalence,
c(0.25, 0.25, 0.25, 0.25, 0),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$Support,
c(5L, 5L, 5L, 5L, NaN),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$N,
c(11, 4, 4, 1),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$`Confusion Matrix`[[5]]$N,
c(16, 4, 0, 0),
tolerance = 1e-4
)
# TODO test that group_by and evaluate work correctly together
})
test_that("multinomial evaluations with one predicted class column is correctly unpacked in evaluate()", {
xpectr::set_test_seed(1)
random_probabilities <- multiclass_probability_tibble(
num_classes = 5,
num_observations = 20,
apply_softmax = FALSE # Test with as well
)
expect_equal(sum(random_probabilities), 51.78471, tolerance = 1e-5)
random_classes <- argmax(random_probabilities)
data_ <- random_probabilities %>%
dplyr::mutate(
cl = as.factor(rep(1:5, each = 4)),
cl_char = paste0("cl_", cl),
pred_cl = random_classes
)
data_ <- data_ %>%
dplyr::rename_at(dplyr::vars(paste0("class_", 1:5)), .funs = ~ paste0("cl_", 1:5)) %>%
dplyr::mutate(pred_cl_char = paste0("cl_", pred_cl))
data_classes <- data_ %>%
base_select(c("cl", "cl_char", "pred_cl", "pred_cl_char"))
mn_eval_1 <- evaluate(
data = data_classes,
target_col = "cl_char",
prediction_cols = "pred_cl_char",
type = "multinomial",
apply_softmax = FALSE,
metrics = list("AUC" = TRUE)
)
# TODO Add more tests
expect_equal(mn_eval_1$`Overall Accuracy`, 0.2, tolerance = 1e-4)
expect_equal(mn_eval_1$`Balanced Accuracy`, 0.5, tolerance = 1e-4)
expect_equal(
mn_eval_1$`Balanced Accuracy`,
mean(mn_eval_1$`Class Level Results`[[1]]$`Balanced Accuracy`)
)
expect_equal(mn_eval_1$F1, NaN)
expect_equal(mn_eval_1$Sensitivity, 0.2, tolerance = 1e-4)
expect_equal(mn_eval_1$Specificity, 0.8, tolerance = 1e-4)
expect_equal(mn_eval_1$`Pos Pred Value`, 0.23, tolerance = 1e-4)
expect_equal(mn_eval_1$`Neg Pred Value`, 0.7991667, tolerance = 1e-4)
expect_equal(mn_eval_1$AUC, 0.5)
expect_equal(mn_eval_1$Kappa, 0.008653846, tolerance = 1e-4)
expect_equal(mn_eval_1$MCC, 0.0, tolerance = 1e-4)
expect_equal(mn_eval_1$`Detection Rate`, 0.04, tolerance = 1e-4)
expect_equal(mn_eval_1$`Detection Prevalence`, 0.2, tolerance = 1e-4)
expect_equal(mn_eval_1$Prevalence, 0.2, tolerance = 1e-4)
expect_equal(as.numeric(mn_eval_1$ROC[[1]]$auc),
0.5,
tolerance = 1e-4
)
expect_equal(
names(mn_eval_1$ROC[[1]]$rocs),
c(
"cl_1/cl_2", "cl_1/cl_3", "cl_1/cl_4", "cl_1/cl_5", "cl_2/cl_3",
"cl_2/cl_4", "cl_2/cl_5", "cl_3/cl_4", "cl_3/cl_5", "cl_4/cl_5"
)
)
expect_equal(
mn_eval_1$ROC[[1]]$rocs$`cl_1/cl_2`[[1]]$sensitivities,
c(1, 0.5, 0)
)
expect_equal(
mn_eval_1$ROC[[1]]$rocs$`cl_1/cl_2`[[1]]$specificities,
c(0, 0.25, 1)
)
expect_equal(
mn_eval_1$ROC[[1]]$rocs$`cl_1/cl_2`[[2]]$sensitivities,
c(1, 0.75, 0)
)
expect_equal(
mn_eval_1$ROC[[1]]$rocs$`cl_1/cl_2`[[2]]$specificities,
c(0, 0, 1)
)
expect_equal(
mn_eval_1$ROC[[1]]$rocs$`cl_4/cl_5`[[1]]$sensitivities,
c(1, 0.5, 0)
)
expect_equal(
mn_eval_1$ROC[[1]]$rocs$`cl_4/cl_5`[[1]]$specificities,
c(0, 0.25, 1)
)
expect_equal(
mn_eval_1$ROC[[1]]$rocs$`cl_4/cl_5`[[2]]$sensitivities,
c(1, 0.75, 0)
)
expect_equal(
mn_eval_1$ROC[[1]]$rocs$`cl_4/cl_5`[[2]]$specificities,
c(0, 0.25, 1)
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$Class,
c("cl_1", "cl_2", "cl_3", "cl_4", "cl_5")
)
expect_equal(mn_eval_1$`Class Level Results`[[1]]$`Balanced Accuracy`,
c(0.50000, 0.37500, 0.59375, 0.50000, 0.53125),
tolerance = 1e-4
)
expect_equal(mn_eval_1$`Class Level Results`[[1]]$F1,
c(0.2222222, NaN, 0.3333333, 0.2222222, 0.2500000),
tolerance = 1e-4
)
expect_equal(mn_eval_1$`Class Level Results`[[1]]$Sensitivity,
c(0.25, 0.0, 0.25, 0.25, 0.25),
tolerance = 1e-4
)
expect_equal(mn_eval_1$`Class Level Results`[[1]]$Specificity,
c(0.7500, 0.7500, 0.9375, 0.7500, 0.8125),
tolerance = 1e-4
)
expect_equal(mn_eval_1$`Class Level Results`[[1]]$`Pos Pred Value`,
c(0.20, 0.00, 0.50, 0.20, 0.25),
tolerance = 1e-4
)
expect_equal(mn_eval_1$`Class Level Results`[[1]]$`Neg Pred Value`,
c(0.80, 0.750, 0.8333333, 0.80, 0.81250),
tolerance = 1e-4
)
expect_equal(mn_eval_1$`Class Level Results`[[1]]$Kappa,
c(-3.172066e-16, -2.500000e-01, 2.307692e-01, -3.172066e-16, 6.250000e-02),
tolerance = 1e-4
)
expect_equal(mn_eval_1$`Class Level Results`[[1]]$`Detection Rate`,
c(0.05, 0.00, 0.05, 0.05, 0.05),
tolerance = 1e-4
)
expect_equal(mn_eval_1$`Class Level Results`[[1]]$`Detection Prevalence`,
c(0.25, 0.20, 0.10, 0.25, 0.20),
tolerance = 1e-4
)
expect_equal(mn_eval_1$`Class Level Results`[[1]]$Prevalence,
c(0.2, 0.2, 0.2, 0.2, 0.2),
tolerance = 1e-4
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$Support,
c(4, 4, 4, 4, 4)
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Prediction,
as.character(c(0, 1, 0, 1))
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Target,
as.character(c(0, 0, 1, 1))
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_0,
c("TP", "FN", "FP", "TN")
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_1,
c("TN", "FP", "FN", "TP")
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$N,
c(12, 4, 3, 1)
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Prediction,
as.character(c(0, 1, 0, 1))
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Target,
as.character(c(0, 0, 1, 1))
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Pos_0,
c("TP", "FN", "FP", "TN")
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Pos_1,
c("TN", "FP", "FN", "TP")
)
expect_equal(
mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$N,
c(12, 4, 4, 0)
)
expect_equal(
colnames(mn_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]),
c("Class", "Prediction", "Target", "Pos_0", "Pos_1", "N")
)
expect_equal(
colnames(mn_eval_1$`Confusion Matrix`[[1]]),
c("Prediction", "Target", "N")
)
# Test Weighted metrics, and metrics == "all"
# Sampled data frame
if (TRUE) {
xpectr::set_test_seed(1)
mn_eval_2 <- evaluate(
data = data_classes %>% dplyr::sample_n(17),
target_col = "cl_char",
prediction_cols = "pred_cl_char",
type = "multinomial",
apply_softmax = FALSE,
metrics = "all"
)
# Create manual weighted mean function
manual_weighted_mean <- function(x, w) {
sum(x * w) / sum(w)
}
# Test manual weighted mean function
expect_equal(manual_weighted_mean(x = c(0.2, 0.8), w = c(1, 1)), 0.5, tolerance = 1e-4)
expect_equal(manual_weighted_mean(x = c(0.3, 0.7), w = c(1, 1)), 0.5, tolerance = 1e-4)
expect_equal(manual_weighted_mean(x = c(0.5, 0.5), w = c(1, 1)), 0.5, tolerance = 1e-4)
expect_equal(manual_weighted_mean(x = c(0.2), w = c(4)), 0.2)
expect_equal(manual_weighted_mean(x = c(0.2, 0.8), w = c(4, 1)), 0.32, tolerance = 1e-4)
expect_equal(mn_eval_2$`Overall Accuracy`, 0.2352941, tolerance = 1e-4)
expect_equal(mn_eval_2$`Balanced Accuracy`, 0.5380586, tolerance = 1e-4)
expect_equal(
mn_eval_2$`Balanced Accuracy`,
mean(mn_eval_2$`Class Level Results`[[1]]$`Balanced Accuracy`)
)
expect_equal(mn_eval_2$`Weighted Balanced Accuracy`, 0.5236264, tolerance = 1e-4)
expect_equal(
mn_eval_2$`Weighted Balanced Accuracy`,
manual_weighted_mean(
x = mn_eval_2$`Class Level Results`[[1]]$`Balanced Accuracy`,
w = mn_eval_2$`Class Level Results`[[1]]$Support
)
)
expect_equal(mn_eval_2$Accuracy, 0.6941176, tolerance = 1e-4)
expect_equal(mn_eval_2$`Weighted Accuracy`, 0.6851211, tolerance = 1e-4)
expect_equal(mn_eval_2$F1, NaN)
expect_equal(mn_eval_2$`Weighted F1`, NaN)
expect_equal(mn_eval_2$Sensitivity, 0.2666667, tolerance = 1e-4)
expect_equal(mn_eval_2$`Weighted Sensitivity`, 0.2352941, tolerance = 1e-4)
expect_equal(mn_eval_2$Specificity, 0.8094505, tolerance = 1e-4)
expect_equal(mn_eval_2$`Weighted Specificity`, 0.8119586, tolerance = 1e-4)
expect_equal(mn_eval_2$`Pos Pred Value`, 0.2666667, tolerance = 1e-4)
expect_equal(mn_eval_2$`Weighted Pos Pred Value`, 0.2696078, tolerance = 1e-4)
expect_equal(mn_eval_2$`Neg Pred Value`, 0.8094505, tolerance = 1e-4)
expect_equal(mn_eval_2$`Weighted Neg Pred Value`, 0.7939237, tolerance = 1e-4)
expect_equal(mn_eval_2$AUC, 0.5416667, tolerance = 1e-4)
expect_equal(mn_eval_2$Kappa, 0.06428773, tolerance = 1e-4)
expect_equal(mn_eval_2$`Weighted Kappa`, 0.04481687, tolerance = 1e-4)
expect_equal(mn_eval_2$MCC, 0.0526315789473684, tolerance = 1e-4)
expect_equal(mn_eval_2$`Detection Rate`, 0.04705882, tolerance = 1e-4)
expect_equal(mn_eval_2$`Weighted Detection Rate`, 0.0449827, tolerance = 1e-4)
expect_equal(mn_eval_2$`Detection Prevalence`, 0.2, tolerance = 1e-4)
expect_equal(mn_eval_2$`Weighted Detection Prevalence`, 0.1937716, tolerance = 1e-4)
expect_equal(mn_eval_2$Prevalence, 0.2, tolerance = 1e-4)
expect_equal(mn_eval_2$`Weighted Prevalence`, 0.2110727, tolerance = 1e-4)
expect_equal(as.numeric(mn_eval_2$ROC[[1]]$auc), 0.5416667, tolerance = 1e-4)
expect_equal(mn_eval_2$ROC[[1]]$rocs$`cl_3/cl_4`[[1]]$direction, ">", tolerance = 1e-4)
expect_equal(mn_eval_2$ROC[[1]]$rocs$`cl_3/cl_4`[[1]]$sensitivities,
c(1, 1, 0),
tolerance = 1e-4
)
expect_equal(mn_eval_2$ROC[[1]]$rocs$`cl_3/cl_4`[[1]]$specificities,
c(0.00, 0.25, 1.00),
tolerance = 1e-4
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$Class,
c("cl_1", "cl_2", "cl_3", "cl_4", "cl_5")
)
expect_equal(mn_eval_2$`Class Level Results`[[1]]$`Balanced Accuracy`,
c(0.5480769, 0.3461538, 0.5865385, 0.5595238, 0.6500000),
tolerance = 1e-4
)
expect_equal(mn_eval_2$`Class Level Results`[[1]]$F1,
c(0.2857143, NaN, 0.3333333, 0.2857143, 0.3333333),
tolerance = 1e-4
)
expect_equal(mn_eval_2$`Class Level Results`[[1]]$Sensitivity,
c(0.250, 0.00, 0.250, 0.3333333, 0.500),
tolerance = 1e-4
)
expect_equal(mn_eval_2$`Class Level Results`[[1]]$Specificity,
c(0.8461538, 0.6923077, 0.9230769, 0.7857143, 0.80),
tolerance = 1e-4
)
expect_equal(mn_eval_2$`Class Level Results`[[1]]$`Pos Pred Value`,
c(0.3333333, 0.0, 0.50, 0.25, 0.25),
tolerance = 1e-4
)
expect_equal(mn_eval_2$`Class Level Results`[[1]]$`Neg Pred Value`,
c(0.7857143, 0.6923077, 0.80, 0.8461538, 0.9230769),
tolerance = 1e-4
)
expect_equal(mn_eval_2$`Class Level Results`[[1]]$Kappa,
c(0.1052632, -0.3076923, 0.2093023, 0.1052632, 0.2093023),
tolerance = 1e-4
)
expect_equal(mn_eval_2$`Class Level Results`[[1]]$`Detection Rate`,
c(0.05882353, 0.00, 0.05882353, 0.05882353, 0.05882353),
tolerance = 1e-4
)
expect_equal(mn_eval_2$`Class Level Results`[[1]]$`Detection Prevalence`,
c(0.1764706, 0.2352941, 0.1176471, 0.2352941, 0.2352941),
tolerance = 1e-4
)
expect_equal(mn_eval_2$`Class Level Results`[[1]]$Prevalence,
c(0.2352941, 0.2352941, 0.2352941, 0.1764706, 0.1176471),
tolerance = 1e-4
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$Support,
c(4, 4, 4, 3, 2)
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Prediction,
as.character(c(0, 1, 0, 1))
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Target,
as.character(c(0, 0, 1, 1))
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_0,
c("TP", "FN", "FP", "TN")
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_1,
c("TN", "FP", "FN", "TP")
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$N,
c(11, 2, 3, 1)
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Prediction,
as.character(c(0, 1, 0, 1))
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Target,
as.character(c(0, 0, 1, 1))
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Pos_0,
c("TP", "FN", "FP", "TN")
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$Pos_1,
c("TN", "FP", "FN", "TP")
)
expect_equal(
mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[2]]$N,
c(9, 4, 4, 0)
)
expect_equal(
colnames(mn_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]),
c("Class", "Prediction", "Target", "Pos_0", "Pos_1", "N")
)
expect_equal(
colnames(mn_eval_2$`Confusion Matrix`[[1]]),
c("Prediction", "Target", "N")
)
}
# Enabling and disabling a few metrics
# TODO
# ID level
if (TRUE) {
# ID level
data_classes <- data_classes %>%
dplyr::mutate(id = factor(rep(1:10, each = 2)))
xpectr::set_test_seed(9)
suppressWarnings(
mn_id_eval_1 <- evaluate(
data = data_classes %>% dplyr::sample_n(13),
target_col = "cl_char",
prediction_cols = "pred_cl_char",
id_col = "id",
id_method = "mean",
type = "multinomial",
apply_softmax = FALSE,
metrics = "all"
)
)
expect_equal(mn_id_eval_1$`Overall Accuracy`, 0.222222, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Balanced Accuracy`, 0.5535714, tolerance = 1e-4)
expect_equal(
mn_id_eval_1$`Balanced Accuracy`,
mean(mn_id_eval_1$`Class Level Results`[[1]]$`Balanced Accuracy`)
)
expect_equal(mn_id_eval_1$`Weighted Balanced Accuracy`, 0.5178571, tolerance = 1e-4)
expect_equal(
mn_id_eval_1$`Weighted Balanced Accuracy`,
manual_weighted_mean(
x = mn_id_eval_1$`Class Level Results`[[1]]$`Balanced Accuracy`,
w = mn_id_eval_1$`Class Level Results`[[1]]$Support
)
)
expect_equal(mn_id_eval_1$Accuracy, 0.688888889, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Weighted Accuracy`, 0.6790123, tolerance = 1e-4)
expect_equal(mn_id_eval_1$F1, NaN)
expect_equal(mn_id_eval_1$`Weighted F1`, NaN)
expect_equal(mn_id_eval_1$Sensitivity, 0.3, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Weighted Sensitivity`, 0.222222, tolerance = 1e-4)
expect_equal(mn_id_eval_1$Specificity, 0.8071429, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Weighted Specificity`, 0.8134921, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Pos Pred Value`, NaN)
expect_equal(mn_id_eval_1$`Weighted Pos Pred Value`, NaN)
expect_equal(mn_id_eval_1$`Neg Pred Value`, 0.81111, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Weighted Neg Pred Value`, 0.7901235, tolerance = 1e-4)
expect_equal(mn_id_eval_1$AUC, 0.575, tolerance = 1e-4)
expect_equal(mn_id_eval_1$Kappa, 0.043636, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Weighted Kappa`, 0.00404, tolerance = 1e-4)
expect_equal(mn_id_eval_1$MCC, 0.0510310363079829, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Detection Rate`, 0.04444444, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Weighted Detection Rate`, 0.03703704, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Detection Prevalence`, 0.2, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Weighted Detection Prevalence`, 0.1851852, tolerance = 1e-4)
expect_equal(mn_id_eval_1$Prevalence, 0.2, tolerance = 1e-4)
expect_equal(mn_id_eval_1$`Weighted Prevalence`, 0.2098765, tolerance = 1e-4)
expect_equal(
mn_id_eval_1$`Class Level Results`[[1]]$Class,
c("cl_1", "cl_2", "cl_3", "cl_4", "cl_5")
)
expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$`Balanced Accuracy`,
c(0.607142857142857, 0.285714285714286, 0.5, 0.875, 0.5),
tolerance = 1e-4
)
expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$F1,
c(0.4, NaN, NaN, 0.5, NA),
tolerance = 1e-4
)
expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$Sensitivity,
c(0.5, 0.0, 0.0, 1.0, 0.0),
tolerance = 1e-4
)
expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$Specificity,
c(0.714285714285714, 0.571428571428571, 1, 0.75, 1),
tolerance = 1e-4
)
expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$`Pos Pred Value`,
c(0.333333333333333, 0, NaN, 0.333333333333333, NaN),
tolerance = 1e-4
)
expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$`Neg Pred Value`,
c(
0.833333333333333, 0.666666666666667, 0.777777777777778, 1,
0.777777777777778
),
tolerance = 1e-4
)
expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$Kappa,
c(0.181818181818182, -0.363636363636363, 0, 0.4, 0),
tolerance = 1e-4
)
expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$`Detection Rate`,
c(0.1111111, 0.0000000, 0.0000000, 0.1111111, 0.0000000),
tolerance = 1e-4
)
expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$`Detection Prevalence`,
c(0.3333333, 0.3333333, 0, 0.3333333, 0),
tolerance = 1e-4
)
expect_equal(mn_id_eval_1$`Class Level Results`[[1]]$Prevalence,
c(0.2222222, 0.2222222, 0.2222222, 0.1111111, 0.2222222),
tolerance = 1e-4
)
expect_equal(
mn_id_eval_1$`Class Level Results`[[1]]$Support,
c(2, 2, 2, 1, 2)
)
expect_equal(
mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Prediction,
c("0", "1", "0", "1")
)
expect_equal(
mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Target,
c("0", "0", "1", "1")
)
expect_equal(
mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_0,
c("TP", "FN", "FP", "TN")
)
expect_equal(
mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$Pos_1,
c("TN", "FP", "FN", "TP")
)
expect_equal(
mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$N,
c(5, 2, 1, 1)
)
expect_equal(
colnames(mn_id_eval_1$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]),
c("Class", "Prediction", "Target", "Pos_0", "Pos_1", "N")
)
expect_equal(
colnames(mn_id_eval_1$`Confusion Matrix`[[1]]),
c("Prediction", "Target", "N")
)
preds <- tidyr::unnest(mn_id_eval_1$Predictions[[1]], cols = c(Prediction))
expect_equal(
preds$Target,
c(
"cl_1", "cl_1", "cl_2", "cl_2", "cl_3", "cl_3", "cl_4", "cl_5",
"cl_5"
)
)
expect_equal(
preds$`Predicted Class`,
c(
"cl_2", "cl_1", "cl_1", "cl_4", "cl_2", "cl_2", "cl_4", "cl_1",
"cl_4"
)
)
expect_equal(
preds$id,
structure(
c(1L, 2L, 3L, 4L, 5L, 6L, 8L, 9L, 10L),
.Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"),
class = "factor"
)
)
expect_equal(preds$id_method, rep("mean", 9))
expect_equal(
preds$cl_1,
c(
0.158794620741017, 0.261808068888474, 0.261808068888474, 0.148847581202078,
0.148847581202078, 0.148847581202078, 0.148847581202078, 0.40460967519169,
0.158794620741017
)
)
expect_equal(
preds$cl_2,
c(
0.261808068888474, 0.158794620741017, 0.158794620741017, 0.148847581202078,
0.40460967519169, 0.40460967519169, 0.148847581202078, 0.148847581202078,
0.158794620741017
)
)
expect_equal(
preds$cl_3,
c(
0.158794620741017, 0.261808068888474, 0.158794620741017, 0.148847581202078,
0.148847581202078, 0.148847581202078, 0.148847581202078, 0.148847581202078,
0.158794620741017
)
)
expect_equal(
preds$cl_4,
c(
0.158794620741017, 0.158794620741017, 0.158794620741017, 0.40460967519169,
0.148847581202078, 0.148847581202078, 0.40460967519169, 0.148847581202078,
0.261808068888474
)
)
expect_equal(
preds$cl_5,
c(
0.261808068888474, 0.158794620741017, 0.261808068888474, 0.148847581202078,
0.148847581202078, 0.148847581202078, 0.148847581202078, 0.148847581202078,
0.261808068888474
)
)
}
# Test grouping vars
data_2 <- data_classes %>%
dplyr::mutate(fold_ = 1) %>%
dplyr::bind_rows(data_classes %>% dplyr::mutate(fold_ = 2))
mn_id_eval_2 <- evaluate(
data = data_2 %>% dplyr::group_by(fold_),
target_col = "cl_char",
prediction_cols = "pred_cl_char",
id_col = "id",
id_method = "majority",
type = "multinomial",
apply_softmax = FALSE
)
expect_equal(mn_id_eval_2$fold_, c(1, 2))
expect_equal(dplyr::bind_rows(mn_id_eval_2$`Class Level Results`)$fold_, rep(1:2, each = 5))
expect_equal(
colnames(mn_id_eval_2),
c(
"fold_", "Overall Accuracy", "Balanced Accuracy", "F1", "Sensitivity",
"Specificity", "Pos Pred Value", "Neg Pred Value",
"Kappa", "MCC", "Detection Rate", "Detection Prevalence",
"Prevalence", "Predictions", "Confusion Matrix", "Class Level Results",
"Process"
)
)
expect_equal(
colnames(mn_id_eval_2$`Class Level Results`[[1]]),
c(
"fold_", "Class", "Balanced Accuracy", "F1", "Sensitivity",
"Specificity", "Pos Pred Value", "Neg Pred Value",
"Kappa", "Detection Rate", "Detection Prevalence",
"Prevalence", "Support", "Confusion Matrix"
)
)
expect_equal(
mn_id_eval_2$`Class Level Results`[[1]]$`Confusion Matrix`[[1]][, -1],
mn_id_eval_2$`Class Level Results`[[2]]$`Confusion Matrix`[[1]][, -1]
)
# What happens when a class is not in the targets but has a probability column?
data_3 <- data_classes %>%
dplyr::mutate(
cl = as.factor(rep(1:4, each = 5)),
cl_char = paste0("cl_", cl)
)
# Testing multinomial
expect_warning(mb_eval <- evaluate(
data = data_3,
target_col = "cl_char",
prediction_cols = "pred_cl_char",
apply_softmax = FALSE,
type = "multinomial",
metrics = list("AUC" = TRUE)
), "The following classes were not found in 'response': cl_5.",
fixed = TRUE
)
expect_equal(mb_eval$`Overall Accuracy`, 0.3)
expect_equal(mb_eval$`Balanced Accuracy`, NaN)
expect_equal(mb_eval$F1, NaN)
expect_equal(mb_eval$Sensitivity, NaN)
expect_equal(mb_eval$Specificity, 0.8266667, tolerance = 1e-4)
expect_equal(mb_eval$`Pos Pred Value`, 0.31, tolerance = 1e-4)
expect_equal(mb_eval$`Neg Pred Value`, 0.825555555555556, tolerance = 1e-4)
expect_equal(mb_eval$AUC, 0.5666667, tolerance = 1e-4)
expect_equal(mb_eval$Kappa, 0.1133333, tolerance = 1e-4)
expect_equal(mb_eval$MCC, 0.1303270, tolerance = 1e-4)
expect_equal(mb_eval$`Detection Rate`, 0.06, tolerance = 1e-4)
expect_equal(mb_eval$`Detection Prevalence`, 0.2, tolerance = 1e-4)
expect_equal(mb_eval$Prevalence, 0.2, tolerance = 1e-4)
expect_true("cl_5" %ni% mb_eval$Predictions[[1]]$Target)
expect_equal(
mb_eval$`Confusion Matrix`[[1]]$Target,
rep(paste0("cl_", 1:5), each = 5)
)
expect_equal(
mb_eval$`Confusion Matrix`[[1]]$Prediction,
rep(paste0("cl_", 1:5), 5)
)
expect_equal(
mb_eval$`Confusion Matrix`[[1]]$N,
c(
1L, 1L, 1L, 0L, 2L, 2L, 1L, 0L, 2L, 0L, 1L, 2L, 1L, 0L, 1L,
1L, 0L, 0L, 3L, 1L, 0L, 0L, 0L, 0L, 0L
)
)
expect_equal(
mb_eval$`Class Level Results`[[1]]$Class,
c("cl_1", "cl_2", "cl_3", "cl_4", "cl_5")
)
expect_equal(
mb_eval$`Class Level Results`[[1]]$`Balanced Accuracy`,
c(
0.466666666666667, 0.5, 0.566666666666667,
0.733333333333333, NaN
)
)
expect_equal(
mb_eval$`Class Level Results`[[1]]$F1,
c(0.2, 0.222222222222222, 0.285714285714286, 0.6, NaN)
)
expect_equal(
mb_eval$`Class Level Results`[[1]]$Sensitivity,
c(0.2, 0.2, 0.2, 0.6, NA)
)
expect_equal(mb_eval$`Class Level Results`[[1]]$Specificity,
c(
0.733333333333333, 0.8, 0.933333333333333,
0.866666666666667, 0.8
),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$`Pos Pred Value`,
c(0.2, 0.25, 0.5, 0.6, 0),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$`Neg Pred Value`,
c(
0.733333333333333, 0.75, 0.777777777777778,
0.866666666666667, 1
),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$Kappa,
c(
-0.0666666666666667, -3.17206578464331e-16, 0.166666666666666,
0.466666666666667, 0
),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$`Detection Rate`,
c(0.05, 0.05, 0.05, 0.15, 0),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$`Detection Prevalence`,
c(0.25, 0.2, 0.1, 0.25, 0.2),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$Prevalence,
c(0.25, 0.25, 0.25, 0.25, 0),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$Support,
c(5L, 5L, 5L, 5L, NaN),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$N,
c(11, 4, 4, 1),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$`Confusion Matrix`[[5]]$N,
c(16, 4, 0, 0),
tolerance = 1e-4
)
# What if a target class is never predicted?
mb_eval <- evaluate(
data = data_3,
target_col = "pred_cl_char",
prediction_cols = "cl_char",
apply_softmax = FALSE,
type = "multinomial",
metrics = list("AUC" = TRUE)
)
expect_equal(mb_eval$`Overall Accuracy`, 0.3, tolerance = 1e-4)
expect_equal(mb_eval$`Balanced Accuracy`, 0.5677778, tolerance = 1e-4)
expect_equal(mb_eval$F1, NaN)
expect_equal(mb_eval$Sensitivity, 0.31)
expect_equal(mb_eval$Specificity, 0.8255556, tolerance = 1e-4)
expect_equal(mb_eval$`Pos Pred Value`, NaN, tolerance = 1e-4)
expect_equal(mb_eval$`Neg Pred Value`, 0.8266667, tolerance = 1e-4)
expect_equal(mb_eval$AUC, 0.56875, tolerance = 1e-4)
expect_equal(mb_eval$Kappa, 0.1133333, tolerance = 1e-4)
expect_equal(mb_eval$MCC, 0.130327042490215, tolerance = 1e-4)
expect_equal(mb_eval$`Detection Rate`, 0.06, tolerance = 1e-4)
expect_equal(mb_eval$`Detection Prevalence`, 0.2, tolerance = 1e-4)
expect_equal(mb_eval$Prevalence, 0.2, tolerance = 1e-4)
expect_true("cl_5" %in% mb_eval$Predictions[[1]]$Target)
expect_equal(
mb_eval$`Confusion Matrix`[[1]]$Target,
rep(paste0("cl_", 1:5), each = 5)
)
expect_equal(
mb_eval$`Confusion Matrix`[[1]]$Prediction,
rep(paste0("cl_", 1:5), 5)
)
expect_equal(
mb_eval$`Confusion Matrix`[[1]]$N,
c(
1L, 2L, 1L, 1L, 0L, 1L, 1L, 2L, 0L, 0L, 1L, 0L, 1L, 0L, 0L,
0L, 2L, 0L, 3L, 0L, 2L, 0L, 1L, 1L, 0L
)
)
expect_equal(
mb_eval$`Class Level Results`[[1]]$Class,
c("cl_1", "cl_2", "cl_3", "cl_4", "cl_5")
)
expect_equal(
mb_eval$`Class Level Results`[[1]]$`Balanced Accuracy`,
c(
0.466666666666667, 0.5, 0.638888888888889,
0.733333333333333, 0.5
)
)
expect_equal(
mb_eval$`Class Level Results`[[1]]$F1,
c(0.2, 0.222222222222222, 0.285714285714286, 0.6, NaN)
)
expect_equal(
mb_eval$`Class Level Results`[[1]]$Sensitivity,
c(0.2, 0.25, 0.5, 0.6, 0)
)
expect_equal(mb_eval$`Class Level Results`[[1]]$Specificity,
c(
0.733333333333333, 0.75, 0.777777777777778,
0.866666666666667, 1
),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$`Pos Pred Value`,
c(0.2, 0.2, 0.2, 0.6, NaN),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$`Neg Pred Value`,
c(
0.733333333333333, 0.8, 0.933333333333333, 0.866666666666667,
0.8
),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$Kappa,
c(
-0.0666666666666667, -3.17206578464331e-16, 0.166666666666666,
0.466666666666667, 0
),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$`Detection Rate`,
c(0.05, 0.05, 0.05, 0.15, 0),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$`Detection Prevalence`,
c(0.25, 0.25, 0.25, 0.25, 0),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$Prevalence,
c(0.25, 0.2, 0.1, 0.25, 0.2),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$Support,
c(5L, 4L, 2L, 5L, 4),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]$N,
c(11, 4, 4, 1),
tolerance = 1e-4
)
expect_equal(mb_eval$`Class Level Results`[[1]]$`Confusion Matrix`[[5]]$N,
c(16, 0, 4, 0),
tolerance = 1e-4
)
#
# In grouped multinomial, when there are not the same classes in
# the subsets, test that only the available classes are used
#
xpectr::set_test_seed(4)
# group a: a,b,c,d
# group b: a,b,c
dd <- dplyr::bind_rows(
data.frame("target" = sample(c("a", "b", "c", "d"), 20, replace=T),
"prediction" = sample(c("a", "b", "c", "d"), 20, replace=T),
"group" = "a",
stringsAsFactors = FALSE),
data.frame("target" = sample(c("a", "b", "c"), 20, replace=T),
"prediction" = sample(c("a", "b", "c"), 20, replace=T),
"group" = "b",
stringsAsFactors = FALSE)
)
res <- dd %>%
dplyr::group_by(.data$group) %>%
evaluate(target_col = "target", prediction_cols = "prediction", type="multinomial")
expect_equal(
res$Process[[1]]$Classes,
c("a", "b", "c", "d"),
fixed = TRUE)
expect_equal(
res$Process[[2]]$Classes,
c("a", "b", "c"),
fixed = TRUE)
expect_equal(
res$`Class Level Results`[[1]]$Class,
c("a", "b", "c", "d"),
fixed = TRUE)
expect_equal(
res$`Class Level Results`[[2]]$Class,
c("a", "b", "c"),
fixed = TRUE)
expect_equal(
unique(res$`Confusion Matrix`[[1]]$Target),
c("a", "b", "c", "d"),
fixed = TRUE)
expect_equal(
unique(res$`Confusion Matrix`[[2]]$Target),
c("a", "b", "c"),
fixed = TRUE)
## Testing 'res' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(res),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
res[["group"]],
c("a", "b"),
fixed = TRUE)
expect_equal(
res[["Overall Accuracy"]],
c(0.25, 0.25),
tolerance = 1e-4)
expect_equal(
res[["Balanced Accuracy"]],
c(0.51335, 0.4472),
tolerance = 1e-4)
expect_equal(
res[["F1"]],
c(NaN, 0.2552),
tolerance = 1e-4)
expect_equal(
res[["Sensitivity"]],
c(0.28125, 0.27727),
tolerance = 1e-4)
expect_equal(
res[["Specificity"]],
c(0.74545, 0.61713),
tolerance = 1e-4)
expect_equal(
res[["Pos Pred Value"]],
c(0.2125, 0.26389),
tolerance = 1e-4)
expect_equal(
res[["Neg Pred Value"]],
c(0.75417, 0.63095),
tolerance = 1e-4)
expect_equal(
res[["Kappa"]],
c(-0.01188, -0.08712),
tolerance = 1e-4)
expect_equal(
res[["MCC"]],
c(0, -0.1117),
tolerance = 1e-4)
expect_equal(
res[["Detection Rate"]],
c(0.0625, 0.08333),
tolerance = 1e-4)
expect_equal(
res[["Detection Prevalence"]],
c(0.25, 0.33333),
tolerance = 1e-4)
expect_equal(
res[["Prevalence"]],
c(0.25, 0.33333),
tolerance = 1e-4)
# Testing column names
expect_equal(
names(res),
c("group", "Overall Accuracy", "Balanced Accuracy", "F1", "Sensitivity",
"Specificity", "Pos Pred Value", "Neg Pred Value", "Kappa",
"MCC", "Detection Rate", "Detection Prevalence", "Prevalence",
"Predictions", "Confusion Matrix", "Class Level Results", "Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(res),
c("character", "numeric", "numeric", "numeric", "numeric", "numeric",
"numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
"numeric", "list", "list", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(res),
c("character", "double", "double", "double", "double", "double",
"double", "double", "double", "double", "double", "double",
"double", "list", "list", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(res),
c(2L, 17L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(res)),
character(0),
fixed = TRUE)
## Finished testing 'res' ####
# TODO test that group_by and evaluate work correctly together
})
test_that("nested tibbles are correctly added to grouped multinomial results in evaluate()", {
xpectr::set_test_seed(1)
random_probabilities <- multiclass_probability_tibble(
num_classes = 3,
num_observations = 60,
apply_softmax = FALSE # Test with as well
)
expect_equal(sum(random_probabilities), 92.56257, tolerance = 1e-5)
random_classes <- argmax(random_probabilities)
data_ <- random_probabilities %>%
dplyr::mutate(
cl = as.factor(rep(1:3, each = 20)),
cl_char = paste0("cl_", cl),
pred_cl = random_classes
)
data_ <- data_ %>%
dplyr::rename_at(dplyr::vars(paste0("class_", 1:3)), .funs = ~ paste0("cl_", 1:3)) %>%
dplyr::mutate(pred_cl_char = paste0("cl_", pred_cl))
# add grouping vars
data_[["gk1_char"]] <- as.character(rep(1:3, each = 20))
data_[["gk2_int"]] <- rep(rep(1:5, each = 4), 3)
data_ <- data_ %>%
dplyr::bind_rows(data_ %>%
dplyr::mutate(
gk1_char = "10",
gk2_int = 10
))
data_ <- data_ %>%
dplyr::sample_frac()
mn_eval_1 <-
data_ %>%
dplyr::group_by(.data$gk1_char, .data$gk2_int) %>%
evaluate(
target_col = "cl_char",
prediction_cols = paste0("cl_", 1:3),
type = "multinomial",
apply_softmax = TRUE
) %>%
.[, c("gk1_char", "gk2_int", "Predictions", "Confusion Matrix", "Class Level Results")]
clr_grkeys <- plyr::ldply(mn_eval_1$`Class Level Results`, function(clr) {
tibble::tibble(
"gk1_char" = unique(clr[["gk1_char"]]),
"gk2_int" = unique(clr[["gk2_int"]])
)
})
expect_equal(mn_eval_1$gk1_char, clr_grkeys$gk1_char)
expect_equal(mn_eval_1$gk2_int, clr_grkeys$gk2_int)
clr_cnfm_grkeys <- plyr::ldply(mn_eval_1$`Class Level Results`, function(clr) {
bn_confmat <- dplyr::bind_rows(clr[["Confusion Matrix"]])
tibble::tibble(
"gk1_char" = unique(bn_confmat[["gk1_char"]]),
"gk2_int" = unique(bn_confmat[["gk2_int"]])
)
})
expect_equal(mn_eval_1$gk1_char, clr_cnfm_grkeys$gk1_char)
expect_equal(mn_eval_1$gk2_int, clr_cnfm_grkeys$gk2_int)
pred_grkeys <- plyr::ldply(mn_eval_1$Predictions, function(clr) {
tibble::tibble(
"gk1_char" = unique(clr[["gk1_char"]]),
"gk2_int" = unique(clr[["gk2_int"]])
)
})
expect_equal(mn_eval_1$gk1_char, pred_grkeys$gk1_char)
expect_equal(mn_eval_1$gk2_int, pred_grkeys$gk2_int)
cnfm_grkeys <- plyr::ldply(mn_eval_1$`Confusion Matrix`, function(clr) {
tibble::tibble(
"gk1_char" = unique(clr[["gk1_char"]]),
"gk2_int" = unique(clr[["gk2_int"]])
)
})
expect_equal(mn_eval_1$gk1_char, cnfm_grkeys$gk1_char)
expect_equal(mn_eval_1$gk2_int, cnfm_grkeys$gk2_int)
})
test_that("nested tibbles are correctly added to grouped binomial results in evaluate()", {
xpectr::set_test_seed(1)
random_probabilities <- multiclass_probability_tibble(
num_classes = 1,
num_observations = 60,
apply_softmax = FALSE # Test with as well
)
expect_equal(sum(random_probabilities), 30.72525, tolerance = 1e-5)
random_classes <- ifelse(random_probabilities$class_1 > 0.5, 1, 0)
data_ <- random_probabilities %>%
dplyr::mutate(
pred_cl = random_classes,
cl = as.factor(rep(c(0, 1), each = 30))
)
data_ <- data_ %>%
dplyr::sample_frac()
# add grouping vars
data_[["gk1_char"]] <- as.character(rep(1:3, each = 20))
data_[["gk2_int"]] <- rep(rep(1:2, each = 10), 3)
data_ <- data_ %>%
dplyr::bind_rows(data_ %>%
dplyr::mutate(
gk1_char = "10",
gk2_int = 10
))
data_ <- data_ %>%
dplyr::sample_frac()
bn_eval_1 <-
data_ %>%
dplyr::group_by(.data$gk1_char, .data$gk2_int) %>%
evaluate(
target_col = "cl",
prediction_cols = "class_1",
type = "binomial",
) %>%
.[, c("gk1_char", "gk2_int", "Predictions", "Confusion Matrix")]
pred_grkeys <- plyr::ldply(bn_eval_1$Predictions, function(clr) {
tibble::tibble(
"gk1_char" = unique(clr[["gk1_char"]]),
"gk2_int" = unique(clr[["gk2_int"]])
)
})
expect_equal(bn_eval_1$gk1_char, pred_grkeys$gk1_char)
expect_equal(bn_eval_1$gk2_int, pred_grkeys$gk2_int)
cnfm_grkeys <- plyr::ldply(bn_eval_1$`Confusion Matrix`, function(clr) {
tibble::tibble(
"gk1_char" = unique(clr[["gk1_char"]]),
"gk2_int" = unique(clr[["gk2_int"]])
)
})
expect_equal(bn_eval_1$gk1_char, cnfm_grkeys$gk1_char)
expect_equal(bn_eval_1$gk2_int, cnfm_grkeys$gk2_int)
})
test_that("nested tibbles are correctly added to grouped gaussian results in evaluate()", {
#### ####
xpectr::set_test_seed(1)
random_probabilities <- multiclass_probability_tibble(
num_classes = 2,
num_observations = 60,
apply_softmax = FALSE # Test with as well
)
expect_equal(sum(random_probabilities), 61.59519, tolerance = 1e-5)
data_ <- random_probabilities
# add grouping vars
data_[["gk1_char"]] <- as.character(rep(1:3, each = 20))
data_[["gk2_int"]] <- rep(rep(1:2, each = 10), 3)
data_ <- data_ %>%
dplyr::bind_rows(data_ %>%
dplyr::mutate(
gk1_char = "10",
gk2_int = 10
))
data_ <- data_ %>%
dplyr::sample_frac()
gs_eval_1 <-
data_ %>%
dplyr::group_by(.data$gk1_char, .data$gk2_int) %>%
evaluate(
target_col = "class_1",
prediction_cols = "class_2",
type = "gaussian",
) %>%
.[, c("gk1_char", "gk2_int", "Predictions")]
pred_grkeys <- plyr::ldply(gs_eval_1$Predictions, function(clr) {
tibble::tibble(
"gk1_char" = unique(clr[["gk1_char"]]),
"gk2_int" = unique(clr[["gk2_int"]])
)
})
expect_equal(gs_eval_1$gk1_char, pred_grkeys$gk1_char)
expect_equal(gs_eval_1$gk2_int, pred_grkeys$gk2_int)
})
test_that("specific multinomial predictions yield correct results in evaluate()", {
#### ####
data <- tibble::tibble(
"target" = c(
"1", "2", "2", "3", "1", "3", "3", "2", "2", "1", "1", "1",
"2", "1", "2", "3", "3", "3", "2", "1", "1", "3", "3", "1", "3",
"1", "2", "2", "3", "1", "3", "3", "2", "2", "1", "1", "1", "2",
"1", "2", "3", "3", "3", "2", "1", "1", "3", "3", "1", "3", "1",
"2", "2", "3", "1", "3", "3", "2", "2", "1", "1", "1", "2", "1",
"2", "3", "3", "3", "2", "1", "1", "3", "3", "1", "3", "1", "2",
"2", "3", "1", "3", "3", "2", "2", "1", "1", "1", "2", "1", "2",
"3", "3", "3", "2", "1", "1", "3", "3", "1", "3", "1", "2", "2",
"3", "1", "3", "3", "2", "2", "1", "1", "1", "2", "1", "2", "3",
"3", "3", "2", "1", "1", "3", "3", "1", "3", "1", "2", "2", "3",
"1", "3", "3", "2", "2", "1", "1", "1", "2", "1", "2", "3", "3",
"3", "2", "1", "1", "3", "3", "1", "3", "1", "2", "2", "3", "1",
"3", "3", "2", "2", "1", "1", "1", "2", "1", "2", "3", "3", "3",
"2", "1", "1", "3", "3", "1", "3", "1", "2", "2", "3", "1", "3",
"3", "2", "2", "1", "1", "1", "2", "1", "2", "3", "3", "3", "2",
"1", "1", "3", "3", "1", "3", "1", "2", "2", "3", "1", "3", "3",
"2", "2", "1", "1", "1", "2", "1", "2", "3", "3", "3", "2", "1",
"1", "3", "3", "1", "3", "1", "2", "2", "3", "1", "3", "3", "2",
"2", "1", "1", "1", "2", "1", "2", "3", "3", "3", "2", "1", "1",
"3", "3", "1", "3"
),
"1" = c(
0.362824302965311, 0.360903717826462, 0.285822146515266, 0.37477371815703,
0.381700937144022, 0.273724077239358, 0.341941761553692, 0.304958692149421,
0.296272111439159, 0.42404336282095, 0.293116571086624, 0.435758286831625,
0.28272698563188, 0.224804797891925, 0.198163481343272, 0.222945305755905,
0.300594211265473, 0.3278586388128, 0.414855759724269, 0.33840681107911,
0.407904832070239, 0.329062099428566, 0.347623656119297, 0.436145034077774,
0.335017655729982, 0.331364482540279, 0.283536882101457, 0.281143446843595,
0.425995399790343, 0.261537001684637, 0.237313955938272, 0.23261151589263,
0.385748271928449, 0.334411930525475, 0.401249217427923, 0.420235849517611,
0.425289710559998, 0.262465444718002, 0.336697104880243, 0.231858151646129,
0.210092588546093, 0.47741912516683, 0.244565586519055, 0.286947978160956,
0.417209987822813, 0.409276670773936, 0.354307298336307, 0.276564090289706,
0.330862775865476, 0.44695225847507, 0.317576338796576, 0.382847300413345,
0.224081474342672, 0.308365835600141, 0.302015399930661, 0.313332459936172,
0.251934064646274, 0.23915184388103, 0.277538091890923, 0.471891254622384,
0.40754972988395, 0.345372198209239, 0.33466402327811, 0.462461145964111,
0.266579885476856, 0.352899631920573, 0.421312945065338, 0.200478726356666,
0.309794998292503, 0.380501567619241, 0.370761060364264, 0.286594888362904,
0.361067815387182, 0.245947918320429, 0.389260130880553, 0.321714471097802,
0.331089372813525, 0.256210536793289, 0.285625126260833, 0.27568485403864,
0.274822151212381, 0.389484563107126, 0.212979233666992, 0.296561920614094,
0.200225006332582, 0.242554965054235, 0.401234678799669, 0.330009524801572,
0.348663184860405, 0.411214822720279, 0.307151655834041, 0.204995757155556,
0.351402327921561, 0.374094844465352, 0.248711786514304, 0.336708510840726,
0.408292124109811, 0.357562258085327, 0.34181138284562, 0.323252856286123,
0.447544862966328, 0.429915527750429, 0.363601070683015, 0.333552425710284,
0.398225671426714, 0.221696833964976, 0.271265251552504, 0.481294916218702,
0.328149707350169, 0.450855293083027, 0.237104558776676, 0.374383211394644,
0.443948913175648, 0.494051191743219, 0.256273986260961, 0.489484795353784,
0.329464982964633, 0.220032486448317, 0.408557119092641, 0.319230850919545,
0.355237403525459, 0.23455484947749, 0.382020061725786, 0.267110221515588,
0.449602783872994, 0.202164855975727, 0.236445026776993, 0.319748604954957,
0.293320401651886, 0.287711207487059, 0.358419477510523, 0.346556723204159,
0.206074092549024, 0.22717751457188, 0.397646700390622, 0.446688182141175,
0.25620775941098, 0.501804532497241, 0.408063047760274, 0.443251289188255,
0.249444482951462, 0.383829461406706, 0.526313287884649, 0.452849242524878,
0.253013631607335, 0.334191437619488, 0.210413611589952, 0.23421023559276,
0.321507174883409, 0.509517153778936, 0.315671789411075, 0.255620649439559,
0.239331881508991, 0.322754669547894, 0.435042212296461, 0.251412865189562,
0.282387104005381, 0.245973141871465, 0.341257410449456, 0.277527088361948,
0.399220690874693, 0.439926753580167, 0.345819178673885, 0.305382501740128,
0.418275547531307, 0.357628745014896, 0.294197703135495, 0.438696916274104,
0.316517159370203, 0.357792733303972, 0.28831445694109, 0.327860691995961,
0.22281288859293, 0.314065020723387, 0.459321941470143, 0.387693577991577,
0.363268586267414, 0.303950826699894, 0.372655080723904, 0.339529039157154,
0.224621024989129, 0.296482850978123, 0.314332611351849, 0.234267899937577,
0.437859055024521, 0.269731707168111, 0.331690324355338, 0.280473038011755,
0.399824753263675, 0.383719422449624, 0.330850303521819, 0.225277208971775,
0.359553396121816, 0.328615974470287, 0.231945476475709, 0.214223736176299,
0.20582242433687, 0.375727645283227, 0.235524783117192, 0.286268463291008,
0.229673192858615, 0.283772599798294, 0.219611172339202, 0.295826542146669,
0.285141592656138, 0.192558055771125, 0.355721326100929, 0.409545410065343,
0.331835694337497, 0.230954644322117, 0.340582165139984, 0.294251623668547,
0.298768624646445, 0.291985965689547, 0.300172335564108, 0.318523203157749,
0.237566842343213, 0.37261470439223, 0.331390765035546, 0.364342409833174,
0.397916841690445, 0.217399738068907, 0.264193767365295, 0.208427577921687,
0.420006306243977, 0.292308232373888, 0.281443833338, 0.30947189334135,
0.257203806304276, 0.295150877610198, 0.439630183471502, 0.321730635029117,
0.383315062595601, 0.225583239939362, 0.309872283286329, 0.255887220229597,
0.316939046835727, 0.449868348167623, 0.304614105760487, 0.248742717566446,
0.305293159179038, 0.222558427746317, 0.244342581208601, 0.503534783305527,
0.286383868562386, 0.428804177911175, 0.295404759872436, 0.30687255200497,
0.445596166797302, 0.334706962656109
),
"2" = c(
0.385780580440307, 0.342604539124775, 0.423080439134247, 0.225175445397002,
0.312989908016585, 0.475217543730522, 0.224916968457944, 0.30476258690942,
0.482054751251862, 0.214841434476723, 0.344321198446437, 0.333138708845406,
0.292804142075078, 0.31480368331023, 0.453026287138725, 0.4719140606185,
0.338078757628025, 0.296611945042962, 0.25365258932653, 0.305362051955673,
0.240571861737913, 0.333693522274202, 0.27392009695986, 0.28042156343828,
0.377909676645575, 0.279959262495121, 0.2122887561223, 0.470196696415608,
0.239862567097843, 0.370221172919109, 0.25443866213747, 0.477173210501813,
0.259642635838842, 0.322066433258374, 0.353453695410528, 0.1765751153703,
0.285504011914052, 0.424456315084771, 0.288460995364288, 0.35958455461334,
0.478761722406255, 0.250631249168976, 0.445446891169545, 0.369385615626211,
0.349831286201674, 0.234976115878086, 0.378701320138235, 0.431201840261207,
0.350278234492975, 0.262288382369503, 0.307058606700166, 0.320533642024935,
0.43236586844692, 0.292897655407014, 0.272621596850078, 0.270996516799443,
0.410085989322878, 0.444842557489891, 0.388276130257391, 0.235166674305724,
0.285112014266039, 0.437273128065374, 0.454306056457757, 0.193441477963145,
0.420457813840421, 0.20658635492773, 0.266523286854865, 0.398245797218399,
0.397335280211415, 0.214226033921459, 0.334049449261309, 0.465337959942035,
0.233723758733514, 0.31472699114585, 0.190099685465158, 0.336264164127875,
0.228217420461845, 0.478951976081042, 0.445589110407979, 0.429453925922651,
0.472930553187392, 0.332138049773757, 0.431357175765288, 0.227158863320801,
0.414490874557254, 0.492841238109576, 0.234359938505408, 0.446090237254051,
0.35319859961771, 0.345280017404155, 0.327026905651897, 0.315397998189469,
0.341892635859671, 0.363505947910766, 0.404023366558927, 0.276862535515932,
0.294742179813213, 0.294768056642531, 0.266684517736902, 0.311820705852887,
0.274265040914617, 0.245816171241433, 0.440341510685916, 0.421104352741149,
0.388918784994157, 0.421278084276545, 0.240879703622285, 0.289683431392311,
0.339677067961078, 0.208333263474704, 0.437464387830416, 0.367535015967519,
0.226639265821188, 0.311401754535334, 0.372083534671264, 0.250419612561257,
0.299632515128755, 0.32824246642957, 0.297400161614639, 0.313227906367457,
0.300459136297168, 0.303048382688999, 0.423041978939619, 0.21995462356869,
0.220422428036373, 0.405812939731575, 0.295769536850216, 0.276168343895395,
0.265750665287833, 0.253402549071951, 0.411972235353439, 0.351927605924981,
0.485986422454677, 0.321551112338582, 0.206970975070247, 0.200850312668311,
0.372875934652805, 0.234374628487236, 0.245897425556106, 0.253770902739585,
0.468416469360073, 0.410120970528605, 0.238800068178443, 0.280904468157008,
0.359143089366176, 0.407538360613741, 0.463482198596469, 0.346052750154407,
0.287160871624961, 0.227514884062728, 0.240774210253124, 0.468578579358505,
0.353317373812137, 0.349640083059674, 0.369950508544139, 0.380002971753307,
0.289234489357681, 0.395516529035609, 0.467797098876064, 0.338410210733615,
0.331006277488809, 0.266926876246406, 0.198909961692134, 0.477956034101944,
0.304153252664601, 0.269291217911081, 0.42201242766336, 0.236603843824688,
0.323467921547914, 0.352132946072107, 0.369703528563403, 0.329596532065509,
0.327226373739197, 0.462085224932779, 0.189039991684081, 0.314951443633383,
0.284435887439821, 0.366590341963145, 0.395601714171088, 0.335225687954128,
0.5311988845767, 0.346007208388757, 0.446522267443117, 0.343863071903928,
0.292892871402239, 0.382830131827548, 0.385357786983514, 0.214142145338179,
0.376626067248013, 0.389178684389126, 0.338344548396213, 0.403001958525644,
0.290654812925342, 0.247101836432825, 0.370492901531161, 0.461281265139314,
0.375012631382266, 0.363470643270294, 0.226067381809261, 0.313047833074042,
0.479443190691509, 0.490484639496364, 0.391240933517263, 0.478047191943444,
0.356438854507902, 0.343453444225452, 0.216202187035667, 0.237739153185802,
0.301640124052444, 0.247512173039711, 0.309589249125308, 0.308972701838745,
0.358423819908425, 0.304335319964878, 0.448013834885817, 0.378099789829676,
0.541697788505534, 0.268360413749574, 0.331964563898707, 0.26728121083406,
0.225911392328278, 0.533006780442077, 0.342769074491857, 0.347630476342447,
0.345065351570056, 0.35598982747141, 0.453613405495365, 0.191197557332096,
0.401269074847496, 0.438308839883431, 0.239209391932476, 0.29631107556132,
0.178586958336824, 0.478858385133731, 0.475817691964852, 0.427658658905656,
0.298338105256703, 0.272983994354953, 0.308124320381545, 0.471773233423636,
0.373331904052012, 0.260774223314073, 0.326618966011962, 0.252950597054979,
0.399516649309411, 0.258469593402028, 0.328387994212459, 0.263157498352965,
0.180860493953652, 0.325672027376522
),
"3" = c(
0.251395116594382, 0.296491743048763, 0.291097414350488, 0.400050836445967,
0.305309154839393, 0.25105837903012, 0.433141269988364, 0.390278720941159,
0.221673137308979, 0.361115202702327, 0.362562230466938, 0.231103004322969,
0.424468872293042, 0.460391518797845, 0.348810231518003, 0.305140633625595,
0.361327031106502, 0.375529416144238, 0.331491650949201, 0.356231136965217,
0.351523306191848, 0.337244378297231, 0.378456246920842, 0.283433402483946,
0.287072667624444, 0.3886762549646, 0.504174361776243, 0.248659856740797,
0.334142033111814, 0.368241825396254, 0.508247381924258, 0.290215273605557,
0.354609092232709, 0.343521636216152, 0.245297087161549, 0.403189035112089,
0.28920627752595, 0.313078240197227, 0.374841899755469, 0.408557293740532,
0.311145689047652, 0.271949625664194, 0.3099875223114, 0.343666406212833,
0.232958725975514, 0.355747213347978, 0.266991381525458, 0.292234069449087,
0.318858989641549, 0.290759359155428, 0.375365054503257, 0.29661905756172,
0.343552657210408, 0.398736508992845, 0.425363003219261, 0.415671023264385,
0.337979946030848, 0.316005598629079, 0.334185777851686, 0.292942071071892,
0.30733825585001, 0.217354673725387, 0.211029920264134, 0.344097376072744,
0.312962300682723, 0.440514013151697, 0.312163768079797, 0.401275476424934,
0.292869721496081, 0.4052723984593, 0.295189490374428, 0.24806715169506,
0.405208425879304, 0.439325090533721, 0.420640183654289, 0.342021364774323,
0.440693206724629, 0.264837487125669, 0.268785763331188, 0.294861220038709,
0.252247295600227, 0.278377387119117, 0.35566359056772, 0.476279216065105,
0.385284119110164, 0.264603796836189, 0.364405382694923, 0.223900237944377,
0.298138215521885, 0.243505159875566, 0.365821438514061, 0.479606244654974,
0.306705036218769, 0.262399207623883, 0.347264846926769, 0.386428953643342,
0.296965696076976, 0.347669685272142, 0.391504099417478, 0.364926437860989,
0.278190096119055, 0.324268301008138, 0.196057418631069, 0.245343221548567,
0.212855543579129, 0.357025081758479, 0.487855044825211, 0.229021652388988,
0.332173224688753, 0.340811443442268, 0.325431053392908, 0.258081772637838,
0.329411821003164, 0.194547053721447, 0.371642479067775, 0.260095592084959,
0.370902501906612, 0.451725047122112, 0.29404271929272, 0.367541242712998,
0.344303460177374, 0.462396767833511, 0.194937959334594, 0.512935154915721,
0.329974788090632, 0.392022204292698, 0.467785436372791, 0.404083051149648,
0.440928933060281, 0.45888624344099, 0.229608287136038, 0.30151567087086,
0.307939484996299, 0.451271373089538, 0.395382324539131, 0.352461505190514,
0.370916305936214, 0.263820839015524, 0.34603952668362, 0.30297780807216,
0.282139047688465, 0.206049568064689, 0.234886643936908, 0.266246289318114,
0.387843279026488, 0.258270201766771, 0.326104189813578, 0.419737014252832,
0.39133195349163, 0.262967962158336, 0.443554000335801, 0.275800771201936,
0.407350744678872, 0.327605247392432, 0.1950072791594, 0.36858416305713,
0.428378406636938, 0.358510329092926, 0.190945490674481, 0.384062700904437,
0.269773031636498, 0.293146370173427, 0.455270859633981, 0.216661464157928,
0.277571199804092, 0.373080037074023, 0.283789869201145, 0.324699239901208,
0.360014919081883, 0.290074320623921, 0.341982014495507, 0.34254277593853,
0.449960737667873, 0.223849754343834, 0.351638066845776, 0.29735497837504,
0.352295526292765, 0.329458831336961, 0.231743205105008, 0.325245272888717,
0.24418009043417, 0.35750994063312, 0.239145121205034, 0.421869028158495,
0.269248073573241, 0.347438161004341, 0.282951888661148, 0.505384816650066,
0.223549179488312, 0.22710189316125, 0.330805148081968, 0.371720832502581,
0.349791790952842, 0.424282189096888, 0.39756162199313, 0.324494998684386,
0.419164944280864, 0.260801711446479, 0.538407835073546, 0.400683703634949,
0.290883616449876, 0.225742760705341, 0.389147894143535, 0.226126265909887,
0.35841955283596, 0.463988500003423, 0.428076486863404, 0.352715436748856,
0.366524181610059, 0.521533182638172, 0.349828585734708, 0.396775674492708,
0.34280755544513, 0.403678714345574, 0.251813829550075, 0.303377007012575,
0.220735369151253, 0.359024881858196, 0.336644671065747, 0.368376379332766,
0.376171765981278, 0.249593481489016, 0.393037158142848, 0.443941945735865,
0.234928342185967, 0.351701940154702, 0.264942761166635, 0.499330549326554,
0.341527118848228, 0.266540282506371, 0.321160424596023, 0.381958289409563,
0.438097979067575, 0.295558374926908, 0.214310024748819, 0.316454120864747,
0.38472284790757, 0.277147657477424, 0.387261573857968, 0.279484049009918,
0.32137493676895, 0.51666734893961, 0.429038452779437, 0.243514619639494,
0.314099482128203, 0.312726228686797, 0.376207245915106, 0.429969949642065,
0.373543339249046, 0.339621009967369
),
".groups" = rep(1:10, each = 25)
)
evals <- evaluate(
data = data %>% dplyr::group_by(.data$.groups),
target_col = "target",
prediction_cols = c("1", "2", "3"),
metrics = list("AUC" = TRUE),
type = "multinomial"
)
expect_equal(
colnames(evals),
c(
".groups", "Overall Accuracy", "Balanced Accuracy", "F1", "Sensitivity",
"Specificity", "Pos Pred Value", "Neg Pred Value", "AUC",
"Kappa", "MCC", "Detection Rate", "Detection Prevalence",
"Prevalence", "Predictions", "ROC", "Confusion Matrix", "Class Level Results",
"Process"
)
)
expect_equal(
evals$`Overall Accuracy`,
c(0.56, 0.36, 0.64, 0.28, 0.52, 0.24, 0.44, 0.4, 0.32, 0.44)
)
expect_equal(evals$`Balanced Accuracy`,
c(
0.665178571428571, 0.523974867724868, 0.73776455026455, 0.466104497354497,
0.636243386243386, 0.427744708994709, 0.580687830687831, 0.552910052910053,
0.498015873015873, 0.581845238095238
),
tolerance = 1e-5
)
expect_equal(evals$F1,
c(
0.551190476190476, 0.347276688453159, 0.638304093567251, 0.276960784313726,
0.511111111111111, 0.237745098039216, 0.439814814814815, 0.401960784313726,
0.313186813186813, 0.423411469851098
),
tolerance = 1e-5
)
expect_equal(evals$Sensitivity,
c(
0.55026455026455, 0.365079365079365, 0.656084656084656, 0.291005291005291,
0.513227513227513, 0.232804232804233, 0.439153439153439, 0.402116402116402,
0.338624338624339, 0.439153439153439
),
tolerance = 1e-5
)
expect_equal(evals$Specificity,
c(
0.780092592592593, 0.68287037037037, 0.819444444444444, 0.641203703703704,
0.759259259259259, 0.622685185185185, 0.722222222222222, 0.703703703703704,
0.657407407407407, 0.724537037037037
),
tolerance = 1e-5
)
expect_equal(evals$`Pos Pred Value`,
c(
0.562770562770563, 0.340740740740741, 0.644444444444444, 0.272619047619048,
0.531746031746032, 0.24537037037037, 0.44973544973545, 0.41547619047619,
0.314814814814815, 0.433333333333333
),
tolerance = 1e-5
)
expect_equal(evals$`Neg Pred Value`,
c(
0.78042328042328, 0.687426900584795, 0.824780701754386, 0.645315904139434,
0.761283550757235, 0.620098039215686, 0.719907407407407, 0.700871459694989,
0.656669719169719, 0.727777777777778
),
tolerance = 1e-5
)
expect_equal(evals$AUC,
c(
0.666960611405056, 0.529100529100529, 0.766901822457378, 0.496766607877719,
0.563786008230453, 0.397119341563786, 0.6222810111699, 0.481187536743092,
0.468841857730747, 0.583774250440917
),
tolerance = 1e-5
)
expect_equal(evals$Kappa,
c(
0.332217431748538, 0.0411382701447752, 0.462128481918942, -0.0736440767694057,
0.274254384977631, -0.1375345264999, 0.163202892673696, 0.108837675055412,
-0.0101585873493696, 0.160109639592244
),
tolerance = 1e-5
)
expect_equal(evals$MCC,
c(0.339040525810531, 0.0486630968728153, 0.469598884822668, -0.0726394381146181,
0.281551008833778, -0.137349796341982, 0.164251207729469, 0.108959157171927,
-0.0175562702416747, 0.172015615514047),
tolerance = 1e-5
)
expect_equal(evals$`Detection Rate`,
c(
0.186666666666667, 0.12, 0.213333333333333, 0.0933333333333333,
0.173333333333333, 0.08, 0.146666666666667, 0.133333333333333,
0.106666666666667, 0.146666666666667
),
tolerance = 1e-5
)
expect_equal(evals$`Detection Prevalence`,
c(
0.333333333333333, 0.333333333333333, 0.333333333333333, 0.333333333333333,
0.333333333333333, 0.333333333333333, 0.333333333333333, 0.333333333333333,
0.333333333333333, 0.333333333333333
),
tolerance = 1e-5
)
expect_equal(evals$Prevalence,
c(
0.333333333333333, 0.333333333333333, 0.333333333333333, 0.333333333333333,
0.333333333333333, 0.333333333333333, 0.333333333333333, 0.333333333333333,
0.333333333333333, 0.333333333333333
),
tolerance = 1e-5
)
expect_equal(
evals$.groups,
1:10
)
expect_equal(names(evals$ROC[[1]]$rocs), c("1/2", "1/3", "2/3"))
expect_equal(evals$ROC[[1]]$rocs$`1/2`[[1]]$sensitivities,
c(
1, 1, 1, 1, 0.857142857142857, 0.857142857142857, 0.857142857142857,
0.857142857142857, 0.714285714285714, 0.714285714285714, 0.571428571428571,
0.428571428571429, 0.428571428571429, 0.285714285714286, 0.142857142857143,
0.142857142857143, 0
),
tolerance = 1e-5
)
expect_equal(evals$ROC[[1]]$rocs$`1/2`[[2]]$sensitivities,
c(
1, 1, 1, 1, 0.888888888888889, 0.777777777777778, 0.777777777777778,
0.666666666666667, 0.555555555555556, 0.444444444444444, 0.333333333333333,
0.333333333333333, 0.333333333333333, 0.222222222222222, 0.222222222222222,
0.111111111111111, 0
),
tolerance = 1e-5
)
expect_equal(evals$ROC[[1]]$rocs$`1/2`[[1]]$specificities,
c(
0, 0.111111111111111, 0.222222222222222, 0.333333333333333,
0.333333333333333, 0.444444444444444, 0.555555555555556, 0.666666666666667,
0.666666666666667, 0.777777777777778, 0.777777777777778, 0.777777777777778,
0.888888888888889, 0.888888888888889, 0.888888888888889, 1, 1
),
tolerance = 1e-5
)
expect_equal(evals$ROC[[1]]$rocs$`1/2`[[2]]$specificities,
c(
0, 0.142857142857143, 0.285714285714286, 0.428571428571429,
0.428571428571429, 0.428571428571429, 0.571428571428571, 0.571428571428571,
0.571428571428571, 0.571428571428571, 0.571428571428571, 0.714285714285714,
0.857142857142857, 0.857142857142857, 1, 1, 1
),
tolerance = 1e-5
)
# class level
expect_equal(evals$`Class Level Results`[[1]]$Class, as.character(c(1, 2, 3)))
expect_equal(evals$`Class Level Results`[[1]]$`Balanced Accuracy`,
c(0.715277777777778, 0.603174603174603, 0.677083333333333),
tolerance = 1e-5
)
expect_equal(evals$`Class Level Results`[[1]]$F1,
c(0.625, 0.428571428571429, 0.6),
tolerance = 1e-5
)
expect_equal(
evals$`Class Level Results`[[1]]$Support,
c(9, 7, 9)
)
expect_equal(
evals$`Class Level Results`[[1]]$`Confusion Matrix`[[1]],
structure(list(
.groups = c(1L, 1L, 1L, 1L),
Class = c("1", "1", "1", "1"),
Prediction = c("0", "1", "0", "1"),
Target = c("0", "0", "1", "1"),
Pos_0 = c("TP", "FN", "FP", "TN"),
Pos_1 = c("TN", "FP", "FN", "TP"),
N = c(14L, 2L, 4L, 5L)
),
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -4L)
)
)
})
# TODO Add test that majority vote id_method works when not all classes are predicted most by one of the ids
test_that("arguments throw proper errors and warnings in evaluate()", {
#### ####
xpectr::set_test_seed(1)
random_probabilities <- multiclass_probability_tibble(
num_classes = 5,
num_observations = 20,
apply_softmax = FALSE # Test with as well
)
data_ <- random_probabilities %>%
dplyr::mutate(
cl = as.factor(rep(1:5, each = 4)),
cl_char = paste0("cl_", cl)
) %>%
dplyr::rename_at(dplyr::vars(paste0("class_", 1:5)), .funs = ~ paste0("cl_", 1:5))
# Testing 'metrics'
expect_error(
xpectr::strip_msg(evaluate(
data = data_,
target_col = "cl_char",
prediction_cols = paste0("cl_", 1:5),
type = "multinomial",
apply_softmax = TRUE,
metrics = "none"
)),
xpectr::strip(paste0(
"1 assertions failed:\n * Variable 'metrics': Must be of typ",
"e 'list', not 'character'."
)),
fixed = TRUE
)
expect_error(
xpectr::strip_msg(evaluate(
data = data_,
target_col = "cl_char",
prediction_cols = paste0("cl_", 1:5),
type = "multinomial",
apply_softmax = TRUE,
metrics = list(TRUE, FALSE)
)),
xpectr::strip("1 assertions failed:\n * Variable 'metrics': Must have names."),
fixed = TRUE
)
# TODO add more
# Testing gaussian
expect_error(evaluate(
data = data_,
target_col = "cl_1",
prediction_cols = "cl_2",
type = "gaussian",
id_col = "cl",
id_method = "mean"
), paste0(
"The targets must be constant within the IDs with the current ID method. ",
"These IDs had more than one unique value in the target column: 1, 2, 3, 4, 5."
),
fixed = TRUE
)
# Only one class in target column for binomial
data_3 <- data.frame(
"target" = c(1, 1, 1, 1, 1),
"prediction" = c(0.1, 0.2, 0.7, 0.8, 0.9)
)
expect_error(evaluate(
data = data_3,
target_col = "target",
prediction_cols = "prediction",
cutoff = 0.5,
type = "binomial"
), "found less than 2 levels in the target column.",
fixed = TRUE
)
# Test that pROC::roc returns the expected error
# when there's only observations for one level in the target col ("response")
expect_error(
pROC::roc(
data.frame(
"target" = c(1, 1, 1),
preds = c(0.01, 0.01, 1 - 0.02)
),
response = "target",
predictor = "preds",
levels = c(0, 1)
),
"No control observation.",
fixed = TRUE
)
})
test_that("binomial evaluation works in evaluate()", {
#### ####
xpectr::set_test_seed(1)
random_probabilities <- multiclass_probability_tibble(
num_classes = 1,
num_observations = 20,
apply_softmax = FALSE # Test with as well
)
expect_equal(sum(random_probabilities), 11.10334, tolerance = 1e-5)
data_ <- random_probabilities %>%
dplyr::rename(prediction = class_1) %>%
dplyr::mutate(
cl = as.factor(rep(1:2, each = 10)),
cl_char = paste0("cl_", cl),
predicted_class = ifelse(prediction > 0.5,
"cl_2",
"cl_1"
),
inv_prediction = 1 - prediction,
inv_predicted_class = ifelse(inv_prediction > 0.5,
"cl_2",
"cl_1"
),
)
bn_eval_1 <- evaluate(
data = data_,
target_col = "cl_char",
prediction_cols = "prediction",
type = "binomial",
apply_softmax = TRUE,
metrics = list("Accuracy" = TRUE)
)
bn_eval_1_inv <- evaluate(
data = data_,
target_col = "cl_char",
prediction_cols = "inv_prediction",
type = "binomial",
apply_softmax = TRUE,
metrics = list("Accuracy" = TRUE)
)
expect_equal(
bn_eval_1$Accuracy,
mean(data_$cl_char == data_$predicted_class)
)
expect_equal(
bn_eval_1$Accuracy,
0.45
)
expect_equal(
bn_eval_1$`Balanced Accuracy`,
0.45
)
expect_equal(
bn_eval_1_inv$Accuracy,
mean(data_$cl_char == data_$inv_predicted_class)
)
expect_equal(
bn_eval_1_inv$Accuracy,
0.55
)
expect_equal(
bn_eval_1_inv$`Balanced Accuracy`,
0.55
)
expect_equal(bn_eval_1$F1,
0.4761905,
tolerance = 1e-4
)
expect_equal(bn_eval_1$Sensitivity,
0.5,
tolerance = 1e-4
)
expect_equal(bn_eval_1$Specificity,
0.4,
tolerance = 1e-4
)
expect_equal(bn_eval_1$`Pos Pred Value`,
0.4545455,
tolerance = 1e-4
)
expect_equal(bn_eval_1$`Neg Pred Value`,
0.4444444,
tolerance = 1e-4
)
expect_equal(bn_eval_1$AUC,
0.53,
tolerance = 1e-4
)
expect_equal(bn_eval_1$AUC,
bn_eval_1$ROC[[1]]$auc[[1]],
tolerance = 1e-4
)
expect_equal(bn_eval_1$`Lower CI`,
0.2573215,
tolerance = 1e-4
)
expect_equal(bn_eval_1$`Upper CI`,
0.8026785,
tolerance = 1e-4
)
expect_equal(bn_eval_1$Kappa,
-0.1,
tolerance = 1e-4
)
expect_equal(bn_eval_1$MCC,
-0.1005038,
tolerance = 1e-4
)
expect_equal(bn_eval_1$`Detection Rate`,
0.25,
tolerance = 1e-4
)
expect_equal(bn_eval_1$`Detection Prevalence`,
0.55,
tolerance = 1e-4
)
expect_equal(bn_eval_1$Prevalence,
0.5,
tolerance = 1e-4
)
expect_equal(bn_eval_1_inv$F1,
0.5263158,
tolerance = 1e-4
)
expect_equal(bn_eval_1_inv$Sensitivity,
0.5,
tolerance = 1e-4
)
expect_equal(bn_eval_1_inv$Specificity,
0.6,
tolerance = 1e-4
)
expect_equal(bn_eval_1_inv$`Pos Pred Value`,
0.5555556,
tolerance = 1e-4
)
expect_equal(bn_eval_1_inv$`Neg Pred Value`,
0.5454545,
tolerance = 1e-4
)
expect_equal(bn_eval_1_inv$AUC,
0.47,
tolerance = 1e-4
)
expect_equal(bn_eval_1_inv$AUC,
bn_eval_1_inv$ROC[[1]]$auc[[1]],
tolerance = 1e-4
)
expect_equal(bn_eval_1_inv$`Lower CI`,
0.1973215,
tolerance = 1e-4
)
expect_equal(bn_eval_1_inv$`Upper CI`,
0.7426785,
tolerance = 1e-4
)
expect_equal(bn_eval_1_inv$Kappa,
0.1,
tolerance = 1e-4
)
expect_equal(bn_eval_1_inv$MCC,
0.1005038,
tolerance = 1e-4
)
expect_equal(bn_eval_1_inv$`Detection Rate`,
0.25,
tolerance = 1e-4
)
expect_equal(bn_eval_1_inv$`Detection Prevalence`,
0.45,
tolerance = 1e-4
)
expect_equal(bn_eval_1_inv$Prevalence,
0.5,
tolerance = 1e-4
)
expect_true(!identical(bn_eval_1$ROC[[1]], bn_eval_1_inv$ROC[[1]]))
expect_equal(
bn_eval_1$ROC[[1]]$sensitivities,
c(
1, 1, 0.9, 0.9, 0.8, 0.8, 0.8, 0.7, 0.6, 0.5, 0.5, 0.5, 0.5,
0.4, 0.3, 0.2, 0.1, 0.1, 0.1, 0.1, 0
)
)
expect_equal(
bn_eval_1$ROC[[1]]$specificities,
c(
0, 0.1, 0.1, 0.2, 0.2, 0.3, 0.4, 0.4, 0.4, 0.4, 0.5, 0.6, 0.7,
0.7, 0.7, 0.7, 0.7, 0.8, 0.9, 1, 1
)
)
expect_equal(
bn_eval_1_inv$ROC[[1]]$sensitivities,
c(
1, 0.9, 0.9, 0.9, 0.9, 0.8, 0.7, 0.6, 0.5, 0.5, 0.5, 0.5, 0.4,
0.3, 0.2, 0.2, 0.2, 0.1, 0.1, 0, 0
)
)
expect_equal(
bn_eval_1_inv$ROC[[1]]$specificities,
c(
0, 0, 0.1, 0.2, 0.3, 0.3, 0.3, 0.3, 0.3, 0.4, 0.5, 0.6, 0.6,
0.6, 0.6, 0.7, 0.8, 0.8, 0.9, 0.9, 1
)
)
bn_eval_2 <- evaluate(
data = data_,
target_col = "cl_char",
prediction_cols = "prediction",
type = "binomial",
apply_softmax = TRUE,
positive = "cl_1",
metrics = list("Accuracy" = TRUE)
)
expect_equal(
bn_eval_2$Accuracy,
mean(data_$cl_char == data_$predicted_class)
)
expect_equal(
bn_eval_2$Accuracy,
0.45
)
expect_equal(
bn_eval_2$`Balanced Accuracy`,
0.45
)
expect_equal(bn_eval_2$F1,
0.4210526,
tolerance = 1e-4
)
expect_equal(bn_eval_2$Sensitivity,
0.4,
tolerance = 1e-4
)
expect_equal(bn_eval_2$Specificity,
0.5,
tolerance = 1e-4
)
expect_equal(bn_eval_2$`Pos Pred Value`,
0.4444444,
tolerance = 1e-4
)
expect_equal(bn_eval_2$`Neg Pred Value`,
0.4545455,
tolerance = 1e-4
)
expect_equal(bn_eval_2$AUC,
0.53,
tolerance = 1e-4
)
expect_equal(bn_eval_2$`Lower CI`,
0.2573215,
tolerance = 1e-4
)
expect_equal(bn_eval_2$`Upper CI`,
0.8026785,
tolerance = 1e-4
)
expect_equal(bn_eval_2$Kappa,
-0.1,
tolerance = 1e-4
)
expect_equal(bn_eval_2$MCC,
-0.1005038,
tolerance = 1e-4
)
expect_equal(bn_eval_2$`Detection Rate`,
0.2,
tolerance = 1e-4
)
expect_equal(bn_eval_2$`Detection Prevalence`,
0.45,
tolerance = 1e-4
)
expect_equal(bn_eval_2$Prevalence,
0.5,
tolerance = 1e-4
)
expect_equal(bn_eval_2$Process[[1]]$`Positive Class`, "cl_1")
# not including predictions
bn_eval_2_no_preds <- evaluate(
data = data_,
target_col = "cl_char",
prediction_cols = "prediction",
type = "binomial",
apply_softmax = TRUE,
positive = "cl_1",
metrics = list("Accuracy" = TRUE),
include_predictions = FALSE
)
expect_equal(bn_eval_2_no_preds$Process[[1]]$`Positive Class`, "cl_1")
expect_equal(
colnames(bn_eval_2_no_preds),
c(
"Balanced Accuracy", "Accuracy", "F1", "Sensitivity",
"Specificity", "Pos Pred Value", "Neg Pred Value", "AUC",
"Lower CI", "Upper CI", "Kappa", "MCC",
"Detection Rate", "Detection Prevalence", "Prevalence",
"ROC", "Confusion Matrix", "Process"
)
)
expect_identical(bn_eval_2_no_preds, bn_eval_2 %>% dplyr::select(-dplyr::one_of("Predictions")))
# TODO Create actual expected tests, where you curate a dataset, an aggregated version (all methods)
# and make sure the results are identical in all settings.
# ID level
data_ <- data_ %>%
dplyr::mutate(id = factor(rep(1:10, each = 2)))
bn_eval_3 <- evaluate(
data = data_,
target_col = "cl_char",
prediction_cols = "prediction",
id_col = "id",
id_method = "mean",
type = "binomial",
apply_softmax = TRUE
)
## Testing 'bn_eval_3' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(bn_eval_3),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
bn_eval_3[["Balanced Accuracy"]],
0.6,
tolerance = 1e-4)
expect_equal(
bn_eval_3[["F1"]],
0.66667,
tolerance = 1e-4)
expect_equal(
bn_eval_3[["Sensitivity"]],
0.8,
tolerance = 1e-4)
expect_equal(
bn_eval_3[["Specificity"]],
0.4,
tolerance = 1e-4)
expect_equal(
bn_eval_3[["Pos Pred Value"]],
0.57143,
tolerance = 1e-4)
expect_equal(
bn_eval_3[["Neg Pred Value"]],
0.66667,
tolerance = 1e-4)
expect_equal(
bn_eval_3[["AUC"]],
0.52,
tolerance = 1e-4)
expect_equal(
bn_eval_3[["Lower CI"]],
0.10515,
tolerance = 1e-4)
expect_equal(
bn_eval_3[["Upper CI"]],
0.93485,
tolerance = 1e-4)
expect_equal(
bn_eval_3[["Kappa"]],
0.2,
tolerance = 1e-4)
expect_equal(
bn_eval_3[["MCC"]],
0.21822,
tolerance = 1e-4)
expect_equal(
bn_eval_3[["Detection Rate"]],
0.4,
tolerance = 1e-4)
expect_equal(
bn_eval_3[["Detection Prevalence"]],
0.7,
tolerance = 1e-4)
expect_equal(
bn_eval_3[["Prevalence"]],
0.5,
tolerance = 1e-4)
expect_equal(
bn_eval_3[["Process"]][[1]][["Positive Class"]],
"cl_2",
fixed = TRUE)
# Testing column names
expect_equal(
names(bn_eval_3),
c("Balanced Accuracy", "Accuracy", "F1", "Sensitivity", "Specificity", "Pos Pred Value",
"Neg Pred Value", "AUC", "Lower CI", "Upper CI", "Kappa", "MCC",
"Detection Rate", "Detection Prevalence", "Prevalence", "Predictions",
"ROC", "Confusion Matrix", "Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(bn_eval_3),
c("numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
"numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
"numeric", "numeric", "numeric", "list", "list", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(bn_eval_3),
c("double", "double", "double", "double", "double", "double", "double",
"double", "double", "double", "double", "double", "double",
"double", "double", "list", "list", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(bn_eval_3),
c(1L, 19L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(bn_eval_3)),
character(0),
fixed = TRUE)
## Finished testing 'bn_eval_3' ####
## Testing 'bn_eval_3$Predictions[[1]]' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(bn_eval_3$Predictions[[1]]),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
bn_eval_3$Predictions[[1]][["Target"]],
c("cl_1", "cl_1", "cl_1", "cl_1", "cl_1", "cl_2", "cl_2", "cl_2",
"cl_2", "cl_2"),
fixed = TRUE)
expect_equal(
bn_eval_3$Predictions[[1]][["Prediction"]],
c(0.31882, 0.74053, 0.55004, 0.80274, 0.34545, 0.19127, 0.53556,
0.63377, 0.85476, 0.57874),
tolerance = 1e-4)
expect_equal(
bn_eval_3$Predictions[[1]][["SD"]],
c(0.07539, 0.23713, 0.49265, 0.20073, 0.40116, 0.0208, 0.2142, 0.19243,
0.19395, 0.28101),
tolerance = 1e-4)
expect_equal(
bn_eval_3$Predictions[[1]][["Predicted Class"]],
c("cl_1", "cl_2", "cl_2", "cl_2", "cl_1", "cl_1", "cl_2", "cl_2",
"cl_2", "cl_2"),
fixed = TRUE)
expect_equal(
bn_eval_3$Predictions[[1]][["id"]],
structure(1:10, .Label = c("1", "2", "3", "4", "5", "6", "7", "8",
"9", "10"), class = "factor"))
expect_equal(
bn_eval_3$Predictions[[1]][["id_method"]],
c("mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean"),
fixed = TRUE)
# Testing column names
expect_equal(
names(bn_eval_3$Predictions[[1]]),
c("Target", "Prediction", "SD", "Predicted Class", "id", "id_method"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(bn_eval_3$Predictions[[1]]),
c("character", "numeric", "numeric", "character", "factor", "character"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(bn_eval_3$Predictions[[1]]),
c("character", "double", "double", "character", "integer", "character"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(bn_eval_3$Predictions[[1]]),
c(10L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(bn_eval_3$Predictions[[1]])),
character(0),
fixed = TRUE)
## Finished testing 'bn_eval_3$Predictions[[1]]' ####
## Testing 'bn_eval_3$`Confusion Matrix`[[1]]' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Assigning output
output_19148 <- bn_eval_3$`Confusion Matrix`[[1]]
# Testing class
expect_equal(
class(output_19148),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_19148[["Prediction"]],
c("cl_1", "cl_2", "cl_1", "cl_2"),
fixed = TRUE)
expect_equal(
output_19148[["Target"]],
c("cl_1", "cl_1", "cl_2", "cl_2"),
fixed = TRUE)
expect_equal(
output_19148[["Pos_cl_1"]],
c("TP", "FN", "FP", "TN"),
fixed = TRUE)
expect_equal(
output_19148[["Pos_cl_2"]],
c("TN", "FP", "FN", "TP"),
fixed = TRUE)
expect_equal(
output_19148[["N"]],
c(2, 3, 1, 4),
tolerance = 1e-4)
# Testing column names
expect_equal(
names(output_19148),
c("Prediction", "Target", "Pos_cl_1", "Pos_cl_2", "N"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_19148),
c("character", "character", "character", "character", "integer"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_19148),
c("character", "character", "character", "character", "integer"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_19148),
4:5)
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_19148)),
character(0),
fixed = TRUE)
## Finished testing 'bn_eval_3$`Confusion Matrix`[[1]]' ####
bn_eval_3_no_preds <- evaluate(
data = data_,
target_col = "cl_char",
prediction_cols = "prediction",
id_col = "id",
id_method = "mean",
type = "binomial",
apply_softmax = TRUE,
include_predictions = FALSE
)
expect_equal(
colnames(bn_eval_3_no_preds),
c(
"Balanced Accuracy", "Accuracy", "F1", "Sensitivity",
"Specificity", "Pos Pred Value", "Neg Pred Value", "AUC",
"Lower CI", "Upper CI", "Kappa", "MCC",
"Detection Rate", "Detection Prevalence", "Prevalence",
"ROC", "Confusion Matrix", "Process"
)
)
# TODO ADD TESTS HERE!
# Majority vote
bn_eval_4 <- evaluate(
data = data_,
target_col = "cl_char",
prediction_cols = "prediction",
id_col = "id",
id_method = "majority",
type = "binomial",
apply_softmax = FALSE
)
expect_equal(
colnames(bn_eval_4),
c(
"Balanced Accuracy", "Accuracy", "F1", "Sensitivity",
"Specificity", "Pos Pred Value", "Neg Pred Value", "AUC",
"Lower CI", "Upper CI", "Kappa", "MCC",
"Detection Rate", "Detection Prevalence", "Prevalence",
"Predictions", "ROC", "Confusion Matrix", "Process"
)
)
expect_equal(bn_eval_4$`Balanced Accuracy`, 0.5)
expect_equal(bn_eval_4$F1, 0.6153846, tolerance = 1e-4)
expect_equal(bn_eval_4$Sensitivity, 0.8, tolerance = 1e-4)
expect_equal(bn_eval_4$Specificity, 0.2, tolerance = 1e-4)
expect_equal(bn_eval_4$`Pos Pred Value`, 0.5, tolerance = 1e-4)
expect_equal(bn_eval_4$`Neg Pred Value`, 0.5, tolerance = 1e-4)
expect_equal(bn_eval_4$AUC, 0.42, tolerance = 1e-4)
expect_equal(bn_eval_4$`Lower CI`, 0.05437346, tolerance = 1e-4)
expect_equal(bn_eval_4$`Upper CI`, 0.7856265, tolerance = 1e-4)
expect_equal(bn_eval_4$Kappa, 0, tolerance = 1e-4)
expect_equal(bn_eval_4$MCC, 0, tolerance = 1e-4)
expect_equal(bn_eval_4$`Detection Rate`, 0.4, tolerance = 1e-4)
expect_equal(bn_eval_4$`Detection Prevalence`, 0.8, tolerance = 1e-4)
expect_equal(bn_eval_4$Prevalence, 0.5, tolerance = 1e-4)
expect_equal(bn_eval_4[["Process"]][[1]]$`Positive Class`, "cl_2")
expect_equal(length(bn_eval_4$Predictions), 1, tolerance = 1e-4)
expect_equal(bn_eval_4$Predictions[[1]]$Target,
c(
"cl_1", "cl_1", "cl_1", "cl_1", "cl_1",
"cl_2", "cl_2", "cl_2", "cl_2", "cl_2"
),
tolerance = 1e-4
)
expect_equal(bn_eval_4$Predictions[[1]]$Prediction,
c(
1e-40, 1e+00, 5e-01, 1e+00, 5e-01,
1e-40, 5e-01, 5e-01, 1e+00, 5e-01
),
tolerance = 1e-4
)
expect_equal(bn_eval_4$Predictions[[1]]$`Predicted Class`,
c(
"cl_1", "cl_2", "cl_2", "cl_2", "cl_2",
"cl_1", "cl_2", "cl_2", "cl_2", "cl_2"
),
tolerance = 1e-4
)
expect_equal(bn_eval_4$Predictions[[1]]$id,
factor(1:10),
tolerance = 1e-4
)
expect_equal(bn_eval_4$Predictions[[1]]$id_method,
rep("majority", 10),
tolerance = 1e-4
)
data_2 <- data_ %>%
dplyr::mutate(fold_ = 1) %>%
dplyr::bind_rows(data_ %>% dplyr::mutate(fold_ = 2))
bn_eval_5 <- evaluate(
data = data_2 %>% dplyr::group_by(fold_),
target_col = "cl_char",
prediction_cols = "prediction",
id_col = "id",
id_method = "majority",
type = "binomial",
apply_softmax = FALSE
)
# TODO Add tests here that grouped dataframes work in binomial!
# Errors
expect_error(evaluate(
data = data_ %>% dplyr::mutate(
cl = ifelse(dplyr::row_number() == 5, 3, cl),
cl_char = ifelse(dplyr::row_number() == 5, "cl_3", cl_char)
),
target_col = "cl_char",
prediction_cols = "prediction",
type = "binomial",
apply_softmax = TRUE,
metrics = list("Accuracy" = TRUE)
),
"The target column must maximally contain 2 levels.",
fixed = TRUE
)
})
test_that("softmax works in multiclass_probability_tibble()", {
# Test softmax was applied correctly in multiclass_probability_tibble
xpectr::set_test_seed(1)
random_probabilities_1 <- multiclass_probability_tibble(
num_classes = 3,
num_observations = 20,
apply_softmax = TRUE
)
xpectr::set_test_seed(1)
random_probabilities_2 <- multiclass_probability_tibble(
num_classes = 3,
num_observations = 20,
apply_softmax = FALSE
) %>% softmax()
expect_equal(sum(random_probabilities_1), sum(random_probabilities_2))
expect_equal(sum(random_probabilities_1), 20) # due to softmax, each row sums to 1
expect_equal(sum(softmax_row(c(1, 2, 3, 4))), 1)
expect_equal(as.vector(t(softmax_row(c(1, 2, 3, 4)))),
c(0.03205860, 0.08714432, 0.23688282, 0.64391426),
tolerance = 1e-4
)
expect_equal(colnames(softmax_row(c(1, 2, 3, 4))), c("V1", "V2", "V3", "V4"))
})
test_that("probability nesting works in multinomial evaluate", {
xpectr::set_test_seed(1)
random_probabilities_1 <- multiclass_probability_tibble(
num_classes = 3,
num_observations = 20,
apply_softmax = TRUE
)
system.time({
manually_nested_probs <- random_probabilities_1 %>%
dplyr::mutate(ind = 1:20) %>%
dplyr::group_by(ind) %>%
legacy_nest(1:3) %>%
dplyr::pull(.data$data)
})
# Changed to basically do the same as above
system.time({
package_nested_probs <- random_probabilities_1 %>%
nest_rowwise()
})
# group_nest adds an attribute
attr(package_nested_probs, "ptype") <- NULL
expect_true(identical(manually_nested_probs, as.list(package_nested_probs)))
unnested <- package_nested_probs %>%
dplyr::bind_rows()
expect_true(identical(random_probabilities_1, unnested))
})
test_that("gaussian evaluations are correct in evaluate()", {
xpectr::set_test_seed(1)
score_model_1 <- lm("score ~ diagnosis", data = participant.scores)
score_model_2 <- lm("score ~ diagnosis+age", data = participant.scores)
# summary(score_model_1)
score_predictions <- stats::predict(score_model_1, participant.scores,
type = "response", allow.new.levels = TRUE
)
eval_data <- participant.scores
eval_data[["score_predictions"]] <- score_predictions
expect_error(evaluate(eval_data,
target_col = "score",
prediction_cols = "score_predictions",
models = list(score_model_1),
type = "gaussian",
metrics = "all"
),
class = "lifecycle_error_deprecated"
)
e1 <- evaluate(eval_data,
target_col = "score",
prediction_cols = "score_predictions",
type = "gaussian",
metrics = "all"
)
expect_equal(
colnames(e1),
c("RMSE", "MAE", "NRMSE(RNG)","NRMSE(IQR)","NRMSE(STD)","NRMSE(AVG)",
"RSE", "RRSE", "RAE",
"RMSLE", "MALE",
"MAPE", "MSE", "TAE", "TSE",
"Predictions", "Process"
)
)
expect_equal(e1$RMSE, 16.16881, tolerance = 1e-4)
expect_equal(e1$MAE, 13.47778, tolerance = 1e-4)
expect_equal(e1$`NRMSE(RNG)`, 0.2277298, tolerance = 1e-4)
expect_equal(e1$`NRMSE(IQR)`, 0.5774577, tolerance = 1e-4)
expect_equal(e1$`NRMSE(STD)`, 0.8380279, tolerance = 1e-4)
expect_equal(e1$`NRMSE(AVG)`, 0.417080334230652, tolerance = 1e-4)
expect_equal(e1$RMSLE, 0.4677011, tolerance = 1e-4)
expect_equal(e1$MALE, 0.3768815, tolerance = 1e-4)
expect_equal(e1$RAE, 0.8666762, tolerance = 1e-4)
expect_equal(e1$RSE, 0.7265077, tolerance = 1e-4)
expect_equal(e1$RRSE, 0.8523542, tolerance = 1e-4)
expect_equal(e1$MAPE, 0.4836546, tolerance = 1e-4)
expect_equal(e1$MSE, 261.4306, tolerance = 1e-4)
expect_equal(e1$TAE, 404.3333, tolerance = 1e-4)
expect_equal(e1$TSE, 7842.917, tolerance = 1e-4)
expect_equal(e1$Predictions[[1]][["Target"]], eval_data$score)
expect_equal(e1$Predictions[[1]][["Prediction"]], eval_data$score_predictions)
# Not passing a model
# This should remove the metrics that depend on the models to be passed
e2 <- evaluate(eval_data,
target_col = "score",
prediction_cols = "score_predictions",
type = "gaussian",
metrics = "all"
)
expect_equal(
colnames(e2),
c("RMSE", "MAE", "NRMSE(RNG)", "NRMSE(IQR)", "NRMSE(STD)", "NRMSE(AVG)",
"RSE", "RRSE", "RAE", "RMSLE", "MALE", "MAPE", "MSE", "TAE",
"TSE", "Predictions", "Process")
)
expect_equal(e2$RMSE, 16.16881, tolerance = 1e-4)
expect_equal(e2$MAE, 13.47778, tolerance = 1e-4)
expect_equal(e2$`NRMSE(RNG)`, 0.227729778737206, tolerance = 1e-4)
expect_equal(e2$`NRMSE(IQR)`, 0.577457653226487, tolerance = 1e-4)
expect_equal(e2$`NRMSE(STD)`, 0.838027891023239, tolerance = 1e-4)
expect_equal(e2$`NRMSE(AVG)`, 0.417080334230652, tolerance = 1e-4)
expect_equal(e2$RMSLE, 0.4677011, tolerance = 1e-4)
expect_equal(e2$MALE, 0.3768815, tolerance = 1e-4)
expect_equal(e2$Predictions[[1]][["Target"]], eval_data$score)
expect_equal(e2$Predictions[[1]][["Prediction"]], eval_data$score_predictions)
# Grouped with multiple models
eval_data_2 <- eval_data
score_predictions_2 <- stats::predict(score_model_2, participant.scores,
type = "response", allow.new.levels = TRUE
)
eval_data_2[["score_predictions"]] <- score_predictions_2
e3 <- evaluate(eval_data_2,
target_col = "score",
prediction_cols = "score_predictions",
type = "gaussian",
metrics = "all"
)
expect_equal(
colnames(e3),
c("RMSE", "MAE", "NRMSE(RNG)", "NRMSE(IQR)", "NRMSE(STD)", "NRMSE(AVG)",
"RSE", "RRSE", "RAE", "RMSLE", "MALE", "MAPE", "MSE", "TAE",
"TSE", "Predictions", "Process")
)
eval_data_3 <- dplyr::bind_rows(
eval_data %>% dplyr::mutate(fold_ = 1),
eval_data_2 %>% dplyr::mutate(fold_ = 2)
) %>%
dplyr::group_by(fold_)
# eval_data_3 %>% dplyr::group_keys()
# eval_data_3 %>% dplyr::group_indices()
e4 <- evaluate(eval_data_3,
target_col = "score",
prediction_cols = "score_predictions",
type = "gaussian",
metrics = "all"
) %>%
dplyr::mutate(fold_ = as.factor(.data$fold_))
expect_equal(
colnames(e4),
c("fold_", "RMSE", "MAE", "NRMSE(RNG)", "NRMSE(IQR)", "NRMSE(STD)",
"NRMSE(AVG)", "RSE", "RRSE", "RAE", "RMSLE", "MALE", "MAPE",
"MSE", "TAE", "TSE", "Predictions", "Process")
)
e1_e3 <- dplyr::bind_rows(e1, e3) %>%
dplyr::mutate(fold_ = factor(1:2)) %>%
dplyr::select("fold_", dplyr::everything())
e1_e3$Predictions[[1]] <- e1_e3$Predictions[[1]] %>%
dplyr::as_tibble() %>%
tibble::add_column("fold_" = 1, .before = "Target")
e1_e3$Predictions[[2]] <- e1_e3$Predictions[[2]] %>%
dplyr::as_tibble() %>%
tibble::add_column("fold_" = 2, .before = "Target")
expect_true(length(setdiff(colnames(e4), colnames(e1_e3))) == 0)
expect_identical(e4, e1_e3)
expect_equal(e4$fold_, factor(c(1, 2)))
expect_equal(e4$RMSE, c(16.16881, 16.12762), tolerance = 1e-4)
expect_equal(e4$MAE, c(13.47778, 13.28942), tolerance = 1e-4)
expect_equal(e4$`NRMSE(RNG)`, c(0.227729778737206, 0.227149512023389), tolerance = 1e-4)
expect_equal(e4$`NRMSE(IQR)`, c(0.577457653226487, 0.575986262630736), tolerance = 1e-4)
expect_equal(e4$`NRMSE(STD)`, c(0.838027891023239, 0.835892554603436), tolerance = 1e-4)
expect_equal(e4$`NRMSE(AVG)`, c(0.417080334230652, 0.416017592957711), tolerance = 1e-4)
expect_equal(e4$RMSLE, c(0.4677011, 0.4666284), tolerance = 1e-4)
expect_equal(e4$MALE, c(0.3768815, 0.3723774), tolerance = 1e-4)
expect_equal(e4$RAE, c(0.866676193198057, 0.8545638890023), tolerance = 1e-4)
expect_equal(e4$RSE, c(0.852354191878764, 0.850182351337433)^2, tolerance = 1e-4)
expect_equal(e4$RRSE, c(0.852354191878764, 0.850182351337433), tolerance = 1e-4)
expect_equal(e4$MAPE, c(0.483654620140199, 0.478764805777454), tolerance = 1e-4)
expect_equal(e4$MSE, c(261.430555555556, 260.099976995629), tolerance = 1e-4)
expect_equal(e4$TAE, c(404.333333333333, 398.68253968254), tolerance = 1e-4)
expect_equal(e4$TSE, c(7842.91666666667, 7802.99930986888), tolerance = 1e-4)
expect_equal(e4$Predictions[[1]]$Target,
c(
10, 24, 45, 24, 40, 67, 15, 30, 40, 35, 50, 78, 24, 54, 62,
14, 25, 30, 11, 35, 41, 16, 32, 44, 33, 53, 66, 29, 55, 81
),
tolerance = 1e-4
)
expect_equal(e4$Predictions[[2]]$Target,
c(
10, 24, 45, 24, 40, 67, 15, 30, 40, 35, 50, 78, 24, 54, 62,
14, 25, 30, 11, 35, 41, 16, 32, 44, 33, 53, 66, 29, 55, 81
),
tolerance = 1e-4
)
expect_equal(e4$Predictions[[1]]$Prediction,
c(
30.66667, 30.66667, 30.66667, 50.91667, 50.91667,
50.91667, 30.66667, 30.66667, 30.66667, 50.91667,
50.91667, 50.91667, 30.66667, 30.66667, 30.66667,
30.66667, 30.66667, 30.66667, 30.66667, 30.66667,
30.66667, 30.66667, 30.66667, 30.66667, 50.91667,
50.91667, 50.91667, 50.91667, 50.91667, 50.91667
),
tolerance = 1e-4
)
expect_equal(e4$Predictions[[2]]$Prediction,
c(
29.17288, 29.17288, 29.17288, 50.16977, 50.16977,
50.16977, 30.33471, 30.33471, 30.33471, 49.83782,
49.83782, 49.83782, 31.16460, 31.16460, 31.16460,
30.99862, 30.99862, 30.99862, 32.99034, 32.99034,
32.99034, 29.33885, 29.33885, 29.33885, 51.99551,
51.99551, 51.99551, 51.66356, 51.66356, 51.66356
),
tolerance = 1e-4
)
# ID evaluation
age_model_1 <- lm("age ~ diagnosis", participant.scores)
age_model_2 <- lm("age ~ diagnosis + score", participant.scores)
age_predictions_1 <- stats::predict(age_model_1, participant.scores,
type = "response", allow.new.levels = TRUE
)
age_predictions_2 <- stats::predict(age_model_2, participant.scores,
type = "response", allow.new.levels = TRUE
)
id_eval_data_4 <- participant.scores %>%
dplyr::mutate(
fold_ = 1,
predicted_age = age_predictions_1
) %>%
dplyr::bind_rows(participant.scores %>%
dplyr::mutate(
fold_ = 2,
predicted_age = age_predictions_2
)) %>%
dplyr::group_by(fold_)
e5 <- evaluate(id_eval_data_4,
target_col = "age",
prediction_cols = "predicted_age",
id_col = "participant",
id_method = "mean",
type = "gaussian",
metrics = "all"
)
## Testing 'e5' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(e5),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
e5[["fold_"]],
c(1, 2),
tolerance = 1e-4)
expect_equal(
e5[["RMSE"]],
c(6.94982, 6.91723),
tolerance = 1e-4)
expect_equal(
e5[["MAE"]],
c(6, 5.9356),
tolerance = 1e-4)
expect_equal(
e5[["NRMSE(RNG)"]],
c(0.30217, 0.30075),
tolerance = 1e-4)
expect_equal(
e5[["NRMSE(IQR)"]],
c(0.66189, 0.65878),
tolerance = 1e-4)
expect_equal(
e5[["NRMSE(STD)"]],
c(0.94342, 0.939),
tolerance = 1e-4)
expect_equal(
e5[["NRMSE(AVG)"]],
c(0.24471, 0.24356),
tolerance = 1e-4)
expect_equal(
e5[["RSE"]],
c(0.98894, 0.97969),
tolerance = 1e-4)
expect_equal(
e5[["RRSE"]],
c(0.99446, 0.98979),
tolerance = 1e-4)
expect_equal(
e5[["RAE"]],
c(1, 0.98927),
tolerance = 1e-4)
expect_equal(
e5[["RMSLE"]],
c(0.23417, 0.23293),
tolerance = 1e-4)
expect_equal(
e5[["MALE"]],
c(0.20614, 0.20397),
tolerance = 1e-4)
expect_equal(
e5[["MAPE"]],
c(0.22259, 0.22016),
tolerance = 1e-4)
expect_equal(
e5[["MSE"]],
c(48.3, 47.8481),
tolerance = 1e-4)
expect_equal(
e5[["TAE"]],
c(60, 59.35604),
tolerance = 1e-4)
expect_equal(
e5[["TSE"]],
c(483, 478.48096),
tolerance = 1e-4)
# Testing column names
expect_equal(
names(e5),
c("fold_", "RMSE", "MAE", "NRMSE(RNG)", "NRMSE(IQR)", "NRMSE(STD)",
"NRMSE(AVG)", "RSE", "RRSE", "RAE", "RMSLE", "MALE", "MAPE",
"MSE", "TAE", "TSE", "Predictions", "Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(e5),
c("numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
"numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
"numeric", "numeric", "numeric", "numeric", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(e5),
c("double", "double", "double", "double", "double", "double", "double",
"double", "double", "double", "double", "double", "double",
"double", "double", "double", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(e5),
c(2L, 18L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(e5)),
character(0),
fixed = TRUE)
## Finished testing 'e5' ####
## Testing 'e5$Predictions[[1]]' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(e5$Predictions[[1]]),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
e5$Predictions[[1]][["fold_"]],
c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
tolerance = 1e-4)
expect_equal(
e5$Predictions[[1]][["Target"]],
c(20, 23, 27, 21, 32, 31, 43, 21, 34, 32),
tolerance = 1e-4)
expect_equal(
e5$Predictions[[1]][["Prediction"]],
c(29, 27.5, 29, 27.5, 29, 29, 29, 29, 27.5, 27.5),
tolerance = 1e-4)
expect_equal(
e5$Predictions[[1]][["SD"]],
c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
tolerance = 1e-4)
expect_equal(
e5$Predictions[[1]][["participant"]],
structure(1:10, .Label = c("1", "2", "3", "4", "5", "6", "7", "8",
"9", "10"), class = "factor"))
expect_equal(
e5$Predictions[[1]][["id_method"]],
c("mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean"),
fixed = TRUE)
# Testing column names
expect_equal(
names(e5$Predictions[[1]]),
c("fold_", "Target", "Prediction", "SD", "participant", "id_method"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(e5$Predictions[[1]]),
c("numeric", "numeric", "numeric", "numeric", "factor", "character"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(e5$Predictions[[1]]),
c("double", "double", "double", "double", "integer", "character"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(e5$Predictions[[1]]),
c(10L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(e5$Predictions[[1]])),
character(0),
fixed = TRUE)
## Finished testing 'e5$Predictions[[1]]' ####
## Testing 'e5$Predictions[[2]]' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(e5$Predictions[[2]]),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
e5$Predictions[[2]][["fold_"]],
c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
tolerance = 1e-4)
expect_equal(
e5$Predictions[[2]][["Target"]],
c(20, 23, 27, 21, 32, 31, 43, 21, 34, 32),
tolerance = 1e-4)
expect_equal(
e5$Predictions[[2]][["Prediction"]],
c(28.86712, 27.27768, 28.92845, 27.60477, 29.49063, 28.7649, 28.94889,
29, 27.49233, 27.62521),
tolerance = 1e-4)
expect_equal(
e5$Predictions[[2]][["SD"]],
c(0.5402, 0.66644, 0.38585, 0.66926, 0.61431, 0.251, 0.48679, 0.43076,
0.50975, 0.79728),
tolerance = 1e-4)
expect_equal(
e5$Predictions[[2]][["participant"]],
structure(1:10, .Label = c("1", "2", "3", "4", "5", "6", "7", "8",
"9", "10"), class = "factor"))
expect_equal(
e5$Predictions[[2]][["id_method"]],
c("mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean"),
fixed = TRUE)
# Testing column names
expect_equal(
names(e5$Predictions[[2]]),
c("fold_", "Target", "Prediction", "SD", "participant", "id_method"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(e5$Predictions[[2]]),
c("numeric", "numeric", "numeric", "numeric", "factor", "character"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(e5$Predictions[[2]]),
c("double", "double", "double", "double", "integer", "character"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(e5$Predictions[[2]]),
c(10L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(e5$Predictions[[2]])),
character(0),
fixed = TRUE)
## Finished testing 'e5$Predictions[[2]]' ####
# Not including predictions in the output
e6 <- evaluate(id_eval_data_4,
target_col = "age",
prediction_cols = "predicted_age",
id_col = "participant",
id_method = "mean",
type = "gaussian",
metrics = "all",
include_predictions = FALSE
)
expect_equal(
colnames(e6),
c("fold_", "RMSE", "MAE", "NRMSE(RNG)", "NRMSE(IQR)", "NRMSE(STD)",
"NRMSE(AVG)", "RSE", "RRSE", "RAE", "RMSLE", "MALE", "MAPE",
"MSE", "TAE", "TSE", "Process")
)
})
test_that("evaluate() treats dfs and tbls the same", {
# Gaussian
# Binomial
# Multinomial
xpectr::set_test_seed(1)
random_probabilities <- multiclass_probability_tibble(
num_classes = 5,
num_observations = 20,
apply_softmax = FALSE # Test with as well
)
expect_equal(sum(random_probabilities), 51.78471, tolerance = 1e-5)
data_ <- random_probabilities %>%
dplyr::mutate(
cl = as.factor(rep(1:5, each = 4)),
cl_char = paste0("cl_", cl)
) %>%
dplyr::rename_at(dplyr::vars(paste0("class_", 1:5)), .funs = ~ paste0("cl_", 1:5))
mn_eval_1_tbl <- evaluate(
data = data_,
target_col = "cl_char",
prediction_cols = paste0("cl_", 1:5),
type = "multinomial",
apply_softmax = TRUE
)
mn_eval_1_df <- evaluate(
data = as.data.frame(data_),
target_col = "cl_char",
prediction_cols = paste0("cl_", 1:5),
type = "multinomial",
apply_softmax = TRUE
)
mn_eval_1_dt <- evaluate( # TODO Need to test this for gaussian and binomial as well!!!
data = as.data.table(data_),
target_col = "cl_char",
prediction_cols = paste0("cl_", 1:5),
type = "multinomial",
apply_softmax = TRUE
)
expect_identical(
mn_eval_1_df$`Class Level Results`[[1]]$`Confusion Matrix`,
mn_eval_1_tbl$`Class Level Results`[[1]]$`Confusion Matrix`
)
expect_identical(
mn_eval_1_df$`Confusion Matrix`,
mn_eval_1_tbl$`Confusion Matrix`
)
expect_identical(mn_eval_1_tbl, mn_eval_1_df)
# TODO Find out why the group_nest attribute is only added to DT ?
# attr(mn_eval_1_dt$Predictions[[1]]$Prediction, "ptype") <- NULL
# There is a "attr(*, ".internal.selfref")=<externalptr> " attribute added to the
# predictions list with the data.table.
expect_identical(
as.list(mn_eval_1_tbl$Predictions[[1]]$Prediction),
as.list(mn_eval_1_dt$Predictions[[1]]$Prediction)
)
mn_eval_1_dt$Predictions <- NULL
mn_eval_1_tbl$Predictions <- NULL
expect_identical(mn_eval_1_dt, mn_eval_1_tbl)
})
test_that("evaluate() works with wines dataset", {
xpectr::set_test_seed(1)
testthat::skip_on_cran()
# Load wines dataset
w <- wines
varieties <- unique(as.character(w$Variety))
## Create All_x one-vs-all evaluations
to_evaluate <- plyr::llply(varieties, function(vary) {
d <- w
d[["label"]] <- ifelse(as.character(d$Variety) == vary, 1, 0)
d[["current_variety"]] <- vary
d[["probability"]] <- matrix(0, nrow = 1, ncol = length(varieties)) %>%
as.data.frame() %>%
setNames(varieties) %>%
dplyr::mutate_at(.vars = vary, .funs = ~1) %>%
list()
d
}) %>%
dplyr::bind_rows() %>%
dplyr::mutate(prediction = 1)
evaluations <- to_evaluate %>%
dplyr::group_by(current_variety) %>%
evaluate(
target_col = "label",
prediction_cols = "prediction",
type = "binomial",
metrics = list("Accuracy" = TRUE)
) %>%
dplyr::arrange(current_variety)
expect_equal(sort(evaluations$current_variety), sort(varieties))
expect_equal(evaluations$`Balanced Accuracy`, rep(0.5, length(varieties)))
expect_equal(evaluations$Accuracy,
c(
0.0271739130434783, 0.0217391304347826, 0.0380434782608696,
0.0108695652173913, 0.00815217391304348, 0.0135869565217391,
0.513586956521739, 0.0570652173913043, 0.201086956521739, 0.108695652173913
),
tolerance = 1e-4
)
expect_equal(evaluations$F1,
c(
0.0529100529100529, 0.0425531914893617, 0.0732984293193717,
0.021505376344086, 0.0161725067385445, 0.0268096514745308, 0.678635547576302,
0.107969151670951, 0.334841628959276, 0.196078431372549
),
tolerance = 1e-4
)
expect_equal(evaluations$Sensitivity, c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), tolerance = 1e-4)
expect_equal(evaluations$Specificity, c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), tolerance = 1e-4)
expect_equal(evaluations$`Pos Pred Value`,
c(
0.0271739130434783, 0.0217391304347826, 0.0380434782608696,
0.0108695652173913, 0.00815217391304348, 0.0135869565217391,
0.513586956521739, 0.0570652173913043, 0.201086956521739, 0.108695652173913
),
tolerance = 1e-4
)
expect_equal(evaluations$`Neg Pred Value`,
c(NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN),
tolerance = 1e-4
)
expect_equal(evaluations$AUC,
c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5),
tolerance = 1e-4
)
expect_equal(evaluations$`Lower CI`,
c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5),
tolerance = 1e-4
)
expect_equal(evaluations$`Upper CI`,
c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5),
tolerance = 1e-4
)
expect_equal(evaluations$Kappa,
c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
tolerance = 1e-4
)
expect_equal(evaluations$MCC,
c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
tolerance = 1e-4
)
expect_equal(evaluations$`Detection Rate`,
c(
0.0271739130434783, 0.0217391304347826, 0.0380434782608696,
0.0108695652173913, 0.00815217391304348, 0.0135869565217391,
0.513586956521739, 0.0570652173913043, 0.201086956521739, 0.108695652173913
),
tolerance = 1e-4
)
expect_equal(evaluations$`Detection Prevalence`,
c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
tolerance = 1e-4
)
expect_equal(evaluations$Prevalence,
c(
0.0271739130434783, 0.0217391304347826, 0.0380434782608696,
0.0108695652173913, 0.00815217391304348, 0.0135869565217391,
0.513586956521739, 0.0570652173913043, 0.201086956521739, 0.108695652173913
),
tolerance = 1e-4
)
expect_equal(evaluations$`Confusion Matrix`[[1]]$N,
c(0L, 358L, 0L, 10L),
tolerance = 1e-4
)
## Create All_x multinomial evaluations
mn_evaluations <- to_evaluate %>%
legacy_unnest(probability) %>%
dplyr::group_by(current_variety) %>%
evaluate(
target_col = "Variety",
prediction_cols = varieties,
type = "multinomial",
metrics = list("Accuracy" = TRUE)
) %>%
dplyr::arrange(current_variety)
expect_equal(
mn_evaluations$`Class Level Results`[[1]]$Accuracy,
c(0.0271739130434783, 0.978260869565217, 0.96195652173913, 0.989130434782609,
0.991847826086957, 0.986413043478261, 0.486413043478261, 0.942934782608696,
0.798913043478261, 0.891304347826087),
tolerance = 1e-4
)
expect_equal(
mn_evaluations$`Class Level Results`[[1]]$F1,
c(0.0529100529100529, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN,
NaN),
tolerance = 1e-4
)
expect_equal(
mn_evaluations$`Overall Accuracy`,
c(
0.0271739130434783, 0.0217391304347826, 0.0380434782608696,
0.0108695652173913, 0.00815217391304348, 0.0135869565217391,
0.513586956521739, 0.0570652173913043, 0.201086956521739, 0.108695652173913
),
tolerance = 1e-4
)
expect_equal(mn_evaluations$`Balanced Accuracy`,
c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5),
tolerance = 1e-4
)
expect_equal(mn_evaluations$Accuracy,
c(
0.805434782608696, 0.804347826086957, 0.807608695652174, 0.802173913043478,
0.801630434782609, 0.802717391304348, 0.902717391304348, 0.811413043478261,
0.840217391304348, 0.821739130434783
),
tolerance = 1e-4
)
expect_equal(mn_evaluations$F1,
c(NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN),
tolerance = 1e-4
)
expect_equal(mn_evaluations$Sensitivity,
c(0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1),
tolerance = 1e-4
)
expect_equal(mn_evaluations$Specificity,
c(0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9),
tolerance = 1e-4
)
expect_equal(mn_evaluations$`Pos Pred Value`,
c(NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN),
tolerance = 1e-4
)
expect_equal(mn_evaluations$`Neg Pred Value`,
c(NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN),
tolerance = 1e-4
)
expect_equal(mn_evaluations$Kappa,
c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
tolerance = 1e-4
)
expect_equal(mn_evaluations$MCC,
c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
tolerance = 1e-4
)
expect_equal(mn_evaluations$`Detection Rate`,
c(
0.00271739130434783, 0.00217391304347826, 0.00380434782608696,
0.00108695652173913, 0.000815217391304348, 0.00135869565217391,
0.0513586956521739, 0.00570652173913043, 0.0201086956521739,
0.0108695652173913
),
tolerance = 1e-4
)
expect_equal(mn_evaluations$`Detection Prevalence`,
c(0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1),
tolerance = 1e-4
)
expect_equal(mn_evaluations$Prevalence,
c(0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1),
tolerance = 1e-4
)
expect_equal(mn_evaluations$`Confusion Matrix`[[1]]$N,
c(
10L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 8L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 14L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
4L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 189L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 21L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 74L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 40L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
),
tolerance = 1e-4
)
# Check that these All_x evaluations are the same in baseline()
xpectr::set_test_seed(1)
suppressWarnings(bsl <- baseline(
w,
dependent_col = "Variety",
n = 1,
family = "multinomial",
metrics = list("Accuracy" = TRUE)
))
all_x_baseline_results <- bsl$summarized_metrics[bsl$summarized_metrics$Measure %in% paste0("All_", varieties), ]
expect_equal(all_x_baseline_results$`Overall Accuracy`, evaluations$Accuracy, tolerance = 1e-4)
expect_equal(mn_evaluations$`Overall Accuracy`, evaluations$Accuracy, tolerance = 1e-4)
expect_equal(all_x_baseline_results$`Balanced Accuracy`, mn_evaluations$`Balanced Accuracy`, tolerance = 1e-4)
expect_equal(all_x_baseline_results$Accuracy, mn_evaluations$Accuracy, tolerance = 1e-4)
expect_equal(all_x_baseline_results$F1, mn_evaluations$F1, tolerance = 1e-4)
expect_equal(all_x_baseline_results$Sensitivity, mn_evaluations$Sensitivity, tolerance = 1e-4)
expect_equal(all_x_baseline_results$Specificity, mn_evaluations$Specificity, tolerance = 1e-4)
expect_equal(all_x_baseline_results$`Pos Pred Value`, mn_evaluations$`Pos Pred Value`, tolerance = 1e-4)
expect_equal(all_x_baseline_results$`Neg Pred Value`, mn_evaluations$`Neg Pred Value`, tolerance = 1e-4)
expect_equal(all_x_baseline_results$Kappa, mn_evaluations$Kappa, tolerance = 1e-4)
expect_equal(all_x_baseline_results$MCC, mn_evaluations$MCC, tolerance = 1e-4)
expect_equal(all_x_baseline_results$`Detection Rate`, mn_evaluations$`Detection Rate`, tolerance = 1e-4)
expect_equal(all_x_baseline_results$`Detection Prevalence`, mn_evaluations$`Detection Prevalence`, tolerance = 1e-4)
expect_equal(all_x_baseline_results$Prevalence, mn_evaluations$Prevalence, tolerance = 1e-4)
})
test_that("evaluate() is agnostic about the order of the input data", {
dat <- data.frame(
"target" = c(2, 1, 2, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1),
"prediction" = c(
0.7, 0.3, 0.1, 0.1, 0.9, 0.3, 0.8, 0.7, 0.9, 0.1, 0.9, 0.1,
0.2, 0.4, 0.2, 0.6, 0.1, 0.6, 0.1, 0.2, 0.1, 0.8, 0.3
),
".group" = paste0("cl_", c(1, 3, 2, 2, 3, 1, 3, 1, 2, 3, 2, 1, 2, 1, 1, 3, 3, 2, 1, 3, 2, 1, 1))
)
eval_1 <- dat %>%
dplyr::group_by(.data$.group) %>%
evaluate(target_col = "target", prediction_cols = "prediction", type = "binomial")
eval_2 <- dat %>%
dplyr::arrange(.data$.group) %>%
dplyr::group_by(.data$.group) %>%
evaluate(target_col = "target", prediction_cols = "prediction", type = "binomial")
eval_3 <- dat %>%
dplyr::arrange(dplyr::desc(.data$.group)) %>%
dplyr::group_by(.data$.group) %>%
evaluate(target_col = "target", prediction_cols = "prediction", type = "binomial")
expect_identical(eval_1[, 1:15], eval_2[, 1:15])
expect_identical(eval_1[, 1:15], eval_3[, 1:15])
eval_group_1 <- dat %>%
dplyr::filter(.data$.group == "cl_1") %>%
evaluate(target_col = "target", prediction_cols = "prediction", type = "binomial")
eval_group_2 <- dat %>%
dplyr::filter(.data$.group == "cl_2") %>%
evaluate(target_col = "target", prediction_cols = "prediction", type = "binomial")
eval_group_3 <- dat %>%
dplyr::filter(.data$.group == "cl_3") %>%
evaluate(target_col = "target", prediction_cols = "prediction", type = "binomial")
eval_groups <- dplyr::bind_rows(
eval_group_1, eval_group_2, eval_group_3
)
expect_equal(eval_1[, 2:15], eval_groups[, 1:14])
})
test_that("evaluate() and confusion_matrix() has same metric values", {
# Binomial
xpectr::set_test_seed(42)
df_binom <- tibble::tibble(
"target" = as.character(sample(0:1, 50, replace = TRUE)),
"prediction" = as.character(sample(0:1, 50, replace = TRUE))
)
eval_binom <- evaluate(
df_binom, target_col = "target",
prediction_cols = "prediction",
type = "binomial",
positive = "0",
metrics = "all")
cfm_binom <- confusion_matrix(df_binom$target,
df_binom$prediction,
positive = "0",
metrics = "all")
# same order of metrics
shared_cols <- intersect(colnames(eval_binom), colnames(cfm_binom))
eval_binom_shared <- eval_binom %>%
base_deselect(cols = c(
"Confusion Matrix", "Process",
setdiff(colnames(eval_binom), shared_cols)
))
cfm_binom_shared <- cfm_binom %>%
base_deselect(cols = c(
"Confusion Matrix",
setdiff(colnames(cfm_binom), shared_cols)
))
expect_equal(colnames(eval_binom_shared),
colnames(cfm_binom_shared))
expect_equal(unlist(eval_binom_shared), unlist(cfm_binom_shared))
expect_equal(eval_binom$`Confusion Matrix`,
cfm_binom$`Confusion Matrix`)
expect_equal(eval_binom[["Process"]][[1]]$`Positive Class`, "0")
# Multinomial
xpectr::set_test_seed(42)
df_multinom <- tibble::tibble(
"target" = as.character(sample(0:3, 50, replace = TRUE)),
"prediction" = as.character(sample(0:3, 50, replace = TRUE))
)
eval_multinom <- evaluate(
df_multinom, target_col = "target",
prediction_cols = "prediction",
type = "multinomial",
metrics = "all")
cfm_multinom <- confusion_matrix(
df_multinom$target,
df_multinom$prediction,
metrics = "all")
# same order of metrics
shared_cols <- intersect(colnames(eval_multinom), colnames(cfm_multinom))
eval_multinom_shared <- eval_multinom %>%
base_deselect(cols = c(
"Confusion Matrix", "Class Level Results",
setdiff(colnames(eval_multinom), shared_cols)
))
cfm_multinom_shared <- cfm_multinom %>%
base_deselect(cols = c(
"Confusion Matrix", "Class Level Results",
setdiff(colnames(cfm_multinom), shared_cols)
))
expect_equal(colnames(eval_multinom_shared),
colnames(cfm_multinom_shared))
expect_equal(unlist(eval_multinom_shared), unlist(cfm_multinom_shared))
expect_equal(eval_multinom$`Confusion Matrix`,
cfm_multinom$`Confusion Matrix`)
# class level results
clr_eval_multinom <- eval_multinom$`Class Level Results`[[1]]
clr_cfm_multinom <- cfm_multinom$`Class Level Results`[[1]]
clr_eval_multinom_cfm <- clr_eval_multinom$`Confusion Matrix`
clr_eval_multinom$`Confusion Matrix` <- NULL
clr_cfm_multinom_cfm <- clr_cfm_multinom$`Confusion Matrix`
clr_cfm_multinom$`Confusion Matrix` <- NULL
shared_cols <- intersect(colnames(clr_eval_multinom), colnames(clr_cfm_multinom))
expect_equal(clr_eval_multinom[, shared_cols],
clr_cfm_multinom[, shared_cols])
clr_eval_multinom_cfm <- lapply(clr_eval_multinom_cfm, function(x){base_deselect(x, "Class")})
expect_equal(clr_eval_multinom_cfm, clr_cfm_multinom_cfm)
})
test_that("evaluate() and evaluate_residuals() has same metric values", {
xpectr::set_test_seed(42)
df <- tibble::tibble(
"target" = runif(50),
"prediction" = runif(50)
)
eval <- evaluate(
df, target_col = "target",
prediction_cols = "prediction",
type = "gaussian",
metrics = "all")
resid_eval <- evaluate_residuals(
df, prediction_col = "prediction",
target_col = "target",
metrics = "all")
# same order of metrics
shared_cols <- intersect(colnames(eval), colnames(resid_eval))
eval_shared <- eval %>%
base_deselect(cols = c(
setdiff(colnames(eval), shared_cols)
))
resid_eval_shared <- resid_eval %>%
base_deselect(cols = c(
setdiff(colnames(resid_eval), shared_cols)
))
expect_equal(colnames(eval_shared), colnames(resid_eval_shared))
expect_equal(unlist(eval_shared), unlist(resid_eval_shared))
})
# TODO Check that evaluate and (cross_)validate* all have the same metric value
# for the same data
test_that("the different prediction formats work properly in Gaussian evaluate()", {
testthat::skip_on_cran()
xpectr::set_test_seed(42)
# Gaussian
gauss <- tibble::tibble(
"ID" = rep(c(1:5), each = 3),
"Target_1" = runif(15),
"Prediction_1" = runif(15),
"Target_const" = rep(c(1,2,1,3,2), each = 3),
"Prediction_2" = runif(15) * 10 / 3
) %>%
dplyr::mutate(
ID_fct = factor(ID),
ID_chr = as.character(ID),
Target_1_fct = factor(Target_1),
Target_1_chr = as.character(Target_1),
Prediction_1_fct = factor(Prediction_1),
Prediction_1_chr = as.character(Prediction_1)
)
# Generate expectations for 'evaluate'
# Tip: comment out the gxs_function() call
# so it is easy to regenerate the tests
xpectr::set_test_seed(42)
# xpectr::gxs_function(
# fn = evaluate,
# args_values = list(
# "data" = list(gauss),
# "target_col" = list("Target_1", "Target_1_chr", "Target_1_fct", "Target_const"),
# "prediction_cols" = list("Prediction_1", "Prediction_1_chr", "Prediction_1_fct", "Prediction_2",
# c("Prediction_1", "Prediction_2")),
# "type" = list("gaussian"),
# "id_col" = list(NULL, "ID", "ID_fct", "ID_chr", "Target_1"),
# "id_method" = list("mean"),
# "metrics" = list(list("all" = FALSE, "RMSE" = TRUE))
# ),
# extra_combinations = list(
# list("target_col" = "Target_const", "prediction_cols" = "Prediction_2", "id_col" = "ID_fct")
# ),
# indentation = 2
# )
## Testing 'evaluate' ####
## Initially generated by xpectr
# Testing different combinations of argument values
# Testing evaluate(data = gauss, target_col = "Target_...
xpectr::set_test_seed(42)
# Assigning output
output_19148 <- evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_1", type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE))
# Testing class
expect_equal(
class(output_19148),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_19148[["RMSE"]],
0.3794,
tolerance = 1e-4)
# Testing column names
expect_equal(
names(output_19148),
c("RMSE", "Predictions", "Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_19148),
c("numeric", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_19148),
c("double", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_19148),
c(1, 3))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_19148)),
character(0),
fixed = TRUE)
# Testing evaluate(data = NULL, target_col = "Target_1...
# Changed from baseline: data = NULL
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_19370 <- xpectr::capture_side_effects(evaluate(data = NULL, target_col = "Target_1", prediction_cols = "Prediction_1", type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_19370[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'data': Must be of type 'data.frame', not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_19370[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = gauss, target_col = "Target_...
# Changed from baseline: target_col = "Target_...
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_12861 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1_chr", prediction_cols = "Prediction_1", type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_12861[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'data[[target_col]]': Must be of type 'numeric', not 'character'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_12861[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = gauss, target_col = "Target_...
# Changed from baseline: target_col = "Target_...
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_18304 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1_fct", prediction_cols = "Prediction_1", type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_18304[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'data[[target_col]]': Must be of type 'numeric', not 'factor'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_18304[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = gauss, target_col = "Target_...
# Changed from baseline: target_col = "Target_...
xpectr::set_test_seed(42)
# Assigning output
output_16417 <- evaluate(data = gauss, target_col = "Target_const", prediction_cols = "Prediction_1", type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE))
# Testing class
expect_equal(
class(output_16417),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_16417[["RMSE"]],
1.49652,
tolerance = 1e-4)
# Testing column names
expect_equal(
names(output_16417),
c("RMSE", "Predictions", "Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_16417),
c("numeric", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_16417),
c("double", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_16417),
c(1, 3))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_16417)),
character(0),
fixed = TRUE)
# Testing evaluate(data = gauss, target_col = NULL, pr...
# Changed from baseline: target_col = NULL
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_15190 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = NULL, prediction_cols = "Prediction_1", type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_15190[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'target_col': Must be of type 'string', not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_15190[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = gauss, target_col = "Target_...
# Changed from baseline: target_col, predictio...
xpectr::set_test_seed(42)
# Assigning output
output_17365 <- evaluate(data = gauss, target_col = "Target_const", prediction_cols = "Prediction_2", type = "gaussian", id_col = "ID_fct", id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE))
# Testing class
expect_equal(
class(output_17365),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_17365[["RMSE"]],
0.85334,
tolerance = 1e-4)
# Testing column names
expect_equal(
names(output_17365),
c("RMSE", "Predictions", "Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_17365),
c("numeric", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_17365),
c("double", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_17365),
c(1, 3))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_17365)),
character(0),
fixed = TRUE)
# Testing evaluate(data = gauss, target_col = "Target_...
# Changed from baseline: prediction_cols = "Pr...
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_11346 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_1_chr", type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_11346[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'data[[prediction_col]]': Must be of type 'numeric', not 'character'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_11346[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = gauss, target_col = "Target_...
# Changed from baseline: prediction_cols = "Pr...
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_16569 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_1_fct", type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_16569[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'data[[prediction_col]]': Must be of type 'numeric', not 'factor'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_16569[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = gauss, target_col = "Target_...
# Changed from baseline: prediction_cols = "Pr...
xpectr::set_test_seed(42)
# Assigning output
output_17050 <- evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_2", type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE))
# Testing class
expect_equal(
class(output_17050),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_17050[["RMSE"]],
1.50669,
tolerance = 1e-4)
# Testing column names
expect_equal(
names(output_17050),
c("RMSE", "Predictions", "Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_17050),
c("numeric", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_17050),
c("double", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_17050),
c(1, 3))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_17050)),
character(0),
fixed = TRUE)
# Testing evaluate(data = gauss, target_col = "Target_...
# Changed from baseline: prediction_cols = c("...
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_14577 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = c("Prediction_1", "Prediction_2"), type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_14577[['error']]),
xpectr::strip("1 assertions failed:\n * When 'type' is 'gaussian', 'prediction_cols' must have length 1."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_14577[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = gauss, target_col = "Target_...
# Changed from baseline: prediction_cols = NULL
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_17191 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = NULL, type = "gaussian", id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_17191[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'prediction_cols': Must be of type 'character', not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_17191[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = gauss, target_col = "Target_...
# Changed from baseline: type = NULL
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_19346 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_1", type = NULL, id_col = NULL, id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_19346[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'type': Must be a subset of {'gaussian','binomial','multinomial'}, not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_19346[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = gauss, target_col = "Target_...
# Changed from baseline: id_col = "ID_chr"
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_12554 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_1", type = "gaussian", id_col = "ID_chr", id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_12554[['error']]),
xpectr::strip("The targets must be constant within the IDs with the current ID method. These IDs had more than one unique value in the target column: 1, 2, 3, 4, 5."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_12554[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = gauss, target_col = "Target_...
# Changed from baseline: id_col = "Target_1"
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_14622 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_1", type = "gaussian", id_col = "Target_1", id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_14622[['error']]),
xpectr::strip("1 assertions failed:\n * 'id_col' and 'target_col' cannot be identical."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_14622[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = gauss, target_col = "Target_...
# Changed from baseline: id_col = "ID"
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_19400 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_1", type = "gaussian", id_col = "ID", id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_19400[['error']]),
xpectr::strip("The targets must be constant within the IDs with the current ID method. These IDs had more than one unique value in the target column: 1, 2, 3, 4, 5."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_19400[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = gauss, target_col = "Target_...
# Changed from baseline: id_col = "ID_fct"
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_19782 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_1", type = "gaussian", id_col = "ID_fct", id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_19782[['error']]),
xpectr::strip("The targets must be constant within the IDs with the current ID method. These IDs had more than one unique value in the target column: 1, 2, 3, 4, 5."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_19782[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = gauss, target_col = "Target_...
# Changed from baseline: id_method = NULL
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_11174 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_1", type = "gaussian", id_col = NULL, id_method = NULL, metrics = list(all = FALSE, RMSE = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_11174[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'id_method': Must be a subset of {'mean','majority'}, not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_11174[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = gauss, target_col = "Target_...
# Changed from baseline: metrics = NULL
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_14749 <- xpectr::capture_side_effects(evaluate(data = gauss, target_col = "Target_1", prediction_cols = "Prediction_1", type = "gaussian", id_col = NULL, id_method = "mean", metrics = NULL), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_14749[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'metrics': Must be of type 'list', not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_14749[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
## Finished testing 'evaluate' ####
#
xpectr::set_test_seed(42)
# Assigning output
output_17365 <- evaluate(data = gauss, target_col = "Target_const", prediction_cols = "Prediction_2", type = "gaussian", id_col = "ID_fct", id_method = "mean", metrics = list(all = FALSE, RMSE = TRUE))
## Testing 'output_17365$Predictions[[1]]' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(output_17365$Predictions[[1]]),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_17365$Predictions[[1]][["Target"]],
c(1, 2, 1, 3, 2),
tolerance = 1e-4)
expect_equal(
output_17365$Predictions[[1]][["Prediction"]],
c(2.15195, 1.69115, 1.24622, 1.58568, 1.60302),
tolerance = 1e-4)
expect_equal(
output_17365$Predictions[[1]][["SD"]],
c(0.75329, 1.4739, 1.5737, 0.40383, 1.56662),
tolerance = 1e-4)
expect_equal(
output_17365$Predictions[[1]][["ID_fct"]],
structure(1:5, .Label = c("1", "2", "3", "4", "5"), class = "factor"))
expect_equal(
output_17365$Predictions[[1]][["id_method"]],
c("mean", "mean", "mean", "mean", "mean"),
fixed = TRUE)
# Testing column names
expect_equal(
names(output_17365$Predictions[[1]]),
c("Target", "Prediction", "SD", "ID_fct", "id_method"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_17365$Predictions[[1]]),
c("numeric", "numeric", "numeric", "factor", "character"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_17365$Predictions[[1]]),
c("double", "double", "double", "integer", "character"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_17365$Predictions[[1]]),
c(5L, 5L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_17365$Predictions[[1]])),
character(0),
fixed = TRUE)
## Finished testing 'output_17365$Predictions[[1]]' ####
})
test_that("the different prediction formats work properly in multinomial evaluate()", {
testthat::skip_on_cran()
xpectr::set_test_seed(42)
multinom <- tibble::tibble(
"ID" = rep(1:5, each = 3),
"Target" = rep(c(1,2,1,3,3), each = 3),
"PredictedClass" = sample(c(1,2,3), 15, replace = TRUE),
"1" = runif(15),
"2" = runif(15),
"3" = runif(15),
"4" = runif(15)
) %>%
dplyr::mutate(
PredictedClass_chr = as.character(PredictedClass),
PredictedClass_fct = as.factor(PredictedClass),
Target_chr = as.character(Target),
Target_fct = as.factor(Target),
`1_chr` = as.character(`1`),
Target_with_1chr = ifelse(Target == 1, "1_chr", Target),
ID_fct = as.factor(ID),
ID_chr = as.character(ID)
)
# Generate expectations for 'evaluate'
# Tip: comment out the gxs_function() call
# so it is easy to regenerate the tests
xpectr::set_test_seed(42)
# xpectr::gxs_function(
# fn = evaluate,
# args_values = list(
# "data" = list(multinom),
# "target_col" = list("Target_chr", "Target", "Target_fct", "4"),
# "prediction_cols" = list(c("1","2","3"), c("1","2","3","4"), c("1","2"),
# "PredictedClass", "PredictedClass_chr", "PredictedClass_fct"),
# "type" = list("multinomial"),
# "id_col" = list(NULL, "ID", "ID_fct", "ID_chr"),
# "id_method" = list("mean"),
# "apply_softmax" = list(TRUE, FALSE),
# "include_predictions" = list(TRUE),
# "metrics" = list(list("all" = FALSE, "Overall Accuracy" = TRUE, "Sensitivity" = TRUE))
# ),
# extra_combinations = list(
# list("target_col" = "Target_with_1chr", "prediction_cols" = c("1_chr","2","3")),
# list("target_col" = "Target_with_1chr", "prediction_cols" = "PredictedClass_chr", "apply_softmax" = FALSE)
# ),
# indentation = 2
# )
## Testing 'evaluate' ####
## Initially generated by xpectr
# Testing different combinations of argument values
# Testing evaluate(data = multinom, target_col = "Targ...
xpectr::set_test_seed(42)
# Assigning output
output_19148 <- evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE))
# Testing class
expect_equal(
class(output_19148),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_19148[["Overall Accuracy"]],
0.33333,
tolerance = 1e-4)
expect_equal(
output_19148[["Sensitivity"]],
0.27778,
tolerance = 1e-4)
# Testing column names
expect_equal(
names(output_19148),
c("Overall Accuracy", "Sensitivity", "Predictions", "Confusion Matrix",
"Class Level Results", "Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_19148),
c("numeric", "numeric", "list", "list", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_19148),
c("double", "double", "list", "list", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_19148),
c(1L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_19148)),
character(0),
fixed = TRUE)
# Testing evaluate(data = NULL, target_col = "Target_c...
# Changed from baseline: data = NULL
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_19370 <- xpectr::capture_side_effects(evaluate(data = NULL, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_19370[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'data': Must be of type 'data.frame', not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_19370[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = multinom, target_col = "Targ...
# Changed from baseline: target_col = "Target"
xpectr::set_test_seed(42)
# Assigning output
output_12861 <- evaluate(data = multinom, target_col = "Target", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE))
# Testing class
expect_equal(
class(output_12861),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_12861[["Overall Accuracy"]],
0.33333,
tolerance = 1e-4)
expect_equal(
output_12861[["Sensitivity"]],
0.27778,
tolerance = 1e-4)
# Testing column names
expect_equal(
names(output_12861),
c("Overall Accuracy", "Sensitivity", "Predictions", "Confusion Matrix",
"Class Level Results", "Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_12861),
c("numeric", "numeric", "list", "list", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_12861),
c("double", "double", "list", "list", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_12861),
c(1L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_12861)),
character(0),
fixed = TRUE)
# Testing evaluate(data = multinom, target_col = "Targ...
# Changed from baseline: target_col = "Target_...
xpectr::set_test_seed(42)
# Assigning output
output_18304 <- evaluate(data = multinom, target_col = "Target_fct", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE))
# Testing class
expect_equal(
class(output_18304),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_18304[["Overall Accuracy"]],
0.33333,
tolerance = 1e-4)
expect_equal(
output_18304[["Sensitivity"]],
0.27778,
tolerance = 1e-4)
# Testing column names
expect_equal(
names(output_18304),
c("Overall Accuracy", "Sensitivity", "Predictions", "Confusion Matrix",
"Class Level Results", "Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_18304),
c("numeric", "numeric", "list", "list", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_18304),
c("double", "double", "list", "list", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_18304),
c(1L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_18304)),
character(0),
fixed = TRUE)
# Testing evaluate(data = multinom, target_col = "4", ...
# Changed from baseline: target_col = "4"
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_16417 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "4", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_16417[['error']]),
xpectr::strip("Not all levels in 'target_col' was found in 'prediction_cols'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_16417[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = multinom, target_col = NULL,...
# Changed from baseline: target_col = NULL
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_15190 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = NULL, prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_15190[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'target_col': Must be of type 'string', not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_15190[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = multinom, target_col = "Targ...
# Changed from baseline: target_col, predictio...
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_17365 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_with_1chr", prediction_cols = c("1_chr", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_17365[['error']]),
xpectr::strip("the prediction columns must be numeric."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_17365[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = multinom, target_col = "Targ...
# Changed from baseline: target_col, predictio...
xpectr::set_test_seed(42)
# Assigning output
output_11346 <- evaluate(data = multinom, target_col = "Target_with_1chr", prediction_cols = "PredictedClass_chr", type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = FALSE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE))
# Testing class
expect_equal(
class(output_11346),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_11346[["Overall Accuracy"]],
0.33333,
tolerance = 1e-4)
expect_equal(
output_11346[["Sensitivity"]],
NaN,
tolerance = 1e-4)
# Testing column names
expect_equal(
names(output_11346),
c("Overall Accuracy", "Sensitivity", "Predictions", "Confusion Matrix",
"Class Level Results", "Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_11346),
c("numeric", "numeric", "list", "list", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_11346),
c("double", "double", "list", "list", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_11346),
c(1L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_11346)),
character(0),
fixed = TRUE)
# Testing evaluate(data = multinom, target_col = "Targ...
# Changed from baseline: prediction_cols = c("...
xpectr::set_test_seed(42)
# Assigning output
output_16569 <- evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3", "4"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE))
# Testing class
expect_equal(
class(output_16569),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_16569[["Overall Accuracy"]],
0.26667,
tolerance = 1e-4)
expect_equal(
output_16569[["Sensitivity"]],
NaN,
tolerance = 1e-4)
# Testing column names
expect_equal(
names(output_16569),
c("Overall Accuracy", "Sensitivity", "Predictions", "Confusion Matrix",
"Class Level Results", "Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_16569),
c("numeric", "numeric", "list", "list", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_16569),
c("double", "double", "list", "list", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_16569),
c(1L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_16569)),
character(0),
fixed = TRUE)
# Testing evaluate(data = multinom, target_col = "Targ...
# Changed from baseline: prediction_cols = c("...
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_17050 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_17050[['error']]),
xpectr::strip("Not all levels in 'target_col' was found in 'prediction_cols'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_17050[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = multinom, target_col = "Targ...
# Changed from baseline: prediction_cols = "Pr...
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_14577 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = "PredictedClass", type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_14577[['error']]),
xpectr::strip("2 assertions failed:\n * When 'type' is 'multinomial' and 'prediction_cols' has length 1, both 'data[[target_col]]' and 'data[[prediction_cols]]'\n * must have type character.\n * When passing 'prediction_cols' as single column with multiple classes, 'apply_softmax' should be 'FALSE'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_14577[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = multinom, target_col = "Targ...
# Changed from baseline: prediction_cols = "Pr...
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_17191 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = "PredictedClass_chr", type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_17191[['error']]),
xpectr::strip("1 assertions failed:\n * When passing 'prediction_cols' as single column with multiple classes, 'apply_softmax' should be 'FALSE'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_17191[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = multinom, target_col = "Targ...
# Changed from baseline: prediction_cols = "Pr...
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_19346 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = "PredictedClass_fct", type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_19346[['error']]),
xpectr::strip("2 assertions failed:\n * When 'type' is 'multinomial' and 'prediction_cols' has length 1, both 'data[[target_col]]' and 'data[[prediction_cols]]'\n * must have type character.\n * When passing 'prediction_cols' as single column with multiple classes, 'apply_softmax' should be 'FALSE'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_19346[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = multinom, target_col = "Targ...
# Changed from baseline: prediction_cols = NULL
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_12554 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = NULL, type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_12554[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'prediction_cols': Must be of type 'character', not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_12554[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = multinom, target_col = "Targ...
# Changed from baseline: type = NULL
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_14622 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = NULL, id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_14622[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'type': Must be a subset of {'gaussian','binomial','multinomial'}, not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_14622[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = multinom, target_col = "Targ...
# Changed from baseline: id_col = "ID_fct"
xpectr::set_test_seed(42)
# Assigning output
output_19400 <- evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = "ID_fct", id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE))
# Testing class
expect_equal(
class(output_19400),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_19400[["Overall Accuracy"]],
0.4,
tolerance = 1e-4)
expect_equal(
output_19400[["Sensitivity"]],
0.33333,
tolerance = 1e-4)
# Testing column names
expect_equal(
names(output_19400),
c("Overall Accuracy", "Sensitivity", "Predictions", "Confusion Matrix",
"Class Level Results", "Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_19400),
c("numeric", "numeric", "list", "list", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_19400),
c("double", "double", "list", "list", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_19400),
c(1L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_19400)),
character(0),
fixed = TRUE)
# Testing evaluate(data = multinom, target_col = "Targ...
# Changed from baseline: id_col = "ID_chr"
xpectr::set_test_seed(42)
# Assigning output
output_19782 <- evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = "ID_chr", id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE))
# Testing class
expect_equal(
class(output_19782),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_19782[["Overall Accuracy"]],
0.4,
tolerance = 1e-4)
expect_equal(
output_19782[["Sensitivity"]],
0.33333,
tolerance = 1e-4)
# Testing column names
expect_equal(
names(output_19782),
c("Overall Accuracy", "Sensitivity", "Predictions", "Confusion Matrix",
"Class Level Results", "Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_19782),
c("numeric", "numeric", "list", "list", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_19782),
c("double", "double", "list", "list", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_19782),
c(1L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_19782)),
character(0),
fixed = TRUE)
# Testing evaluate(data = multinom, target_col = "Targ...
# Changed from baseline: id_col = "ID"
xpectr::set_test_seed(42)
# Assigning output
output_11174 <- evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = "ID", id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE))
# Testing class
expect_equal(
class(output_11174),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_11174[["Overall Accuracy"]],
0.4,
tolerance = 1e-4)
expect_equal(
output_11174[["Sensitivity"]],
0.33333,
tolerance = 1e-4)
# Testing column names
expect_equal(
names(output_11174),
c("Overall Accuracy", "Sensitivity", "Predictions", "Confusion Matrix",
"Class Level Results", "Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_11174),
c("numeric", "numeric", "list", "list", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_11174),
c("double", "double", "list", "list", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_11174),
c(1L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_11174)),
character(0),
fixed = TRUE)
# Testing evaluate(data = multinom, target_col = "Targ...
# Changed from baseline: id_method = NULL
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_14749 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = NULL, apply_softmax = TRUE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_14749[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'id_method': Must be a subset of {'mean','majority'}, not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_14749[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = multinom, target_col = "Targ...
# Changed from baseline: apply_softmax = FALSE
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_15603 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = FALSE, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_15603[['error']]),
xpectr::strip("'multinomial' evaluate(): Not all probabilities added up to 1 row-wise (tolerance of 5 decimals)."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_15603[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = multinom, target_col = "Targ...
# Changed from baseline: apply_softmax = NULL
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_19040 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = NULL, include_predictions = TRUE, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_19040[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'apply_softmax': Must be of type 'logical flag', not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_19040[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = multinom, target_col = "Targ...
# Changed from baseline: include_predictions =...
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_11387 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = NULL, metrics = list(all = FALSE, `Overall Accuracy` = TRUE, Sensitivity = TRUE)), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_11387[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'include_predictions': Must be of type 'logical flag', not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_11387[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = multinom, target_col = "Targ...
# Changed from baseline: metrics = NULL
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_19888 <- xpectr::capture_side_effects(evaluate(data = multinom, target_col = "Target_chr", prediction_cols = c("1", "2", "3"), type = "multinomial", id_col = NULL, id_method = "mean", apply_softmax = TRUE, include_predictions = TRUE, metrics = NULL), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_19888[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'metrics': Must be of type 'list', not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_19888[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
## Finished testing 'evaluate' ####
#
})
test_that("the different prediction formats work properly in binomial evaluate()", {
testthat::skip_on_cran()
xpectr::set_test_seed(42)
binom <- tibble::tibble(
"ID" = rep(1:5, each = 3),
"Target" = rep(c(1,2,1,1,2), each = 3),
"PredictedClass" = sample(c(1,2), 15, replace = TRUE),
"Prediction" = runif(15),
"1" = runif(15),
"2" = runif(15)
) %>%
dplyr::mutate(
PredictedClass_chr = as.character(PredictedClass),
PredictedClass_fct = as.factor(PredictedClass),
Target_chr = as.character(Target),
Target_fct = as.factor(Target),
ID_fct = as.factor(ID),
ID_chr = as.character(ID)
)
# Generate expectations for 'evaluate'
# Tip: comment out the gxs_function() call
# so it is easy to regenerate the tests
xpectr::set_test_seed(42)
# xpectr::gxs_function(
# fn = evaluate,
# args_values = list(
# "data" = list(binom),
# "target_col" = list("Target", "Target_chr", "Target_fct"),
# "prediction_cols" = list("Prediction", c("1", "2"), "PredictedClass", "PredictedClass_chr", "PredictedClass_fct"),
# "type" = list("binomial", "multinomial"),
# "id_col" = list(NULL, "ID", "ID_fct", "ID_chr", "Target_fct"),
# "id_method" = list("mean"),
# "cutoff" = list(0.5),
# "positive" = list(2, 1, "2"),
# "metrics" = list(list("all" = FALSE, "Balanced Accuracy" = TRUE, "Sensitivity" = TRUE)),
# "include_predictions" = list(TRUE)
# ),
# extra_combinations = list(
# list("prediction_cols" = "PredictedClass_chr", "target_col" = "Target_chr", "id_col" = "ID_chr")
# ),
# indentation = 2
# )
## Testing 'evaluate' ####
## Initially generated by xpectr
# Testing different combinations of argument values
# Testing evaluate(data = binom, target_col = "Target"...
xpectr::set_test_seed(42)
# Assigning output
output_19148 <- evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE)
# Testing class
expect_equal(
class(output_19148),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_19148[["Balanced Accuracy"]],
0.55556,
tolerance = 1e-4)
expect_equal(
output_19148[["Sensitivity"]],
0.66667,
tolerance = 1e-4)
expect_equal(
output_19148[["ROC"]],
NA)
expect_equal(
output_19148[["Process"]][[1]][["Positive Class"]],
"2",
fixed = TRUE)
# Testing column names
expect_equal(
names(output_19148),
c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix",
"Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_19148),
c("numeric", "numeric", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_19148),
c("double", "double", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_19148),
c(1L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_19148)),
character(0),
fixed = TRUE)
# Testing evaluate(data = NULL, target_col = "Target",...
# Changed from baseline: data = NULL
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_19370 <- xpectr::capture_side_effects(evaluate(data = NULL, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_19370[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'data': Must be of type 'data.frame', not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_19370[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = "Target_...
# Changed from baseline: target_col = "Target_...
xpectr::set_test_seed(42)
# Assigning output
output_12861 <- evaluate(data = binom, target_col = "Target_chr", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE)
# Testing class
expect_equal(
class(output_12861),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_12861[["Balanced Accuracy"]],
0.55556,
tolerance = 1e-4)
expect_equal(
output_12861[["Sensitivity"]],
0.66667,
tolerance = 1e-4)
expect_equal(
output_12861[["ROC"]],
NA)
expect_equal(
output_12861[["Process"]][[1]][["Positive Class"]],
"2",
fixed = TRUE)
# Testing column names
expect_equal(
names(output_12861),
c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix",
"Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_12861),
c("numeric", "numeric", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_12861),
c("double", "double", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_12861),
c(1L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_12861)),
character(0),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = "Target_...
# Changed from baseline: target_col = "Target_...
xpectr::set_test_seed(42)
# Assigning output
output_18304 <- evaluate(data = binom, target_col = "Target_fct", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE)
# Testing class
expect_equal(
class(output_18304),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_18304[["Balanced Accuracy"]],
0.55556,
tolerance = 1e-4)
expect_equal(
output_18304[["Sensitivity"]],
0.66667,
tolerance = 1e-4)
expect_equal(
output_18304[["ROC"]],
NA)
expect_equal(
output_18304[["Process"]][[1]][["Positive Class"]],
"2",
fixed = TRUE)
# Testing column names
expect_equal(
names(output_18304),
c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix",
"Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_18304),
c("numeric", "numeric", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_18304),
c("double", "double", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_18304),
c(1L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_18304)),
character(0),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = NULL, pr...
# Changed from baseline: target_col = NULL
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_16417 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = NULL, prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_16417[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'target_col': Must be of type 'string', not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_16417[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = "Target_...
# Changed from baseline: target_col, predictio...
xpectr::set_test_seed(42)
# Assigning output
output_15190 <- evaluate(data = binom, target_col = "Target_chr", prediction_cols = "PredictedClass_chr", type = "binomial", id_col = "ID_chr", id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE)
# Testing class
expect_equal(
class(output_15190),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_15190[["Balanced Accuracy"]],
0.25,
tolerance = 1e-4)
expect_equal(
output_15190[["Sensitivity"]],
0.5,
tolerance = 1e-4)
expect_equal(
output_15190[["ROC"]],
NA)
expect_equal(
output_15190[["Process"]][[1]][["Positive Class"]],
"2",
fixed = TRUE)
# Testing column names
expect_equal(
names(output_15190),
c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix",
"Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_15190),
c("numeric", "numeric", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_15190),
c("double", "double", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_15190),
c(1L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_15190)),
character(0),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = "Target"...
# Changed from baseline: prediction_cols = c("...
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_17365 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = c("1", "2"), type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_17365[['error']]),
xpectr::strip("1 assertions failed:\n * When 'type' is 'binomial', 'prediction_cols' must have length 1."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_17365[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = "Target"...
# Changed from baseline: prediction_cols = "Pr...
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_11346 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = "PredictedClass", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_11346[['error']]),
xpectr::strip("1 assertions failed:\n * When 'type' is 'binomial' and 'data[[prediction_cols]]' is numeric, the values in 'data[[prediction_cols]]' must be\n * between 0 and 1."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_11346[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = "Target"...
# Changed from baseline: prediction_cols = "Pr...
xpectr::set_test_seed(42)
# Assigning output
output_16569 <- evaluate(data = binom, target_col = "Target", prediction_cols = "PredictedClass_chr", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE)
# Testing class
expect_equal(
class(output_16569),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_16569[["Balanced Accuracy"]],
0.5,
tolerance = 1e-4)
expect_equal(
output_16569[["Sensitivity"]],
0.66667,
tolerance = 1e-4)
expect_equal(
output_16569[["ROC"]],
NA)
expect_equal(
output_16569[["Process"]][[1]][["Positive Class"]],
"2",
fixed = TRUE)
# Testing column names
expect_equal(
names(output_16569),
c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix",
"Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_16569),
c("numeric", "numeric", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_16569),
c("double", "double", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_16569),
c(1L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_16569)),
character(0),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = "Target"...
# Changed from baseline: prediction_cols = "Pr...
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_17050 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = "PredictedClass_fct", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_17050[['error']]),
xpectr::strip("1 assertions failed:\n * When 'type' is 'binomial', 'data[[prediction_cols]]' must be either numeric or character."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_17050[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = "Target"...
# Changed from baseline: prediction_cols = NULL
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_14577 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = NULL, type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_14577[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'prediction_cols': Must be of type 'character', not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_14577[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = "Target"...
# Changed from baseline: type = "multinomial"
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_17191 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "multinomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_17191[['error']]),
xpectr::strip("1 assertions failed:\n * When 'type' is 'multinomial' and 'prediction_cols' has length 1, both 'data[[target_col]]' and 'data[[prediction_cols]]'\n * must have type character."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_17191[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = "Target"...
# Changed from baseline: type = NULL
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_19346 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = NULL, id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_19346[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'type': Must be a subset of {'gaussian','binomial','multinomial'}, not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_19346[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = "Target"...
# Changed from baseline: id_col = "ID_chr"
xpectr::set_test_seed(42)
# Assigning output
output_12554 <- evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = "ID_chr", id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE)
# Testing class
expect_equal(
class(output_12554),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_12554[["Balanced Accuracy"]],
0.66667,
tolerance = 1e-4)
expect_equal(
output_12554[["Sensitivity"]],
1,
tolerance = 1e-4)
expect_equal(
output_12554[["ROC"]],
NA)
expect_equal(
output_12554[["Process"]][[1]][["Positive Class"]],
"2",
fixed = TRUE)
# Testing column names
expect_equal(
names(output_12554),
c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix",
"Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_12554),
c("numeric", "numeric", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_12554),
c("double", "double", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_12554),
c(1L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_12554)),
character(0),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = "Target"...
# Changed from baseline: id_col = "Target_fct"
xpectr::set_test_seed(42)
# Assigning output
output_14622 <- evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = "Target_fct", id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE)
# Testing class
expect_equal(
class(output_14622),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_14622[["Balanced Accuracy"]],
0.5,
tolerance = 1e-4)
expect_equal(
output_14622[["Sensitivity"]],
1,
tolerance = 1e-4)
expect_equal(
output_14622[["ROC"]],
NA)
expect_equal(
output_14622[["Process"]][[1]][["Positive Class"]],
"2",
fixed = TRUE)
# Testing column names
expect_equal(
names(output_14622),
c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix",
"Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_14622),
c("numeric", "numeric", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_14622),
c("double", "double", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_14622),
c(1L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_14622)),
character(0),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = "Target"...
# Changed from baseline: id_col = "ID"
xpectr::set_test_seed(42)
# Assigning output
output_19400 <- evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = "ID", id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE)
# Testing class
expect_equal(
class(output_19400),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_19400[["Balanced Accuracy"]],
0.66667,
tolerance = 1e-4)
expect_equal(
output_19400[["Sensitivity"]],
1,
tolerance = 1e-4)
expect_equal(
output_19400[["ROC"]],
NA)
expect_equal(
output_19400[["Process"]][[1]][["Positive Class"]],
"2",
fixed = TRUE)
# Testing column names
expect_equal(
names(output_19400),
c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix",
"Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_19400),
c("numeric", "numeric", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_19400),
c("double", "double", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_19400),
c(1L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_19400)),
character(0),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = "Target"...
# Changed from baseline: id_col = "ID_fct"
xpectr::set_test_seed(42)
# Assigning output
output_19782 <- evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = "ID_fct", id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE)
# Testing class
expect_equal(
class(output_19782),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_19782[["Balanced Accuracy"]],
0.66667,
tolerance = 1e-4)
expect_equal(
output_19782[["Sensitivity"]],
1,
tolerance = 1e-4)
expect_equal(
output_19782[["ROC"]],
NA)
expect_equal(
output_19782[["Process"]][[1]][["Positive Class"]],
"2",
fixed = TRUE)
# Testing column names
expect_equal(
names(output_19782),
c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix",
"Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_19782),
c("numeric", "numeric", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_19782),
c("double", "double", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_19782),
c(1L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_19782)),
character(0),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = "Target"...
# Changed from baseline: id_method = NULL
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_11174 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = NULL, cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_11174[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'id_method': Must be a subset of {'mean','majority'}, not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_11174[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = "Target"...
# Changed from baseline: cutoff = NULL
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_14749 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = NULL, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_14749[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'cutoff': Must be of type 'number', not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_14749[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = "Target"...
# Changed from baseline: positive = 1
xpectr::set_test_seed(42)
# Assigning output
output_15603 <- evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 1, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE)
# Testing class
expect_equal(
class(output_15603),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_15603[["Balanced Accuracy"]],
0.55556,
tolerance = 1e-4)
expect_equal(
output_15603[["Sensitivity"]],
0.44444,
tolerance = 1e-4)
expect_equal(
output_15603[["ROC"]],
NA)
expect_equal(
output_15603[["Process"]][[1]][["Positive Class"]],
"1",
fixed = TRUE)
# Testing column names
expect_equal(
names(output_15603),
c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix",
"Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_15603),
c("numeric", "numeric", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_15603),
c("double", "double", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_15603),
c(1L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_15603)),
character(0),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = "Target"...
# Changed from baseline: positive = "2"
xpectr::set_test_seed(42)
# Assigning output
output_19040 <- evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = "2", metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE)
# Testing class
expect_equal(
class(output_19040),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_19040[["Balanced Accuracy"]],
0.55556,
tolerance = 1e-4)
expect_equal(
output_19040[["Sensitivity"]],
0.66667,
tolerance = 1e-4)
expect_equal(
output_19040[["ROC"]],
NA)
expect_equal(
output_19040[["Process"]][[1]][["Positive Class"]],
"2",
fixed = TRUE)
# Testing column names
expect_equal(
names(output_19040),
c("Balanced Accuracy", "Sensitivity", "Predictions", "ROC", "Confusion Matrix",
"Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_19040),
c("numeric", "numeric", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_19040),
c("double", "double", "list", "logical", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_19040),
c(1L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_19040)),
character(0),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = "Target"...
# Changed from baseline: positive = NULL
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_11387 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = NULL, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = TRUE), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_11387[['error']]),
xpectr::strip("Assertion failed. One of the following must apply:\n * checkmate::check_choice(positive): Must be a subset of {'1','2'}, not 'NULL'\n * checkmate::check_string(positive): Must be of type 'string', not 'NULL'"),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_11387[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = "Target"...
# Changed from baseline: metrics = NULL
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_19888 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = NULL, include_predictions = TRUE), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_19888[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'metrics': Must be of type 'list', not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_19888[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
# Testing evaluate(data = binom, target_col = "Target"...
# Changed from baseline: include_predictions =...
xpectr::set_test_seed(42)
# Testing side effects
# Assigning side effects
side_effects_19466 <- xpectr::capture_side_effects(evaluate(data = binom, target_col = "Target", prediction_cols = "Prediction", type = "binomial", id_col = NULL, id_method = "mean", cutoff = 0.5, positive = 2, metrics = list(all = FALSE, `Balanced Accuracy` = TRUE, Sensitivity = TRUE), include_predictions = NULL), reset_seed = TRUE)
expect_equal(
xpectr::strip(side_effects_19466[['error']]),
xpectr::strip("1 assertions failed:\n * Variable 'include_predictions': Must be of type 'logical flag', not 'NULL'."),
fixed = TRUE)
expect_equal(
xpectr::strip(side_effects_19466[['error_class']]),
xpectr::strip(c("simpleError", "error", "condition")),
fixed = TRUE)
## Finished testing 'evaluate' ####
#
})
test_that("testing 2-class multinomial evaluation works", {
xpectr::set_test_seed(42)
data_ <- multiclass_probability_tibble(
num_classes = 2,
num_observations = 60,
apply_softmax = FALSE # Done in evaluate
) %>%
dplyr::mutate(
cl = as.factor(rep(c("class_1", "class_2"), each = 30))
) %>%
dplyr::sample_frac()
expect_equal(sum(data_[, c("class_1", "class_2")]), 63.33427, tolerance = 1e-5)
mnm_eval_2class <- evaluate(
data_, target_col = "cl",
prediction_cols = c("class_1", "class_2"),
type = "multinomial", apply_softmax = TRUE)
## Testing 'mnm_eval_2class' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(mnm_eval_2class),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
mnm_eval_2class[["Overall Accuracy"]],
0.6,
tolerance = 1e-4)
expect_equal(
mnm_eval_2class[["Balanced Accuracy"]],
0.6,
tolerance = 1e-4)
expect_equal(
mnm_eval_2class[["F1"]],
0.59956,
tolerance = 1e-4)
expect_equal(
mnm_eval_2class[["Sensitivity"]],
0.6,
tolerance = 1e-4)
expect_equal(
mnm_eval_2class[["Specificity"]],
0.6,
tolerance = 1e-4)
expect_equal(
mnm_eval_2class[["Pos Pred Value"]],
0.60045,
tolerance = 1e-4)
expect_equal(
mnm_eval_2class[["Neg Pred Value"]],
0.60045,
tolerance = 1e-4)
expect_equal(
mnm_eval_2class[["Kappa"]],
0.2,
tolerance = 1e-4)
expect_equal(
mnm_eval_2class[["MCC"]],
0.20045,
tolerance = 1e-4)
expect_equal(
mnm_eval_2class[["Detection Rate"]],
0.3,
tolerance = 1e-4)
expect_equal(
mnm_eval_2class[["Detection Prevalence"]],
0.5,
tolerance = 1e-4)
expect_equal(
mnm_eval_2class[["Prevalence"]],
0.5,
tolerance = 1e-4)
# Testing column names
expect_equal(
names(mnm_eval_2class),
c("Overall Accuracy", "Balanced Accuracy", "F1", "Sensitivity",
"Specificity", "Pos Pred Value", "Neg Pred Value", "Kappa",
"MCC", "Detection Rate", "Detection Prevalence", "Prevalence",
"Predictions", "Confusion Matrix", "Class Level Results", "Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(mnm_eval_2class),
c("numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
"numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
"list", "list", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(mnm_eval_2class),
c("double", "double", "double", "double", "double", "double", "double",
"double", "double", "double", "double", "double", "list", "list",
"list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(mnm_eval_2class),
c(1L, 16L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(mnm_eval_2class)),
character(0),
fixed = TRUE)
## Finished testing 'mnm_eval_2class' ####
## Testing 'mnm_eval_2class$`Class Level Results`[[1]]' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Assigning output
output_19148 <- mnm_eval_2class$`Class Level Results`[[1]]
# Testing class
expect_equal(
class(output_19148),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_19148[["Class"]],
c("class_1", "class_2"),
fixed = TRUE)
expect_equal(
output_19148[["Balanced Accuracy"]],
c(0.6, 0.6),
tolerance = 1e-4)
expect_equal(
output_19148[["F1"]],
c(0.6129, 0.58621),
tolerance = 1e-4)
expect_equal(
output_19148[["Sensitivity"]],
c(0.63333, 0.56667),
tolerance = 1e-4)
expect_equal(
output_19148[["Specificity"]],
c(0.56667, 0.63333),
tolerance = 1e-4)
expect_equal(
output_19148[["Pos Pred Value"]],
c(0.59375, 0.60714),
tolerance = 1e-4)
expect_equal(
output_19148[["Neg Pred Value"]],
c(0.60714, 0.59375),
tolerance = 1e-4)
expect_equal(
output_19148[["Kappa"]],
c(0.2, 0.2),
tolerance = 1e-4)
expect_equal(
output_19148[["Detection Rate"]],
c(0.31667, 0.28333),
tolerance = 1e-4)
expect_equal(
output_19148[["Detection Prevalence"]],
c(0.53333, 0.46667),
tolerance = 1e-4)
expect_equal(
output_19148[["Prevalence"]],
c(0.5, 0.5),
tolerance = 1e-4)
expect_equal(
output_19148[["Support"]],
c(30, 30),
tolerance = 1e-4)
# Testing column names
expect_equal(
names(output_19148),
c("Class", "Balanced Accuracy", "F1", "Sensitivity", "Specificity",
"Pos Pred Value", "Neg Pred Value", "Kappa", "Detection Rate",
"Detection Prevalence", "Prevalence", "Support", "Confusion Matrix"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_19148),
c("character", "numeric", "numeric", "numeric", "numeric", "numeric",
"numeric", "numeric", "numeric", "numeric", "numeric", "integer",
"list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_19148),
c("character", "double", "double", "double", "double", "double",
"double", "double", "double", "double", "double", "integer",
"list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_19148),
c(2L, 13L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_19148)),
character(0),
fixed = TRUE)
## Finished testing 'mnm_eval_2class$`Class Level Results`[[1]]' ####
## Testing 'mnm_eval_2class$`Class Level Results`[[1]]$`...' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Assigning output
output_19148 <- mnm_eval_2class$`Class Level Results`[[1]]$`Confusion Matrix`[[1]]
# Testing class
expect_equal(
class(output_19148),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_19148[["Class"]],
c("class_1", "class_1", "class_1", "class_1"))
expect_equal(
output_19148[["Prediction"]],
c("0", "1", "0", "1"),
fixed = TRUE)
expect_equal(
output_19148[["Target"]],
c("0", "0", "1", "1"),
fixed = TRUE)
expect_equal(
output_19148[["Pos_0"]],
c("TP", "FN", "FP", "TN"),
fixed = TRUE)
expect_equal(
output_19148[["Pos_1"]],
c("TN", "FP", "FN", "TP"),
fixed = TRUE)
expect_equal(
output_19148[["N"]],
c(17, 13, 11, 19),
tolerance = 1e-4)
# Testing column names
expect_equal(
names(output_19148),
c("Class", "Prediction", "Target", "Pos_0", "Pos_1", "N"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_19148),
c("character", "character", "character", "character", "character",
"integer"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_19148),
c("character", "character", "character", "character", "character",
"integer"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_19148),
c(4L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_19148)),
character(0),
fixed = TRUE)
## Finished testing 'mnm_eval_2class$`Class Level Results`[[1]]$`...' ####
## Testing 'mnm_eval_2class$`Confusion Matrix`[[1]]' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Assigning output
output_19148 <- mnm_eval_2class$`Confusion Matrix`[[1]]
# Testing class
expect_equal(
class(output_19148),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
output_19148[["Prediction"]],
c("class_1", "class_2", "class_1", "class_2"),
fixed = TRUE)
expect_equal(
output_19148[["Target"]],
c("class_1", "class_1", "class_2", "class_2"),
fixed = TRUE)
expect_equal(
output_19148[["Pos_class_1"]],
c("TP", "FN", "FP", "TN"),
fixed = TRUE)
expect_equal(
output_19148[["Pos_class_2"]],
c("TN", "FP", "FN", "TP"),
fixed = TRUE)
expect_equal(
output_19148[["N"]],
c(19, 11, 13, 17),
tolerance = 1e-4)
# Testing column names
expect_equal(
names(output_19148),
c("Prediction", "Target", "Pos_class_1", "Pos_class_2", "N"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(output_19148),
c("character", "character", "character", "character", "integer"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(output_19148),
c("character", "character", "character", "character", "integer"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(output_19148),
4:5)
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(output_19148)),
character(0),
fixed = TRUE)
## Finished testing 'mnm_eval_2class$`Confusion Matrix`[[1]]' ####
})
test_that("testing different number of classes in grouped multinomial evaluate()", {
xpectr::set_test_seed(42)
df <- plyr::ldply(1:3, function(i){
multiclass_probability_tibble(
num_classes = 2+i,
num_observations = 15,
apply_softmax = TRUE,
add_predicted_classes = TRUE,
add_targets = TRUE
) %>%
dplyr::mutate(TheGroup = paste0("gr", i))
}) %>%
dplyr::select(`Predicted Class`, "Target", "TheGroup", dplyr::everything())
# Different number of classes per group
# Prediction col is the predicted class
eval_1 <- df %>%
dplyr::group_by(.data$TheGroup) %>%
evaluate(
target_col = "Target",
prediction_cols = "Predicted Class",
type = "multinomial",
metrics = list("all" = FALSE, "Accuracy" = TRUE))
## Testing 'eval_1' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(eval_1),
c("eval_results", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
eval_1[["TheGroup"]],
c("gr1", "gr2", "gr3"),
fixed = TRUE)
expect_equal(
eval_1[["Accuracy"]],
c(0.55556, 0.63333, 0.70667),
tolerance = 1e-4)
# Testing column names
expect_equal(
names(eval_1),
c("TheGroup", "Accuracy", "Predictions", "Confusion Matrix", "Class Level Results",
"Process"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(eval_1),
c("character", "numeric", "list", "list", "list", "list"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(eval_1),
c("character", "double", "list", "list", "list", "list"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(eval_1),
c(3L, 6L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(eval_1)),
character(0),
fixed = TRUE)
## Finished testing 'eval_1' ####
# Classes reported in 'Process'
expect_equal(
eval_1$Process[[1]]$Classes,
c("class_1", "class_2", "class_3"),
fixed = TRUE)
expect_equal(
eval_1$Process[[2]]$Classes,
c("class_1", "class_2", "class_3", "class_4"),
fixed = TRUE)
expect_equal(
eval_1$Process[[3]]$Classes,
c("class_1", "class_2", "class_3", "class_4", "class_5"),
fixed = TRUE)
# Nested predictions
expect_equal(
unlist(eval_1$Predictions[[1]]$Prediction[[1]]),
c(class_1 = 0, class_2 = 1, class_3 = 0),
tolerance = 1e-4)
expect_equal(
unlist(eval_1$Predictions[[2]]$Prediction[[1]]),
c(class_1 = 0, class_2 = 0, class_3 = 0, class_4 = 1),
tolerance = 1e-4)
expect_equal(
unlist(eval_1$Predictions[[3]]$Prediction[[1]]),
c(class_1 = 0, class_2 = 0, class_3 = 0, class_4 = 1, class_5 = 0),
tolerance = 1e-4)
# Confusion matrices
expect_equal(nrow(eval_1$`Confusion Matrix`[[1]]), 3^2)
expect_equal(nrow(eval_1$`Confusion Matrix`[[2]]), 4^2)
expect_equal(nrow(eval_1$`Confusion Matrix`[[3]]), 5^2)
# Class level results
expect_equal(nrow(eval_1$`Class Level Results`[[1]]), 3)
expect_equal(nrow(eval_1$`Class Level Results`[[2]]), 4)
expect_equal(nrow(eval_1$`Class Level Results`[[3]]), 5)
# Different number of classes per group
# Prediction cols are the probabilities
# First set NAs to 0
df[is.na(df)] <- 0.0
eval_2 <- df %>%
dplyr::group_by(.data$TheGroup) %>%
evaluate(
target_col = "Target",
prediction_cols = paste0("class_", 1:5),
type = "multinomial",
metrics = list("all" = FALSE, "Accuracy" = TRUE))
expect_equal(
eval_1[["Accuracy"]],
eval_2[["Accuracy"]]
)
expect_equal(
eval_1[["TheGroup"]],
eval_2[["TheGroup"]]
)
# Nested predictions
expect_equal(
unlist(eval_2$Predictions[[1]]$Prediction[[1]]),
c(class_1 = 0.349267814, class_2 = 0.35818423, class_3 = 0.29254794),
tolerance = 1e-4)
expect_equal(
unlist(eval_2$Predictions[[2]]$Prediction[[1]]),
c(class_1 = 0.228885461, class_2 = 0.239121137,
class_3 = 0.227020643, class_4 = 0.304972757),
tolerance = 1e-4)
expect_equal(
unlist(eval_2$Predictions[[3]]$Prediction[[1]]),
c(class_1 = 0.20777347, class_2 = 0.23889069, class_3 = 0.1392378,
class_4 = 0.29139622, class_5 = 0.12270179),
tolerance = 1e-4)
# Confusion matrices
expect_equal(nrow(eval_2$`Confusion Matrix`[[1]]), 3^2)
expect_equal(nrow(eval_2$`Confusion Matrix`[[2]]), 4^2)
expect_equal(nrow(eval_2$`Confusion Matrix`[[3]]), 5^2)
# Class level results
expect_equal(nrow(eval_2$`Class Level Results`[[1]]), 3)
expect_equal(nrow(eval_2$`Class Level Results`[[2]]), 4)
expect_equal(nrow(eval_2$`Class Level Results`[[3]]), 5)
})
# commented out ####
# test_that("profiling",{
#
# # Load file with prepared predictions and hparams
# load(file="")
#
# evals <- predictions %>%
# dplyr::group_by(results_folder, epoch) %>%
# cvms:::evaluate(target_col = "target_string",
# prediction_cols = current_hparams %>%
# dplyr::arrange(class_indices_map_values) %>%
# dplyr::pull(class_names) %>%
# as.character(),
# type = "multinomial",
# apply_softmax = FALSE,
# parallel = TRUE)
#
#
# })
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.