Nothing
test_that("Calculations are correct - two class", {
expect_equal(
pr_auc_vec(
two_class_example$truth,
two_class_example$Class1
),
0.9464467
)
})
test_that("Calculations are correct - multi class", {
hpc_f1 <- data_hpc_fold1()
expect_equal(
pr_auc(hpc_f1, obs, VF:L, estimator = "macro")[[".estimate"]],
hpc_fold1_macro_metric(pr_auc_binary)
)
expect_equal(
pr_auc(hpc_f1, obs, VF:L, estimator = "macro_weighted")[[".estimate"]],
hpc_fold1_macro_weighted_metric(pr_auc_binary)
)
})
test_that("Calculations handles NAs", {
hpc_cv$VF[1:10] <- NA
expect_equal(
pr_auc(hpc_cv, obs, VF:L)[[".estimate"]],
0.62197342
)
expect_equal(
pr_auc(hpc_cv, obs, VF:L, na_rm = FALSE)[[".estimate"]],
NA_real_
)
})
test_that("Case weights calculations are correct", {
sklearn_curve <- read_pydata("py-pr-curve")$case_weight$binary
sklearn_auc <- auc(sklearn_curve$recall, sklearn_curve$precision)
two_class_example$weight <- read_weights_two_class_example()
expect_equal(
pr_auc(two_class_example, truth, Class1, case_weights = weight)[[
".estimate"
]],
sklearn_auc
)
})
test_that("works with hardhat case weights", {
df <- two_class_example
imp_wgt <- hardhat::importance_weights(seq_len(nrow(df)))
freq_wgt <- hardhat::frequency_weights(seq_len(nrow(df)))
expect_no_error(
pr_auc_vec(df$truth, df$Class1, case_weights = imp_wgt)
)
expect_no_error(
pr_auc_vec(df$truth, df$Class1, case_weights = freq_wgt)
)
})
test_that("errors with class_pred input", {
skip_if_not_installed("probably")
cp_truth <- probably::as_class_pred(two_class_example$truth, which = 1)
fct_truth <- two_class_example$truth
fct_truth[1] <- NA
estimate <- two_class_example$Class1
expect_snapshot(
error = TRUE,
pr_auc_vec(cp_truth, estimate)
)
})
test_that("na_rm argument check", {
expect_snapshot(
error = TRUE,
pr_auc_vec(1, 1, na_rm = "yes")
)
})
test_that("`event_level = 'second'` works", {
df <- two_class_example
df_rev <- df
df_rev$truth <- stats::relevel(df_rev$truth, "Class2")
expect_equal(
pr_auc_vec(df$truth, df$Class1),
pr_auc_vec(df_rev$truth, df_rev$Class1, event_level = "second")
)
})
test_that("sklearn equivalent", {
# Note that these values are different from `MLmetrics::PRAUC()`,
# see #93 about how duplicates and end points are handled
sklearn_curve <- read_pydata("py-pr-curve")$binary
sklearn_auc <- auc(sklearn_curve$recall, sklearn_curve$precision)
expect_equal(
pr_auc(two_class_example, truth = "truth", "Class1")[[".estimate"]],
sklearn_auc
)
expect_equal(
pr_auc(two_class_example, truth, Class1)[[".estimate"]],
sklearn_auc
)
})
test_that("grouped multiclass (one-vs-all) weighted example matches expanded equivalent", {
hpc_cv$weight <- rep(1, times = nrow(hpc_cv))
hpc_cv$weight[c(100, 200, 150, 2)] <- 5
hpc_cv <- dplyr::group_by(hpc_cv, Resample)
hpc_cv_expanded <- hpc_cv[
vec_rep_each(seq_len(nrow(hpc_cv)), times = hpc_cv$weight),
]
expect_identical(
pr_auc(hpc_cv, obs, VF:L, case_weights = weight, estimator = "macro"),
pr_auc(hpc_cv_expanded, obs, VF:L, estimator = "macro")
)
expect_identical(
pr_auc(
hpc_cv,
obs,
VF:L,
case_weights = weight,
estimator = "macro_weighted"
),
pr_auc(hpc_cv_expanded, obs, VF:L, estimator = "macro_weighted")
)
})
test_that("range values are correct", {
direction <- metric_direction(pr_auc)
range <- metric_range(pr_auc)
perfect <- ifelse(direction == "minimize", range[1], range[2])
worst <- ifelse(direction == "minimize", range[2], range[1])
df <- tibble::tibble(
truth = factor(c("a", "a", "a", "b", "b"), levels = c("a", "b")),
perfect = c(1, 1, 1, 0, 0),
off = c(0.5, 0.5, 0.5, 0.5, 0.5)
)
expect_equal(pr_auc_vec(df$truth, df$perfect), perfect)
if (direction == "minimize") {
expect_gt(pr_auc_vec(df$truth, df$off), perfect)
expect_lte(pr_auc_vec(df$truth, df$off), worst)
}
if (direction == "maximize") {
expect_lt(pr_auc_vec(df$truth, df$off), perfect)
expect_gte(pr_auc_vec(df$truth, df$off), worst)
}
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.