Nothing
test_that("Calculations are correct", {
df <- data.frame(
truth = factor(c("Yes", "Yes", "No", "Yes", "No"), levels = c("Yes", "No")),
estimate = c(0.9, 0.8, 0.7, 0.68, 0.5)
)
# caret::lift(truth ~ estimate, df)
perc_found <- c(
0,
33.3333333333333,
66.6666666666667,
66.6666666666667,
100,
100
)
expect_equal(
gain_curve(df, truth, estimate)$.percent_found,
perc_found
)
})
test_that("na_rm = FALSE errors if missing values are present", {
df <- two_class_example
df$Class1[1] <- NA
expect_snapshot(
error = TRUE,
gain_curve_vec(df$truth, df$Class1, na_rm = FALSE)
)
})
test_that("Case weights calculations are correct", {
# binary, ideally, frequency weights
df <- data.frame(
truth = factor(c("Yes", "Yes", "No", "Yes", "No"), levels = c("Yes", "No")),
estimate = c(0.9, 0.8, 0.7, 0.68, 0.5),
weight = c(2, 1, 1, 3, 2)
)
out <- gain_curve(df, truth, estimate, case_weights = weight)
# Manually computed and verified
expect <- dplyr::tibble(
.n = c(0, 2, 3, 4, 7, 9),
.n_events = c(0, 2, 3, 3, 6, 6),
.percent_tested = .n / 9 * 100,
.percent_found = .n_events / 6 * 100
)
class(expect) <- class(out)
expect_s3_class(out, "gain_df")
expect_identical(out, expect)
# multiclass, ideally, frequency weights
df <- data.frame(
truth = factor(
c("Yes", "Yes", "No", "Maybe", "Yes", "Maybe", "No"),
levels = c("Yes", "No", "Maybe")
),
Yes = c(0.9, 0.8, 0.7, 0.68, 0.5, 0.7, 0.3),
No = c(0.05, 0.05, 0.2, 0.2, 0.2, 0.1, 0.6),
Maybe = c(0.05, 0.15, 0.1, 0.12, 0.3, 0.2, 0.1),
weight = c(2, 1, 1, 3, 2, 5, 2)
)
out <- gain_curve(df, truth, Yes, No, Maybe, case_weights = weight)
# Manually computed and verified
expect <- tibble::tibble(
.level = rep(c("Yes", "No", "Maybe"), c(7L, 5L, 7L)),
.n = c(0, 2, 3, 9, 12, 14, 16, 0, 2, 8, 13, 16, 0, 2, 7, 8, 11, 14, 16),
.n_events = c(0, 2, 3, 3, 3, 5, 5, 0, 2, 3, 3, 3, 0, 0, 5, 5, 8, 8, 8),
)
expect <- dplyr::group_by(expect, .level)
expect <- dplyr::mutate(
expect,
.percent_tested = .n / max(.n) * 100,
.percent_found = .n_events / max(.n_events) * 100
)
expect <- dplyr::ungroup(expect)
class(expect) <- class(out)
expect_s3_class(out, "gain_df")
expect_identical(out, expect)
# case weights scales `.n` and `.n_events`
skip_if_not_installed("ggplot2")
# This is required for the `autoplot()` method
df <- data.frame(
truth = factor(c("Yes", "Yes", "No", "Yes", "No"), levels = c("Yes", "No")),
estimate = c(0.9, 0.8, 0.7, 0.68, 0.5),
weight = c(2, 1, 1, 3, 2)
)
out <- gain_curve(df, truth, estimate, case_weights = weight)
plot <- ggplot2::autoplot(out)
data <- ggplot2::ggplot_build(plot)
grey_overlay_data <- data$data[[1]]
expect_equal(grey_overlay_data$x, c(0, 2 / 3 * 100, 100))
expect_equal(grey_overlay_data$y, c(0, 100, 100))
})
test_that("works with hardhat case weights", {
df <- data.frame(
truth = factor(c("Yes", "Yes", "No", "Yes", "No"), levels = c("Yes", "No")),
estimate = c(0.9, 0.8, 0.7, 0.68, 0.5),
weight = c(2, 1, 1, 3, 2)
)
curve1 <- gain_curve(df, truth, estimate, case_weights = weight)
df$weight <- hardhat::frequency_weights(df$weight)
curve2 <- gain_curve(df, truth, estimate, case_weights = weight)
expect_identical(curve1, curve2)
})
test_that("errors with class_pred input", {
skip_if_not_installed("probably")
cp_truth <- probably::as_class_pred(two_class_example$truth, which = 1)
fct_truth <- two_class_example$truth
fct_truth[1] <- NA
estimate <- two_class_example$Class1
expect_snapshot(
error = TRUE,
gain_curve_vec_vec(cp_truth, estimate)
)
})
test_that("na_rm argument check", {
expect_snapshot(
error = TRUE,
gain_curve_vec(1, 1, na_rm = "yes")
)
})
test_that("`event_level = 'second'` works", {
df <- two_class_example
df_rev <- df
df_rev$truth <- stats::relevel(df_rev$truth, "Class2")
expect_equal(
gain_curve_vec(df$truth, df$Class1),
gain_curve_vec(df_rev$truth, df_rev$Class1, event_level = "second")
)
})
test_that("duplicates are removed", {
# known answer
dup_estimate <- c(0.9, 0.9, 0.7, 0.68, 0.68)
dup_truth <- factor(
c("Yes", "Yes", "No", "Yes", "No"),
levels = c("Yes", "No")
)
dup_df <- data.frame(estimate = dup_estimate, truth = dup_truth)
gain_df <- gain_curve(dup_df, truth, estimate)
expect_equal(nrow(gain_df), 4L)
# .n_events should be 2 for the 2 .9+Yes predictions
expect_equal(gain_df$.n_events[2], 2)
})
test_that("ordering of `truth` values within duplicated `estimate` groups doesn't affect the result", {
dup_estimate <- c(0.9, 0.9, 0.7, 0.68, 0.68)
dup_truth <- factor(
c("Yes", "Yes", "No", "Yes", "No"),
levels = c("Yes", "No")
)
dup_df1 <- data.frame(estimate = dup_estimate, truth = dup_truth)
# Flip the order of the .68 estimate values
# From c(Yes, No) to c(No, Yes)
dup_df2 <- dup_df1[c(1, 2, 3, 5, 4), ]
curve1 <- gain_curve(dup_df1, truth, estimate)
curve2 <- gain_curve(dup_df2, truth, estimate)
# If we didn't take unique values, these would generate different curves
# and therefore different plots
expect_identical(curve1, curve2)
})
test_that("Multiclass structure is correct", {
res_gain <- gain_curve(hpc_cv, obs, VF:L)
expect_true(".level" %in% colnames(res_gain))
expect_s3_class(res_gain, "gain_df")
})
test_that("Grouped structure is correct", {
hpc_g <- dplyr::group_by(hpc_cv, Resample)
res_gain <- gain_curve(hpc_g, obs, VF:L)
expect_true("Resample" %in% colnames(res_gain))
expect_s3_class(res_gain, "grouped_gain_df")
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.