Nothing
# These tests reference results generated by tune 1.3.0. The code to generate
# them (and the results) are found in the `inst` directory.
test_that("verifying loop_over_all_stages, no submodels, tuning, no estimation", {
skip_if_not_installed("modeldata")
skip_if_not_installed("kknn")
skip_if_not_installed("probably")
load(system.file(
"regression_tests",
"simple_example.RData",
package = "tune"
))
# ------------------------------------------------------------------------------
set.seed(1)
dat <- modeldata::sim_regression(1000)
rs <- vfold_cv(dat)
rs_split <- rs$splits[[1]]
rs_args <- rsample::.get_split_args(rs)
rs_iter <- tune:::vec_list_rowwise(rs) |>
purrr::pluck(1) |>
mutate(
.seeds = tune:::get_parallel_seeds(1)
)
# ------------------------------------------------------------------------------
mod <- nearest_neighbor(neighbors = 11, weight_func = tune()) |>
set_mode("regression")
wflow <- workflow(outcome ~ ., mod, reg_max)
max_param <-
wflow |>
extract_parameter_set_dials() |>
update(upper_limit = upper_limit(c(0, 1)))
grd <- max_param |> grid_regular(levels = c(3, 2))
upper_vals <- sort(unique(grd$upper_limit))
static_1 <- tune:::make_static(
wflow,
param_info = max_param,
grid = grd,
metrics = metric_set(rmse, rsq),
eval_time = NULL,
split_args = rs_args,
control = control_grid()
)
data_1 <- tune:::get_data_subsets(wflow, rs_split, rs_args)
static_1 <- tune:::update_static(static_1, data_1)
static_1$y_name <- "outcome"
simple_res <- tune:::loop_over_all_stages(rs_iter, grd, static_1)
expect_true(!is.null(simple_res$.metrics[[1]]))
expect_named(simple_res, c(".metrics", ".notes", "outcome_names", "id"))
expect_true(nrow(simple_res) == 1)
# Loop over upper limits and check rmse
exp_rmse_mtr <-
simple_metrics |>
dplyr::filter(id == "Fold01" & .metric == "rmse") |>
dplyr::select(weight_func, raw_rmse = .estimate)
obs_rmse_simple_mtr <-
simple_res$.metrics[[1]] |>
dplyr::filter(.metric == "rmse")
# rmse should be worse
for (cut in upper_vals) {
obs_rmse <- obs_rmse_simple_mtr |> dplyr::filter(upper_limit == cut)
diff_rmse <- inner_join(obs_rmse, exp_rmse_mtr, by = "weight_func")
expect_true(all(diff_rmse$.estimate > diff_rmse$raw_rmse))
}
})
test_that("verifying loop_over_all_stages, submodels, tuning, no estimation", {
skip_if_not_installed("modeldata")
skip_if_not_installed("kknn")
skip_if_not_installed("probably")
load(system.file(
"regression_tests",
"submodel_example.RData",
package = "tune"
))
ctrl <- tune::control_grid()
# ------------------------------------------------------------------------------
set.seed(1)
dat <- modeldata::sim_regression(1000)
rs <- vfold_cv(dat)
rs_split <- rs$splits[[1]]
rs_args <- rsample::.get_split_args(rs)
rs_iter <- tune:::vec_list_rowwise(rs) |>
purrr::pluck(1) |>
mutate(
.seeds = tune:::get_parallel_seeds(1)
)
# ------------------------------------------------------------------------------
rec <- recipe(outcome ~ ., data = dat) |>
step_pca(all_numeric_predictors(), num_comp = tune())
mod <- nearest_neighbor(neighbors = tune("k"), weight_func = tune()) |>
set_mode("regression")
submodel_wflow <- workflow(rec, mod, reg_max)
max_param <-
submodel_wflow |>
extract_parameter_set_dials() |>
update(upper_limit = upper_limit(c(0, 1)))
upper_vals <- 0:1
# fmt: skip
submodel_grid <-
tibble::tribble(
~k, ~weight_func, ~num_comp,
9L, "rectangular", 2L,
14L, "rectangular", 2L,
20L, "rectangular", 2L,
4L, "triangular", 2L,
9L, "triangular", 2L,
14L, "triangular", 2L,
20L, "triangular", 2L,
4L, "epanechnikov", 2L,
9L, "epanechnikov", 2L,
14L, "epanechnikov", 2L,
20L, "epanechnikov", 2L,
4L, "rectangular", 10L,
9L, "rectangular", 10L,
14L, "rectangular", 10L,
20L, "rectangular", 10L,
4L, "triangular", 10L,
9L, "triangular", 10L,
14L, "triangular", 10L,
20L, "triangular", 10L,
4L, "epanechnikov", 10L,
9L, "epanechnikov", 10L,
14L, "epanechnikov", 10L,
20L, "epanechnikov", 10L
) |>
tidyr::crossing(upper_limit = upper_vals)
# ------------------------------------------------------------------------------
static_1 <- tune:::make_static(
submodel_wflow,
param_info = max_param,
grid = submodel_grid,
metrics = metric_set(rmse),
eval_time = NULL,
split_args = rs_args,
control = ctrl
)
data_1 <- tune:::get_data_subsets(submodel_wflow, rs_split, rs_args)
static_1 <- tune:::update_static(static_1, data_1)
static_1$y_name <- "outcome"
submodel_res <- tune:::loop_over_all_stages(rs_iter, submodel_grid, static_1)
expect_named(submodel_res, c(".metrics", ".notes", "outcome_names", "id"))
expect_true(nrow(submodel_res) == 1)
# rmse should be worse
exp_rmse_mtr <-
submodel_metrics |>
dplyr::filter(id == "Fold01" & .metric == "rmse") |>
dplyr::select(raw_rmse = .estimate, weight_func, k, num_comp)
obs_rmse_submodel_mtr <-
submodel_res$.metrics[[1]] |>
dplyr::filter(.metric == "rmse")
for (cut in upper_vals) {
obs_rmse <- obs_rmse_submodel_mtr |> dplyr::filter(upper_limit == cut)
diff_rmse <- inner_join(
obs_rmse,
exp_rmse_mtr,
by = join_by(k, weight_func, num_comp)
)
expect_true(all(diff_rmse$.estimate > diff_rmse$raw_rmse))
}
})
test_that("verifying loop_over_all_stages, submodels only, tuning, no estimation", {
skip_if_not_installed("modeldata")
skip_if_not_installed("kknn")
skip_if_not_installed("probably")
load(system.file(
"regression_tests",
"submodel_only_example.RData",
package = "tune"
))
ctrl <- tune::control_grid()
# ------------------------------------------------------------------------------
set.seed(1)
dat <- modeldata::sim_classification(1000)
rs <- vfold_cv(dat)
rs_split <- rs$splits[[1]]
rs_args <- rsample::.get_split_args(rs)
rs_iter <- tune:::vec_list_rowwise(rs) |>
purrr::pluck(1) |>
mutate(
.seeds = tune:::get_parallel_seeds(1)
)
# ------------------------------------------------------------------------------
mod <- nearest_neighbor(neighbors = tune(), weight_func = "triangular") |>
set_mode("classification")
submodel_only_wflow <- workflow(class ~ ., mod, cls_post)
cut_vals <- c(.1, .9)
submodel_only_grid <- tidyr::crossing(neighbors = 3:10, cut = cut_vals)
# ------------------------------------------------------------------------------
static_1 <- tune:::make_static(
submodel_only_wflow,
param_info = submodel_only_wflow |> extract_parameter_set_dials(),
grid = submodel_only_grid,
metrics = metric_set(accuracy, roc_auc, brier_class),
eval_time = NULL,
split_args = rs_args,
control = ctrl
)
data_1 <- tune:::get_data_subsets(submodel_only_wflow, rs_split, rs_args)
static_1 <- tune:::update_static(static_1, data_1)
static_1$y_name <- "class"
submodel_only_res <- tune:::loop_over_all_stages(
rs_iter,
submodel_only_grid,
static_1
)
expect_named(
submodel_only_res,
c(".metrics", ".notes", "outcome_names", "id")
)
expect_true(nrow(submodel_only_res) == 1)
# accuracy should be worse, others should be same
exp_acc_mtr <-
submodel_only_metrics |>
dplyr::filter(id == "Fold01" & .metric == "accuracy") |>
dplyr::select(raw = .estimate, neighbors)
exp_prob_mtr <-
submodel_only_metrics |>
dplyr::filter(id == "Fold01" & .metric == "brier_class") |>
dplyr::select(raw = .estimate, neighbors, .metric)
obs_acc_submodel_only_mtr <-
submodel_only_res$.metrics[[1]] |>
dplyr::filter(.metric == "accuracy")
obs_prob_submodel_only_mtr <-
submodel_only_res$.metrics[[1]] |>
dplyr::filter(.metric == "brier_class")
for (thrsh in cut_vals) {
obs_acc <- obs_acc_submodel_only_mtr |> dplyr::filter(cut == thrsh)
diff_acc <- inner_join(obs_acc, exp_acc_mtr, by = join_by(neighbors))
expect_true(all(diff_acc$.estimate != diff_acc$raw))
obs_prob <- obs_prob_submodel_only_mtr |> dplyr::filter(cut == thrsh)
diff_prob <- inner_join(
obs_prob,
exp_prob_mtr,
by = join_by(neighbors, .metric)
)
expect_equal(diff_prob$.estimate, diff_prob$raw, tolerance = 0.01)
}
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.