Nothing
# These tests reference results generated by tune 1.3.0. The code to generate
# them (and the results) are found in the `inst` directory.
test_that("verifying loop_over_all_stages, no submodels, no post estimation or tuning", {
skip_if_not_installed("modeldata")
skip_if_not_installed("kknn")
load(system.file(
"regression_tests",
"simple_example.RData",
package = "tune"
))
# ------------------------------------------------------------------------------
set.seed(1)
dat <- modeldata::sim_regression(1000)
rs <- vfold_cv(dat)
rs_split <- rs$splits[[1]]
rs_args <- rsample::.get_split_args(rs)
rs_iter <- tune:::vec_list_rowwise(rs) |>
purrr::pluck(1) |>
mutate(
.seeds = tune:::get_parallel_seeds(1)
)
# ------------------------------------------------------------------------------
mod <- nearest_neighbor(neighbors = 11, weight_func = tune()) |>
set_mode("regression")
wflow <- workflow(outcome ~ ., mod, reg_post)
grd <- wflow |> extract_parameter_set_dials() |> grid_regular()
static_1 <- tune:::make_static(
wflow,
param_info = wflow |> extract_parameter_set_dials(),
grid = grd,
metrics = metric_set(rmse, rsq),
eval_time = NULL,
split_args = rs_args,
control = control_grid()
)
data_1 <- tune:::get_data_subsets(wflow, rs_split, rs_args)
static_1 <- tune:::update_static(static_1, data_1)
static_1$y_name <- "outcome"
simple_res <- tune:::loop_over_all_stages(rs_iter, grd, static_1)
expect_named(simple_res, c(".metrics", ".notes", "outcome_names", "id"))
expect_true(nrow(simple_res) == 1)
# A linear transformation so R^2 should be the same
exp_rsq_mtr <-
simple_metrics |>
dplyr::filter(id == "Fold01" & .metric == "rsq") |>
dplyr::select(all_of(names(simple_res$.metrics[[1]]))) |>
arrange(weight_func) |>
dplyr::select(-.config)
obs_rsq_simple_mtr <-
simple_res$.metrics[[1]] |>
dplyr::filter(.metric == "rsq") |>
arrange(weight_func) |>
dplyr::select(-.config)
expect_equal(obs_rsq_simple_mtr, exp_rsq_mtr)
# rmse should be bad
exp_rmse_mtr <-
simple_metrics |>
dplyr::filter(id == "Fold01" & .metric == "rmse") |>
dplyr::select(all_of(names(simple_res$.metrics[[1]]))) |>
arrange(weight_func) |>
dplyr::select(expected = .estimate, weight_func)
obs_rmse_simple_mtr <-
simple_res$.metrics[[1]] |>
dplyr::filter(.metric == "rmse") |>
arrange(weight_func) |>
dplyr::select(obs = .estimate, weight_func)
rmse_diff <-
full_join(exp_rmse_mtr, obs_rmse_simple_mtr, by = "weight_func") |>
mutate(diffs = obs - expected)
expect_true(all(rmse_diff$diffs > 0))
})
test_that("verifying loop_over_all_stages, submodels, no post estimation or tuning", {
skip_if_not_installed("modeldata")
skip_if_not_installed("kknn")
load(system.file(
"regression_tests",
"submodel_example.RData",
package = "tune"
))
ctrl <- tune::control_grid()
# ------------------------------------------------------------------------------
set.seed(1)
dat <- modeldata::sim_regression(1000)
rs <- vfold_cv(dat)
rs_split <- rs$splits[[1]]
rs_args <- rsample::.get_split_args(rs)
rs_iter <- tune:::vec_list_rowwise(rs) |>
purrr::pluck(1) |>
mutate(
.seeds = tune:::get_parallel_seeds(1)
)
# ------------------------------------------------------------------------------
rec <- recipe(outcome ~ ., data = dat) |>
step_pca(all_numeric_predictors(), num_comp = tune())
mod <- nearest_neighbor(neighbors = tune("k"), weight_func = tune()) |>
set_mode("regression")
submodel_wflow <- workflow(rec, mod, reg_post)
# fmt: skip
submodel_grid <-
tibble::tribble(
~k, ~weight_func, ~num_comp,
9L, "rectangular", 2L,
14L, "rectangular", 2L,
20L, "rectangular", 2L,
4L, "triangular", 2L,
9L, "triangular", 2L,
14L, "triangular", 2L,
20L, "triangular", 2L,
4L, "epanechnikov", 2L,
9L, "epanechnikov", 2L,
14L, "epanechnikov", 2L,
20L, "epanechnikov", 2L,
4L, "rectangular", 10L,
9L, "rectangular", 10L,
14L, "rectangular", 10L,
20L, "rectangular", 10L,
4L, "triangular", 10L,
9L, "triangular", 10L,
14L, "triangular", 10L,
20L, "triangular", 10L,
4L, "epanechnikov", 10L,
9L, "epanechnikov", 10L,
14L, "epanechnikov", 10L,
20L, "epanechnikov", 10L
)
# ------------------------------------------------------------------------------
static_1 <- tune:::make_static(
submodel_wflow,
param_info = submodel_wflow |> extract_parameter_set_dials(),
grid = submodel_grid,
metrics = metric_set(rmse, rsq),
eval_time = NULL,
split_args = rs_args,
control = ctrl
)
data_1 <- tune:::get_data_subsets(submodel_wflow, rs_split, rs_args)
static_1 <- tune:::update_static(static_1, data_1)
static_1$y_name <- "outcome"
submodel_res <- tune:::loop_over_all_stages(rs_iter, submodel_grid, static_1)
expect_named(submodel_res, c(".metrics", ".notes", "outcome_names", "id"))
expect_true(nrow(submodel_res) == 1)
# A linear transformation so R^2 should be the same
exp_rsq_mtr <-
submodel_metrics |>
dplyr::filter(id == "Fold01" & .metric == "rsq") |>
dplyr::select(all_of(names(submodel_res$.metrics[[1]]))) |>
arrange(weight_func, k, num_comp) |>
dplyr::select(-.config)
obs_rsq_submodel_mtr <-
submodel_res$.metrics[[1]] |>
dplyr::filter(.metric == "rsq") |>
arrange(weight_func, k, num_comp) |>
dplyr::select(-.config)
expect_equal(obs_rsq_submodel_mtr, exp_rsq_mtr)
# rmse should be bad
exp_rmse_mtr <-
submodel_metrics |>
dplyr::filter(id == "Fold01" & .metric == "rmse") |>
dplyr::select(all_of(names(submodel_res$.metrics[[1]]))) |>
arrange(weight_func, k, num_comp) |>
dplyr::select(expected = .estimate, weight_func, k, num_comp)
obs_rmse_submodel_mtr <-
submodel_res$.metrics[[1]] |>
dplyr::filter(.metric == "rmse") |>
arrange(weight_func, k, num_comp) |>
dplyr::select(obs = .estimate, weight_func, k, num_comp)
rmse_diff <-
full_join(
exp_rmse_mtr,
obs_rmse_submodel_mtr,
by = c("weight_func", "k", "num_comp")
) |>
mutate(diffs = obs - expected)
expect_true(all(rmse_diff$diffs > 0))
})
test_that("verifying loop_over_all_stages, submodels only, no post estimation or tuning", {
skip_if_not_installed("modeldata")
skip_if_not_installed("kknn")
skip_if_not_installed("probably")
load(system.file(
"regression_tests",
"submodel_only_example.RData",
package = "tune"
))
ctrl <- tune::control_grid()
# ------------------------------------------------------------------------------
set.seed(1)
dat <- modeldata::sim_classification(1000)
rs <- vfold_cv(dat)
rs_split <- rs$splits[[1]]
rs_args <- rsample::.get_split_args(rs)
rs_iter <- tune:::vec_list_rowwise(rs) |>
purrr::pluck(1) |>
mutate(
.seeds = tune:::get_parallel_seeds(1)
)
# ------------------------------------------------------------------------------
mod <- nearest_neighbor(neighbors = tune(), weight_func = "triangular") |>
set_mode("classification")
submodel_only_wflow <- workflow(class ~ ., mod, cls_tenth)
submodel_only_grid <- tibble(neighbors = 3:10)
# ------------------------------------------------------------------------------
static_1 <- tune:::make_static(
submodel_only_wflow,
param_info = submodel_only_wflow |> extract_parameter_set_dials(),
grid = submodel_only_grid,
metrics = metric_set(accuracy, roc_auc, brier_class),
eval_time = NULL,
split_args = rs_args,
control = ctrl
)
data_1 <- tune:::get_data_subsets(submodel_only_wflow, rs_split, rs_args)
static_1 <- tune:::update_static(static_1, data_1)
static_1$y_name <- "class"
submodel_only_res <- tune:::loop_over_all_stages(
rs_iter,
submodel_only_grid,
static_1
)
expect_named(
submodel_only_res,
c(".metrics", ".notes", "outcome_names", "id")
)
expect_true(nrow(submodel_only_res) == 1)
# Thresholding so accuracy should be different and Brier and ROC should be
# unchanged
exp_prob_mtr <-
submodel_only_metrics |>
dplyr::filter(id == "Fold01" & .metric != "accuracy") |>
dplyr::select(all_of(names(submodel_only_res$.metrics[[1]]))) |>
arrange(neighbors, .metric) |>
dplyr::select(-.config)
obs_prob_submodel_only_mtr <-
submodel_only_res$.metrics[[1]] |>
dplyr::filter(.metric != "accuracy") |>
arrange(neighbors, .metric) |>
dplyr::select(-.config)
expect_equal(obs_prob_submodel_only_mtr, exp_prob_mtr)
# accuracy should be worse
exp_acc_mtr <-
submodel_only_metrics |>
dplyr::filter(id == "Fold01" & .metric == "accuracy") |>
dplyr::select(all_of(names(submodel_only_res$.metrics[[1]]))) |>
arrange(neighbors, .metric) |>
dplyr::select(expected = .estimate, neighbors, .metric)
obs_acc_submodel_only_mtr <-
submodel_only_res$.metrics[[1]] |>
dplyr::filter(.metric == "accuracy") |>
arrange(neighbors, .metric) |>
dplyr::select(obs = .estimate, neighbors, .metric)
rmse_diff <-
full_join(
exp_acc_mtr,
obs_acc_submodel_only_mtr,
by = c("neighbors", ".metric")
) |>
mutate(diffs = obs - expected)
expect_true(all(rmse_diff$diffs < 0))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.