test_that("tune recipe only", {
helper_objects <- helper_objects_tidyclust()
set.seed(4400)
wflow <- workflows::workflow() %>%
workflows::add_recipe(helper_objects$rec_tune_1) %>%
workflows::add_model(helper_objects$kmeans_mod_no_tune)
pset <- hardhat::extract_parameter_set_dials(wflow) %>%
update(num_comp = dials::num_comp(c(1, 3)))
grid <- dials::grid_regular(pset, levels = 3)
folds <- rsample::vfold_cv(mtcars, v = 2)
control <- tune::control_grid(extract = identity)
metrics <- cluster_metric_set(sse_within_total, sse_total)
res <- tune_cluster(
wflow,
resamples = folds,
grid = grid,
control = control,
metrics = metrics
)
res_est <- tune::collect_metrics(res)
res_workflow <- res$.extracts[[1]]$.extracts[[1]]
# Ensure tunable parameters in recipe are finalized
num_comp <- res_workflow$pre$actions$recipe$recipe$steps[[2]]$num_comp
expect_equal(res$id, folds$id)
expect_equal(nrow(res_est), nrow(grid) * 2)
expect_equal(sum(res_est$.metric == "sse_total"), nrow(grid))
expect_equal(sum(res_est$.metric == "sse_within_total"), nrow(grid))
expect_equal(res_est$n, rep(2, nrow(grid) * 2))
expect_false(identical(num_comp, expr(tune())))
expect_true(res_workflow$trained)
})
test_that("tune model only (with recipe)", {
helper_objects <- helper_objects_tidyclust()
set.seed(4400)
wflow <- workflows::workflow() %>%
workflows::add_recipe(helper_objects$rec_no_tune_1) %>%
workflows::add_model(helper_objects$kmeans_mod)
pset <- hardhat::extract_parameter_set_dials(wflow)
grid <- dials::grid_regular(pset, levels = 3)
grid$num_clusters <- grid$num_clusters + 1
folds <- rsample::vfold_cv(mtcars, v = 2)
control <- tune::control_grid(extract = identity)
metrics <- cluster_metric_set(sse_within_total, sse_total)
res <- tune_cluster(
wflow,
resamples = folds,
grid = grid,
control = control,
metrics = metrics
)
res_est <- tune::collect_metrics(res)
res_workflow <- res$.extracts[[1]]$.extracts[[1]]
# Ensure tunable parameters in spec are finalized
num_clusters_quo <- res_workflow$fit$fit$spec$args$num_clusters
num_clusters <- rlang::quo_get_expr(num_clusters_quo)
expect_equal(res$id, folds$id)
expect_equal(nrow(res_est), nrow(grid) * 2)
expect_equal(sum(res_est$.metric == "sse_total"), nrow(grid))
expect_equal(sum(res_est$.metric == "sse_within_total"), nrow(grid))
expect_equal(res_est$n, rep(2, nrow(grid) * 2))
expect_false(identical(num_clusters, expr(tune())))
expect_true(res_workflow$trained)
})
test_that("tune model only (with variables)", {
helper_objects <- helper_objects_tidyclust()
set.seed(4400)
wflow <- workflows::workflow() %>%
workflows::add_variables(outcomes = NULL, predictors = everything()) %>%
workflows::add_model(helper_objects$kmeans_mod)
pset <- hardhat::extract_parameter_set_dials(wflow)
grid <- dials::grid_regular(pset, levels = 3)
folds <- rsample::vfold_cv(mtcars, v = 2)
metrics <- cluster_metric_set(sse_total, sse_within_total)
res <- tune_cluster(wflow, resamples = folds, grid = grid, metrics = metrics)
expect_equal(res$id, folds$id)
res_est <- tune::collect_metrics(res)
expect_equal(nrow(res_est), nrow(grid) * 2)
expect_equal(sum(res_est$.metric == "sse_total"), nrow(grid))
expect_equal(sum(res_est$.metric == "sse_within_total"), nrow(grid))
expect_equal(res_est$n, rep(2, nrow(grid) * 2))
})
test_that("tune model only (with formula)", {
helper_objects <- helper_objects_tidyclust()
set.seed(4400)
wflow <- workflows::workflow() %>%
workflows::add_formula(~.) %>%
workflows::add_model(helper_objects$kmeans_mod)
pset <- hardhat::extract_parameter_set_dials(wflow)
grid <- dials::grid_regular(pset, levels = 3)
folds <- rsample::vfold_cv(mtcars, v = 2)
metrics <- cluster_metric_set(sse_total, sse_within_total)
res <- tune_cluster(wflow, resamples = folds, grid = grid, metrics = metrics)
expect_equal(res$id, folds$id)
res_est <- tune::collect_metrics(res)
expect_equal(nrow(res_est), nrow(grid) * 2)
expect_equal(sum(res_est$.metric == "sse_total"), nrow(grid))
expect_equal(sum(res_est$.metric == "sse_within_total"), nrow(grid))
expect_equal(res_est$n, rep(2, nrow(grid) * 2))
})
test_that("tune model and recipe", {
helper_objects <- helper_objects_tidyclust()
set.seed(4400)
wflow <- workflows::workflow() %>%
workflows::add_recipe(helper_objects$rec_tune_1) %>%
workflows::add_model(helper_objects$kmeans_mod)
pset <- hardhat::extract_parameter_set_dials(wflow) %>%
update(num_comp = dials::num_comp(c(1, 3)))
grid <- dials::grid_regular(pset, levels = 3)
grid$num_clusters <- grid$num_clusters + 1
folds <- rsample::vfold_cv(mtcars, v = 2)
control <- tune::control_grid(extract = identity)
metrics <- cluster_metric_set(sse_within_total, sse_total)
res <- tune_cluster(
wflow,
resamples = folds,
grid = grid,
control = control,
metrics = metrics
)
res_est <- tune::collect_metrics(res)
res_workflow <- res$.extracts[[1]]$.extracts[[1]]
# Ensure tunable parameters in spec are finalized
num_clusters_quo <- res_workflow$fit$fit$spec$args$num_clusters
num_clusters <- rlang::quo_get_expr(num_clusters_quo)
# Ensure tunable parameters in recipe are finalized
num_comp <- res_workflow$pre$actions$recipe$recipe$steps[[2]]$num_comp
expect_equal(res$id, folds$id)
expect_equal(
colnames(res$.metrics[[1]]),
c(
"num_clusters",
"num_comp",
".metric",
".estimator",
".estimate",
".config"
)
)
expect_equal(nrow(res_est), nrow(grid) * 2)
expect_equal(sum(res_est$.metric == "sse_total"), nrow(grid))
expect_equal(sum(res_est$.metric == "sse_within_total"), nrow(grid))
expect_equal(res_est$n, rep(2, nrow(grid) * 2))
expect_false(identical(num_clusters, expr(tune())))
expect_false(identical(num_comp, expr(tune())))
expect_true(res_workflow$trained)
})
test_that("verbose argument works", {
helper_objects <- helper_objects_tidyclust()
set.seed(4400)
wflow <- workflows::workflow() %>%
workflows::add_recipe(helper_objects$rec_tune_1) %>%
workflows::add_model(helper_objects$kmeans_mod)
pset <- hardhat::extract_parameter_set_dials(wflow) %>%
update(num_comp = dials::num_comp(c(1, 3)))
grid <- dials::grid_regular(pset, levels = 3)
grid$num_clusters <- grid$num_clusters + 1
folds <- rsample::vfold_cv(mtcars, v = 2)
control <- tune::control_grid(extract = identity, verbose = TRUE)
metrics <- cluster_metric_set(sse_within_total, sse_total)
expect_snapshot(
res <- tune_cluster(
wflow,
resamples = folds,
grid = grid,
control = control,
metrics = metrics
)
)
})
test_that('tune model and recipe (parallel_over = "everything")', {
helper_objects <- helper_objects_tidyclust()
set.seed(4400)
wflow <- workflows::workflow() %>%
workflows::add_recipe(helper_objects$rec_tune_1) %>%
workflows::add_model(helper_objects$kmeans_mod)
pset <- hardhat::extract_parameter_set_dials(wflow) %>%
update(num_comp = dials::num_comp(c(1, 3)))
grid <- dials::grid_regular(pset, levels = 3)
grid$num_clusters <- grid$num_clusters + 1
folds <- rsample::vfold_cv(mtcars, v = 2)
control <- tune::control_grid(
extract = identity,
parallel_over = "everything"
)
metrics <- cluster_metric_set(sse_within_total, sse_total)
res <- tune_cluster(
wflow,
resamples = folds,
grid = grid,
control = control,
metrics = metrics
)
res_est <- tune::collect_metrics(res)
expect_equal(res$id, folds$id)
expect_equal(
colnames(res$.metrics[[1]]),
c(
"num_clusters",
"num_comp",
".metric",
".estimator",
".estimate",
".config"
)
)
expect_equal(nrow(res_est), nrow(grid) * 2)
expect_equal(sum(res_est$.metric == "sse_total"), nrow(grid))
expect_equal(sum(res_est$.metric == "sse_within_total"), nrow(grid))
expect_equal(res_est$n, rep(2, nrow(grid) * 2))
})
test_that("tune model only - failure in formula is caught elegantly", {
helper_objects <- helper_objects_tidyclust()
set.seed(7898)
data_folds <- rsample::vfold_cv(mtcars, v = 2)
cars_grid <- tibble::tibble(num_clusters = 2)
# these terms don't exist!
expect_snapshot(
cars_res <- tune_cluster(
helper_objects$kmeans_mod,
~z,
resamples = data_folds,
grid = cars_grid,
control = tune::control_grid(
extract = function(x) {
1
},
save_pred = TRUE
)
)
)
notes <- cars_res$.notes
note <- notes[[1]]$note
extracts <- cars_res$.extracts
predictions <- cars_res$.predictions
expect_length(notes, 2L)
# formula failed - no models run
expect_equal(extracts, list(NULL, NULL))
expect_equal(predictions, list(NULL, NULL))
})
test_that("argument order gives errors for recipes", {
helper_objects <- helper_objects_tidyclust()
expect_snapshot(error = TRUE, {
tune_cluster(
helper_objects$rec_tune_1,
helper_objects$kmeans_mod_no_tune,
rsample::vfold_cv(mtcars, v = 2)
)
})
})
test_that("argument order gives errors for formula", {
helper_objects <- helper_objects_tidyclust()
expect_snapshot(error = TRUE, {
tune_cluster(
mpg ~ .,
helper_objects$kmeans_mod_no_tune,
rsample::vfold_cv(mtcars, v = 2)
)
})
})
test_that("metrics can be NULL", {
helper_objects <- helper_objects_tidyclust()
set.seed(4400)
wflow <- workflows::workflow() %>%
workflows::add_recipe(helper_objects$rec_tune_1) %>%
workflows::add_model(helper_objects$kmeans_mod_no_tune)
pset <- hardhat::extract_parameter_set_dials(wflow) %>%
update(num_comp = dials::num_comp(c(1, 3)))
grid <- dials::grid_regular(pset, levels = 3)
folds <- rsample::vfold_cv(mtcars, v = 2)
control <- tune::control_grid(extract = identity)
metrics <- cluster_metric_set(sse_within_total, sse_total)
set.seed(4400)
res <- tune_cluster(
wflow,
resamples = folds,
grid = grid,
control = control
)
set.seed(4400)
res1 <- tune_cluster(
wflow,
resamples = folds,
grid = grid,
control = control,
metrics = metrics
)
expect_identical(res$.metrics, res1$.metrics)
})
test_that("tune recipe only", {
helper_objects <- helper_objects_tidyclust()
set.seed(4400)
wflow <- workflows::workflow() %>%
workflows::add_recipe(helper_objects$rec_tune_1) %>%
workflows::add_model(helper_objects$kmeans_mod_no_tune)
pset <- hardhat::extract_parameter_set_dials(wflow) %>%
update(num_comp = dials::num_comp(c(1, 3)))
grid <- dials::grid_regular(pset, levels = 3)
folds <- rsample::vfold_cv(mtcars, v = 2)
control <- tune::control_grid(extract = identity)
metrics <- cluster_metric_set(sse_within_total)
res <- tune_cluster(
wflow,
resamples = folds,
grid = grid,
control = control,
metrics = metrics
)
res_est <- tune::collect_metrics(res)
res_workflow <- res$.extracts[[1]]$.extracts[[1]]
# Ensure tunable parameters in recipe are finalized
num_comp <- res_workflow$pre$actions$recipe$recipe$steps[[2]]$num_comp
expect_equal(res$id, folds$id)
expect_equal(nrow(res_est), nrow(grid))
expect_equal(sum(res_est$.metric == "sse_within_total"), nrow(grid))
expect_equal(res_est$n, rep(2, nrow(grid)))
expect_false(identical(num_comp, expr(tune())))
expect_true(res_workflow$trained)
})
test_that("ellipses with tune_cluster", {
helper_objects <- helper_objects_tidyclust()
wflow <- workflows::workflow() %>%
workflows::add_recipe(helper_objects$rec_tune_1) %>%
workflows::add_model(helper_objects$kmeans_mod_no_tune)
folds <- rsample::vfold_cv(mtcars, v = 2)
expect_snapshot(
tune_cluster(wflow, resamples = folds, grid = 3, something = "wrong")
)
})
test_that("determining the grid type", {
grid_1 <- expand.grid(a = 1:20, b = letters[1:2])
expect_true(tune:::is_regular_grid(grid_1))
expect_true(tune:::is_regular_grid(grid_1[-(1:2), ]))
expect_false(tune:::is_regular_grid(grid_1[-(1:20), ]))
set.seed(1932)
grid_2 <- data.frame(a = runif(length(letters)), b = letters)
expect_false(tune:::is_regular_grid(grid_2))
})
test_that("retain extra attributes", {
helper_objects <- helper_objects_tidyclust()
set.seed(4400)
wflow <- workflows::workflow() %>%
workflows::add_recipe(helper_objects$rec_no_tune_1) %>%
workflows::add_model(helper_objects$kmeans_mod)
pset <- hardhat::extract_parameter_set_dials(wflow)
grid <- dials::grid_regular(pset, levels = 3)
grid$num_clusters <- grid$num_clusters + 1
folds <- rsample::vfold_cv(mtcars, v = 2)
metrics <- cluster_metric_set(sse_within_total, sse_total)
res <- tune_cluster(wflow, resamples = folds, grid = grid, metrics = metrics)
att <- attributes(res)
att_names <- names(att)
expect_true(any(att_names == "metrics"))
expect_true(any(att_names == "parameters"))
expect_true(inherits(att$parameters, "parameters"))
expect_true(inherits(att$metrics, "cluster_metric_set"))
})
test_that("select_best() and show_best() works", {
helper_objects <- helper_objects_tidyclust()
set.seed(4400)
wflow <- workflows::workflow() %>%
workflows::add_recipe(helper_objects$rec_no_tune_1) %>%
workflows::add_model(helper_objects$kmeans_mod)
pset <- hardhat::extract_parameter_set_dials(wflow)
grid <- dials::grid_regular(pset, levels = 2)
grid$num_clusters <- grid$num_clusters + 1
folds <- rsample::vfold_cv(mtcars, v = 2)
control <- tune::control_grid(extract = identity)
metrics <- cluster_metric_set(sse_within_total, sse_total)
res <- tune_cluster(
wflow,
resamples = folds,
grid = grid,
control = control,
metrics = metrics
)
expect_snapshot(tmp <- tune::show_best(res))
expect_equal(
tune::show_best(res, metric = "sse_within_total"),
tune::collect_metrics(res) %>%
dplyr::filter(.metric == "sse_within_total") %>%
dplyr::slice_min(mean, n = 5, with_ties = FALSE)
)
expect_equal(
tune::show_best(res, metric = "sse_total"),
tune::collect_metrics(res) %>%
dplyr::filter(.metric == "sse_total") %>%
dplyr::slice_min(mean, n = 5, with_ties = FALSE)
)
expect_snapshot(tmp <- tune::select_best(res))
expect_equal(
tune::select_best(res, metric = "sse_within_total"),
tune::collect_metrics(res) %>%
dplyr::filter(.metric == "sse_within_total") %>%
dplyr::slice_min(mean, n = 1, with_ties = FALSE) %>%
dplyr::select(num_clusters, .config)
)
expect_equal(
tune::select_best(res, metric = "sse_total"),
tune::collect_metrics(res) %>%
dplyr::filter(.metric == "sse_total") %>%
dplyr::slice_min(mean, n = 1, with_ties = FALSE) %>%
dplyr::select(num_clusters, .config)
)
})
test_that("doesn't error if recipes uses id variables", {
helper_objects <- helper_objects_tidyclust()
mtcars_id <- mtcars %>%
tibble::rownames_to_column(var = "model")
rec_id <- recipes::recipe(~., data = mtcars_id) %>%
recipes::update_role(model, new_role = "id variable") %>%
recipes::step_normalize(recipes::all_numeric_predictors())
set.seed(4400)
wflow <- workflows::workflow() %>%
workflows::add_recipe(rec_id) %>%
workflows::add_model(helper_objects$kmeans_mod)
pset <- hardhat::extract_parameter_set_dials(wflow) %>%
update(num_clusters = dials::num_clusters(c(1, 3)))
grid <- dials::grid_regular(pset, levels = 3)
folds <- rsample::vfold_cv(mtcars_id, v = 2)
control <- tune::control_grid(extract = identity)
metrics <- cluster_metric_set(sse_within_total, sse_total)
res <- tune_cluster(
wflow,
resamples = folds,
grid = grid,
control = control,
metrics = metrics
)
res_est <- tune::collect_metrics(res)
res_workflow <- res$.extracts[[1]]$.extracts[[1]]
expect_equal(res$id, folds$id)
expect_equal(nrow(res_est), nrow(grid) * 2)
expect_equal(sum(res_est$.metric == "sse_total"), nrow(grid))
expect_equal(sum(res_est$.metric == "sse_within_total"), nrow(grid))
expect_equal(res_est$n, rep(2, nrow(grid) * 2))
expect_true(res_workflow$trained)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.