Nothing
tmp = tsk("iris", id = "iris_small")$select("Sepal.Length")
tasks = c(mlr_tasks$mget(c("iris", "sonar")), list(tmp))
learners = mlr_learners$mget(c("classif.featureless", "classif.rpart"))
resamplings = rsmp("cv", folds = 3)
design = benchmark_grid(tasks, learners, resamplings)
bmr = benchmark(design)
test_that("Basic benchmarking", {
expect_benchmark_result(bmr)
expect_names(names(as.data.table(bmr)), permutation.of = c(mlr_reflections$rr_names, "uhash"))
tab = as.data.table(bmr)
expect_data_table(tab, nrows = 18L, ncols = 6L)
expect_names(names(tab), permutation.of = c("uhash", mlr_reflections$rr_names))
measures = list(msr("classif.acc"))
tab = bmr$score(measures, ids = FALSE)
expect_data_table(tab, nrows = 18L, ncols = 7L + length(measures))
expect_names(names(tab), must.include = c("nr", "uhash", mlr_reflections$rr_names, ids(measures)))
expect_list(tab$prediction, "Prediction")
tab = bmr$tasks
expect_data_table(tab, nrows = 3L, any.missing = FALSE)
expect_names(names(tab), identical.to = c("task_hash", "task_id", "task"))
expect_hash(tab$task_hash, len = 3L)
tab = bmr$learners
expect_data_table(tab, nrows = 2L, any.missing = FALSE)
expect_names(names(tab), identical.to = c("learner_hash", "learner_id", "learner"))
expect_hash(tab$learner_hash, len = 2L)
qexpectr(map(tab$learner, "state"), "0")
tab = bmr$resamplings
expect_data_table(tab, nrows = 3L, any.missing = FALSE)
expect_names(names(tab), permutation.of = c("resampling_hash", "resampling", "resampling_id"))
expect_hash(tab$resampling_hash, len = 3L)
tab = bmr$aggregate(measures)
expect_data_table(tab, nrows = 6L)
expect_names(names(tab), type = "unique",
identical.to = c("nr", "resample_result", "task_id", "learner_id", "resampling_id", "iters", ids(measures)))
m = measures[[1L]]
expect_numeric(tab[[m$id]], any.missing = FALSE, lower = m$range[1], upper = m$range[2])
})
test_that("ResampleResult / hash", {
m = msr("classif.ce")
aggr = bmr$aggregate(m, uhashes = TRUE)
nr = aggr$nr
expect_integer(nr, len = 6L, any.missing = FALSE, unique = TRUE)
for (i in nr) {
rr = aggr$resample_result[[i]]
expect_resample_result(rr)
expect_equal(unname(rr$aggregate(m)), aggr[["classif.ce"]][i])
expect_equal(bmr$uhashes[i], rr$uhash)
}
})
test_that("discarding model", {
bmr2 = benchmark(benchmark_grid(tasks[1L], learners[1L], resamplings), store_models = FALSE)
expect_benchmark_result(bmr2)
expect_true(every(map(as.data.table(bmr2)$learner, "model"), is.null))
bmr2 = benchmark(benchmark_grid(tasks[1L], learners[1L], resamplings), store_models = TRUE)
expect_benchmark_result(bmr2)
expect_false(every(map(as.data.table(bmr2)$learner, "model"), is.null))
})
test_that("bmr$combine()", {
bmr_new = benchmark(benchmark_grid(mlr_tasks$mget("pima"), learners, resamplings))
combined = list(
bmr$clone(deep = TRUE)$combine(bmr_new),
c(bmr, bmr_new)
)
for (bmr_combined in combined) {
expect_benchmark_result(bmr)
expect_benchmark_result(bmr_new)
expect_benchmark_result(bmr_combined)
expect_data_table(get_private(bmr)$.data$data$fact, nrows = 18L)
expect_data_table(get_private(bmr_new)$.data$data$fact, nrows = 6L)
expect_data_table(get_private(bmr_combined)$.data$data$fact, nrows = 24L)
expect_false("pima" %in% bmr$tasks$task_id)
expect_true("pima" %in% bmr_new$tasks$task_id)
expect_true("pima" %in% bmr_combined$tasks$task_id)
}
rr = resample(tsk("zoo"), lrn("classif.rpart"), rsmp("holdout"))
bmr2 = c(combined[[1]], rr)
expect_benchmark_result(bmr2)
expect_data_table(get_private(bmr2)$.data$data$fact, nrows = 25L)
})
test_that("empty bmr", {
bmr_new = BenchmarkResult$new()
expect_benchmark_result(bmr_new)
bmr_new$combine(NULL)
expect_benchmark_result(bmr_new)
bmr_new$combine(bmr)
expect_benchmark_result(bmr_new)
expect_data_table(get_private(bmr_new)$.data$data$fact, nrows = nrow(get_private(bmr)$.data))
})
test_that("bmr$resample_result()", {
uhashes = bmr$uhashes
expect_resample_result(bmr$resample_result(1L))
expect_resample_result(bmr$resample_result(uhash = uhashes[1]))
expect_error(bmr$resample_result(0))
expect_error(bmr$resample_result(100))
expect_error(bmr$resample_result(uhash = "a"))
expect_error(bmr$resample_result(i = 1, uhash = uhashes[1]))
})
test_that("inputs are cloned", {
task = tsk("iris")
learner = lrn("classif.featureless")
resampling = rsmp("holdout")
expect_error(benchmark(data.table(task = list(task), learner = list(learner), resampling = list(resampling))), "instantiated")
bmr = benchmark(design = data.table(task = list(task), learner = list(learner), resampling = list(resampling$instantiate(task))))
rr = bmr$resample_result(1)
expect_different_address(task, rr$task)
expect_different_address(learner, get_private(rr)$.data$data$learner_components$learner[[1L]])
expect_different_address(resampling, rr$resampling)
})
test_that("memory footprint", {
expect_equal(uniqueN(map_chr(design$task, address)), 3L)
expect_equal(uniqueN(map_chr(design$learner, address)), 2L)
expect_equal(uniqueN(map_chr(design$resampling, address)), 3L)
x = as.data.table(bmr)
expect_equal(uniqueN(map_chr(x$task, address)), 3L)
expect_equal(uniqueN(map_chr(x$learner, address)), 18L)
expect_equal(uniqueN(map_chr(x$resampling, address)), 3L)
})
test_that("multiple measures", {
tasks = list(tsk("iris"), tsk("sonar"))
learner = lrn("classif.featureless")
measures = list(msr("classif.ce"), msr("classif.acc"))
bmr = benchmark(design = benchmark_grid(tasks, learner, rsmp("cv", folds = 3)))
expect_subset(c("classif.ce", "classif.acc"), names(bmr$aggregate(measures)))
})
test_that("predict_type is checked", {
task = tsk("sonar")
learner = lrn("classif.featureless")
resampling = rsmp("cv", folds = 3L)
design = benchmark_grid(task, learner, resampling)
bmr = benchmark(design)
expect_error(bmr$aggregate("classif.auc", "predict_type"))
})
test_that("custom resampling (#245)", {
task_boston = tsk("boston_housing")
lrn = lrn("regr.featureless")
rdesc = rsmp("custom")
train_sets = list((1:200), (1:300), (1:400))
test_sets = list((201:301), (301:401), (401:501))
rdesc$instantiate(task_boston, train_sets, test_sets)
expect_resample_result(mlr3::resample(task_boston, lrn, rdesc))
design = data.table(task = list(task_boston), learner = list(lrn), resampling = list(rdesc))
bmr = benchmark(design)
expect_benchmark_result(bmr)
# Issue #451
design = benchmark_grid(tasks = task_boston, learners = lrn, resamplings = rdesc)
expect_data_table(design, nrows = 1)
})
test_that("extract params", {
# some params, some not
lrns = list(
lrn("classif.rpart", id = "rp1", xval = 0),
lrn("classif.rpart", id = "rp2", xval = 0, cp = 0.2, minsplit = 2),
lrn("classif.rpart", id = "rp3", xval = 0, cp = 0.1)
)
bmr = benchmark(benchmark_grid(tsk("wine"), lrns, rsmp("cv", folds = 3)))
aggr = bmr$aggregate(params = TRUE)
setorder(aggr, "learner_id")
expect_list(aggr$params[[1]], names = "unique", len = 1L)
expect_list(aggr$params[[2]], names = "unique", len = 3L)
expect_list(aggr$params[[3]], names = "unique", len = 2L)
scores = bmr$score()
pvs = map(scores$learner, function(l) l$param_set$values)
expect_true(all(sapply(split(lengths(pvs), scores$nr), uniqueN) == 1))
expect_set_equal(lengths(pvs), 1:3)
# only one params
lrns = mlr_learners$mget("classif.featureless")
bmr = benchmark(benchmark_grid(tsk("wine"), lrns, rsmp("cv", folds = 3)))
aggr = bmr$aggregate(params = TRUE)
expect_list(aggr$params[[1]], names = "unique", len = 1L)
# no params
lrns = mlr_learners$mget("classif.debug")
lrns$classif.debug$param_set$values = list()
bmr = benchmark(benchmark_grid(tsk("wine"), lrns, rsmp("cv", folds = 3)))
aggr = bmr$aggregate(params = TRUE)
expect_list(aggr$params[[1]], names = "unique", len = 0L)
expect_true(all(c("warnings", "errors") %in% names(bmr$score(conditions = TRUE))))
})
test_that("benchmark_grid", {
learner = lrn("classif.rpart")
tasks = tsks(c("iris", "iris"))
resamp = rsmp("cv")$instantiate(tasks[[1]])
expect_data_table(benchmark_grid(tasks, learner, resamp))
tasks = tsks(c("iris", "sonar"))
resamp = rsmp("cv")$instantiate(tasks[[1]])
expect_error(benchmark_grid(tasks, learner, resamp), "rows")
})
test_that("filter", {
tasks = lapply(c("iris", "sonar"), tsk)
learners = lapply(c("classif.featureless", "classif.rpart"), lrn)
resamplings = list(rsmp("cv", folds = 3), rsmp("holdout"))
design = benchmark_grid(tasks, learners, resamplings)
bmr = benchmark(design)
expect_data_table(get_private(bmr)$.data$data$fact, nrows = 16)
bmr$filter(task_ids = "sonar")
expect_data_table(get_private(bmr)$.data$data$fact, nrows = 8)
expect_resultdata(get_private(bmr)$.data, TRUE)
bmr$filter(learner_ids = "classif.rpart")
expect_data_table(get_private(bmr)$.data$data$fact, nrows = 4)
expect_resultdata(get_private(bmr)$.data, TRUE)
bmr$filter(resampling_ids = "cv")
expect_data_table(get_private(bmr)$.data$data$fact, nrows = 3)
expect_resultdata(get_private(bmr)$.data, TRUE)
expect_benchmark_result(bmr)
})
test_that("aggregated performance values are calculated correctly (#555)", {
task = tsk("spam")
learner1 = lrn("classif.featureless")
learner2 = lrn("classif.rpart")
resampling = rsmp("subsampling", repeats = 2)
design = benchmark_grid(task, list(learner1, learner2), resampling)
bmr = benchmark(design = design, store_models = TRUE)
y = bmr$aggregate()$classif.ce
expect_gt(y[1], y[2])
y = c(
bmr$resample_result(1)$aggregate(msr("classif.ce")),
bmr$resample_result(2)$aggregate(msr("classif.ce"))
)
expect_gt(y[1], y[2])
})
test_that("save/load roundtrip", {
path = tempfile()
saveRDS(bmr, file = path)
bmr2 = readRDS(path)
expect_benchmark_result(bmr2)
})
test_that("debug branch", {
tmp = tsk("iris", id = "iris_small")$select("Sepal.Length")
tasks = c(mlr_tasks$mget(c("iris", "sonar")), list(tmp))
learners = mlr_learners$mget(c("classif.featureless", "classif.rpart"))
resamplings = rsmp("cv", folds = 2)
design = benchmark_grid(tasks, learners, resamplings)
bmr = invoke(benchmark, design, .opts = list(mlr3.debug = TRUE))
expect_benchmark_result(bmr)
})
test_that("encapsulatiion", {
learners = list(lrn("classif.debug", error_train = 1), lrn("classif.rpart"))
grid = benchmark_grid(tasks, learners, resamplings)
expect_error(benchmark(grid), "classif.debug->train()")
bmr = benchmark(grid, encapsulate = "evaluate")
aggr = bmr$aggregate(conditions = TRUE)
expect_true(all(aggr[learner_id == "classif.debug", errors] == 3L))
expect_true(all(aggr[learner_id != "classif.debug", errors] == 0L))
for (learner in bmr$learners$learner) {
expect_class(learner$fallback, "LearnerClassifFeatureless")
expect_equal(learner$encapsulate[["train"]], "evaluate")
expect_equal(learner$encapsulate[["predict"]], "evaluate")
}
})
test_that("disable cloning", {
grid = benchmark_grid(
tasks = tsk("iris"),
learners = lrn("classif.featureless"),
resamplings = rsmp("holdout")
)
task = grid$task[[1L]]
learner = grid$learner[[1L]]
resampling = grid$resampling[[1L]]
bmr = benchmark(grid, clone = c())
expect_same_address(task, bmr$tasks$task[[1]])
expect_same_address(learner, get_private(bmr)$.data$data$learners$learner[[1]])
expect_same_address(resampling, bmr$resamplings$resampling[[1]])
expect_identical(task$hash, bmr$tasks$task[[1]]$hash)
expect_identical(learner$hash, bmr$learners$learner[[1]]$hash)
expect_identical(resampling$hash, bmr$resamplings$resampling[[1]]$hash)
})
test_that("task and learner assertions", {
grid = benchmark_grid(
tasks = tsks(c("iris", "boston_housing")),
learners = lrn("classif.rpart"),
resamplings = rsmp("holdout")
)
expect_error(benchmark(grid), "task types")
grid = benchmark_grid(
tasks = tsk("iris"),
learners = lrns(c("classif.rpart", "regr.rpart")),
resamplings = rsmp("holdout")
)
expect_error(benchmark(grid), "learner types")
grid = benchmark_grid(
tasks = tsk("iris"),
learners = lrn("regr.rpart"),
resamplings = rsmp("holdout")
)
expect_error(benchmark(grid), "not match type")
})
test_that("benchmark_grid works if paired = TRUE", {
tasks = mlr3::tsks(c("pima", "iris"))
learners = lrns(c("classif.featureless", "classif.rpart"))
resampling = rsmp("cv")
resamplings = pmap(
list(tasks, rsmps(c("cv", "holdout"))),
function(task, resampling) resampling$instantiate(task)
)
design = benchmark_grid(tasks, learners, resamplings, paired = TRUE)
expect_class(design, "benchmark_grid")
# design[, identical(task), by = task]]
# expect(identical(design$resampling[class(learner)[[1]] ==)]))
expect_true(nrow(design) == 4L) #
expect_true(identical(design$task[[1]], design$task[[2]]))
expect_true(identical(design$task[[3]], design$task[[4]]))
expect_false(identical(design$task[[1]], design$task[[3]]))
expect_true(identical(design$resampling[[1]], design$resampling[[2]]))
expect_true(identical(design$resampling[[3]], design$resampling[[4]]))
expect_false(identical(design$resampling[[1]], design$resampling[[3]]))
expect_true(identical(design$learner[[1]], design$learner[[3]]))
expect_true(identical(design$learner[[2]], design$learner[[4]]))
expect_false(identical(design$learner[[2]], design$learner[[3]]))
# Resamplings must be instantiated
tasks = tsks(c("pima", "iris"))
learners = lrns(c("classif.featureless", "classif.rpart"))
resamplings = mlr3::rsmps(c("cv", "holdout"))
expect_error(benchmark_grid(tasks, learners, resamplings, paired = TRUE))
# Resamplings and tasks must have the same length
tasks = tsks(c("pima", "iris"))
learners = lrns(c("classif.featureless", "classif.rpart"))
resamplings = pmap(
list(tasks, mlr3::rsmps(c("cv", "holdout"))),
function(task, resampling) resampling$instantiate(task)
)
resamplings = c(resamplings, resamplings)
expect_error(benchmark_grid(tasks, learners, resamplings, paired = TRUE))
# Resamplings and tasks must have corresponding hashes
tasks = tsks(c("pima", "iris"))
learners = lrns(c("classif.featureless", "classif.rpart"))
resamplings = pmap(
list(tasks, mlr3::rsmps(c("cv", "holdout"))),
function(task, resampling) resampling$instantiate(task)
)
resamplings = rev(resamplings)
expect_error(benchmark_grid(tasks, learners, resamplings, paired = TRUE))
})
test_that("param_values in benchmark", {
# setup
tasks = tsks("iris")
resamplings = list(rsmp("cv", folds = 3)$instantiate(tasks[[1]]))
learners = lrns("classif.debug")
# single parameter set via manual design
design = data.table(task = tasks, learner = learners, resampling = resamplings, param_values = list(list(list(x = 1))))
bmr = benchmark(design)
expect_benchmark_result(bmr)
expect_equal(bmr$n_resample_results, 1)
expect_equal(nrow(as.data.table(bmr)), 3)
learner = bmr$resample_result(1)$learner
expect_equal(learner$param_set$values$x, 1)
expect_equal(nrow(as.data.table(bmr)), 3)
# multiple parameters set via manual design
design = data.table(task = tasks, learner = learners, resampling = resamplings, param_values = list(list(list(x = 1), list(x = 0.5))))
bmr = benchmark(design)
expect_benchmark_result(bmr)
expect_equal(bmr$n_resample_results, 2)
expect_equal(nrow(as.data.table(bmr)), 6)
learner = bmr$resample_result(1)$learner
expect_equal(learner$param_set$values$x, 1)
learner = bmr$resample_result(2)$learner
expect_equal(learner$param_set$values$x, 0.5)
# benchmark grid does not attach param_values if empty
design = benchmark_grid(tasks, learners, resamplings)
expect_names(names(design), permutation.of = c("task", "learner", "resampling"))
# benchmark grid with param_values
design = benchmark_grid(tasks, learners, resamplings, param_values = list(list(list(x = 1))))
expect_data_table(design, nrows = 1)
expect_names(names(design), permutation.of = c("task", "learner", "resampling", "param_values"))
bmr = benchmark(design)
expect_benchmark_result(bmr)
# benchmark grid with param_values and paired = TRUE
design = benchmark_grid(tasks, learners, resamplings, param_values = list(list(list(x = 1))), paired = TRUE)
expect_data_table(design, nrows = 1)
bmr = benchmark(design)
expect_benchmark_result(bmr)
expect_equal(bmr$n_resample_results, 1)
# benchmark grid with multiple params
design = benchmark_grid(tasks, learners, resamplings, param_values = list(list(list(x = 1), list(x = 0.5))))
expect_data_table(design, nrows = 1)
bmr = benchmark(design)
expect_benchmark_result(bmr)
expect_equal(bmr$n_resample_results, 2)
# benchmark grid with multiple params and multiple learners
design = benchmark_grid(tasks, lrns(c("classif.debug", "classif.debug")), rsmp("holdout"), param_values = list(list(list(x = 1), list(x = 0.5)), list()))
bmr = benchmark(design)
expect_benchmark_result(bmr)
expect_equal(bmr$n_resample_results, 3)
# constant values are inserted
learners = lrns("classif.rpart", minsplit = 12)
design = data.table(task = tasks, learner = learners, resampling = resamplings, param_values = list(list(list(cp = 0.1), list(minbucket = 2))))
bmr = benchmark(design)
sortnames = function(x) {
if (!is.null(names(x))) {
x <- x[order(names(x))]
}
x
}
trained = bmr$learners$learner
ii = which(map_lgl(trained, function(x) "cp" %in% names(x$param_set$values))) # find learner with cp
expect_count(ii)
expect_equal(sortnames(bmr$learners$learner[-ii][[1]]$param_set$values), list(minbucket = 2, minsplit = 12, xval = 0))
expect_equal(sortnames(bmr$learners$learner[[ii]]$param_set$values), list(cp = 0.1, minsplit = 12, xval = 0))
})
test_that("learner's validate cannot be 'test' if internal_valid_set is present", {
# otherwise, predict_set = "internal_valid" would be ambiguous
learner = lrn("classif.debug", validate = "test", predict_sets = c("train", "internal_valid"))
task = tsk("iris")$divide(ids = 1)
expect_error(benchmark(benchmark_grid(task, learner, rsmp("holdout"))), "cannot be set to ")
})
test_that("learner's validate cannot be a ratio if internal_valid_set is present", {
# otherwise, predict_set = "internal_valid" would be ambiguous
learner = lrn("classif.debug", validate = 0.5, predict_sets = c("train", "internal_valid"))
task = tsk("iris")$divide(ids = 1)
expect_error(benchmark(benchmark_grid(task, learner, rsmp("holdout"))), "cannot be set to ")
})
test_that("properties are also checked on validation task", {
task = tsk("iris")
row = task$data(1)
row[[1]][1] = NA
row$..row_id = 151
task$rbind(row)
task$divide(ids = 151)
learner = lrn("classif.debug", validate = "predefined")
learner$properties = setdiff(learner$properties, "missings")
expect_error(benchmark(benchmark_grid(task, learner, rsmp("holdout"))), "missing values")
})
test_that("parallel execution automatically triggers marshaling", {
learner = lrn("classif.debug", count_marshaling = TRUE)
task = tsk("iris")
resampling = rsmp("holdout")
design = benchmark_grid(task, learner, resampling)
bmr = with_future(future::multisession, {
benchmark(design, store_models = TRUE, unmarshal = TRUE)
})
expect_equal(bmr$resample_result(1)$learners[[1]]$model$marshal_count, 1)
expect_false(bmr$resample_result(1)$learners[[1]]$marshaled)
})
test_that("sequential execution does not trigger marshaling", {
learner = lrn("classif.debug", count_marshaling = TRUE)
task = tsk("iris")
resampling = rsmp("holdout")
design = benchmark_grid(task, learner, resampling)
bmr = with_future(future::sequential, {
benchmark(design, store_models = TRUE, unmarshal = TRUE)
})
expect_equal(bmr$resample_result(1)$learners[[1]]$model$marshal_count, 0)
})
test_that("parallel execution and callr marshal once", {
learner = lrn("classif.debug", count_marshaling = TRUE, encapsulate = c(train = "callr"))
task = tsk("iris")
resampling = rsmp("holdout")
design = benchmark_grid(task, learner, resampling)
bmr = with_future(future::multisession, {
benchmark(design, store_models = TRUE, unmarshal = TRUE)
})
expect_equal(bmr$resample_result(1)$learners[[1]]$model$marshal_count, 1)
expect_false(bmr$resample_result(1)$learners[[1]]$marshaled)
})
test_that("unmarshal parameter is respected", {
learner = lrn("classif.debug", count_marshaling = TRUE, encapsulate = c(train = "callr"))
task = tsk("iris")
resampling = rsmp("holdout")
design = benchmark_grid(task, learner, resampling)
bmr = with_future(future::multisession, {
list(
marshaled = benchmark(design, store_models = TRUE, unmarshal = FALSE),
unmarshaled = benchmark(design, store_models = TRUE, unmarshal = TRUE)
)
})
expect_false(bmr$unmarshaled$resample_result(1)$learners[[1]]$marshaled)
expect_true(bmr$marshaled$resample_result(1)$learners[[1]]$marshaled)
})
test_that("BenchmarkResult can be (un)marshaled", {
bmr = benchmark(benchmark_grid(tsk("iris"), lrn("classif.debug"), rsmp("holdout")), store_models = TRUE)
expect_false(bmr$resample_result(1)$learners[[1]]$marshaled)
bmr$marshal()
expect_true(bmr$resample_result(1)$learners[[1]]$marshaled)
bmr$unmarshal()
expect_false(bmr$resample_result(1)$learners[[1]]$marshaled)
# also works with non-marshalable learner
bmr1 = benchmark(benchmark_grid(tsk("iris"), lrn("classif.featureless"), rsmp("holdout")), store_models = TRUE)
model = bmr1$resample_result(1)$learners[[1]]$model
bmr1$unmarshal()
expect_equal(bmr1$resample_result(1)$learners[[1]]$model, model)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.