Nothing
# tests/testthat/test-misl2.R
# Run with: devtools::test()
# ── Shared test fixtures ──────────────────────────────────────────────────────
make_cont_data <- function(n = 50, seed = 1) {
set.seed(seed)
df <- data.frame(x1 = rnorm(n), x2 = rnorm(n), y = rnorm(n))
df[sample(n, 10), "y"] <- NA
df
}
make_bin_data <- function(n = 50, seed = 2) {
set.seed(seed)
df <- data.frame(x1 = rnorm(n), x2 = rnorm(n), y = rbinom(n, 1, 0.5))
df[sample(n, 10), "y"] <- NA
df
}
make_cat_data <- function(n = 50, seed = 3) {
set.seed(seed)
df <- data.frame(
x1 = rnorm(n),
x2 = rnorm(n),
y = factor(sample(c("a", "b", "c"), n, replace = TRUE))
)
df[sample(n, 10), "y"] <- NA
df
}
make_ord_data <- function(n = 50, seed = 6) {
set.seed(seed)
df <- data.frame(
x1 = rnorm(n),
x2 = rnorm(n),
y = factor(sample(c("low", "med", "high"), n, replace = TRUE),
levels = c("low", "med", "high"), ordered = TRUE)
)
df[sample(n, 10), "y"] <- NA
df
}
make_mixed_data <- function(n = 80, seed = 4) {
set.seed(seed)
df <- data.frame(
cont = rnorm(n),
bin = rbinom(n, 1, 0.5),
cat = factor(sample(c("x", "y", "z"), n, replace = TRUE)),
pred = rnorm(n)
)
df[sample(n, 10), "cont"] <- NA
df[sample(n, 10), "bin"] <- NA
df[sample(n, 10), "cat"] <- NA
df
}
make_factor_bin_data <- function(n = 50, seed = 5) {
set.seed(seed)
df <- data.frame(
x = rnorm(n),
y = factor(sample(c("Yes", "No"), n, replace = TRUE))
)
df[sample(n, 10), "y"] <- NA
df
}
# ── check_dataset() ───────────────────────────────────────────────────────────
test_that("check_dataset() errors on non-data-frame input", {
expect_error(check_dataset("a string"), "data frame or matrix")
expect_error(check_dataset(1:10), "data frame or matrix")
expect_error(check_dataset(list(a = 1:5)), "data frame or matrix")
})
test_that("check_dataset() errors on empty input", {
expect_error(check_dataset(data.frame()), "at least one row")
expect_error(check_dataset(data.frame(a = integer())), "at least one row")
})
test_that("check_dataset() errors on complete dataset", {
expect_error(check_dataset(datasets::iris), "complete")
})
test_that("check_dataset() passes silently on valid incomplete data", {
df <- make_cont_data()
expect_invisible(check_dataset(df))
})
# ── check_datatype() ──────────────────────────────────────────────────────────
test_that("check_datatype() identifies categorical columns", {
expect_equal(check_datatype(factor(c("a", "b", "c"))), "categorical")
})
test_that("check_datatype() identifies ordinal columns", {
expect_equal(
check_datatype(factor(c("low", "med", "high"), ordered = TRUE)),
"ordinal"
)
})
test_that("check_datatype() identifies two-level factor as binomial", {
expect_equal(check_datatype(factor(c("Yes", "No", "Yes", NA))), "binomial")
expect_equal(check_datatype(factor(c("A", "B"))), "binomial")
})
test_that("check_datatype() identifies binomial columns", {
expect_equal(check_datatype(c(0, 1, 0, 1, NA)), "binomial")
expect_equal(check_datatype(c(0, 0, 0)), "binomial")
})
test_that("check_datatype() identifies continuous columns", {
expect_equal(check_datatype(c(1.5, 2.3, 3.7)), "continuous")
expect_equal(check_datatype(c(1L, 2L, 3L)), "continuous")
expect_equal(check_datatype(c(0, 1, 2)), "continuous")
})
# ── list_learners() ───────────────────────────────────────────────────────────
test_that("list_learners() returns a tibble with expected columns", {
result <- list_learners()
expect_s3_class(result, "tbl_df")
expect_true(all(c("learner", "description", "package", "installed") %in%
colnames(result)))
})
test_that("list_learners() filters by outcome type", {
cont <- list_learners("continuous")
bin <- list_learners("binomial")
cat <- list_learners("categorical")
ord <- list_learners("ordinal")
expect_true("glm" %in% cont$learner)
expect_true("glm" %in% bin$learner)
expect_false("glm" %in% cat$learner)
expect_false("glm" %in% ord$learner)
expect_true("multinom_reg" %in% cat$learner)
expect_false("multinom_reg" %in% cont$learner)
expect_false("multinom_reg" %in% bin$learner)
expect_false("multinom_reg" %in% ord$learner)
expect_true("polr" %in% ord$learner)
expect_false("polr" %in% cont$learner)
expect_false("polr" %in% bin$learner)
expect_false("polr" %in% cat$learner)
expect_true("rand_forest" %in% ord$learner)
expect_true("boost_tree" %in% ord$learner)
expect_false("continuous" %in% colnames(cont))
expect_false("binomial" %in% colnames(bin))
expect_false("categorical" %in% colnames(cat))
expect_false("ordinal" %in% colnames(ord))
})
test_that("list_learners() installed_only returns only installed learners", {
result <- list_learners(installed_only = TRUE)
expect_true(all(result$installed))
})
test_that("list_learners() errors on invalid outcome_type", {
expect_error(list_learners("invalid"), "should be one of")
})
# ── misl() -- output structure ────────────────────────────────────────────────
test_that("misl() returns a list of length m", {
result <- misl(make_cont_data(), m = 2, maxit = 1, con_method = "glm", seed = 1)
expect_type(result, "list")
expect_length(result, 2)
})
test_that("misl() each element has $datasets and $trace", {
result <- misl(make_cont_data(), m = 2, maxit = 1, con_method = "glm", seed = 1)
for (i in seq_along(result)) {
expect_named(result[[i]], c("datasets", "trace"))
}
})
test_that("misl() imputed datasets have same dimensions as input", {
df <- make_cont_data()
result <- misl(df, m = 2, maxit = 1, con_method = "glm", seed = 1)
for (i in seq_along(result)) {
expect_equal(dim(result[[i]]$datasets), dim(df))
}
})
# ── misl() -- no remaining missingness ───────────────────────────────────────
test_that("misl() produces no NAs for continuous outcome", {
result <- misl(make_cont_data(), m = 2, maxit = 2, con_method = "glm", seed = 1)
expect_false(anyNA(result[[1]]$datasets))
expect_false(anyNA(result[[2]]$datasets))
})
test_that("misl() produces no NAs for binomial outcome", {
result <- misl(make_bin_data(), m = 2, maxit = 2, bin_method = "glm", seed = 2)
expect_false(anyNA(result[[1]]$datasets))
})
test_that("misl() produces no NAs for categorical outcome", {
result <- misl(make_cat_data(), m = 2, maxit = 2,
cat_method = "rand_forest", seed = 3)
expect_false(anyNA(result[[1]]$datasets))
})
test_that("misl() produces no NAs for ordinal outcome", {
skip_if_not_installed("MASS")
result <- misl(make_ord_data(), m = 2, maxit = 2,
ord_method = "polr", seed = 6)
expect_false(anyNA(result[[1]]$datasets))
})
test_that("misl() produces no NAs for mixed outcome types", {
result <- misl(
make_mixed_data(), m = 2, maxit = 2,
con_method = "glm", bin_method = "glm", cat_method = "rand_forest",
seed = 4
)
expect_false(anyNA(result[[1]]$datasets))
})
test_that("misl() produces no NAs for factor-coded binary outcome", {
result <- misl(make_factor_bin_data(), m = 2, maxit = 2,
bin_method = "glm", seed = 5)
expect_false(anyNA(result[[1]]$datasets))
})
# ── misl() -- imputed values are plausible ────────────────────────────────────
test_that("misl() binary imputations are only 0 or 1", {
df <- make_bin_data()
result <- misl(df, m = 2, maxit = 2, bin_method = "glm", seed = 2)
for (i in seq_along(result)) {
expect_true(all(result[[i]]$datasets$y %in% c(0, 1)))
}
})
test_that("misl() categorical imputations stay within observed levels", {
df <- make_cat_data()
result <- misl(df, m = 2, maxit = 2, cat_method = "rand_forest", seed = 3)
for (i in seq_along(result)) {
expect_true(all(result[[i]]$datasets$y %in% levels(df$y)))
}
})
test_that("misl() ordinal imputations stay within observed levels and remain ordered", {
skip_if_not_installed("MASS")
df <- make_ord_data()
result <- misl(df, m = 2, maxit = 2, ord_method = "polr", seed = 6)
for (i in seq_along(result)) {
expect_true(all(result[[i]]$datasets$y %in% levels(df$y)))
expect_true(is.ordered(result[[i]]$datasets$y))
}
})
test_that("misl() factor-coded binary imputations stay within observed levels", {
df <- make_factor_bin_data()
result <- misl(df, m = 2, maxit = 2, bin_method = "glm", seed = 5)
for (i in seq_along(result)) {
expect_true(all(result[[i]]$datasets$y %in% levels(df$y)))
expect_s3_class(result[[i]]$datasets$y, "factor")
}
})
test_that("misl() does not alter observed values", {
df <- make_cont_data()
result <- misl(df, m = 2, maxit = 2, con_method = "glm", seed = 1)
obs_idx <- !is.na(df$y)
for (i in seq_along(result)) {
expect_equal(result[[i]]$datasets$y[obs_idx], df$y[obs_idx])
}
})
# ── misl() -- trace plot ──────────────────────────────────────────────────────
test_that("misl() trace has expected columns", {
result <- misl(make_cont_data(), m = 2, maxit = 2, con_method = "glm", seed = 1)
expect_named(
result[[1]]$trace,
c("statistic", "variable", "m", "iteration", "value")
)
})
test_that("misl() trace has mean and sd rows for continuous columns", {
result <- misl(make_cont_data(), m = 2, maxit = 2, con_method = "glm", seed = 1)
trace <- result[[1]]$trace
y_rows <- trace[trace$variable == "y", ]
expect_true("mean" %in% y_rows$statistic)
expect_true("sd" %in% y_rows$statistic)
})
test_that("misl() trace is NA for factor-coded binary columns", {
result <- misl(make_factor_bin_data(), m = 1, maxit = 2,
bin_method = "glm", seed = 5)
trace <- result[[1]]$trace
y_rows <- trace[trace$variable == "y", ]
expect_true(all(is.na(y_rows$value)))
})
test_that("misl() trace is NA for ordinal columns", {
skip_if_not_installed("MASS")
result <- misl(make_ord_data(), m = 1, maxit = 2,
ord_method = "polr", seed = 6)
trace <- result[[1]]$trace
y_rows <- trace[trace$variable == "y", ]
expect_true(all(is.na(y_rows$value)))
})
# ── misl() -- cv_folds parameter ─────────────────────────────────────────────
test_that("misl() runs with cv_folds = 3", {
skip_if_not_installed("ranger")
result <- misl(
make_cont_data(), m = 2, maxit = 1,
con_method = c("glm", "rand_forest"),
cv_folds = 3, seed = 1
)
expect_false(anyNA(result[[1]]$datasets))
})
test_that("misl() cv_folds is ignored for single learner", {
result <- misl(
make_cont_data(), m = 2, maxit = 1,
con_method = "glm", cv_folds = 3, seed = 1
)
expect_false(anyNA(result[[1]]$datasets))
})
test_that("misl() errors on invalid cv_folds", {
df <- make_cont_data()
expect_error(misl(df, m = 1, maxit = 1, con_method = "glm", cv_folds = 1), "'cv_folds' must be an integer >= 2.")
expect_error(misl(df, m = 1, maxit = 1, con_method = "glm", cv_folds = 1.5), "'cv_folds' must be an integer >= 2.")
expect_error(misl(df, m = 1, maxit = 1, con_method = "glm", cv_folds = "3"), "'cv_folds' must be an integer >= 2.")
})
# ── misl() -- binomial bootstrap guard ───────────────────────────────────────
test_that("misl() handles imbalanced binary columns without error", {
set.seed(99)
n <- 100
df <- data.frame(
x = rnorm(n),
y = c(rep(1, 95), rep(0, 5))
)
df[sample(which(df$y == 1), 5), "y"] <- NA
expect_no_error(
misl(df, m = 2, maxit = 1, bin_method = "glm", seed = 99)
)
})
# ── misl() -- ignore_predictors ──────────────────────────────────────────────
test_that("misl() respects ignore_predictors", {
df <- make_cont_data()
expect_no_error(
misl(df, m = 1, maxit = 1, con_method = "glm",
ignore_predictors = "x1", seed = 1)
)
})
# ── misl() -- single vs multiple learners ────────────────────────────────────
test_that("misl() runs with a single learner (no stacking)", {
result <- misl(make_cont_data(), m = 2, maxit = 1, con_method = "glm", seed = 1)
expect_false(anyNA(result[[1]]$datasets))
})
test_that("misl() runs with multiple learners (stacking)", {
skip_if_not_installed("ranger")
result <- misl(
make_cont_data(), m = 2, maxit = 1,
con_method = c("glm", "rand_forest"), seed = 1
)
expect_false(anyNA(result[[1]]$datasets))
})
# ── misl() -- multinom_reg learner ───────────────────────────────────────────
test_that("misl() runs with multinom_reg for categorical outcome", {
result <- misl(
make_cat_data(), m = 2, maxit = 1,
cat_method = "multinom_reg", seed = 3
)
expect_false(anyNA(result[[1]]$datasets))
expect_true(all(result[[1]]$datasets$y %in% levels(make_cat_data()$y)))
})
# ── misl() -- polr learner ────────────────────────────────────────────────────
test_that("misl() runs with polr for ordinal outcome", {
skip_if_not_installed("MASS")
result <- misl(
make_ord_data(), m = 2, maxit = 1,
ord_method = "polr", seed = 6
)
expect_false(anyNA(result[[1]]$datasets))
expect_true(all(result[[1]]$datasets$y %in% levels(make_ord_data()$y)))
expect_true(is.ordered(result[[1]]$datasets$y))
})
# ── misl() -- custom parsnip specs ───────────────────────────────────────────
test_that("misl() accepts a parsnip spec for continuous outcome", {
skip_if_not_installed("ranger")
custom_spec <- parsnip::rand_forest(trees = 50) |>
parsnip::set_engine("ranger")
result <- misl(
make_cont_data(), m = 2, maxit = 1,
con_method = list(custom_spec), seed = 1
)
expect_false(anyNA(result[[1]]$datasets))
})
test_that("misl() accepts a mixed list of strings and parsnip specs", {
skip_if_not_installed("ranger")
custom_spec <- parsnip::rand_forest(trees = 50) |>
parsnip::set_engine("ranger")
result <- misl(
make_cont_data(), m = 2, maxit = 1,
con_method = list("glm", custom_spec),
cv_folds = 3, seed = 1
)
expect_false(anyNA(result[[1]]$datasets))
})
test_that("misl() accepts a parsnip spec for binary outcome", {
skip_if_not_installed("ranger")
custom_spec <- parsnip::rand_forest(trees = 50) |>
parsnip::set_engine("ranger")
result <- misl(
make_bin_data(), m = 2, maxit = 1,
bin_method = list(custom_spec), seed = 2
)
expect_false(anyNA(result[[1]]$datasets))
})
test_that("misl() accepts a parsnip spec for categorical outcome", {
custom_spec <- parsnip::multinom_reg() |>
parsnip::set_engine("nnet")
result <- misl(
make_cat_data(), m = 2, maxit = 1,
cat_method = list(custom_spec), seed = 3
)
expect_false(anyNA(result[[1]]$datasets))
})
test_that("misl() accepts a parsnip spec for ordinal outcome", {
skip_if_not_installed("MASS")
# polr is handled via MASS::polr() directly, passed as the named string
result <- misl(
make_ord_data(), m = 2, maxit = 1,
ord_method = list("polr"), seed = 6
)
expect_false(anyNA(result[[1]]$datasets))
expect_true(is.ordered(result[[1]]$datasets$y))
})
test_that("misl() enforces mode on user-supplied parsnip spec", {
skip_if_not_installed("ranger")
# Supply a spec with the wrong mode - misl should correct it
wrong_mode_spec <- parsnip::rand_forest(trees = 50) |>
parsnip::set_engine("ranger") |>
parsnip::set_mode("classification") # wrong for continuous
result <- misl(
make_cont_data(), m = 2, maxit = 1,
con_method = list(wrong_mode_spec), seed = 1
)
expect_false(anyNA(result[[1]]$datasets))
})
test_that("misl() backwards compatible: character vector still works", {
result <- misl(
make_cont_data(), m = 2, maxit = 1,
con_method = c("glm"), seed = 1
)
expect_false(anyNA(result[[1]]$datasets))
})
# ── plot_misl_trace() ─────────────────────────────────────────────────────────
test_that("plot_misl_trace() runs without error", {
skip_if_not_installed("ggplot2")
skip_if_not_installed("ggforce")
result <- misl(make_cont_data(), m = 3, maxit = 3, con_method = "glm", seed = 1)
expect_no_error(plot_misl_trace(result))
})
test_that("plot_misl_trace() returns trace data invisibly", {
skip_if_not_installed("ggplot2")
skip_if_not_installed("ggforce")
result <- misl(make_cont_data(), m = 3, maxit = 3, con_method = "glm", seed = 1)
out <- plot_misl_trace(result)
expect_s3_class(out, "data.frame")
expect_true(all(c("statistic", "variable", "m", "iteration", "value") %in%
colnames(out)))
})
test_that("plot_misl_trace() drops categorical variables silently", {
skip_if_not_installed("ggplot2")
skip_if_not_installed("ggforce")
result <- misl(make_mixed_data(), m = 2, maxit = 2,
con_method = "glm", bin_method = "glm",
cat_method = "rand_forest", seed = 4)
out <- plot_misl_trace(result)
# cat column should not appear in trace output since it has no trace values
expect_false("cat" %in% unique(out$variable))
})
test_that("plot_misl_trace() returns message when no trace data available", {
skip_if_not_installed("ggplot2")
skip_if_not_installed("ggforce")
result <- misl(make_cat_data(), m = 2, maxit = 2,
cat_method = "rand_forest", seed = 3)
expect_message(plot_misl_trace(result), "No trace data available")
})
test_that("plot_misl_trace() accepts custom ncol and nrow", {
skip_if_not_installed("ggplot2")
skip_if_not_installed("ggforce")
result <- misl(make_cont_data(), m = 3, maxit = 3, con_method = "glm", seed = 1)
expect_no_error(plot_misl_trace(result, ncol = 1, nrow = 2))
})
test_that("plot_misl_trace() errors when ggplot2 not available", {
result <- misl(make_cont_data(), m = 2, maxit = 2, con_method = "glm", seed = 1)
local_mocked_bindings(
requireNamespace = function(pkg, ...) if (pkg == "ggplot2") FALSE else TRUE,
.package = "base"
)
expect_error(plot_misl_trace(result), "ggplot2")
})
test_that("plot_misl_trace() errors when ggforce not available", {
result <- misl(make_cont_data(), m = 2, maxit = 2, con_method = "glm", seed = 1)
local_mocked_bindings(
requireNamespace = function(pkg, ...) if (pkg == "ggforce") FALSE else TRUE,
.package = "base"
)
expect_error(plot_misl_trace(result), "ggforce")
})
test_that("misl() errors with a helpful message for unknown learner", {
suppressWarnings(
expect_error(
misl(make_cont_data(), m = 1, maxit = 1, con_method = "lasso"),
"Unknown learner"
)
)
})
test_that("misl() errors with install hint for missing backend package", {
local_mocked_bindings(
requireNamespace = function(pkg, ...) if (pkg == "xgboost") FALSE else TRUE,
.package = "base"
)
suppressWarnings(
expect_error(
misl(make_cont_data(), m = 1, maxit = 1, con_method = "boost_tree"),
"install.packages"
)
)
})
test_that("misl() errors with helpful message for invalid learner type", {
suppressWarnings(
expect_error(
misl(make_cont_data(), m = 1, maxit = 1, con_method = list(123)),
"named character string or a parsnip model spec"
)
)
})
test_that("misl() completes successfully with a valid user-supplied parsnip spec", {
skip_if_not_installed("ranger")
valid_spec <- parsnip::rand_forest(trees = 10) |>
parsnip::set_engine("ranger")
result <- misl(
make_cont_data(), m = 1, maxit = 1,
con_method = list(valid_spec), seed = 1
)
expect_false(anyNA(result[[1]]$datasets))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.