Nothing
# Tests for KnockoffSampler
test_that("KnockoffSampler initialization works", {
skip_if_not_installed("knockoff")
n = 100
task = tgen("friedman1")$generate(n = n)
sampler = KnockoffSampler$new(task)
expect_s3_class(sampler, "KnockoffSampler")
expect_equal(sampler$label, "Knockoff sampler")
expect_s3_class(sampler$param_set, "ParamSet")
expect_true("knockoff_fun" %in% sampler$param_set$ids())
# Check that knockoff matrix was created
expect_true(data.table::is.data.table(sampler$x_tilde))
expect_equal(nrow(sampler$x_tilde), n)
# x_tilde has feature columns + internal ..row_id column
expect_equal(ncol(sampler$x_tilde), length(task$feature_names) + 1L)
expect_equal(setdiff(names(sampler$x_tilde), "..row_id"), task$feature_names)
})
test_that("KnockoffSampler sampling works", {
skip_if_not_installed("knockoff")
task = tgen("friedman1")$generate(n = 100)
sampler = KnockoffSampler$new(task)
data = task$data()
# Test single feature sampling
sampled_data = sampler$sample("important1")
expect_sampler_output_structure(sampled_data, task, nrows = 100)
expect_feature_type_consistency(sampled_data, task)
# Sampled feature values are from knockoff matrix
expect_identical(sampled_data$important1, sampler$x_tilde$important1)
})
test_that("KnockoffSampler handles multiple features", {
skip_if_not_installed("knockoff")
task = tgen("friedman1")$generate(n = 100)
sampler = KnockoffSampler$new(task)
data = task$data()
features = c("important1", "important2")
sampled_data = sampler$sample(features)
expect_sampler_output_structure(sampled_data, task, nrows = 100)
expect_feature_type_consistency(sampled_data, task)
# Check that sampled feature values are from knockoff matrix
for (feat in features) {
expect_identical(sampled_data[[feat]], sampler$x_tilde[[feat]])
}
})
test_that("KnockoffSampler works with different numeric tasks", {
skip_if_not_installed("knockoff")
# Friedman1 regression task (all numeric)
task_friedman = tgen("friedman1")$generate(n = 100)
sampler_friedman = KnockoffSampler$new(task_friedman)
sampled_friedman = sampler_friedman$sample("important1")
expect_sampler_output_structure(sampled_friedman, task_friedman, nrows = 100)
# Circle task with specified dimensions (all numeric)
task_circle = tgen("circle", d = 4)$generate(n = 80)
sampler_circle = KnockoffSampler$new(task_circle)
sampled_circle = sampler_circle$sample("x1")
expect_sampler_output_structure(sampled_circle, task_circle, nrows = 80)
# Custom numeric task
set.seed(123)
custom_data = data.table::data.table(
x1 = rnorm(50),
x2 = runif(50),
x3 = rexp(50),
y = rnorm(50)
)
task_custom = as_task_regr(custom_data, target = "y")
sampler_custom = KnockoffSampler$new(task_custom)
sampled_custom = sampler_custom$sample("x1")
expect_sampler_output_structure(sampled_custom, task_custom, nrows = 50)
})
test_that("KnockoffSampler knockoff matrix properties", {
skip_if_not_installed("knockoff")
task = tgen("friedman1")$generate(n = 100)
sampler = KnockoffSampler$new(task)
# Knockoff matrix should have same dimensions as feature matrix (plus ..row_id column)
feature_data = task$data(cols = task$feature_names)
expect_equal(nrow(sampler$x_tilde), nrow(feature_data))
expect_equal(ncol(sampler$x_tilde), ncol(feature_data) + 1L)
expect_equal(setdiff(names(sampler$x_tilde), "..row_id"), names(feature_data))
# Knockoff values should be numeric (since task is all numeric)
expect_true(all(sapply(sampler$x_tilde, is.numeric)))
# Knockoff values should be finite
expect_true(all(sapply(sampler$x_tilde, function(x) all(is.finite(x)))))
})
test_that("KnockoffSampler custom knockoff function", {
skip_if_not_installed("knockoff")
task = tgen("friedman1")$generate(n = 50)
# Custom knockoff function that returns scaled versions
custom_knockoff_fun = function(x) {
x_matrix = as.matrix(x)
noise = matrix(rnorm(prod(dim(x_matrix)), sd = 0.1), nrow = nrow(x_matrix))
return(x_matrix + noise)
}
sampler = KnockoffSampler$new(task, knockoff_fun = custom_knockoff_fun)
expect_true(is.function(sampler$param_set$values$knockoff_fun))
expect_true(data.table::is.data.table(sampler$x_tilde))
expect_equal(nrow(sampler$x_tilde), 50)
expect_equal(ncol(sampler$x_tilde), length(task$feature_names) + 1L)
data = task$data()
sampled_data = sampler$sample("important1")
expect_true(data.table::is.data.table(sampled_data))
expect_equal(nrow(sampled_data), 50)
expect_false(identical(sampled_data$important1, data$important1))
expect_identical(sampled_data$important1, sampler$x_tilde$important1)
})
test_that("KnockoffSampler reproducibility", {
skip_if_not_installed("knockoff")
task = tgen("friedman1")$generate(n = 80)
set.seed(123)
sampler1 = KnockoffSampler$new(task)
set.seed(123)
sampler2 = KnockoffSampler$new(task)
# Dimensions should be identical
expect_equal(dim(sampler1$x_tilde), dim(sampler2$x_tilde))
expect_equal(names(sampler1$x_tilde), names(sampler2$x_tilde))
expect_true("..row_id" %in% names(sampler1$x_tilde))
# Sampling should be consistent within each sampler
sampled1a = sampler1$sample("important1")
sampled1b = sampler1$sample("important1")
expect_identical(sampled1a$important1, sampled1b$important1)
})
test_that("KnockoffSampler edge cases", {
skip_if_not_installed("knockoff")
# Test with minimum viable task (single feature)
single_feature_data = data.table::data.table(
x1 = rnorm(30),
y = rnorm(30)
)
task_single = as_task_regr(single_feature_data, target = "y")
expect_warning({
sampler_single = KnockoffSampler$new(task_single)
})
expect_equal(ncol(sampler_single$x_tilde), 2L) # x1 + ..row_id
expect_equal(setdiff(names(sampler_single$x_tilde), "..row_id"), "x1")
sampled_single = sampler_single$sample("x1")
expect_true(data.table::is.data.table(sampled_single))
expect_equal(nrow(sampled_single), 30)
})
test_that("KnockoffSampler fails with non-numeric features", {
skip_if_not_installed("knockoff")
# Factor features
data_with_factor = data.table::data.table(
x1 = rnorm(50),
x2 = factor(sample(c("A", "B", "C"), 50, replace = TRUE)),
x3 = rnorm(50),
y = rnorm(50)
)
task_factor = as_task_regr(data_with_factor, target = "y")
expect_error(KnockoffSampler$new(task_factor), class = "error")
# Character features
data_with_char = data.table::data.table(
x1 = rnorm(50),
x2 = sample(letters[1:5], 50, replace = TRUE),
x3 = rnorm(50),
y = rnorm(50)
)
task_char = as_task_regr(data_with_char, target = "y")
expect_error(KnockoffSampler$new(task_char), class = "error")
})
test_that("KnockoffGaussianSampler works with all-numeric features", {
skip_if_not_installed("knockoff")
task = tgen("friedman1")$generate(n = 100)
sampler = KnockoffGaussianSampler$new(task)
expect_s3_class(sampler, "KnockoffGaussianSampler")
expect_s3_class(sampler, "KnockoffSampler")
expect_equal(sampler$label, "Gaussian Knockoff sampler")
expect_true(data.table::is.data.table(sampler$x_tilde))
expect_equal(nrow(sampler$x_tilde), 100)
expect_equal(ncol(sampler$x_tilde), length(task$feature_names) + 1L)
data = task$data()
sampled_data = sampler$sample("important1")
expect_sampler_output_structure(sampled_data, task, nrows = 100)
expect_identical(sampled_data$important1, sampler$x_tilde$important1)
# Test row_ids
expect_equal(nrow(sampler$sample("important1", row_ids = 1:10)), 10)
expect_equal(nrow(sampler$sample("important1", row_ids = 32)), 1)
# Non-sampled features unchanged
non_sampled = c(
"important2",
"important3",
"important4",
"important5",
"unimportant1",
"unimportant2",
"unimportant3",
"unimportant4",
"unimportant5"
)
expect_equal(
sampler$sample("important1", row_ids = 32)[, ..non_sampled],
data[32, ..non_sampled]
)
# Multiple features
features = c("important1", "important2", "important3")
sampled_multi = sampler$sample(features)
for (feat in features) {
expect_false(identical(sampled_multi[[feat]], data[[feat]]))
expect_identical(sampled_multi[[feat]], sampler$x_tilde[[feat]])
}
})
test_that("KnockoffGaussianSampler fails with factor features", {
skip_if_not_installed("knockoff")
data_with_factor = data.table::data.table(
x1 = rnorm(50),
x2 = factor(sample(c("A", "B", "C"), 50, replace = TRUE)),
x3 = rnorm(50),
y = rnorm(50)
)
task_factor = as_task_regr(data_with_factor, target = "y")
expect_error(
KnockoffGaussianSampler$new(task_factor),
regexp = "following unsupported feature types"
)
})
test_that("KnockoffSampler works with multiple iterations", {
skip_if_not_installed("knockoff")
task = sim_dgp_ewald(n = 100)
withr::with_seed(42, {
sampler_default = KnockoffSampler$new(task = task)
})
withr::with_seed(42, {
sampler_1 = KnockoffSampler$new(task = task, iters = 1)
})
sampler_5 = KnockoffSampler$new(task = task, iters = 5)
# x_tilde generated with same seed, same length
expect_equal(
sampler_default$sample("x1"),
sampler_1$sample("x1")
)
x_orig = task$data(rows = 2)
x_sampled = sampler_1$sample("x1", row_ids = 2)
# Other values are the same (excluding sampled feature x1)
non_sampled_cols = setdiff(names(x_sampled), "x1")
expect_equal(x_orig[, ..non_sampled_cols], x_sampled[, ..non_sampled_cols])
# Multiple samples
x_orig = task$data(rows = c(1, 1, 1))
x_sampled = sampler_5$sample("x1", row_ids = c(1, 1, 1))
expect_equal(x_sampled$x2, x_orig$x2)
# Sampling more than available
expect_warning(
sampled_1_repeats <- sampler_1$sample("x1", row_ids = c(1, 1, 1)),
regexp = "sample with replacement"
)
expect_equal(
sampled_1_repeats,
data.table::rbindlist(replicate(3, sampler_1$sample("x1", row_ids = 1), simplify = FALSE))
)
})
test_that("KnockoffGaussianSampler preserves feature types", {
skip_if_not_installed("knockoff")
test_sampler_feature_types(KnockoffGaussianSampler)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.