tests/testthat/test-KnockoffSampler.R

# Tests for KnockoffSampler

test_that("KnockoffSampler initialization works", {
	skip_if_not_installed("knockoff")

	n = 100
	task = tgen("friedman1")$generate(n = n)
	sampler = KnockoffSampler$new(task)

	expect_s3_class(sampler, "KnockoffSampler")
	expect_equal(sampler$label, "Knockoff sampler")
	expect_s3_class(sampler$param_set, "ParamSet")
	expect_true("knockoff_fun" %in% sampler$param_set$ids())

	# Check that knockoff matrix was created
	expect_true(data.table::is.data.table(sampler$x_tilde))
	expect_equal(nrow(sampler$x_tilde), n)
	# x_tilde has feature columns + internal ..row_id column
	expect_equal(ncol(sampler$x_tilde), length(task$feature_names) + 1L)
	expect_equal(setdiff(names(sampler$x_tilde), "..row_id"), task$feature_names)
})

test_that("KnockoffSampler sampling works", {
	skip_if_not_installed("knockoff")

	task = tgen("friedman1")$generate(n = 100)
	sampler = KnockoffSampler$new(task)
	data = task$data()

	# Test single feature sampling
	sampled_data = sampler$sample("important1")

	expect_sampler_output_structure(sampled_data, task, nrows = 100)
	expect_feature_type_consistency(sampled_data, task)

	# Sampled feature values are from knockoff matrix
	expect_identical(sampled_data$important1, sampler$x_tilde$important1)
})

test_that("KnockoffSampler handles multiple features", {
	skip_if_not_installed("knockoff")

	task = tgen("friedman1")$generate(n = 100)
	sampler = KnockoffSampler$new(task)
	data = task$data()

	features = c("important1", "important2")
	sampled_data = sampler$sample(features)

	expect_sampler_output_structure(sampled_data, task, nrows = 100)
	expect_feature_type_consistency(sampled_data, task)

	# Check that sampled feature values are from knockoff matrix
	for (feat in features) {
		expect_identical(sampled_data[[feat]], sampler$x_tilde[[feat]])
	}
})

test_that("KnockoffSampler works with different numeric tasks", {
	skip_if_not_installed("knockoff")

	# Friedman1 regression task (all numeric)
	task_friedman = tgen("friedman1")$generate(n = 100)
	sampler_friedman = KnockoffSampler$new(task_friedman)
	sampled_friedman = sampler_friedman$sample("important1")
	expect_sampler_output_structure(sampled_friedman, task_friedman, nrows = 100)

	# Circle task with specified dimensions (all numeric)
	task_circle = tgen("circle", d = 4)$generate(n = 80)
	sampler_circle = KnockoffSampler$new(task_circle)
	sampled_circle = sampler_circle$sample("x1")
	expect_sampler_output_structure(sampled_circle, task_circle, nrows = 80)

	# Custom numeric task
	set.seed(123)
	custom_data = data.table::data.table(
		x1 = rnorm(50),
		x2 = runif(50),
		x3 = rexp(50),
		y = rnorm(50)
	)
	task_custom = as_task_regr(custom_data, target = "y")
	sampler_custom = KnockoffSampler$new(task_custom)
	sampled_custom = sampler_custom$sample("x1")
	expect_sampler_output_structure(sampled_custom, task_custom, nrows = 50)
})

test_that("KnockoffSampler knockoff matrix properties", {
	skip_if_not_installed("knockoff")

	task = tgen("friedman1")$generate(n = 100)
	sampler = KnockoffSampler$new(task)

	# Knockoff matrix should have same dimensions as feature matrix (plus ..row_id column)
	feature_data = task$data(cols = task$feature_names)
	expect_equal(nrow(sampler$x_tilde), nrow(feature_data))
	expect_equal(ncol(sampler$x_tilde), ncol(feature_data) + 1L)
	expect_equal(setdiff(names(sampler$x_tilde), "..row_id"), names(feature_data))

	# Knockoff values should be numeric (since task is all numeric)
	expect_true(all(sapply(sampler$x_tilde, is.numeric)))

	# Knockoff values should be finite
	expect_true(all(sapply(sampler$x_tilde, function(x) all(is.finite(x)))))
})

test_that("KnockoffSampler custom knockoff function", {
	skip_if_not_installed("knockoff")

	task = tgen("friedman1")$generate(n = 50)

	# Custom knockoff function that returns scaled versions
	custom_knockoff_fun = function(x) {
		x_matrix = as.matrix(x)
		noise = matrix(rnorm(prod(dim(x_matrix)), sd = 0.1), nrow = nrow(x_matrix))
		return(x_matrix + noise)
	}

	sampler = KnockoffSampler$new(task, knockoff_fun = custom_knockoff_fun)

	expect_true(is.function(sampler$param_set$values$knockoff_fun))
	expect_true(data.table::is.data.table(sampler$x_tilde))
	expect_equal(nrow(sampler$x_tilde), 50)
	expect_equal(ncol(sampler$x_tilde), length(task$feature_names) + 1L)

	data = task$data()
	sampled_data = sampler$sample("important1")

	expect_true(data.table::is.data.table(sampled_data))
	expect_equal(nrow(sampled_data), 50)
	expect_false(identical(sampled_data$important1, data$important1))
	expect_identical(sampled_data$important1, sampler$x_tilde$important1)
})

test_that("KnockoffSampler reproducibility", {
	skip_if_not_installed("knockoff")

	task = tgen("friedman1")$generate(n = 80)

	set.seed(123)
	sampler1 = KnockoffSampler$new(task)

	set.seed(123)
	sampler2 = KnockoffSampler$new(task)

	# Dimensions should be identical
	expect_equal(dim(sampler1$x_tilde), dim(sampler2$x_tilde))
	expect_equal(names(sampler1$x_tilde), names(sampler2$x_tilde))
	expect_true("..row_id" %in% names(sampler1$x_tilde))

	# Sampling should be consistent within each sampler
	sampled1a = sampler1$sample("important1")
	sampled1b = sampler1$sample("important1")
	expect_identical(sampled1a$important1, sampled1b$important1)
})

test_that("KnockoffSampler edge cases", {
	skip_if_not_installed("knockoff")

	# Test with minimum viable task (single feature)
	single_feature_data = data.table::data.table(
		x1 = rnorm(30),
		y = rnorm(30)
	)
	task_single = as_task_regr(single_feature_data, target = "y")

	expect_warning({
		sampler_single = KnockoffSampler$new(task_single)
	})

	expect_equal(ncol(sampler_single$x_tilde), 2L) # x1 + ..row_id
	expect_equal(setdiff(names(sampler_single$x_tilde), "..row_id"), "x1")

	sampled_single = sampler_single$sample("x1")
	expect_true(data.table::is.data.table(sampled_single))
	expect_equal(nrow(sampled_single), 30)
})

test_that("KnockoffSampler fails with non-numeric features", {
	skip_if_not_installed("knockoff")

	# Factor features
	data_with_factor = data.table::data.table(
		x1 = rnorm(50),
		x2 = factor(sample(c("A", "B", "C"), 50, replace = TRUE)),
		x3 = rnorm(50),
		y = rnorm(50)
	)
	task_factor = as_task_regr(data_with_factor, target = "y")
	expect_error(KnockoffSampler$new(task_factor), class = "error")

	# Character features
	data_with_char = data.table::data.table(
		x1 = rnorm(50),
		x2 = sample(letters[1:5], 50, replace = TRUE),
		x3 = rnorm(50),
		y = rnorm(50)
	)
	task_char = as_task_regr(data_with_char, target = "y")
	expect_error(KnockoffSampler$new(task_char), class = "error")
})

test_that("KnockoffGaussianSampler works with all-numeric features", {
	skip_if_not_installed("knockoff")

	task = tgen("friedman1")$generate(n = 100)
	sampler = KnockoffGaussianSampler$new(task)

	expect_s3_class(sampler, "KnockoffGaussianSampler")
	expect_s3_class(sampler, "KnockoffSampler")
	expect_equal(sampler$label, "Gaussian Knockoff sampler")

	expect_true(data.table::is.data.table(sampler$x_tilde))
	expect_equal(nrow(sampler$x_tilde), 100)
	expect_equal(ncol(sampler$x_tilde), length(task$feature_names) + 1L)

	data = task$data()
	sampled_data = sampler$sample("important1")

	expect_sampler_output_structure(sampled_data, task, nrows = 100)
	expect_identical(sampled_data$important1, sampler$x_tilde$important1)

	# Test row_ids
	expect_equal(nrow(sampler$sample("important1", row_ids = 1:10)), 10)
	expect_equal(nrow(sampler$sample("important1", row_ids = 32)), 1)

	# Non-sampled features unchanged
	non_sampled = c(
		"important2",
		"important3",
		"important4",
		"important5",
		"unimportant1",
		"unimportant2",
		"unimportant3",
		"unimportant4",
		"unimportant5"
	)
	expect_equal(
		sampler$sample("important1", row_ids = 32)[, ..non_sampled],
		data[32, ..non_sampled]
	)

	# Multiple features
	features = c("important1", "important2", "important3")
	sampled_multi = sampler$sample(features)

	for (feat in features) {
		expect_false(identical(sampled_multi[[feat]], data[[feat]]))
		expect_identical(sampled_multi[[feat]], sampler$x_tilde[[feat]])
	}
})

test_that("KnockoffGaussianSampler fails with factor features", {
	skip_if_not_installed("knockoff")

	data_with_factor = data.table::data.table(
		x1 = rnorm(50),
		x2 = factor(sample(c("A", "B", "C"), 50, replace = TRUE)),
		x3 = rnorm(50),
		y = rnorm(50)
	)
	task_factor = as_task_regr(data_with_factor, target = "y")

	expect_error(
		KnockoffGaussianSampler$new(task_factor),
		regexp = "following unsupported feature types"
	)
})

test_that("KnockoffSampler works with multiple iterations", {
	skip_if_not_installed("knockoff")

	task = sim_dgp_ewald(n = 100)

	withr::with_seed(42, {
		sampler_default = KnockoffSampler$new(task = task)
	})
	withr::with_seed(42, {
		sampler_1 = KnockoffSampler$new(task = task, iters = 1)
	})
	sampler_5 = KnockoffSampler$new(task = task, iters = 5)

	# x_tilde generated with same seed, same length
	expect_equal(
		sampler_default$sample("x1"),
		sampler_1$sample("x1")
	)

	x_orig = task$data(rows = 2)
	x_sampled = sampler_1$sample("x1", row_ids = 2)

	# Other values are the same (excluding sampled feature x1)
	non_sampled_cols = setdiff(names(x_sampled), "x1")
	expect_equal(x_orig[, ..non_sampled_cols], x_sampled[, ..non_sampled_cols])

	# Multiple samples
	x_orig = task$data(rows = c(1, 1, 1))
	x_sampled = sampler_5$sample("x1", row_ids = c(1, 1, 1))

	expect_equal(x_sampled$x2, x_orig$x2)

	# Sampling more than available
	expect_warning(
		sampled_1_repeats <- sampler_1$sample("x1", row_ids = c(1, 1, 1)),
		regexp = "sample with replacement"
	)

	expect_equal(
		sampled_1_repeats,
		data.table::rbindlist(replicate(3, sampler_1$sample("x1", row_ids = 1), simplify = FALSE))
	)
})

test_that("KnockoffGaussianSampler preserves feature types", {
	skip_if_not_installed("knockoff")
	test_sampler_feature_types(KnockoffGaussianSampler)
})

Try the xplainfi package in your browser

Any scripts or data that you put into this service are public.

xplainfi documentation built on Feb. 27, 2026, 1:08 a.m.