Nothing
# test-criterion_fn.R
# TDD tests for custom criterion callback feature (Objective 27)
#
# Covers custom criterion_fn parameter in summary.irt_results() that allows
# users to compute item-level custom performance criteria, augmenting the
# built-in Morris et al. (2019) criteria.
# =============================================================================
# Helper: Build a minimal irt_results fixture for fast tests
# =============================================================================
make_fixture_results <- function(
n_items = 3,
sample_sizes = c(100, 200),
iterations = 5,
model = "1PL",
seed = 42
) {
# Build item_results with deterministic estimates
rows <- list()
set.seed(seed)
for (ss in sample_sizes) {
for (item in seq_len(n_items)) {
for (param in "b") {
tv <- seq(-2, 2, length.out = n_items)[item]
ests <- tv + rnorm(iterations, mean = 0.01, sd = 0.1)
rows[[length(rows) + 1]] <- data.frame(
iteration = seq_len(iterations),
sample_size = rep(ss, iterations),
item = rep(item, iterations),
param = rep(param, iterations),
true_value = rep(tv, iterations),
estimate = ests,
se = rep(0.1, iterations),
ci_lower = ests - 0.2,
ci_upper = ests + 0.2,
converged = rep(TRUE, iterations),
stringsAsFactors = FALSE
)
}
}
}
item_results <- do.call(rbind, rows)
rownames(item_results) <- NULL
# Build theta_results
theta_results <- data.frame(
iteration = rep(seq_len(iterations), each = length(sample_sizes)),
sample_size = rep(sample_sizes, times = iterations),
theta_cor = runif(iterations * length(sample_sizes), 0.8, 0.99),
theta_rmse = runif(iterations * length(sample_sizes), 0.2, 0.5),
converged = TRUE,
stringsAsFactors = FALSE
)
# Build design and study stubs
design_stub <- list(
model = model,
n_items = n_items,
item_params = list(b = seq(-2, 2, length.out = n_items))
)
class(design_stub) <- "irt_design"
study_stub <- list(
design = design_stub,
sample_sizes = sample_sizes,
missing_type = "none"
)
class(study_stub) <- "irt_study"
structure(
list(
item_results = item_results,
theta_results = theta_results,
study = study_stub,
iterations = iterations,
seed = seed,
elapsed = 0.1
),
class = "irt_results"
)
}
# =============================================================================
# Test 1: Happy path — single named scalar
# =============================================================================
test_that("criterion_fn returning single named scalar creates correct column", {
results <- make_fixture_results()
# Simple callback: return constant value
my_fn <- function(estimates, true_value, ci_lower, ci_upper, converged, ...) {
c(my_stat = 0.5)
}
s <- summary(results, criterion_fn = my_fn)
# Should have my_stat column
expect_true("my_stat" %in% names(s$item_summary))
# All values should be 0.5 (since callback always returns 0.5)
expect_true(all(s$item_summary$my_stat == 0.5, na.rm = TRUE))
# Standard columns should still exist
expect_true("bias" %in% names(s$item_summary))
expect_true("sample_size" %in% names(s$item_summary))
})
# =============================================================================
# Test 2: Happy path — multiple named scalars
# =============================================================================
test_that("criterion_fn returning multiple named scalars creates all columns", {
results <- make_fixture_results()
my_fn <- function(estimates, true_value, ci_lower, ci_upper, converged, ...) {
c(stat_a = 1.0, stat_b = 2.5)
}
s <- summary(results, criterion_fn = my_fn)
# Both columns should exist
expect_true("stat_a" %in% names(s$item_summary))
expect_true("stat_b" %in% names(s$item_summary))
# Check values
expect_true(all(s$item_summary$stat_a == 1.0, na.rm = TRUE))
expect_true(all(s$item_summary$stat_b == 2.5, na.rm = TRUE))
})
# =============================================================================
# Test 3: Backward compatibility — no criterion_fn produces identical output
# =============================================================================
test_that("summary with no criterion_fn identical to criterion_fn = NULL", {
results <- make_fixture_results()
s1 <- summary(results)
s2 <- summary(results, criterion_fn = NULL)
# Same columns
expect_equal(names(s1$item_summary), names(s2$item_summary))
# Same data
expect_equal(s1$item_summary, s2$item_summary)
})
test_that("backward compat: no custom columns without criterion_fn", {
results <- make_fixture_results()
s <- summary(results)
# Should have only built-in columns (no custom columns)
expected_cols <- c(
"sample_size", "item", "param", "true_value",
"bias", "empirical_se", "mse", "rmse", "coverage",
"mcse_bias", "mcse_mse", "n_converged"
)
expect_equal(sort(names(s$item_summary)), sort(expected_cols))
})
# =============================================================================
# Test 4: Callback receives correct data per cell
# =============================================================================
test_that("callback receives correct estimates, true_value, CIs, converged", {
results <- make_fixture_results(n_items = 1, sample_sizes = 100, iterations = 3)
# Capture inputs in a parent-scope list
captured <- list()
my_fn <- function(estimates, true_value, ci_lower, ci_upper, converged, ...) {
# Verify structure
expect_true(is.numeric(estimates))
expect_true(is.numeric(true_value))
expect_length(true_value, 1)
expect_true(is.numeric(ci_lower))
expect_true(is.numeric(ci_upper))
expect_true(is.logical(converged))
# Verify lengths match
expect_length(estimates, length(converged))
expect_length(ci_lower, length(converged))
expect_length(ci_upper, length(converged))
c(test_stat = 0)
}
s <- summary(results, criterion_fn = my_fn)
# If we got here, all expect_true calls inside the callback passed
expect_true(TRUE)
})
# =============================================================================
# Test 5: Error — non-function criterion_fn
# =============================================================================
test_that("criterion_fn = string raises informative error", {
results <- make_fixture_results()
expect_error(
summary(results, criterion_fn = "not_a_function"),
"criterion_fn.*function"
)
})
test_that("criterion_fn = list raises informative error", {
results <- make_fixture_results()
expect_error(
summary(results, criterion_fn = list(a = 1)),
"criterion_fn.*function"
)
})
# =============================================================================
# Test 6: Error — callback returns non-numeric
# =============================================================================
test_that("callback returning character vector raises error with cell context", {
results <- make_fixture_results(n_items = 1, sample_sizes = 100)
bad_fn <- function(estimates, true_value, ci_lower, ci_upper, converged, ...) {
c(x = "hello")
}
expect_error(
summary(results, criterion_fn = bad_fn),
"numeric.*sample_size.*item.*param"
)
})
test_that("callback returning list raises error", {
results <- make_fixture_results(n_items = 1, sample_sizes = 100)
bad_fn <- function(estimates, true_value, ci_lower, ci_upper, converged, ...) {
list(x = 1)
}
expect_error(
summary(results, criterion_fn = bad_fn),
"numeric"
)
})
# =============================================================================
# Test 7: Error — callback returns unnamed vector
# =============================================================================
test_that("callback returning unnamed numeric vector raises error", {
results <- make_fixture_results(n_items = 1, sample_sizes = 100)
bad_fn <- function(estimates, true_value, ci_lower, ci_upper, converged, ...) {
c(1, 2, 3)
}
expect_error(
summary(results, criterion_fn = bad_fn),
"named"
)
})
test_that("callback with some empty names raises error", {
results <- make_fixture_results(n_items = 1, sample_sizes = 100)
bad_fn <- function(estimates, true_value, ci_lower, ci_upper, converged, ...) {
c(a = 1, 2)
}
expect_error(
summary(results, criterion_fn = bad_fn),
"named"
)
})
# =============================================================================
# Test 8: Error — inconsistent names across cells
# =============================================================================
test_that("callback returns inconsistent names across cells raises error", {
results <- make_fixture_results(n_items = 2, sample_sizes = c(100, 200))
call_count <- 0
bad_fn <- function(estimates, true_value, ci_lower, ci_upper, converged, ...) {
call_count <<- call_count + 1
# First few calls return 'a', later calls return 'b'
if (call_count <= 3) {
c(a = 1)
} else {
c(b = 2)
}
}
expect_error(
summary(results, criterion_fn = bad_fn),
"consistent names|reference"
)
})
# =============================================================================
# Test 9: Error — callback throws
# =============================================================================
test_that("callback throwing error wrapped with cell context", {
results <- make_fixture_results(n_items = 1, sample_sizes = 100)
bad_fn <- function(estimates, true_value, ci_lower, ci_upper, converged, ...) {
stop("oh no", call. = FALSE)
}
expect_error(
summary(results, criterion_fn = bad_fn),
"oh no"
)
# Error should also include cell identifier
expect_error(
summary(results, criterion_fn = bad_fn),
"sample_size.*item.*param"
)
})
# =============================================================================
# Test 10: Compatibility with criterion filter
# =============================================================================
test_that("criterion filter keeps custom columns even when filtering built-ins", {
results <- make_fixture_results()
my_fn <- function(estimates, true_value, ci_lower, ci_upper, converged, ...) {
c(my_custom = 99.9)
}
s <- summary(results, criterion = "bias", criterion_fn = my_fn)
# Built-in columns should be filtered
expect_true("bias" %in% names(s$item_summary))
expect_false("mse" %in% names(s$item_summary))
# But custom column should still be present
expect_true("my_custom" %in% names(s$item_summary))
})
# =============================================================================
# Test 11: Compatibility with param filter
# =============================================================================
test_that("param filter works with criterion_fn", {
results <- make_fixture_results(n_items = 3)
my_fn <- function(estimates, true_value, ci_lower, ci_upper, converged, ...) {
c(custom_stat = 42)
}
# Only summarize "b" parameter (which is all we have in this fixture)
s <- summary(results, param = "b", criterion_fn = my_fn)
# All rows should be param "b"
expect_true(all(s$item_summary$param == "b"))
# Custom column should still exist
expect_true("custom_stat" %in% names(s$item_summary))
})
# =============================================================================
# Test 12: Compatibility with model misspecification (cross-fit)
# =============================================================================
test_that("criterion_fn works with cross-fit (gen=1PL, est=2PL)", {
# Build a 1PL design and 2PL estimation model
design <- irt_design(
model = "1PL",
n_items = 3,
item_params = list(b = c(-1, 0, 1))
)
study <- irt_study(
design,
sample_sizes = 100,
estimation_model = "2PL"
)
results <- irt_simulate(study, iterations = 3, seed = 42, progress = FALSE)
my_fn <- function(estimates, true_value, ci_lower, ci_upper, converged, ...) {
c(cross_fit_stat = mean(estimates, na.rm = TRUE))
}
# Should not error and should produce custom column
s <- summary(results, criterion_fn = my_fn)
expect_true("cross_fit_stat" %in% names(s$item_summary))
# Estimation model is 2PL, so extract_params() returns both a and b for each
# of the 3 items — 6 rows per sample_size. Obj 26's
# build_true_params_for_estimation() adds a=1 rows (Rasch constraint) so true
# values exist for all estimated params.
expect_equal(nrow(s$item_summary), 6) # 3 items × 2 params (a + b)
})
# =============================================================================
# Test 13: theta_summary unchanged regardless of criterion_fn
# =============================================================================
test_that("theta_summary unaffected by criterion_fn", {
results <- make_fixture_results()
my_fn <- function(estimates, true_value, ci_lower, ci_upper, converged, ...) {
c(custom = 1.5)
}
s1 <- summary(results)
s2 <- summary(results, criterion_fn = my_fn)
# theta_summary should be identical
expect_equal(s1$theta_summary, s2$theta_summary)
})
# =============================================================================
# Test 14: Callback can use ... to accept future parameters
# =============================================================================
test_that("callback with ... accepts named args without error", {
results <- make_fixture_results()
# Function with ... for future-proofing
flexible_fn <- function(estimates, true_value, ...) {
c(flexible_stat = 3.14)
}
expect_no_error(
summary(results, criterion_fn = flexible_fn)
)
})
# =============================================================================
# Test 15: Custom columns positioned correctly in output
# =============================================================================
test_that("custom columns appended after n_converged", {
results <- make_fixture_results()
my_fn <- function(estimates, true_value, ci_lower, ci_upper, converged, ...) {
c(custom_a = 1, custom_b = 2)
}
s <- summary(results, criterion_fn = my_fn)
col_names <- names(s$item_summary)
# Find positions
n_converged_pos <- match("n_converged", col_names)
custom_a_pos <- match("custom_a", col_names)
custom_b_pos <- match("custom_b", col_names)
# Custom columns should come after n_converged
expect_true(custom_a_pos > n_converged_pos)
expect_true(custom_b_pos > n_converged_pos)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.