Nothing
# test-irt_simulate.R
# TDD tests for irt_simulate() — Objective 8
# These tests define the expected behavior BEFORE implementation exists.
#
# irt_simulate() is the core simulation engine. It takes an irt_study object
# and runs a Monte Carlo simulation loop:
# for each iteration:
# for each sample_size:
# generate data → apply missing → fit model → extract parameters
#
# Design decision (Approach A): irt_simulate() stores RAW per-iteration
# parameter estimates (point estimates, SEs, CIs, convergence). Criterion
# computation (MSE, bias, RMSE, coverage, etc.) happens at summary() time,
# not inside irt_simulate(). This follows the simhelpers/SimDesign/Morris
# et al. (2019) convention.
#
# Per-iteration storage:
# iteration | sample_size | item | param | true_value | estimate | se |
# ci_lower | ci_upper | converged
#
# For theta-level summaries (correlation with true theta, RMSE), a separate
# theta_results data.frame stores per-iteration summary stats:
# iteration | sample_size | theta_cor | theta_rmse | converged
# --- Helper: reusable designs and studies ------------------------------------
make_1pl_study <- function(n_items = 10,
sample_sizes = c(100, 250),
missing = "none") {
design <- irt_design(
model = "1PL",
n_items = n_items,
item_params = list(b = seq(-2, 2, length.out = n_items))
)
irt_study(design, sample_sizes = sample_sizes, missing = missing)
}
make_2pl_study <- function(n_items = 10,
sample_sizes = c(100, 250),
missing = "none") {
design <- irt_design(
model = "2PL",
n_items = n_items,
item_params = list(
a = rep(1.2, n_items),
b = seq(-2, 2, length.out = n_items)
)
)
irt_study(design, sample_sizes = sample_sizes, missing = missing)
}
make_grm_study <- function(n_items = 8,
n_categories = 4,
sample_sizes = c(200, 500)) {
n_thresh <- n_categories - 1L
b_mat <- matrix(
seq(-2, 2, length.out = n_items * n_thresh),
nrow = n_items, ncol = n_thresh
)
if (n_thresh > 1L) {
b_mat <- t(apply(b_mat, 1, sort))
}
design <- irt_design(
model = "GRM",
n_items = n_items,
item_params = list(a = rep(1.0, n_items), b = b_mat)
)
irt_study(design, sample_sizes = sample_sizes)
}
# =============================================================================
# 1. Return Type and S3 Class
# =============================================================================
test_that("irt_simulate returns an irt_results S3 object", {
study <- make_1pl_study()
res <- irt_simulate(study, iterations = 3, seed = 42)
expect_s3_class(res, "irt_results")
})
test_that("irt_results contains required top-level elements", {
study <- make_1pl_study()
res <- irt_simulate(study, iterations = 3, seed = 42)
# Must contain: item_results, theta_results, study, iterations, seed, timings
expect_true("item_results" %in% names(res))
expect_true("theta_results" %in% names(res))
expect_true("study" %in% names(res))
expect_true("iterations" %in% names(res))
expect_true("seed" %in% names(res))
})
test_that("irt_results$study is the original irt_study object", {
study <- make_1pl_study()
res <- irt_simulate(study, iterations = 3, seed = 42)
expect_identical(res$study, study)
})
# =============================================================================
# 2. item_results Structure
# =============================================================================
test_that("item_results is a data.frame with expected columns", {
study <- make_1pl_study(n_items = 10, sample_sizes = c(100, 250))
res <- irt_simulate(study, iterations = 3, seed = 42)
ir <- res$item_results
expect_s3_class(ir, "data.frame")
expected_cols <- c(
"iteration", "sample_size", "item", "param",
"true_value", "estimate", "se", "ci_lower", "ci_upper", "converged"
)
for (col in expected_cols) {
expect_true(col %in% names(ir), info = paste("Missing column:", col))
}
})
test_that("item_results has correct number of rows for 1PL", {
n_items <- 10
sample_sizes <- c(100, 250)
n_ss <- length(sample_sizes)
iterations <- 3
study <- make_1pl_study(n_items = n_items, sample_sizes = sample_sizes)
res <- irt_simulate(study, iterations = iterations, seed = 42)
ir <- res$item_results
# 1PL: one parameter per item (b/difficulty)
# Rows = iterations × sample_sizes × items × params_per_item
# 1PL has 1 param (b) per item
n_params_per_item <- 1L
expected_rows <- iterations * n_ss * n_items * n_params_per_item
expect_equal(nrow(ir), expected_rows)
})
test_that("item_results has correct number of rows for 2PL", {
n_items <- 10
sample_sizes <- c(100, 250)
n_ss <- length(sample_sizes)
iterations <- 3
study <- make_2pl_study(n_items = n_items, sample_sizes = sample_sizes)
res <- irt_simulate(study, iterations = iterations, seed = 42)
ir <- res$item_results
# 2PL: two parameters per item (a, b)
n_params_per_item <- 2L
expected_rows <- iterations * n_ss * n_items * n_params_per_item
expect_equal(nrow(ir), expected_rows)
})
test_that("item_results iteration column contains 1:iterations", {
study <- make_1pl_study()
res <- irt_simulate(study, iterations = 5, seed = 42)
expect_equal(sort(unique(res$item_results$iteration)), 1:5)
})
test_that("item_results sample_size column matches study sample_sizes", {
study <- make_1pl_study(sample_sizes = c(100, 250))
res <- irt_simulate(study, iterations = 3, seed = 42)
expect_equal(sort(unique(res$item_results$sample_size)), c(100L, 250L))
})
test_that("item_results item column indexes all items", {
n_items <- 10
study <- make_1pl_study(n_items = n_items)
res <- irt_simulate(study, iterations = 3, seed = 42)
expect_equal(sort(unique(res$item_results$item)), 1:n_items)
})
test_that("item_results param column contains 'b' for 1PL", {
study <- make_1pl_study()
res <- irt_simulate(study, iterations = 3, seed = 42)
expect_equal(unique(res$item_results$param), "b")
})
test_that("item_results param column contains 'a' and 'b' for 2PL", {
study <- make_2pl_study()
res <- irt_simulate(study, iterations = 3, seed = 42)
expect_true(setequal(unique(res$item_results$param), c("a", "b")))
})
test_that("item_results true_value matches design parameters", {
n_items <- 10
b_true <- seq(-2, 2, length.out = n_items)
study <- make_1pl_study(n_items = n_items)
res <- irt_simulate(study, iterations = 3, seed = 42)
ir <- res$item_results
# For each item, the true b value should match the design
b_rows <- ir[ir$param == "b", ]
for (i in seq_len(n_items)) {
item_true <- unique(b_rows$true_value[b_rows$item == i])
expect_equal(item_true, b_true[i], tolerance = 1e-10,
info = paste("Item", i, "true_value mismatch"))
}
})
test_that("item_results estimate values are numeric and finite (converged)", {
study <- make_1pl_study(sample_sizes = c(250))
res <- irt_simulate(study, iterations = 5, seed = 42)
ir <- res$item_results
converged_rows <- ir[ir$converged, ]
# Estimates should be numeric and finite for converged replications
expect_true(is.numeric(converged_rows$estimate))
expect_true(all(is.finite(converged_rows$estimate)))
})
test_that("item_results se values are non-negative for converged iterations", {
study <- make_1pl_study(sample_sizes = c(250))
res <- irt_simulate(study, iterations = 5, seed = 42)
ir <- res$item_results
converged_rows <- ir[ir$converged, ]
expect_true(all(converged_rows$se >= 0))
})
test_that("item_results ci_lower <= estimate <= ci_upper for converged rows", {
study <- make_2pl_study(sample_sizes = c(250))
res <- irt_simulate(study, iterations = 5, seed = 42)
ir <- res$item_results
# Filter to converged rows that have non-NA CIs (SEM SE computation
# can fail for individual parameters even when the model converges)
cr <- ir[ir$converged & !is.na(ir$ci_lower) & !is.na(ir$ci_upper), ]
expect_true(nrow(cr) > 0) # at least some rows have CIs
expect_true(all(cr$ci_lower <= cr$estimate))
expect_true(all(cr$estimate <= cr$ci_upper))
})
# =============================================================================
# 3. theta_results Structure
# =============================================================================
test_that("theta_results is a data.frame with expected columns", {
study <- make_1pl_study()
res <- irt_simulate(study, iterations = 3, seed = 42)
tr <- res$theta_results
expect_s3_class(tr, "data.frame")
expected_cols <- c(
"iteration", "sample_size", "theta_cor", "theta_rmse", "converged"
)
for (col in expected_cols) {
expect_true(col %in% names(tr), info = paste("Missing column:", col))
}
})
test_that("theta_results has one row per iteration × sample_size", {
sample_sizes <- c(100, 250)
iterations <- 5
study <- make_1pl_study(sample_sizes = sample_sizes)
res <- irt_simulate(study, iterations = iterations, seed = 42)
tr <- res$theta_results
expected_rows <- iterations * length(sample_sizes)
expect_equal(nrow(tr), expected_rows)
})
test_that("theta_cor is between -1 and 1 for converged iterations", {
study <- make_1pl_study(sample_sizes = c(500))
res <- irt_simulate(study, iterations = 5, seed = 42)
tr <- res$theta_results
cr <- tr[tr$converged, ]
expect_true(all(cr$theta_cor >= -1 & cr$theta_cor <= 1))
})
test_that("theta_rmse is non-negative for converged iterations", {
study <- make_1pl_study(sample_sizes = c(500))
res <- irt_simulate(study, iterations = 5, seed = 42)
tr <- res$theta_results
cr <- tr[tr$converged, ]
expect_true(all(cr$theta_rmse >= 0))
})
# =============================================================================
# 4. Seed Reproducibility
# =============================================================================
test_that("irt_simulate is reproducible with the same seed", {
study <- make_1pl_study(sample_sizes = c(100))
res1 <- irt_simulate(study, iterations = 3, seed = 42)
res2 <- irt_simulate(study, iterations = 3, seed = 42)
expect_identical(res1$item_results, res2$item_results)
expect_identical(res1$theta_results, res2$theta_results)
})
test_that("irt_simulate produces different results with different seeds", {
study <- make_1pl_study(sample_sizes = c(100))
res1 <- irt_simulate(study, iterations = 3, seed = 1)
res2 <- irt_simulate(study, iterations = 3, seed = 2)
expect_false(identical(res1$item_results, res2$item_results))
})
# =============================================================================
# 5. Non-Convergence Handling
# =============================================================================
test_that("non-convergence is tracked in item_results$converged", {
study <- make_1pl_study()
res <- irt_simulate(study, iterations = 5, seed = 42)
expect_true(is.logical(res$item_results$converged))
})
test_that("non-convergence is tracked in theta_results$converged", {
study <- make_1pl_study()
res <- irt_simulate(study, iterations = 5, seed = 42)
expect_true(is.logical(res$theta_results$converged))
})
test_that("non-converged iterations have NA estimates", {
# We can't force non-convergence easily, but we can test the structural
# contract: if converged == FALSE, estimate/se/ci should be NA
study <- make_1pl_study()
res <- irt_simulate(study, iterations = 5, seed = 42)
ir <- res$item_results
non_conv <- ir[!ir$converged, ]
if (nrow(non_conv) > 0) {
expect_true(all(is.na(non_conv$estimate)))
expect_true(all(is.na(non_conv$se)))
expect_true(all(is.na(non_conv$ci_lower)))
expect_true(all(is.na(non_conv$ci_upper)))
} else {
# All converged — that's fine for a well-specified simple model
expect_true(TRUE)
}
})
test_that("non-converged iterations are never silently dropped", {
# Total row count must equal iterations × sample_sizes × items × params
# regardless of convergence
n_items <- 10
sample_sizes <- c(100, 250)
iterations <- 5
study <- make_1pl_study(n_items = n_items, sample_sizes = sample_sizes)
res <- irt_simulate(study, iterations = iterations, seed = 42)
n_params_per_item <- 1L # 1PL
expected_rows <- iterations * length(sample_sizes) * n_items * n_params_per_item
expect_equal(nrow(res$item_results), expected_rows)
expected_theta_rows <- iterations * length(sample_sizes)
expect_equal(nrow(res$theta_results), expected_theta_rows)
})
# =============================================================================
# 6. Input Validation
# =============================================================================
test_that("irt_simulate errors for non-irt_study input", {
expect_error(
irt_simulate("not a study", iterations = 3, seed = 42),
"irt_study"
)
})
test_that("irt_simulate errors for non-positive iterations", {
study <- make_1pl_study()
expect_error(irt_simulate(study, iterations = 0, seed = 42), "iterations")
expect_error(irt_simulate(study, iterations = -5, seed = 42), "iterations")
})
test_that("irt_simulate errors for non-integer iterations", {
study <- make_1pl_study()
expect_error(irt_simulate(study, iterations = 3.5, seed = 42), "iterations")
})
test_that("irt_simulate errors when seed is missing", {
study <- make_1pl_study()
# seed is required for reproducibility
expect_error(irt_simulate(study, iterations = 3), "seed")
})
# =============================================================================
# 7. Model-Specific Behavior
# =============================================================================
test_that("irt_simulate works end-to-end for 1PL", {
study <- make_1pl_study(n_items = 10, sample_sizes = c(200))
res <- irt_simulate(study, iterations = 3, seed = 42)
expect_s3_class(res, "irt_results")
expect_equal(unique(res$item_results$param), "b")
expect_true(nrow(res$item_results) > 0)
})
test_that("irt_simulate works end-to-end for 2PL", {
study <- make_2pl_study(n_items = 10, sample_sizes = c(200))
res <- irt_simulate(study, iterations = 3, seed = 42)
expect_s3_class(res, "irt_results")
expect_true(setequal(unique(res$item_results$param), c("a", "b")))
expect_true(nrow(res$item_results) > 0)
})
test_that("irt_simulate works end-to-end for GRM", {
study <- make_grm_study(n_items = 8, n_categories = 4, sample_sizes = c(300))
res <- irt_simulate(study, iterations = 3, seed = 42)
expect_s3_class(res, "irt_results")
# GRM has a and indexed threshold parameters (b1, b2, b3, ...)
expect_true("a" %in% unique(res$item_results$param))
b_params <- grep("^b\\d+$", unique(res$item_results$param), value = TRUE)
expect_true(length(b_params) >= 1)
expect_true(nrow(res$item_results) > 0)
})
# =============================================================================
# 8. Missing Data Integration
# =============================================================================
test_that("irt_simulate works with MCAR missing data", {
design <- irt_design(
model = "1PL",
n_items = 10,
item_params = list(b = seq(-2, 2, length.out = 10))
)
study <- irt_study(design, sample_sizes = c(200),
missing = "mcar", missing_rate = 0.1)
res <- irt_simulate(study, iterations = 3, seed = 42)
expect_s3_class(res, "irt_results")
expect_true(nrow(res$item_results) > 0)
})
# =============================================================================
# 9. Parameter Recovery Sanity Checks
# =============================================================================
# These are NOT precision tests — they're sanity checks that the simulation
# loop is wired correctly (generating, fitting, extracting in the right order).
# Actual criterion precision testing belongs in objectives 10-11.
test_that("1PL b estimates are in a reasonable range", {
# True b in [-2, 2], estimates should be in roughly [-4, 4]
study <- make_1pl_study(n_items = 10, sample_sizes = c(500))
res <- irt_simulate(study, iterations = 5, seed = 42)
ir <- res$item_results
cr <- ir[ir$converged & ir$param == "b", ]
expect_true(all(cr$estimate > -6))
expect_true(all(cr$estimate < 6))
})
test_that("2PL a estimates are positive", {
study <- make_2pl_study(n_items = 10, sample_sizes = c(500))
res <- irt_simulate(study, iterations = 5, seed = 42)
ir <- res$item_results
cr <- ir[ir$converged & ir$param == "a", ]
# Discrimination estimates should be positive
expect_true(all(cr$estimate > 0))
})
test_that("mean b estimates are close to true values with large N", {
# With N = 1000, mean estimates across iterations should be near truth
n_items <- 10
b_true <- seq(-2, 2, length.out = n_items)
study <- make_1pl_study(n_items = n_items, sample_sizes = c(1000))
res <- irt_simulate(study, iterations = 10, seed = 42)
ir <- res$item_results
cr <- ir[ir$converged & ir$param == "b", ]
# Check mean estimate per item — should be within 0.5 of truth
for (i in seq_len(n_items)) {
item_ests <- cr$estimate[cr$item == i]
if (length(item_ests) > 0) {
mean_est <- mean(item_ests)
expect_lt(abs(mean_est - b_true[i]), 0.5,
label = paste("Item", i, ": |mean_est - true| =",
round(abs(mean_est - b_true[i]), 3)))
}
}
})
# =============================================================================
# 10. Print Method
# =============================================================================
test_that("print.irt_results produces output without error", {
study <- make_1pl_study()
res <- irt_simulate(study, iterations = 3, seed = 42)
expect_output(print(res))
})
test_that("print.irt_results shows iterations and convergence rate", {
study <- make_1pl_study()
res <- irt_simulate(study, iterations = 3, seed = 42)
output <- capture.output(print(res))
combined <- paste(output, collapse = "\n")
# Should mention iterations count
expect_match(combined, "3", fixed = TRUE)
# Should mention convergence
expect_match(combined, "[Cc]onverg")
})
# =============================================================================
# 11. Timing Information
# =============================================================================
test_that("irt_results contains elapsed time information", {
study <- make_1pl_study(sample_sizes = c(100))
res <- irt_simulate(study, iterations = 3, seed = 42)
expect_true("elapsed" %in% names(res))
expect_true(is.numeric(res$elapsed))
expect_true(res$elapsed >= 0)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.