Nothing
# test-grm_integration.R
# Objective 14: Full pipeline integration test for GRM
# Exercises: irt_design → irt_study → irt_simulate → summary → plot → recommended_n
# ---- Helper: build a standard GRM study for integration tests ----
make_grm_integration_study <- function(n_items = 5,
n_categories = 4,
sample_sizes = c(200, 500),
missing = "none",
missing_rate = NULL) {
n_thresh <- n_categories - 1L
b_mat <- matrix(
seq(-2, 2, length.out = n_items * n_thresh),
nrow = n_items, ncol = n_thresh
)
if (n_thresh > 1L) {
b_mat <- t(apply(b_mat, 1, sort))
}
design <- irt_design(
model = "GRM",
n_items = n_items,
item_params = list(a = rep(1.2, n_items), b = b_mat)
)
irt_study(design,
sample_sizes = sample_sizes,
missing = missing,
missing_rate = missing_rate)
}
# ===========================================================================
# 1. Full pipeline — clean data (no missingness)
# ===========================================================================
test_that("GRM full pipeline runs without error (no missing)", {
study <- make_grm_integration_study()
expect_no_error(
res <- irt_simulate(study, iterations = 3, seed = 101)
)
expect_s3_class(res, "irt_results")
})
test_that("GRM item_results has correct structure", {
study <- make_grm_integration_study()
res <- irt_simulate(study, iterations = 3, seed = 101)
ir <- res$item_results
expect_true(is.data.frame(ir))
expected_cols <- c("iteration", "sample_size", "item", "param",
"true_value", "estimate", "se",
"ci_lower", "ci_upper", "converged")
expect_true(all(expected_cols %in% names(ir)))
})
test_that("GRM item_results contains correct parameter names", {
study <- make_grm_integration_study(n_items = 5, n_categories = 4)
res <- irt_simulate(study, iterations = 3, seed = 101)
params <- unique(res$item_results$param)
expect_true("a" %in% params)
expect_true("b1" %in% params)
expect_true("b2" %in% params)
expect_true("b3" %in% params)
# 4 categories → 3 thresholds (b1, b2, b3) + a = 4 param types
expect_equal(length(params), 4)
})
test_that("GRM item_results has correct row count", {
n_items <- 5
n_categories <- 4
n_thresh <- n_categories - 1 # 3
n_params_per_item <- 1 + n_thresh # a + b1,b2,b3
sample_sizes <- c(200, 500)
iterations <- 3
study <- make_grm_integration_study(
n_items = n_items, n_categories = n_categories,
sample_sizes = sample_sizes
)
res <- irt_simulate(study, iterations = iterations, seed = 101)
# Expected rows (assuming all converge):
# n_items * n_params_per_item * length(sample_sizes) * iterations
expected_rows <- n_items * n_params_per_item * length(sample_sizes) * iterations
converged <- res$item_results[res$item_results$converged, ]
expect_equal(nrow(converged), expected_rows)
})
test_that("GRM theta_results has correct structure and row count", {
study <- make_grm_integration_study()
res <- irt_simulate(study, iterations = 3, seed = 101)
tr <- res$theta_results
expect_true(is.data.frame(tr))
expect_true(all(c("iteration", "sample_size", "theta_cor",
"theta_rmse", "converged") %in% names(tr)))
# 2 sample sizes × 3 iterations = 6 rows
expect_equal(nrow(tr), 6)
})
test_that("GRM true_value matches design parameters", {
n_items <- 5
a_vals <- rep(1.2, n_items)
b_mat <- matrix(
seq(-2, 2, length.out = n_items * 3),
nrow = n_items, ncol = 3
)
b_mat <- t(apply(b_mat, 1, sort))
design <- irt_design(
model = "GRM", n_items = n_items,
item_params = list(a = a_vals, b = b_mat)
)
study <- irt_study(design, sample_sizes = c(300))
res <- irt_simulate(study, iterations = 2, seed = 202)
ir <- res$item_results
# Check a true values
a_rows <- ir[ir$param == "a" & ir$iteration == 1, ]
expect_equal(a_rows$true_value, a_vals)
# Check b1 true values
b1_rows <- ir[ir$param == "b1" & ir$iteration == 1, ]
expect_equal(b1_rows$true_value, b_mat[, 1])
# Check b3 true values
b3_rows <- ir[ir$param == "b3" & ir$iteration == 1, ]
expect_equal(b3_rows$true_value, b_mat[, 3])
})
test_that("GRM seed reproducibility", {
study <- make_grm_integration_study(sample_sizes = c(300))
res1 <- irt_simulate(study, iterations = 3, seed = 42)
res2 <- irt_simulate(study, iterations = 3, seed = 42)
expect_identical(res1$item_results, res2$item_results)
expect_identical(res1$theta_results, res2$theta_results)
})
# ===========================================================================
# 2. Summary method
# ===========================================================================
test_that("summary() on GRM results returns summary_irt_results", {
study <- make_grm_integration_study()
res <- irt_simulate(study, iterations = 5, seed = 303)
s <- summary(res)
expect_s3_class(s, "summary_irt_results")
expect_true("item_summary" %in% names(s))
expect_true("theta_summary" %in% names(s))
})
test_that("summary() item_summary has rows for all GRM params", {
study <- make_grm_integration_study(n_items = 5, n_categories = 4,
sample_sizes = c(200, 500))
res <- irt_simulate(study, iterations = 5, seed = 303)
s <- summary(res)
is_df <- s$item_summary
# 5 items × 4 params (a, b1, b2, b3) × 2 sample sizes = 40 rows
expect_equal(nrow(is_df), 40)
expect_true(all(c("a", "b1", "b2", "b3") %in% unique(is_df$param)))
})
test_that("summary() with param filter works for GRM thresholds", {
study <- make_grm_integration_study()
res <- irt_simulate(study, iterations = 5, seed = 303)
s <- summary(res, param = c("b1", "b3"))
expect_true(all(s$item_summary$param %in% c("b1", "b3")))
})
test_that("summary() criterion values are finite for converged GRM sims", {
study <- make_grm_integration_study(sample_sizes = c(500))
res <- irt_simulate(study, iterations = 10, seed = 404)
s <- summary(res)
is_df <- s$item_summary
# All criterion columns should be numeric
for (col in c("bias", "empirical_se", "mse", "rmse")) {
if (col %in% names(is_df)) {
vals <- is_df[[col]]
# Should have finite values (some may be NA if SE was NA)
expect_true(any(is.finite(vals)),
info = paste("No finite values in", col))
}
}
})
test_that("summary() theta_summary has correct structure for GRM", {
study <- make_grm_integration_study(sample_sizes = c(200, 500))
res <- irt_simulate(study, iterations = 5, seed = 303)
s <- summary(res)
ts_df <- s$theta_summary
expect_true(is.data.frame(ts_df))
expect_true("sample_size" %in% names(ts_df))
expect_equal(nrow(ts_df), 2) # 2 sample sizes
})
# ===========================================================================
# 3. Plot method
# ===========================================================================
test_that("plot() on GRM results returns a ggplot", {
study <- make_grm_integration_study()
res <- irt_simulate(study, iterations = 5, seed = 303)
p <- plot(res)
expect_s3_class(p, "ggplot")
})
test_that("plot() on GRM results with threshold line works", {
study <- make_grm_integration_study()
res <- irt_simulate(study, iterations = 5, seed = 303)
p <- plot(res, criterion = "rmse", threshold = 0.3)
expect_s3_class(p, "ggplot")
})
test_that("plot() on GRM summary works", {
study <- make_grm_integration_study()
res <- irt_simulate(study, iterations = 5, seed = 303)
s <- summary(res)
p <- plot(s, criterion = "bias")
expect_s3_class(p, "ggplot")
})
test_that("plot() on GRM with param filter works", {
study <- make_grm_integration_study()
res <- irt_simulate(study, iterations = 5, seed = 303)
p <- plot(res, param = "a")
expect_s3_class(p, "ggplot")
})
# ===========================================================================
# 4. recommended_n()
# ===========================================================================
test_that("recommended_n() works on GRM summary", {
study <- make_grm_integration_study()
res <- irt_simulate(study, iterations = 5, seed = 303)
s <- summary(res)
rec <- recommended_n(s, criterion = "rmse", threshold = 1.0)
expect_true(is.data.frame(rec))
expect_true(all(c("item", "param", "recommended_n") %in% names(rec)))
})
test_that("recommended_n() returns results for all GRM params", {
study <- make_grm_integration_study(n_items = 5, n_categories = 4)
res <- irt_simulate(study, iterations = 5, seed = 303)
s <- summary(res)
rec <- recommended_n(s, criterion = "rmse", threshold = 1.0)
expect_true(all(c("a", "b1", "b2", "b3") %in% unique(rec$param)))
})
test_that("recommended_n() with param filter on GRM", {
study <- make_grm_integration_study()
res <- irt_simulate(study, iterations = 5, seed = 303)
s <- summary(res)
rec <- recommended_n(s, criterion = "rmse", threshold = 1.0, param = "b2")
expect_true(all(rec$param == "b2"))
})
# ===========================================================================
# 5. GRM with MCAR missingness
# ===========================================================================
test_that("GRM pipeline works with MCAR missingness", {
study <- make_grm_integration_study(
sample_sizes = c(300),
missing = "mcar",
missing_rate = 0.1
)
res <- irt_simulate(study, iterations = 3, seed = 505)
expect_s3_class(res, "irt_results")
expect_true(nrow(res$item_results) > 0)
# Summary should also work
s <- summary(res)
expect_s3_class(s, "summary_irt_results")
})
# ===========================================================================
# 6. GRM with 2 categories (binary — edge case)
# ===========================================================================
test_that("GRM pipeline works with 2 categories (binary edge case)", {
n_items <- 5
# 2 categories → 1 threshold
b_mat <- matrix(seq(-1.5, 1.5, length.out = n_items), nrow = n_items, ncol = 1)
design <- irt_design(
model = "GRM",
n_items = n_items,
item_params = list(a = rep(1.0, n_items), b = b_mat)
)
study <- irt_study(design, sample_sizes = c(300))
res <- irt_simulate(study, iterations = 3, seed = 606)
expect_s3_class(res, "irt_results")
params <- unique(res$item_results$param)
expect_true("a" %in% params)
expect_true("b1" %in% params)
# Only 1 threshold, so no b2 or higher
expect_false("b2" %in% params)
# Summary and plot should still work
s <- summary(res)
expect_s3_class(s, "summary_irt_results")
p <- plot(res)
expect_s3_class(p, "ggplot")
})
# ===========================================================================
# 7. GRM with irt_params_grm() helper
# ===========================================================================
test_that("GRM pipeline works with irt_params_grm() generated parameters", {
params <- irt_params_grm(n_items = 6, n_categories = 5, seed = 99)
design <- irt_design(
model = "GRM",
n_items = 6,
item_params = params
)
study <- irt_study(design, sample_sizes = c(300))
res <- irt_simulate(study, iterations = 3, seed = 707)
expect_s3_class(res, "irt_results")
# 5 categories → 4 thresholds
params_found <- unique(res$item_results$param)
expect_true(all(c("a", "b1", "b2", "b3", "b4") %in% params_found))
})
# ===========================================================================
# 8. Parameter recovery sanity (large N)
# ===========================================================================
test_that("GRM parameter estimates are in reasonable range at large N", {
study <- make_grm_integration_study(
n_items = 5, n_categories = 4, sample_sizes = c(1000)
)
res <- irt_simulate(study, iterations = 5, seed = 808)
s <- summary(res)
is_df <- s$item_summary
# At N=1000 with 5 iterations, bias should be modest (not wildly off)
bias_vals <- is_df$bias[is.finite(is_df$bias)]
expect_true(all(abs(bias_vals) < 1.0),
info = "Bias exceeds 1.0 — parameter recovery may be broken")
# RMSE should be bounded
rmse_vals <- is_df$rmse[is.finite(is_df$rmse)]
expect_true(all(rmse_vals < 2.0),
info = "RMSE exceeds 2.0 — parameter recovery may be broken")
})
# ===========================================================================
# 9. Print methods don't error
# ===========================================================================
test_that("print methods work for GRM pipeline objects", {
study <- make_grm_integration_study(sample_sizes = c(300))
res <- irt_simulate(study, iterations = 3, seed = 909)
s <- summary(res)
expect_output(print(res))
expect_output(print(s))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.