Nothing
# test-compute_criterion.R
# TDD tests for internal compute_criterion() helper and exported irt_iterations()
#
# These tests use hand-calculated expected values so outcomes are
# deterministic and independent of any simulation run.
# Morris et al. (2019) definitions throughout.
#
# compute_criterion() signature (expected):
# compute_criterion(estimates, true_value, ci_lower = NULL, ci_upper = NULL)
# Returns a named list with: bias, empirical_se, mse, rmse, coverage,
# mcse_bias, mcse_mse
# =============================================================================
# Helper: access internal function
# =============================================================================
compute_criterion <- function(...) {
irtsim:::compute_criterion(...)
}
irt_iterations <- irtsim::irt_iterations
# =============================================================================
# Section 1: compute_criterion() — basic bias computation
# =============================================================================
test_that("bias is mean(estimate) - true_value", {
# estimates: 1.0, 2.0, 3.0 → mean = 2.0
# true_value = 1.5 → bias = 2.0 - 1.5 = 0.5
estimates <- c(1.0, 2.0, 3.0)
true_value <- 1.5
result <- compute_criterion(estimates, true_value)
expect_equal(result$bias, 0.5)
})
test_that("bias is zero when estimates are unbiased", {
# estimates: -1, 0, 1 → mean = 0 → bias = 0 - 0 = 0
estimates <- c(-1, 0, 1)
true_value <- 0
result <- compute_criterion(estimates, true_value)
expect_equal(result$bias, 0)
})
test_that("bias can be negative", {
# estimates: 0.5, 0.6, 0.7 → mean = 0.6
# true_value = 1.0 → bias = 0.6 - 1.0 = -0.4
estimates <- c(0.5, 0.6, 0.7)
true_value <- 1.0
result <- compute_criterion(estimates, true_value)
expect_equal(result$bias, -0.4)
})
# =============================================================================
# Section 2: compute_criterion() — empirical SE
# =============================================================================
test_that("empirical_se is sd(estimates)", {
# estimates: 2, 4, 6 → sd = 2
estimates <- c(2, 4, 6)
true_value <- 4
result <- compute_criterion(estimates, true_value)
expect_equal(result$empirical_se, sd(c(2, 4, 6)))
})
test_that("empirical_se is zero when all estimates are identical", {
estimates <- c(3.0, 3.0, 3.0, 3.0)
true_value <- 3.0
result <- compute_criterion(estimates, true_value)
expect_equal(result$empirical_se, 0)
})
# =============================================================================
# Section 3: compute_criterion() — MSE and RMSE
# =============================================================================
test_that("mse equals mean((estimate - true_value)^2)", {
# estimates: 1, 2, 3; true = 2
# deviations: -1, 0, 1 → squared: 1, 0, 1 → mean = 2/3
estimates <- c(1, 2, 3)
true_value <- 2
result <- compute_criterion(estimates, true_value)
expect_equal(result$mse, mean((estimates - true_value)^2))
})
test_that("mse equals bias^2 + empirical_se^2 (decomposition identity)", {
# This is the bias-variance decomposition.
# Use arbitrary values to verify the algebraic identity.
estimates <- c(0.8, 1.3, 1.1, 0.9, 1.4, 1.0, 1.2, 0.7, 1.5, 1.1)
true_value <- 1.0
result <- compute_criterion(estimates, true_value)
# Check decomposition: MSE = bias^2 + var(estimates)
# Note: empirical_se = sd(estimates), and Morris et al. (2019)
# use the sample SD (denominator n-1), so:
# MSE ≈ bias^2 + (n-1)/n * empirical_se^2 (the mean of squared devs)
# Actually MSE = mean((est - true)^2) and decomposition is exact when
# empirical_se^2 uses n denominator. With sd() using n-1, the identity
# MSE = bias^2 + ((K-1)/K) * sd(est)^2 holds. Let's just check the
# direct MSE computation.
expected_mse <- mean((estimates - true_value)^2)
expect_equal(result$mse, expected_mse)
})
test_that("rmse is sqrt(mse)", {
estimates <- c(1, 2, 3, 4, 5)
true_value <- 3
result <- compute_criterion(estimates, true_value)
expect_equal(result$rmse, sqrt(result$mse))
})
test_that("mse is zero when all estimates equal true value", {
estimates <- c(2.0, 2.0, 2.0)
true_value <- 2.0
result <- compute_criterion(estimates, true_value)
expect_equal(result$mse, 0)
expect_equal(result$rmse, 0)
})
# =============================================================================
# Section 4: compute_criterion() — coverage
# =============================================================================
test_that("coverage is proportion of CIs containing true value", {
# 5 iterations: CIs either contain or don't contain true_value = 1.0
estimates <- c(0.9, 1.1, 0.5, 1.0, 2.0)
ci_lower <- c(0.5, 0.8, 0.3, 0.7, 1.5)
ci_upper <- c(1.3, 1.4, 0.7, 1.3, 2.5)
true_value <- 1.0
# Coverage: [0.5,1.3]✓ [0.8,1.4]✓ [0.3,0.7]✗ [0.7,1.3]✓ [1.5,2.5]✗
# → 3/5 = 0.6
result <- compute_criterion(estimates, true_value,
ci_lower = ci_lower, ci_upper = ci_upper)
expect_equal(result$coverage, 0.6)
})
test_that("coverage is 1.0 when all CIs contain true value", {
estimates <- c(1.0, 1.0, 1.0)
ci_lower <- c(0.5, 0.5, 0.5)
ci_upper <- c(1.5, 1.5, 1.5)
true_value <- 1.0
result <- compute_criterion(estimates, true_value,
ci_lower = ci_lower, ci_upper = ci_upper)
expect_equal(result$coverage, 1.0)
})
test_that("coverage is 0.0 when no CIs contain true value", {
estimates <- c(3.0, 4.0, 5.0)
ci_lower <- c(2.5, 3.5, 4.5)
ci_upper <- c(3.5, 4.5, 5.5)
true_value <- 1.0
result <- compute_criterion(estimates, true_value,
ci_lower = ci_lower, ci_upper = ci_upper)
expect_equal(result$coverage, 0.0)
})
test_that("coverage handles boundary case (true value equals CI endpoint)", {
# true_value exactly at the boundary — should count as covered
# (following convention: lower <= true <= upper)
estimates <- c(1.0)
ci_lower <- c(1.0)
ci_upper <- c(2.0)
true_value <- 1.0
result <- compute_criterion(estimates, true_value,
ci_lower = ci_lower, ci_upper = ci_upper)
expect_equal(result$coverage, 1.0)
})
test_that("coverage is NULL when CIs are not provided", {
estimates <- c(1, 2, 3)
true_value <- 2
result <- compute_criterion(estimates, true_value)
expect_null(result$coverage)
})
test_that("coverage excludes iterations with NA CIs", {
# 4 iterations: CI 3 has NA → only 3 usable
# Of those 3: CIs 1 and 2 contain true=1.0, CI 4 does not
estimates <- c(0.9, 1.1, 0.5, 2.0)
ci_lower <- c(0.5, 0.8, NA, 1.5)
ci_upper <- c(1.3, 1.4, NA, 2.5)
true_value <- 1.0
# Usable: [0.5,1.3]✓ [0.8,1.4]✓ [1.5,2.5]✗ → 2/3
result <- compute_criterion(estimates, true_value,
ci_lower = ci_lower, ci_upper = ci_upper)
expect_equal(result$coverage, 2/3)
})
# =============================================================================
# Section 5: compute_criterion() — Monte Carlo SE of bias
# =============================================================================
test_that("mcse_bias equals empirical_se / sqrt(K)", {
estimates <- c(1, 2, 3, 4, 5)
true_value <- 3
K <- length(estimates)
result <- compute_criterion(estimates, true_value)
expected_mcse <- sd(estimates) / sqrt(K)
expect_equal(result$mcse_bias, expected_mcse)
})
# =============================================================================
# Section 6: compute_criterion() — Monte Carlo SE of MSE
# =============================================================================
test_that("mcse_mse follows Morris et al. (2019) formula", {
# MCSE(MSE) = sqrt( (1/K) * var((estimate - true)^2) )
# which is sd((estimate - true)^2) / sqrt(K)
estimates <- c(0.5, 1.0, 1.5, 2.0, 2.5)
true_value <- 1.5
K <- length(estimates)
sq_errors <- (estimates - true_value)^2
expected_mcse_mse <- sd(sq_errors) / sqrt(K)
result <- compute_criterion(estimates, true_value)
expect_equal(result$mcse_mse, expected_mcse_mse)
})
# =============================================================================
# Section 7: compute_criterion() — NA handling
# =============================================================================
test_that("NAs in estimates are excluded from all calculations", {
# 5 estimates, 2 are NA → effective K = 3
estimates <- c(1.0, NA, 2.0, NA, 3.0)
true_value <- 2.0
result <- compute_criterion(estimates, true_value)
valid <- c(1.0, 2.0, 3.0)
expect_equal(result$bias, mean(valid) - true_value)
expect_equal(result$empirical_se, sd(valid))
expect_equal(result$mse, mean((valid - true_value)^2))
expect_equal(result$mcse_bias, sd(valid) / sqrt(3))
})
test_that("all-NA estimates return all-NA results", {
estimates <- c(NA_real_, NA_real_, NA_real_)
true_value <- 1.0
result <- compute_criterion(estimates, true_value)
expect_true(is.na(result$bias))
expect_true(is.na(result$empirical_se))
expect_true(is.na(result$mse))
expect_true(is.na(result$rmse))
expect_true(is.na(result$mcse_bias))
})
test_that("single valid estimate returns NA for empirical_se and mcse", {
# sd() of a single value is NA → propagates
estimates <- c(NA, NA, 2.0, NA)
true_value <- 2.0
result <- compute_criterion(estimates, true_value)
expect_equal(result$bias, 0)
expect_true(is.na(result$empirical_se))
expect_equal(result$mse, 0)
})
# =============================================================================
# Section 8: compute_criterion() — return structure
# =============================================================================
test_that("compute_criterion returns a named list with all expected elements",
{
estimates <- c(1, 2, 3, 4, 5)
ci_lower <- c(0.5, 1.5, 2.5, 3.5, 4.5)
ci_upper <- c(1.5, 2.5, 3.5, 4.5, 5.5)
true_value <- 3
result <- compute_criterion(estimates, true_value,
ci_lower = ci_lower, ci_upper = ci_upper)
expect_type(result, "list")
expect_named(result,
c("bias", "empirical_se", "mse", "rmse", "coverage",
"mcse_bias", "mcse_mse"),
ignore.order = TRUE
)
})
test_that("all numeric outputs are length-1 scalars", {
estimates <- c(1, 2, 3)
ci_lower <- c(0.5, 1.5, 2.5)
ci_upper <- c(1.5, 2.5, 3.5)
true_value <- 2
result <- compute_criterion(estimates, true_value,
ci_lower = ci_lower, ci_upper = ci_upper)
for (nm in names(result)) {
expect_length(result[[nm]], 1)
}
})
# =============================================================================
# Section 9: compute_criterion() — hand-calculated comprehensive example
# =============================================================================
test_that("full hand-calculated example matches expected values", {
# 6 estimates of a parameter with true value = 1.0
estimates <- c(0.8, 1.2, 0.9, 1.1, 1.0, 1.3)
true_value <- 1.0
# CIs: all width 0.4 centered on estimate
ci_lower <- estimates - 0.2
ci_upper <- estimates + 0.2
K <- 6
# Hand calculations:
# mean(est) = (0.8+1.2+0.9+1.1+1.0+1.3)/6 = 6.3/6 = 1.05
# bias = 1.05 - 1.0 = 0.05
expected_bias <- mean(estimates) - true_value
# empirical_se = sd(estimates) with n-1 denominator
expected_emp_se <- sd(estimates)
# MSE = mean((est - true)^2)
# deviations: -0.2, 0.2, -0.1, 0.1, 0, 0.3
# squared: 0.04, 0.04, 0.01, 0.01, 0, 0.09
# mean = 0.19/6
expected_mse <- mean((estimates - true_value)^2)
# RMSE
expected_rmse <- sqrt(expected_mse)
# Coverage: true=1.0
# [0.6,1.0]✓ [1.0,1.4]✓ [0.7,1.1]✓ [0.9,1.3]✓ [0.8,1.2]✓ [1.1,1.5]✗
# → 5/6
expected_coverage <- 5/6
# MCSE(bias) = sd(est) / sqrt(K)
expected_mcse_bias <- sd(estimates) / sqrt(K)
# MCSE(MSE) = sd((est - true)^2) / sqrt(K)
sq_errors <- (estimates - true_value)^2
expected_mcse_mse <- sd(sq_errors) / sqrt(K)
result <- compute_criterion(estimates, true_value,
ci_lower = ci_lower, ci_upper = ci_upper)
expect_equal(result$bias, expected_bias)
expect_equal(result$empirical_se, expected_emp_se)
expect_equal(result$mse, expected_mse)
expect_equal(result$rmse, expected_rmse)
expect_equal(result$coverage, expected_coverage)
expect_equal(result$mcse_bias, expected_mcse_bias)
expect_equal(result$mcse_mse, expected_mcse_mse)
})
# =============================================================================
# Section 10: irt_iterations() — Burton (2003) formula
# =============================================================================
# Burton formula: R = ceil( (z_{alpha/2} * sigma / delta)^2 )
# where sigma = empirical SE of the estimand,
# delta = acceptable Monte Carlo error,
# alpha = two-sided significance level (default 0.05)
test_that("irt_iterations returns correct R for known inputs", {
# sigma = 0.5, delta = 0.1, alpha = 0.05
# z = qnorm(0.975) = 1.959964
# R = ceil((1.959964 * 0.5 / 0.1)^2) = ceil((9.79982)^2) = ceil(96.04) = 97
z <- qnorm(0.975)
expected <- ceiling((z * 0.5 / 0.1)^2)
result <- irt_iterations(sigma = 0.5, delta = 0.1, alpha = 0.05)
expect_equal(result, expected)
})
test_that("irt_iterations uses alpha = 0.05 by default", {
r1 <- irt_iterations(sigma = 1.0, delta = 0.2)
r2 <- irt_iterations(sigma = 1.0, delta = 0.2, alpha = 0.05)
expect_equal(r1, r2)
})
test_that("irt_iterations with alpha = 0.01 uses z = qnorm(0.995)", {
z <- qnorm(0.995)
expected <- ceiling((z * 0.3 / 0.05)^2)
result <- irt_iterations(sigma = 0.3, delta = 0.05, alpha = 0.01)
expect_equal(result, expected)
})
test_that("irt_iterations returns integer", {
result <- irt_iterations(sigma = 1, delta = 0.1)
expect_type(result, "integer")
})
test_that("irt_iterations with large sigma / small delta gives large R", {
# sigma = 2, delta = 0.01 → need lots of iterations
z <- qnorm(0.975)
expected <- ceiling((z * 2 / 0.01)^2)
result <- irt_iterations(sigma = 2, delta = 0.01)
expect_equal(result, expected)
expect_true(result > 100000)
})
test_that("irt_iterations validates sigma > 0", {
expect_error(irt_iterations(sigma = 0, delta = 0.1))
expect_error(irt_iterations(sigma = -1, delta = 0.1))
})
test_that("irt_iterations validates delta > 0", {
expect_error(irt_iterations(sigma = 1, delta = 0))
expect_error(irt_iterations(sigma = 1, delta = -0.5))
})
test_that("irt_iterations validates alpha in (0, 1)", {
expect_error(irt_iterations(sigma = 1, delta = 0.1, alpha = 0))
expect_error(irt_iterations(sigma = 1, delta = 0.1, alpha = 1))
expect_error(irt_iterations(sigma = 1, delta = 0.1, alpha = -0.05))
expect_error(irt_iterations(sigma = 1, delta = 0.1, alpha = 1.5))
})
test_that("irt_iterations returns at least 1", {
# Even with very small sigma or large delta
result <- irt_iterations(sigma = 0.001, delta = 100)
expect_true(result >= 1L)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.