Nothing
# test-apply_missing.R
# TDD tests for apply_missing() — Objective 6 (Part 2)
# These tests define the expected behavior BEFORE implementation exists.
#
# apply_missing() takes a complete response matrix and an irt_study object,
# then introduces missingness according to the study's missing data mechanism.
# Supported mechanisms: none, mcar, mar, booklet, linking.
# --- Helper: create a complete response matrix --------------------------------
make_complete_data <- function(n = 200, n_items = 10, seed = 1) {
set.seed(seed)
matrix(sample(0:1, n * n_items, replace = TRUE), nrow = n, ncol = n_items)
}
make_study <- function(missing = "none", missing_rate = NULL,
test_design = NULL, n_items = 10) {
design <- irt_design(
model = "1PL",
n_items = n_items,
item_params = list(b = seq(-2, 2, length.out = n_items))
)
irt_study(
design,
sample_sizes = c(100, 200),
missing = missing,
missing_rate = missing_rate,
test_design = test_design
)
}
# =============================================================================
# 1. Missing = "none" — No Modification
# =============================================================================
test_that("apply_missing returns identical data when missing = 'none'", {
dat <- make_complete_data(n = 100, n_items = 10)
study <- make_study(missing = "none")
result <- irtsim:::apply_missing(dat, study)
expect_identical(result, dat)
})
test_that("apply_missing introduces no NAs when missing = 'none'", {
dat <- make_complete_data(n = 100, n_items = 10)
study <- make_study(missing = "none")
result <- irtsim:::apply_missing(dat, study)
expect_false(anyNA(result))
})
# =============================================================================
# 2. MCAR — Missing Completely At Random
# =============================================================================
test_that("apply_missing produces NAs under MCAR", {
dat <- make_complete_data(n = 500, n_items = 20)
study <- make_study(missing = "mcar", missing_rate = 0.3, n_items = 20)
result <- irtsim:::apply_missing(dat, study, seed = 42)
expect_true(anyNA(result))
})
test_that("apply_missing MCAR rate is approximately correct", {
dat <- make_complete_data(n = 2000, n_items = 20)
study <- make_study(missing = "mcar", missing_rate = 0.2, n_items = 20)
result <- irtsim:::apply_missing(dat, study, seed = 42)
observed_rate <- mean(is.na(result))
# With 40,000 cells and 20% missing, stochastic tolerance is tight
expect_gt(observed_rate, 0.18)
expect_lt(observed_rate, 0.22)
})
test_that("apply_missing MCAR preserves matrix dimensions", {
dat <- make_complete_data(n = 100, n_items = 15)
study <- make_study(missing = "mcar", missing_rate = 0.25, n_items = 15)
result <- irtsim:::apply_missing(dat, study, seed = 1)
expect_equal(dim(result), dim(dat))
})
test_that("apply_missing MCAR preserves non-missing values", {
dat <- make_complete_data(n = 200, n_items = 10)
study <- make_study(missing = "mcar", missing_rate = 0.3)
result <- irtsim:::apply_missing(dat, study, seed = 1)
# Where result is not NA, values should match original
non_na_mask <- !is.na(result)
expect_equal(result[non_na_mask], dat[non_na_mask])
})
test_that("apply_missing MCAR is reproducible with same seed", {
dat <- make_complete_data(n = 200, n_items = 10)
study <- make_study(missing = "mcar", missing_rate = 0.3)
result1 <- irtsim:::apply_missing(dat, study, seed = 42)
result2 <- irtsim:::apply_missing(dat, study, seed = 42)
# NA positions should be identical
expect_identical(is.na(result1), is.na(result2))
})
test_that("apply_missing MCAR with rate = 0 introduces no NAs", {
dat <- make_complete_data(n = 100, n_items = 10)
study <- make_study(missing = "mcar", missing_rate = 0)
result <- irtsim:::apply_missing(dat, study, seed = 1)
expect_false(anyNA(result))
})
test_that("apply_missing MCAR spreads NAs across rows and columns", {
dat <- make_complete_data(n = 500, n_items = 20)
study <- make_study(missing = "mcar", missing_rate = 0.3, n_items = 20)
result <- irtsim:::apply_missing(dat, study, seed = 42)
# With 30% missing across 500 rows and 20 columns, most rows/cols
# should have at least one NA
rows_with_na <- sum(apply(result, 1, anyNA))
cols_with_na <- sum(apply(result, 2, anyNA))
expect_gt(rows_with_na, 400)
expect_equal(cols_with_na, 20)
})
# =============================================================================
# 3. MAR — Missing At Random (dependent on theta)
# =============================================================================
test_that("apply_missing produces NAs under MAR", {
dat <- make_complete_data(n = 500, n_items = 10)
study <- make_study(missing = "mar", missing_rate = 0.2)
theta <- rnorm(500)
result <- irtsim:::apply_missing(dat, study, seed = 42, theta = theta)
expect_true(anyNA(result))
})
test_that("apply_missing MAR rate is approximately correct overall", {
dat <- make_complete_data(n = 2000, n_items = 20)
study <- make_study(missing = "mar", missing_rate = 0.2, n_items = 20)
theta <- rnorm(2000)
result <- irtsim:::apply_missing(dat, study, seed = 42, theta = theta)
observed_rate <- mean(is.na(result))
# MAR should target approximately the specified rate overall
expect_gt(observed_rate, 0.15)
expect_lt(observed_rate, 0.25)
})
test_that("apply_missing MAR missingness depends on theta", {
dat <- make_complete_data(n = 2000, n_items = 20)
study <- make_study(missing = "mar", missing_rate = 0.3, n_items = 20)
# Create theta with extreme groups
theta <- c(rep(-3, 1000), rep(3, 1000))
result <- irtsim:::apply_missing(dat, study, seed = 42, theta = theta)
# Missing rate should differ between low-theta and high-theta groups
low_miss_rate <- mean(is.na(result[1:1000, ]))
high_miss_rate <- mean(is.na(result[1001:2000, ]))
expect_false(abs(low_miss_rate - high_miss_rate) < 0.01)
})
test_that("apply_missing MAR preserves matrix dimensions", {
dat <- make_complete_data(n = 100, n_items = 10)
study <- make_study(missing = "mar", missing_rate = 0.2)
theta <- rnorm(100)
result <- irtsim:::apply_missing(dat, study, seed = 1, theta = theta)
expect_equal(dim(result), dim(dat))
})
test_that("apply_missing MAR errors without theta", {
dat <- make_complete_data(n = 100, n_items = 10)
study <- make_study(missing = "mar", missing_rate = 0.2)
expect_error(
irtsim:::apply_missing(dat, study, seed = 1),
"theta"
)
})
# =============================================================================
# 4. Booklet Design — Structured Missingness
# =============================================================================
test_that("apply_missing booklet produces structured missingness", {
n_items <- 10
# 2 booklets: booklet 1 administers items 1-6, booklet 2 administers items 5-10
booklet_matrix <- matrix(0, nrow = 2, ncol = n_items)
booklet_matrix[1, 1:6] <- 1
booklet_matrix[2, 5:10] <- 1
dat <- make_complete_data(n = 200, n_items = n_items)
study <- make_study(
missing = "booklet",
test_design = list(booklet_matrix = booklet_matrix),
n_items = n_items
)
result <- irtsim:::apply_missing(dat, study, seed = 42)
expect_true(anyNA(result))
})
test_that("apply_missing booklet assigns each respondent to exactly one booklet", {
n_items <- 10
booklet_matrix <- matrix(0, nrow = 3, ncol = n_items)
booklet_matrix[1, 1:4] <- 1
booklet_matrix[2, 3:7] <- 1
booklet_matrix[3, 6:10] <- 1
dat <- make_complete_data(n = 300, n_items = n_items)
study <- make_study(
missing = "booklet",
test_design = list(booklet_matrix = booklet_matrix),
n_items = n_items
)
result <- irtsim:::apply_missing(dat, study, seed = 42)
# Each row's NA pattern should match one of the booklet patterns
for (i in seq_len(nrow(result))) {
observed_pattern <- as.integer(!is.na(result[i, ]))
matches_booklet <- apply(booklet_matrix, 1, function(bk) {
identical(as.integer(bk), observed_pattern)
})
expect_true(any(matches_booklet),
info = paste("Row", i, "does not match any booklet pattern"))
}
})
test_that("apply_missing booklet distributes respondents roughly evenly", {
n_items <- 10
booklet_matrix <- matrix(0, nrow = 2, ncol = n_items)
booklet_matrix[1, 1:6] <- 1
booklet_matrix[2, 5:10] <- 1
dat <- make_complete_data(n = 1000, n_items = n_items)
study <- make_study(
missing = "booklet",
test_design = list(booklet_matrix = booklet_matrix),
n_items = n_items
)
result <- irtsim:::apply_missing(dat, study, seed = 42)
# Count respondents per booklet
pattern1 <- as.integer(booklet_matrix[1, ] == 1)
count_bk1 <- sum(apply(result, 1, function(row) {
identical(as.integer(!is.na(row)), pattern1)
}))
# With 2 booklets and 1000 respondents, each should get ~500
expect_gt(count_bk1, 400)
expect_lt(count_bk1, 600)
})
test_that("apply_missing booklet preserves observed values", {
n_items <- 10
booklet_matrix <- matrix(0, nrow = 2, ncol = n_items)
booklet_matrix[1, 1:5] <- 1
booklet_matrix[2, 6:10] <- 1
dat <- make_complete_data(n = 200, n_items = n_items)
study <- make_study(
missing = "booklet",
test_design = list(booklet_matrix = booklet_matrix),
n_items = n_items
)
result <- irtsim:::apply_missing(dat, study, seed = 42)
# Non-NA values should match original data
non_na_mask <- !is.na(result)
expect_equal(result[non_na_mask], dat[non_na_mask])
})
test_that("apply_missing booklet preserves matrix dimensions", {
n_items <- 10
booklet_matrix <- matrix(0, nrow = 2, ncol = n_items)
booklet_matrix[1, 1:6] <- 1
booklet_matrix[2, 5:10] <- 1
dat <- make_complete_data(n = 100, n_items = n_items)
study <- make_study(
missing = "booklet",
test_design = list(booklet_matrix = booklet_matrix),
n_items = n_items
)
result <- irtsim:::apply_missing(dat, study, seed = 42)
expect_equal(dim(result), dim(dat))
})
# =============================================================================
# 5. Linking Design — Structured Missingness for Test Equating
# =============================================================================
test_that("apply_missing linking produces structured missingness", {
n_items <- 12
# 2 forms: form 1 has items 1-8, form 2 has items 5-12 (items 5-8 are linking)
linking_matrix <- matrix(0, nrow = 2, ncol = n_items)
linking_matrix[1, 1:8] <- 1
linking_matrix[2, 5:12] <- 1
dat <- make_complete_data(n = 200, n_items = n_items)
study <- make_study(
missing = "linking",
test_design = list(linking_matrix = linking_matrix),
n_items = n_items
)
result <- irtsim:::apply_missing(dat, study, seed = 42)
expect_true(anyNA(result))
})
test_that("apply_missing linking assigns each respondent to exactly one form", {
n_items <- 12
linking_matrix <- matrix(0, nrow = 3, ncol = n_items)
linking_matrix[1, 1:6] <- 1
linking_matrix[2, 4:9] <- 1
linking_matrix[3, 7:12] <- 1
dat <- make_complete_data(n = 300, n_items = n_items)
study <- make_study(
missing = "linking",
test_design = list(linking_matrix = linking_matrix),
n_items = n_items
)
result <- irtsim:::apply_missing(dat, study, seed = 42)
for (i in seq_len(nrow(result))) {
observed_pattern <- as.integer(!is.na(result[i, ]))
matches_form <- apply(linking_matrix, 1, function(fm) {
identical(as.integer(fm), observed_pattern)
})
expect_true(any(matches_form),
info = paste("Row", i, "does not match any form pattern"))
}
})
test_that("apply_missing linking preserves observed values", {
n_items <- 12
linking_matrix <- matrix(0, nrow = 2, ncol = n_items)
linking_matrix[1, 1:8] <- 1
linking_matrix[2, 5:12] <- 1
dat <- make_complete_data(n = 200, n_items = n_items)
study <- make_study(
missing = "linking",
test_design = list(linking_matrix = linking_matrix),
n_items = n_items
)
result <- irtsim:::apply_missing(dat, study, seed = 42)
non_na_mask <- !is.na(result)
expect_equal(result[non_na_mask], dat[non_na_mask])
})
test_that("apply_missing linking distributes respondents roughly evenly", {
n_items <- 12
linking_matrix <- matrix(0, nrow = 2, ncol = n_items)
linking_matrix[1, 1:8] <- 1
linking_matrix[2, 5:12] <- 1
dat <- make_complete_data(n = 1000, n_items = n_items)
study <- make_study(
missing = "linking",
test_design = list(linking_matrix = linking_matrix),
n_items = n_items
)
result <- irtsim:::apply_missing(dat, study, seed = 42)
pattern1 <- as.integer(linking_matrix[1, ] == 1)
count_f1 <- sum(apply(result, 1, function(row) {
identical(as.integer(!is.na(row)), pattern1)
}))
expect_gt(count_f1, 400)
expect_lt(count_f1, 600)
})
# =============================================================================
# 6. Seed Reproducibility
# =============================================================================
test_that("apply_missing MCAR is reproducible with same seed", {
dat <- make_complete_data(n = 200, n_items = 10)
study <- make_study(missing = "mcar", missing_rate = 0.3)
r1 <- irtsim:::apply_missing(dat, study, seed = 99)
r2 <- irtsim:::apply_missing(dat, study, seed = 99)
expect_identical(is.na(r1), is.na(r2))
})
test_that("apply_missing booklet is reproducible with same seed", {
n_items <- 10
booklet_matrix <- matrix(0, nrow = 2, ncol = n_items)
booklet_matrix[1, 1:5] <- 1
booklet_matrix[2, 6:10] <- 1
dat <- make_complete_data(n = 200, n_items = n_items)
study <- make_study(
missing = "booklet",
test_design = list(booklet_matrix = booklet_matrix),
n_items = n_items
)
r1 <- irtsim:::apply_missing(dat, study, seed = 99)
r2 <- irtsim:::apply_missing(dat, study, seed = 99)
expect_identical(is.na(r1), is.na(r2))
})
# =============================================================================
# 7. Edge Cases
# =============================================================================
test_that("apply_missing handles single-row data", {
dat <- make_complete_data(n = 1, n_items = 10)
study <- make_study(missing = "mcar", missing_rate = 0.5)
result <- irtsim:::apply_missing(dat, study, seed = 1)
expect_equal(dim(result), c(1, 10))
})
test_that("apply_missing handles single-column data", {
dat <- make_complete_data(n = 100, n_items = 1)
study <- make_study(missing = "mcar", missing_rate = 0.3, n_items = 1)
result <- irtsim:::apply_missing(dat, study, seed = 1)
expect_equal(dim(result), c(100, 1))
})
test_that("apply_missing booklet with one booklet administers all items", {
n_items <- 10
# Single booklet that includes all items — should produce no NAs
booklet_matrix <- matrix(1, nrow = 1, ncol = n_items)
dat <- make_complete_data(n = 100, n_items = n_items)
study <- make_study(
missing = "booklet",
test_design = list(booklet_matrix = booklet_matrix),
n_items = n_items
)
result <- irtsim:::apply_missing(dat, study, seed = 42)
expect_false(anyNA(result))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.