Nothing
# Tests for DIF analysis module (Phase 2) and compare_mfrm enhancements (Phase 3)
# ---------- shared fixtures ----------
local_dif_fixtures <- function(env = parent.frame()) {
toy <- load_mfrmr_data("study1")
persons <- unique(toy$Person)
half <- ceiling(length(persons) / 2)
grp_map <- setNames(
c(rep("A", half), rep("B", length(persons) - half)),
persons
)
toy$Group <- grp_map[toy$Person]
fit <- fit_mfrm(toy, person = "Person", facets = c("Rater", "Criterion"),
score = "Score", method = "JML")
diag <- diagnose_mfrm(fit, residual_pca = "none")
assign("toy", toy, envir = env)
assign("fit", fit, envir = env)
assign("diag", diag, envir = env)
}
# ================================================================
# Phase 2: DIF diagnostic module
# ================================================================
test_that("analyze_dff residual method returns expected structure", {
local_dif_fixtures()
dif <- analyze_dff(fit, diag, facet = "Criterion", group = "Group",
data = toy, method = "residual")
expect_s3_class(dif, "mfrm_dff")
expect_s3_class(dif, "mfrm_dif")
expect_true(is.data.frame(dif$dif_table))
expect_true(is.data.frame(dif$cell_table))
expect_true(nrow(dif$dif_table) > 0)
expect_true(nrow(dif$cell_table) > 0)
# Required columns in dif_table
required_cols <- c("Level", "Group1", "Group2", "Contrast", "SE", "t",
"df", "p_value", "Classification", "ClassificationSystem",
"ETS", "Method", "SEBasis", "StatisticLabel",
"ProbabilityMetric", "DFBasis", "ContrastBasis",
"ReportingUse", "PrimaryReportingEligible")
expect_true(all(required_cols %in% names(dif$dif_table)))
# Method must be "residual"
expect_true(all(dif$dif_table$Method == "residual"))
expect_true(all(dif$dif_table$ClassificationSystem == "screening"))
expect_true(all(dif$dif_table$StatisticLabel == "Welch screening t"))
expect_true(all(dif$dif_table$DFBasis == "Welch-Satterthwaite approximation"))
expect_true(all(is.na(dif$dif_table$ETS)))
expect_true(all(dif$dif_table$ReportingUse == "screening_only"))
expect_true(all(!dif$dif_table$PrimaryReportingEligible))
expect_equal(dif$config$method, "residual")
})
test_that("analyze_dff alias is backward compatible with analyze_dif", {
local_dif_fixtures()
dff <- analyze_dff(fit, diag, facet = "Criterion", group = "Group",
data = toy, method = "residual")
dif <- analyze_dif(fit, diag, facet = "Criterion", group = "Group",
data = toy, method = "residual")
expect_equal(dff$dif_table, dif$dif_table)
expect_equal(dff$summary, dif$summary)
expect_equal(dff$config$functioning_label, "DIF")
})
test_that("analyze_dif refit keeps JML contrasts descriptive even when linked", {
local_dif_fixtures()
dif <- analyze_dif(fit, diag, facet = "Criterion", group = "Group",
data = toy, method = "refit")
expect_s3_class(dif, "mfrm_dif")
expect_true(is.data.frame(dif$dif_table))
expect_true(nrow(dif$dif_table) > 0)
expect_true(all(c("ContrastComparable", "ScaleLinkStatus", "LinkingFacets",
"FormalInferenceEligible", "InferenceTier",
"ReportingUse", "PrimaryReportingEligible",
"BaselineConverged", "SubgroupConverged1", "SubgroupConverged2",
"BaselineMethod", "BaselinePrecisionTier",
"BaselineSupportsFormalInference") %in% names(dif$dif_table)))
expect_true(all(c("LinkingStatus", "LinkingDetail", "ETS_Eligible",
"LinkComparable", "PrecisionTier", "SupportsFormalInference") %in% names(dif$group_fits[[1]])))
expect_true("Converged" %in% names(dif$group_fits[[1]]))
expect_true(all(dif$dif_table$ClassificationSystem == "descriptive"))
expect_true(all(dif$dif_table$StatisticLabel == "linked descriptive contrast"))
expect_true(all(dif$dif_table$SEBasis == "not reported without model-based subgroup precision"))
expect_true(all(is.na(dif$dif_table$ETS)))
expect_true(all(dif$dif_table$ContrastComparable))
expect_true(all(!dif$dif_table$FormalInferenceEligible))
expect_true(all(!dif$dif_table$PrimaryReportingEligible))
expect_true(all(dif$dif_table$InferenceTier == "exploratory"))
expect_true(all(dif$dif_table$ScaleLinkStatus == "linked"))
expect_true(all(dif$dif_table$ReportingUse == "screening_only"))
expect_equal(dif$config$method, "refit")
})
test_that("analyze_dif refit surfaces subgroup diagnostics failures", {
local_dif_fixtures()
testthat::local_mocked_bindings(
diagnose_mfrm = function(...) stop("forced subgroup diagnostics failure"),
.package = "mfrmr"
)
dif <- analyze_dif(fit, diag, facet = "Criterion", group = "Group",
data = toy, method = "refit")
group_fit_tbl <- dif$group_fits[[1]]
expect_true(all(c("DiagnosticsStatus", "DiagnosticsDetail") %in% names(group_fit_tbl)))
expect_true(all(group_fit_tbl$DiagnosticsStatus == "failed"))
expect_true(all(group_fit_tbl$PrecisionTier == "diagnostics_unavailable"))
expect_true(all(grepl("forced subgroup diagnostics failure", group_fit_tbl$DiagnosticsDetail, fixed = TRUE)))
})
test_that("analyze_dif refit uses ETS only for linked model-based MML contrasts", {
dat <- mfrmr:::sample_mfrm_data(seed = 42)
persons <- unique(dat$Person)
half <- ceiling(length(persons) / 2)
grp_map <- setNames(
c(rep("A", half), rep("B", length(persons) - half)),
persons
)
dat$Group <- grp_map[dat$Person]
fit_mml <- suppressWarnings(fit_mfrm(
dat,
person = "Person",
facets = c("Rater", "Task", "Criterion"),
score = "Score",
method = "MML",
quad_points = 7,
maxit = 25
))
diag_mml <- diagnose_mfrm(fit_mml, residual_pca = "none")
dif_mml <- analyze_dif(
fit_mml,
diag_mml,
facet = "Criterion",
group = "Group",
data = dat,
method = "refit"
)
expect_true(any(dif_mml$dif_table$ClassificationSystem == "ETS"))
ets_rows <- dif_mml$dif_table$ClassificationSystem == "ETS"
expect_true(all(dif_mml$dif_table$ContrastComparable[ets_rows]))
expect_true(all(dif_mml$dif_table$FormalInferenceEligible[ets_rows]))
expect_true(all(dif_mml$dif_table$PrimaryReportingEligible[ets_rows]))
expect_true(all(dif_mml$dif_table$InferenceTier[ets_rows] == "model_based"))
expect_true(all(dif_mml$dif_table$ReportingUse[ets_rows] == "primary_reporting"))
expect_true(all(stats::na.omit(dif_mml$dif_table$ETS) %in% c("A", "B", "C")))
})
test_that("analyze_dif refit demotes ETS when subgroup refits lack linking facets", {
local_dif_fixtures()
fit_one <- fit_mfrm(toy, person = "Person", facets = "Criterion",
score = "Score", method = "JML", maxit = 20)
diag_one <- diagnose_mfrm(fit_one, residual_pca = "none")
dif_one <- analyze_dif(fit_one, diag_one, facet = "Criterion", group = "Group",
data = toy, method = "refit")
expect_true(all(dif_one$dif_table$ClassificationSystem == "descriptive"))
expect_true(all(is.na(dif_one$dif_table$ETS)))
expect_true(all(dif_one$dif_table$ScaleLinkStatus == "unlinked"))
expect_true(all(!dif_one$dif_table$ContrastComparable))
expect_match(dif_one$summary$Classification[nrow(dif_one$summary)], "insufficient linking", ignore.case = TRUE)
})
test_that("analyze_dif min_obs filter works", {
local_dif_fixtures()
# With a very high min_obs, all cells should be sparse
dif_high <- analyze_dif(fit, diag, facet = "Criterion", group = "Group",
data = toy, method = "residual", min_obs = 99999)
expect_true(all(dif_high$cell_table$sparse))
})
test_that("analyze_dif p_adjust works for all methods", {
local_dif_fixtures()
for (m in c("holm", "fdr", "bonferroni", "none")) {
dif <- analyze_dif(fit, diag, facet = "Criterion", group = "Group",
data = toy, method = "residual", p_adjust = m)
expect_true("p_adjusted" %in% names(dif$dif_table))
}
})
test_that("residual method uses screening labels instead of ETS categories", {
local_dif_fixtures()
dif <- analyze_dif(fit, diag, facet = "Criterion", group = "Group",
data = toy, method = "residual")
expect_true(all(dif$summary$Classification %in% c("Screen positive", "Screen negative", "Unclassified")))
expect_true(all(is.na(dif$dif_table$ETS)))
})
test_that("residual method uses Welch-Satterthwaite degrees of freedom", {
local_dif_fixtures()
dif <- analyze_dif(fit, diag, facet = "Criterion", group = "Group",
data = toy, method = "residual")
first_row <- dif$dif_table[1, , drop = FALSE]
c1 <- subset(dif$cell_table, Level == first_row$Level & GroupValue == first_row$Group1)
c2 <- subset(dif$cell_table, Level == first_row$Level & GroupValue == first_row$Group2)
comp1 <- c1$Var_sum[1] / c1$N[1]^2
comp2 <- c2$Var_sum[1] / c2$N[1]^2
expected_df <- (comp1 + comp2)^2 / ((comp1^2) / (c1$N[1] - 1) + (comp2^2) / (c2$N[1] - 1))
expect_equal(first_row$df, expected_df, tolerance = 1e-8)
})
test_that("refit method ETS classification is valid", {
dat <- mfrmr:::sample_mfrm_data(seed = 99)
dat$Group <- ifelse(dat$Person %in% unique(dat$Person)[1:30], "A", "B")
fit_mml <- suppressWarnings(fit_mfrm(
dat,
person = "Person",
facets = c("Rater", "Task", "Criterion"),
score = "Score",
method = "MML",
quad_points = 7,
maxit = 25
))
diag_mml <- diagnose_mfrm(fit_mml, residual_pca = "none")
dif <- analyze_dif(fit_mml, diag_mml, facet = "Criterion", group = "Group",
data = dat, method = "refit")
expect_true(all(stats::na.omit(dif$dif_table$ETS) %in% c("A", "B", "C")))
})
test_that("dif_interaction_table returns expected structure", {
local_dif_fixtures()
int <- dif_interaction_table(fit, diag, facet = "Criterion", group = "Group",
data = toy, min_obs = 2)
expect_s3_class(int, "mfrm_dif_interaction")
expect_true(is.data.frame(int$table))
expect_true(nrow(int$table) > 0)
required_cols <- c("Level", "GroupValue", "N", "ObsScore", "ExpScore",
"ObsExpAvg")
expect_true(all(required_cols %in% names(int$table)))
})
test_that("dif_interaction_table min_obs filter works", {
local_dif_fixtures()
int <- dif_interaction_table(fit, diag, facet = "Criterion", group = "Group",
data = toy, min_obs = 99999)
expect_true(all(int$table$sparse))
})
test_that("plot_dif_heatmap returns matrix when draw = FALSE", {
local_dif_fixtures()
dif <- analyze_dif(fit, diag, facet = "Criterion", group = "Group",
data = toy, method = "residual")
for (m in c("obs_exp", "t", "contrast")) {
mat <- plot_dif_heatmap(dif, metric = m, draw = FALSE)
expect_true(is.matrix(mat))
}
})
test_that("plot_dif_heatmap works with dif_interaction_table", {
local_dif_fixtures()
int <- dif_interaction_table(fit, diag, facet = "Criterion", group = "Group",
data = toy, min_obs = 2)
mat <- plot_dif_heatmap(int, metric = "obs_exp", draw = FALSE)
expect_true(is.matrix(mat))
})
test_that("dif_report produces interpretable output", {
local_dif_fixtures()
dif <- analyze_dif(fit, diag, facet = "Criterion", group = "Group",
data = toy, method = "residual")
rpt <- dif_report(dif)
expect_s3_class(rpt, "mfrm_dif_report")
expect_true(is.character(rpt$narrative))
expect_true(nchar(rpt$narrative) > 0)
expect_match(rpt$narrative, "screening", ignore.case = TRUE)
})
test_that("print and summary S3 methods work for DIF objects", {
local_dif_fixtures()
dif <- analyze_dif(fit, diag, facet = "Criterion", group = "Group",
data = toy, method = "residual")
expect_output(print(dif))
s <- summary(dif)
expect_output(print(s))
int <- dif_interaction_table(fit, diag, facet = "Criterion", group = "Group",
data = toy, min_obs = 2)
expect_output(print(int))
s2 <- summary(int)
expect_output(print(s2))
rpt <- dif_report(dif)
expect_output(print(rpt))
s3 <- summary(rpt)
expect_output(print(s3))
})
# ================================================================
# Phase 3: compare_mfrm enhancements
# ================================================================
test_that("compare_mfrm reports comparable IC quantities on a common basis", {
toy_small <- load_mfrmr_data("example_core")
fit_rsm <- suppressWarnings(fit_mfrm(
toy_small,
person = "Person",
facets = c("Rater", "Criterion"),
score = "Score",
method = "MML",
model = "RSM",
quad_points = 5,
maxit = 15
))
fit_pcm <- suppressWarnings(fit_mfrm(
toy_small,
person = "Person",
facets = c("Rater", "Criterion"),
score = "Score",
method = "MML",
model = "PCM",
step_facet = "Criterion",
quad_points = 5,
maxit = 15
))
fit_rsm$summary$Converged[1] <- TRUE
fit_pcm$summary$Converged[1] <- TRUE
comp <- compare_mfrm(RSM = fit_rsm, PCM = fit_pcm)
expect_s3_class(comp, "mfrm_comparison")
tbl <- comp$table
expect_true(all(tbl$ICComparable))
expect_true("Delta_AIC" %in% names(tbl))
expect_true("Delta_BIC" %in% names(tbl))
expect_true("AkaikeWeight" %in% names(tbl))
expect_true("BICWeight" %in% names(tbl))
# Delta should have at least one zero (best model)
expect_equal(min(tbl$Delta_AIC), 0)
expect_equal(min(tbl$Delta_BIC), 0)
# Weights should sum to 1
expect_equal(sum(tbl$AkaikeWeight), 1, tolerance = 1e-10)
expect_equal(sum(tbl$BICWeight), 1, tolerance = 1e-10)
expect_true(isTRUE(comp$comparison_basis$ic_comparable))
expect_true(isTRUE(comp$comparison_basis$same_data))
})
test_that("compare_mfrm evidence_ratios are reciprocal", {
toy_small <- load_mfrmr_data("example_core")
fit_rsm <- suppressWarnings(fit_mfrm(
toy_small,
person = "Person",
facets = c("Rater", "Criterion"),
score = "Score",
method = "MML",
model = "RSM",
quad_points = 5,
maxit = 15
))
fit_pcm <- suppressWarnings(fit_mfrm(
toy_small,
person = "Person",
facets = c("Rater", "Criterion"),
score = "Score",
method = "MML",
model = "PCM",
step_facet = "Criterion",
quad_points = 5,
maxit = 15
))
fit_rsm$summary$Converged[1] <- TRUE
fit_pcm$summary$Converged[1] <- TRUE
comp <- compare_mfrm(RSM = fit_rsm, PCM = fit_pcm)
er <- comp$evidence_ratios
expect_true(is.data.frame(er))
expect_true(nrow(er) > 0)
expect_true("EvidenceRatio" %in% names(er))
expect_true(all(er$EvidenceRatio > 0))
})
test_that("compare_mfrm print and summary work with new fields", {
toy_small <- load_mfrmr_data("example_core")
fit_rsm <- suppressWarnings(fit_mfrm(
toy_small,
person = "Person",
facets = c("Rater", "Criterion"),
score = "Score",
method = "MML",
model = "RSM",
quad_points = 5,
maxit = 15
))
fit_pcm <- suppressWarnings(fit_mfrm(
toy_small,
person = "Person",
facets = c("Rater", "Criterion"),
score = "Score",
method = "MML",
model = "PCM",
step_facet = "Criterion",
quad_points = 5,
maxit = 15
))
fit_rsm$summary$Converged[1] <- TRUE
fit_pcm$summary$Converged[1] <- TRUE
comp <- compare_mfrm(RSM = fit_rsm, PCM = fit_pcm)
expect_output(print(comp))
s <- summary(comp)
expect_output(print(s))
})
test_that("compare_mfrm suppresses IC ranking outside the formal MML path and requires nested = TRUE for LRT", {
toy_small <- load_mfrmr_data("example_core")
fit_jml <- suppressWarnings(fit_mfrm(
toy_small,
person = "Person",
facets = c("Rater", "Criterion"),
score = "Score",
method = "JML",
model = "RSM",
maxit = 15
))
fit_mml <- suppressWarnings(fit_mfrm(
toy_small,
person = "Person",
facets = c("Rater", "Criterion"),
score = "Score",
method = "MML",
model = "RSM",
quad_points = 5,
maxit = 15
))
fit_jml$summary$Converged[1] <- TRUE
fit_mml$summary$Converged[1] <- TRUE
expect_warning(
comp <- compare_mfrm(JML = fit_jml, MML = fit_mml),
"different estimation methods"
)
expect_false(isTRUE(comp$comparison_basis$ic_comparable))
expect_true(all(is.na(comp$table$Delta_AIC)))
expect_true(all(is.na(comp$table$AkaikeWeight)))
expect_null(comp$lrt)
fit_pcm <- suppressWarnings(fit_mfrm(
toy_small,
person = "Person",
facets = c("Rater", "Criterion"),
score = "Score",
method = "JML",
model = "PCM",
step_facet = "Criterion",
maxit = 15
))
fit_pcm$summary$Converged[1] <- TRUE
expect_warning(
comp_no_lrt <- compare_mfrm(RSM = fit_jml, PCM = fit_pcm),
"limited to converged MML fits"
)
expect_null(comp_no_lrt$lrt)
expect_warning(
comp_lrt <- compare_mfrm(RSM = fit_jml, PCM = fit_pcm, nested = TRUE),
"formal MML likelihood basis"
)
expect_true(isTRUE(comp_lrt$comparison_basis$nested_requested))
expect_null(comp_lrt$lrt)
expect_true(isTRUE(comp_lrt$comparison_basis$nesting_audit$eligible))
expect_identical(as.character(comp_lrt$comparison_basis$nesting_audit$relation), "RSM_in_PCM")
fit_rsm_2 <- suppressWarnings(fit_mfrm(
toy_small,
person = "Person",
facets = c("Rater", "Criterion"),
score = "Score",
method = "JML",
model = "RSM",
maxit = 15
))
fit_rsm_2$summary$Converged[1] <- TRUE
expect_warning(
comp_same <- compare_mfrm(RSM1 = fit_jml, RSM2 = fit_rsm_2, nested = TRUE),
"formal MML likelihood basis"
)
expect_null(comp_same$lrt)
expect_false(isTRUE(comp_same$comparison_basis$ic_comparable))
expect_false(isTRUE(comp_same$comparison_basis$nesting_audit$eligible))
expect_identical(as.character(comp_same$comparison_basis$nesting_audit$relation), "same_model")
})
test_that("compare_mfrm suppresses IC ranking for JML-only comparisons", {
local_dif_fixtures()
fit2 <- fit_mfrm(toy, person = "Person", facets = c("Rater", "Criterion"),
score = "Score", method = "JML",
model = "PCM", step_facet = "Criterion")
fit$summary$Converged[1] <- TRUE
fit2$summary$Converged[1] <- TRUE
expect_warning(
comp <- compare_mfrm(RSM = fit, PCM = fit2),
"limited to converged MML fits"
)
expect_false(isTRUE(comp$comparison_basis$ic_comparable))
expect_false(isTRUE(comp$comparison_basis$all_mml))
expect_true(all(is.na(comp$table$Delta_AIC)))
expect_true(all(is.na(comp$table$AkaikeWeight)))
expect_null(comp$evidence_ratios)
})
test_that("compare_mfrm suppresses IC ranking when a fit is marked unconverged", {
local_dif_fixtures()
fit2 <- fit_mfrm(toy, person = "Person", facets = c("Rater", "Criterion"),
score = "Score", method = "JML",
model = "PCM", step_facet = "Criterion")
fit2$summary$Converged[1] <- FALSE
expect_warning(
comp <- compare_mfrm(RSM = fit, PCM = fit2),
"did not converge"
)
expect_false(isTRUE(comp$comparison_basis$ic_comparable))
expect_false(isTRUE(comp$comparison_basis$all_converged))
expect_true(all(is.na(comp$table$Delta_AIC)))
expect_true(all(is.na(comp$table$AkaikeWeight)))
})
test_that("compare_mfrm requires the same prepared response data for IC ranking", {
toy_small <- load_mfrmr_data("example_core")
fit_a <- suppressWarnings(fit_mfrm(
toy_small,
person = "Person",
facets = c("Rater", "Criterion"),
score = "Score",
method = "MML",
model = "RSM",
quad_points = 5,
maxit = 15
))
fit_a$summary$Converged[1] <- TRUE
toy_perm <- toy_small[sample.int(nrow(toy_small)), , drop = FALSE]
fit_perm <- suppressWarnings(fit_mfrm(
toy_perm,
person = "Person",
facets = c("Rater", "Criterion"),
score = "Score",
method = "MML",
model = "RSM",
quad_points = 5,
maxit = 15
))
fit_perm$summary$Converged[1] <- TRUE
comp_same <- compare_mfrm(RSM1 = fit_a, RSM2 = fit_perm)
expect_true(isTRUE(comp_same$comparison_basis$same_data))
expect_true(all(comp_same$table$ICComparable))
toy_shuf <- toy_small
toy_shuf$Score <- sample(toy_shuf$Score)
fit_shuf <- suppressWarnings(fit_mfrm(
toy_shuf,
person = "Person",
facets = c("Rater", "Criterion"),
score = "Score",
method = "MML",
model = "RSM",
quad_points = 5,
maxit = 15
))
fit_shuf$summary$Converged[1] <- TRUE
expect_warning(
comp_diff <- compare_mfrm(A = fit_a, B = fit_shuf),
"same prepared response data"
)
expect_false(isTRUE(comp_diff$comparison_basis$same_data))
expect_false(any(comp_diff$table$ICComparable))
expect_true(all(is.na(comp_diff$table$Delta_AIC)))
expect_true(all(is.na(comp_diff$table$AkaikeWeight)))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.