Nothing
# Tests for iterative FCS (chained equations) pattern
# Compares miceFast FCS loop with mice on the air_miss dataset
library(data.table)
# --- Helper: data.table FCS function -----------------------------------------
fcs_dt <- function(dt, n_iter = 5) {
d <- copy(dt)
na_ozone <- is.na(d$Ozone)
na_solar <- is.na(d[["Solar.R"]])
d <- naive_fill_NA(d)
for (iter in seq_len(n_iter)) {
set(d, which(na_ozone), "Ozone", NA_real_)
d[, Ozone := fill_NA(.SD, "lm_bayes", "Ozone", c("Solar.R", "Wind", "Temp"))]
set(d, which(na_solar), "Solar.R", NA_real_)
d[, Solar.R := fill_NA(.SD, "lm_bayes", "Solar.R", c("Ozone", "Wind", "Temp", "Intercept"))]
}
d
}
# --- Helper: data.frame FCS function -----------------------------------------
fcs_df <- function(df, n_iter = 5) {
na_ozone <- which(is.na(df$Ozone))
na_solar <- which(is.na(df$Solar.R))
d <- naive_fill_NA(df)
for (iter in seq_len(n_iter)) {
d$Ozone[na_ozone] <- NA
d$Ozone <- fill_NA(d, "lm_bayes", "Ozone", c("Solar.R", "Wind", "Temp"))
d$Solar.R[na_solar] <- NA
d$Solar.R <- fill_NA(d, "lm_bayes", "Solar.R", c("Ozone", "Wind", "Temp", "Intercept"))
}
d
}
# --- Helper: OOP FCS function ------------------------------------------------
fcs_oop <- function(mat, n_iter = 5) {
na_ozone <- is.na(mat[, 1])
na_solar <- is.na(mat[, 2])
mat_init <- mat + 0
for (j in seq_len(ncol(mat_init))) {
na_j <- is.na(mat_init[, j])
if (any(na_j)) mat_init[na_j, j] <- mean(mat_init[!na_j, j])
}
m <- new(miceFast)
m$set_data(mat_init)
for (iter in seq_len(n_iter)) {
col1 <- m$get_data()[, 1]; col1[na_ozone] <- NaN
m$update_var(1, col1)
m$update_var(1, m$impute("lm_bayes", 1, c(2, 3, 4))$imputations)
col2 <- m$get_data()[, 2]; col2[na_solar] <- NaN
m$update_var(2, col2)
m$update_var(2, m$impute("lm_bayes", 2, c(1, 3, 4, 5))$imputations)
}
as.data.frame(m$get_data())
}
# =============================================================================
# Tests: Basic FCS correctness
# =============================================================================
test_that("FCS data.table: all NAs are imputed", {
data(air_miss, package = "miceFast")
dt <- as.data.table(air_miss)
set.seed(101)
result <- fcs_dt(dt)
expect_equal(sum(is.na(result$Ozone)), 0)
expect_equal(sum(is.na(result$Solar.R)), 0)
})
test_that("FCS data.frame: all NAs are imputed", {
data(air_miss, package = "miceFast")
set.seed(102)
result <- fcs_df(air_miss)
expect_equal(sum(is.na(result$Ozone)), 0)
expect_equal(sum(is.na(result$Solar.R)), 0)
})
test_that("FCS OOP matrix: all NaN are imputed", {
data(air_miss, package = "miceFast")
mat <- cbind(
as.matrix(air_miss[, c("Ozone", "Solar.R", "Wind", "Temp")]),
Intercept = 1
)
set.seed(103)
result <- fcs_oop(mat)
expect_equal(sum(is.nan(result[[1]])), 0)
expect_equal(sum(is.nan(result[[2]])), 0)
})
test_that("FCS handles joint missingness (rows where both Ozone and Solar.R are NA)", {
data(air_miss, package = "miceFast")
dt <- as.data.table(air_miss)
joint_na <- which(is.na(dt$Ozone) & is.na(dt$Solar.R))
expect_true(length(joint_na) > 0, info = "air_miss should have jointly missing rows")
set.seed(104)
result <- fcs_dt(dt)
expect_false(anyNA(result$Ozone[joint_na]))
expect_false(anyNA(result$Solar.R[joint_na]))
})
test_that("FCS does not modify the original data", {
data(air_miss, package = "miceFast")
dt <- as.data.table(air_miss)
dt_orig <- copy(dt)
set.seed(105)
fcs_dt(dt)
expect_identical(dt, dt_orig)
})
test_that("FCS preserves non-missing values", {
data(air_miss, package = "miceFast")
dt <- as.data.table(air_miss)
obs_ozone <- which(!is.na(dt$Ozone))
obs_solar <- which(!is.na(dt$Solar.R))
orig_ozone <- dt$Ozone[obs_ozone]
orig_solar <- dt$Solar.R[obs_solar]
set.seed(106)
result <- fcs_dt(dt)
expect_equal(result$Ozone[obs_ozone], orig_ozone)
expect_equal(result$Solar.R[obs_solar], orig_solar)
})
test_that("FCS n_iter = 1 still produces complete data", {
data(air_miss, package = "miceFast")
dt <- as.data.table(air_miss)
set.seed(107)
result <- fcs_dt(dt, n_iter = 1)
expect_equal(sum(is.na(result$Ozone)), 0)
expect_equal(sum(is.na(result$Solar.R)), 0)
})
test_that("FCS produces different results across stochastic runs (lm_bayes)", {
data(air_miss, package = "miceFast")
dt <- as.data.table(air_miss)
set.seed(201)
r1 <- fcs_dt(dt)
set.seed(202)
r2 <- fcs_dt(dt)
# Imputed values should differ between seeds
na_idx <- which(is.na(dt$Ozone))
expect_false(identical(r1$Ozone[na_idx], r2$Ozone[na_idx]))
})
# =============================================================================
# Tests: MI + pool workflow
# =============================================================================
test_that("FCS MI workflow produces valid pooled results", {
data(air_miss, package = "miceFast")
dt <- as.data.table(air_miss)
set.seed(301)
completed <- lapply(1:10, function(i) fcs_dt(dt))
fits <- lapply(completed, function(d) lm(Ozone ~ Wind + Temp, data = d))
res <- pool(fits)
expect_s3_class(res, "miceFast_pool")
s <- summary(res)
# Coefficients should be reasonable for Ozone ~ Wind + Temp
# Wind should have a negative effect, Temp a positive effect
expect_true(s[s$term == "Wind", "estimate"] < 0)
expect_true(s[s$term == "Temp", "estimate"] > 0)
# Standard errors should be positive and finite
expect_true(all(s$std.error > 0))
expect_true(all(is.finite(s$std.error)))
# p-values should exist and be in [0, 1]
expect_true(all(s$p.value >= 0 & s$p.value <= 1))
})
# =============================================================================
# Tests: Comparison with mice
# =============================================================================
test_that("FCS pooled estimates are similar to mice", {
skip_if_not_installed("mice")
data(air_miss, package = "miceFast")
# --- mice ---
mice_data <- air_miss[, c("Ozone", "Solar.R", "Wind", "Temp")]
set.seed(42)
mice_imp <- mice::mice(
mice_data,
m = 20,
method = "norm",
maxit = 5,
printFlag = FALSE
)
mice_fits <- with(mice_imp, lm(Ozone ~ Wind + Temp))
mice_pool <- mice::pool(mice_fits)
mice_coefs <- summary(mice_pool)
# --- miceFast FCS ---
dt <- as.data.table(air_miss)
set.seed(42)
completed <- lapply(1:20, function(i) fcs_dt(dt))
fits <- lapply(completed, function(d) lm(Ozone ~ Wind + Temp, data = d))
mf_pool <- pool(fits)
mf_coefs <- summary(mf_pool)
# Point estimates should be in the same ballpark (within 50% or 15 units)
for (trm in c("(Intercept)", "Wind", "Temp")) {
mice_est <- mice_coefs[mice_coefs$term == trm, "estimate"]
mf_est <- mf_coefs[mf_coefs$term == trm, "estimate"]
expect_true(
abs(mf_est - mice_est) < max(abs(mice_est) * 0.5, 15),
info = sprintf(
"%s: miceFast=%.3f vs mice=%.3f (diff=%.3f)",
trm, mf_est, mice_est, mf_est - mice_est
)
)
}
# Sign of effects should match
expect_equal(
sign(mf_coefs[mf_coefs$term == "Wind", "estimate"]),
sign(mice_coefs[mice_coefs$term == "Wind", "estimate"])
)
expect_equal(
sign(mf_coefs[mf_coefs$term == "Temp", "estimate"]),
sign(mice_coefs[mice_coefs$term == "Temp", "estimate"])
)
# Standard errors should be in the same order of magnitude
for (trm in c("(Intercept)", "Wind", "Temp")) {
mice_se <- mice_coefs[mice_coefs$term == trm, "std.error"]
mf_se <- mf_coefs[mf_coefs$term == trm, "std.error"]
ratio <- mf_se / mice_se
expect_true(
ratio > 0.25 && ratio < 4,
info = sprintf(
"%s SE ratio: miceFast/mice = %.2f (mf=%.3f, mice=%.3f)",
trm, ratio, mf_se, mice_se
)
)
}
})
test_that("FCS imputed means are close to mice imputed means", {
skip_if_not_installed("mice")
data(air_miss, package = "miceFast")
# --- mice ---
mice_data <- air_miss[, c("Ozone", "Solar.R", "Wind", "Temp")]
set.seed(99)
mice_imp <- mice::mice(
mice_data,
m = 20,
method = "norm",
maxit = 5,
printFlag = FALSE
)
mice_completed <- mice::complete(mice_imp, action = "all")
mice_ozone_mean <- mean(sapply(mice_completed, function(d) mean(d$Ozone)))
mice_solar_mean <- mean(sapply(mice_completed, function(d) mean(d$Solar.R)))
# --- miceFast FCS ---
dt <- as.data.table(air_miss)
set.seed(99)
completed <- lapply(1:20, function(i) fcs_dt(dt))
mf_ozone_mean <- mean(sapply(completed, function(d) mean(d$Ozone)))
mf_solar_mean <- mean(sapply(completed, function(d) mean(d$Solar.R)))
# Means should be within 20% of each other
expect_true(
abs(mf_ozone_mean - mice_ozone_mean) / mice_ozone_mean < 0.2,
info = sprintf("Ozone mean: miceFast=%.2f vs mice=%.2f", mf_ozone_mean, mice_ozone_mean)
)
expect_true(
abs(mf_solar_mean - mice_solar_mean) / mice_solar_mean < 0.2,
info = sprintf("Solar.R mean: miceFast=%.2f vs mice=%.2f", mf_solar_mean, mice_solar_mean)
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.