tests/testthat/test-fcs.R

# Tests for iterative FCS (chained equations) pattern
# Compares miceFast FCS loop with mice on the air_miss dataset

library(data.table)

# --- Helper: data.table FCS function -----------------------------------------

fcs_dt <- function(dt, n_iter = 5) {
  d <- copy(dt)
  na_ozone <- is.na(d$Ozone)
  na_solar <- is.na(d[["Solar.R"]])
  d <- naive_fill_NA(d)
  for (iter in seq_len(n_iter)) {
    set(d, which(na_ozone), "Ozone", NA_real_)
    d[, Ozone := fill_NA(.SD, "lm_bayes", "Ozone", c("Solar.R", "Wind", "Temp"))]
    set(d, which(na_solar), "Solar.R", NA_real_)
    d[, Solar.R := fill_NA(.SD, "lm_bayes", "Solar.R", c("Ozone", "Wind", "Temp", "Intercept"))]
  }
  d
}

# --- Helper: data.frame FCS function -----------------------------------------

fcs_df <- function(df, n_iter = 5) {
  na_ozone <- which(is.na(df$Ozone))
  na_solar <- which(is.na(df$Solar.R))
  d <- naive_fill_NA(df)
  for (iter in seq_len(n_iter)) {
    d$Ozone[na_ozone] <- NA
    d$Ozone <- fill_NA(d, "lm_bayes", "Ozone", c("Solar.R", "Wind", "Temp"))
    d$Solar.R[na_solar] <- NA
    d$Solar.R <- fill_NA(d, "lm_bayes", "Solar.R", c("Ozone", "Wind", "Temp", "Intercept"))
  }
  d
}

# --- Helper: OOP FCS function ------------------------------------------------

fcs_oop <- function(mat, n_iter = 5) {
  na_ozone <- is.na(mat[, 1])
  na_solar <- is.na(mat[, 2])
  mat_init <- mat + 0
  for (j in seq_len(ncol(mat_init))) {
    na_j <- is.na(mat_init[, j])
    if (any(na_j)) mat_init[na_j, j] <- mean(mat_init[!na_j, j])
  }
  m <- new(miceFast)
  m$set_data(mat_init)
  for (iter in seq_len(n_iter)) {
    col1 <- m$get_data()[, 1]; col1[na_ozone] <- NaN
    m$update_var(1, col1)
    m$update_var(1, m$impute("lm_bayes", 1, c(2, 3, 4))$imputations)
    col2 <- m$get_data()[, 2]; col2[na_solar] <- NaN
    m$update_var(2, col2)
    m$update_var(2, m$impute("lm_bayes", 2, c(1, 3, 4, 5))$imputations)
  }
  as.data.frame(m$get_data())
}

# =============================================================================
# Tests: Basic FCS correctness
# =============================================================================

test_that("FCS data.table: all NAs are imputed", {
  data(air_miss, package = "miceFast")
  dt <- as.data.table(air_miss)
  set.seed(101)
  result <- fcs_dt(dt)
  expect_equal(sum(is.na(result$Ozone)), 0)
  expect_equal(sum(is.na(result$Solar.R)), 0)
})

test_that("FCS data.frame: all NAs are imputed", {
  data(air_miss, package = "miceFast")
  set.seed(102)
  result <- fcs_df(air_miss)
  expect_equal(sum(is.na(result$Ozone)), 0)
  expect_equal(sum(is.na(result$Solar.R)), 0)
})

test_that("FCS OOP matrix: all NaN are imputed", {
  data(air_miss, package = "miceFast")
  mat <- cbind(
    as.matrix(air_miss[, c("Ozone", "Solar.R", "Wind", "Temp")]),
    Intercept = 1
  )
  set.seed(103)
  result <- fcs_oop(mat)
  expect_equal(sum(is.nan(result[[1]])), 0)
  expect_equal(sum(is.nan(result[[2]])), 0)
})

test_that("FCS handles joint missingness (rows where both Ozone and Solar.R are NA)", {
  data(air_miss, package = "miceFast")
  dt <- as.data.table(air_miss)
  joint_na <- which(is.na(dt$Ozone) & is.na(dt$Solar.R))
  expect_true(length(joint_na) > 0, info = "air_miss should have jointly missing rows")

  set.seed(104)
  result <- fcs_dt(dt)
  expect_false(anyNA(result$Ozone[joint_na]))
  expect_false(anyNA(result$Solar.R[joint_na]))
})

test_that("FCS does not modify the original data", {
  data(air_miss, package = "miceFast")
  dt <- as.data.table(air_miss)
  dt_orig <- copy(dt)
  set.seed(105)
  fcs_dt(dt)
  expect_identical(dt, dt_orig)
})

test_that("FCS preserves non-missing values", {
  data(air_miss, package = "miceFast")
  dt <- as.data.table(air_miss)
  obs_ozone <- which(!is.na(dt$Ozone))
  obs_solar <- which(!is.na(dt$Solar.R))
  orig_ozone <- dt$Ozone[obs_ozone]
  orig_solar <- dt$Solar.R[obs_solar]

  set.seed(106)
  result <- fcs_dt(dt)
  expect_equal(result$Ozone[obs_ozone], orig_ozone)
  expect_equal(result$Solar.R[obs_solar], orig_solar)
})

test_that("FCS n_iter = 1 still produces complete data", {
  data(air_miss, package = "miceFast")
  dt <- as.data.table(air_miss)
  set.seed(107)
  result <- fcs_dt(dt, n_iter = 1)
  expect_equal(sum(is.na(result$Ozone)), 0)
  expect_equal(sum(is.na(result$Solar.R)), 0)
})

test_that("FCS produces different results across stochastic runs (lm_bayes)", {
  data(air_miss, package = "miceFast")
  dt <- as.data.table(air_miss)
  set.seed(201)
  r1 <- fcs_dt(dt)
  set.seed(202)
  r2 <- fcs_dt(dt)
  # Imputed values should differ between seeds
  na_idx <- which(is.na(dt$Ozone))
  expect_false(identical(r1$Ozone[na_idx], r2$Ozone[na_idx]))
})

# =============================================================================
# Tests: MI + pool workflow
# =============================================================================

test_that("FCS MI workflow produces valid pooled results", {
  data(air_miss, package = "miceFast")
  dt <- as.data.table(air_miss)

  set.seed(301)
  completed <- lapply(1:10, function(i) fcs_dt(dt))
  fits <- lapply(completed, function(d) lm(Ozone ~ Wind + Temp, data = d))
  res <- pool(fits)

  expect_s3_class(res, "miceFast_pool")
  s <- summary(res)

  # Coefficients should be reasonable for Ozone ~ Wind + Temp
  # Wind should have a negative effect, Temp a positive effect
  expect_true(s[s$term == "Wind", "estimate"] < 0)
  expect_true(s[s$term == "Temp", "estimate"] > 0)

  # Standard errors should be positive and finite
  expect_true(all(s$std.error > 0))
  expect_true(all(is.finite(s$std.error)))

  # p-values should exist and be in [0, 1]
  expect_true(all(s$p.value >= 0 & s$p.value <= 1))
})

# =============================================================================
# Tests: Comparison with mice
# =============================================================================

test_that("FCS pooled estimates are similar to mice", {
  skip_if_not_installed("mice")

  data(air_miss, package = "miceFast")

  # --- mice ---
  mice_data <- air_miss[, c("Ozone", "Solar.R", "Wind", "Temp")]
  set.seed(42)
  mice_imp <- mice::mice(
    mice_data,
    m = 20,
    method = "norm",
    maxit = 5,
    printFlag = FALSE
  )
  mice_fits <- with(mice_imp, lm(Ozone ~ Wind + Temp))
  mice_pool <- mice::pool(mice_fits)
  mice_coefs <- summary(mice_pool)

  # --- miceFast FCS ---
  dt <- as.data.table(air_miss)
  set.seed(42)
  completed <- lapply(1:20, function(i) fcs_dt(dt))
  fits <- lapply(completed, function(d) lm(Ozone ~ Wind + Temp, data = d))
  mf_pool <- pool(fits)
  mf_coefs <- summary(mf_pool)

  # Point estimates should be in the same ballpark (within 50% or 15 units)
  for (trm in c("(Intercept)", "Wind", "Temp")) {
    mice_est <- mice_coefs[mice_coefs$term == trm, "estimate"]
    mf_est <- mf_coefs[mf_coefs$term == trm, "estimate"]
    expect_true(
      abs(mf_est - mice_est) < max(abs(mice_est) * 0.5, 15),
      info = sprintf(
        "%s: miceFast=%.3f vs mice=%.3f (diff=%.3f)",
        trm, mf_est, mice_est, mf_est - mice_est
      )
    )
  }

  # Sign of effects should match
  expect_equal(
    sign(mf_coefs[mf_coefs$term == "Wind", "estimate"]),
    sign(mice_coefs[mice_coefs$term == "Wind", "estimate"])
  )
  expect_equal(
    sign(mf_coefs[mf_coefs$term == "Temp", "estimate"]),
    sign(mice_coefs[mice_coefs$term == "Temp", "estimate"])
  )

  # Standard errors should be in the same order of magnitude
  for (trm in c("(Intercept)", "Wind", "Temp")) {
    mice_se <- mice_coefs[mice_coefs$term == trm, "std.error"]
    mf_se <- mf_coefs[mf_coefs$term == trm, "std.error"]
    ratio <- mf_se / mice_se
    expect_true(
      ratio > 0.25 && ratio < 4,
      info = sprintf(
        "%s SE ratio: miceFast/mice = %.2f (mf=%.3f, mice=%.3f)",
        trm, ratio, mf_se, mice_se
      )
    )
  }
})

test_that("FCS imputed means are close to mice imputed means", {
  skip_if_not_installed("mice")

  data(air_miss, package = "miceFast")

  # --- mice ---
  mice_data <- air_miss[, c("Ozone", "Solar.R", "Wind", "Temp")]
  set.seed(99)
  mice_imp <- mice::mice(
    mice_data,
    m = 20,
    method = "norm",
    maxit = 5,
    printFlag = FALSE
  )
  mice_completed <- mice::complete(mice_imp, action = "all")
  mice_ozone_mean <- mean(sapply(mice_completed, function(d) mean(d$Ozone)))
  mice_solar_mean <- mean(sapply(mice_completed, function(d) mean(d$Solar.R)))

  # --- miceFast FCS ---
  dt <- as.data.table(air_miss)
  set.seed(99)
  completed <- lapply(1:20, function(i) fcs_dt(dt))
  mf_ozone_mean <- mean(sapply(completed, function(d) mean(d$Ozone)))
  mf_solar_mean <- mean(sapply(completed, function(d) mean(d$Solar.R)))

  # Means should be within 20% of each other
  expect_true(
    abs(mf_ozone_mean - mice_ozone_mean) / mice_ozone_mean < 0.2,
    info = sprintf("Ozone mean: miceFast=%.2f vs mice=%.2f", mf_ozone_mean, mice_ozone_mean)
  )
  expect_true(
    abs(mf_solar_mean - mice_solar_mean) / mice_solar_mean < 0.2,
    info = sprintf("Solar.R mean: miceFast=%.2f vs mice=%.2f", mf_solar_mean, mice_solar_mean)
  )
})

Try the miceFast package in your browser

Any scripts or data that you put into this service are public.

miceFast documentation built on Feb. 26, 2026, 5:06 p.m.