tests/testthat/test-pca_imp.R

test_that("same results as imputePCA", {
  skip_if_not_installed("missMDA")
  set.seed(1234)
  to_test <- sim_mat(20, 50, perc_total_na = 0.25, perc_col_na = 1, rho = 0.75)$input
  expect_true(anyNA(to_test))
  # expected orientation (wide)
  r1 <- missMDA::imputePCA(to_test, ncp = 2, nb.init = 10, seed = 1234)
  set.seed(1234)
  r2 <- pca_imp(to_test, ncp = 2, nb.init = 10, seed = 1234)
  expect_equal(r1$completeObs, r2[, ])

  row.w <- runif(nrow(to_test))
  row.w <- row.w / sum(row.w)
  set.seed(1234)
  r3 <- missMDA::imputePCA(to_test, ncp = 2, row.w = row.w, nb.init = 5, seed = 1234)
  set.seed(1234)
  r4 <- pca_imp(to_test, ncp = 2, nb.init = 5, row.w = row.w, seed = 1234)
  expect_equal(r3$completeObs, r4[, ])

  # transposed input also gives identical results
  set.seed(1234)
  to_test_t <- t(to_test)
  r1_t <- missMDA::imputePCA(to_test_t, ncp = 2, nb.init = 10, seed = 1234)
  set.seed(1234)
  r2_t <- pca_imp(to_test_t, ncp = 2, nb.init = 10, seed = 1234)
  expect_equal(r1_t$completeObs, r2_t[, ])

  row.w_t <- runif(nrow(to_test_t))
  row.w_t <- row.w_t / sum(row.w_t)
  set.seed(1234)
  r3_t <- missMDA::imputePCA(to_test_t, ncp = 2, row.w = row.w_t, nb.init = 5, seed = 1234)
  set.seed(1234)
  r4_t <- pca_imp(to_test_t, ncp = 2, nb.init = 5, row.w = row.w_t, seed = 1234)
  expect_equal(r3_t$completeObs, r4_t[, ])
})

test_that("same results as imputePCA, scale = FALSE", {
  skip_if_not_installed("missMDA")
  set.seed(1234)
  to_test <- sim_mat(20, 50, perc_total_na = 0.25, perc_col_na = 1, rho = 0.75)$input
  expect_true(anyNA(to_test))

  # expected orientation (wide)
  r1 <- missMDA::imputePCA(to_test, ncp = 2, nb.init = 10, seed = 1234, scale = FALSE)
  set.seed(1234)
  r2 <- pca_imp(to_test, ncp = 2, nb.init = 10, seed = 1234, scale = FALSE)
  expect_equal(r1$completeObs, r2[, ])

  row.w <- runif(nrow(to_test))
  row.w <- row.w / sum(row.w)
  set.seed(1234)
  r3 <- missMDA::imputePCA(to_test, ncp = 2, row.w = row.w, nb.init = 5, seed = 1234, scale = FALSE)
  set.seed(1234)
  r4 <- pca_imp(to_test, ncp = 2, nb.init = 5, row.w = row.w, seed = 1234, scale = FALSE)
  expect_equal(r3$completeObs, r4[, ])

  # transposed input also gives identical results
  set.seed(1234)
  to_test_t <- t(to_test)
  r1_t <- missMDA::imputePCA(to_test_t, ncp = 2, nb.init = 10, seed = 1234, scale = FALSE)
  set.seed(1234)
  r2_t <- pca_imp(to_test_t, ncp = 2, nb.init = 10, seed = 1234, scale = FALSE)
  expect_equal(r1_t$completeObs, r2_t[, ])

  row.w_t <- runif(nrow(to_test_t))
  row.w_t <- row.w_t / sum(row.w_t)
  set.seed(1234)
  r3_t <- missMDA::imputePCA(to_test_t, ncp = 2, row.w = row.w_t, nb.init = 5, seed = 1234, scale = FALSE)
  set.seed(1234)
  r4_t <- pca_imp(to_test_t, ncp = 2, nb.init = 5, row.w = row.w_t, seed = 1234, scale = FALSE)
  expect_equal(r3_t$completeObs, r4_t[, ])
})

test_that("Behavior with extreme missing columns and rows", {
  set.seed(1234)
  to_test <- sim_mat(20, 50, perc_total_na = 0.25, perc_col_na = 1, rho = 0.75)$input
  to_test[1, ] <- NA
  expect_no_error(pca_imp(to_test, ncp = 2, seed = 1234))
  to_test[, 1] <- NA
  expect_error(pca_imp(to_test, ncp = 2, seed = 1234))
  expect_true(all(is.na(to_test[, 1])))
})

test_that("row.w = 'n_miss' matches missMDA::imputePCA with equivalent weights", {
  skip_if_not_installed("missMDA")
  set.seed(1234)
  to_test <- sim_mat(20, 50, perc_total_na = 0.25, perc_col_na = 1, rho = 0.75)$input

  # compute expected weights manually
  miss <- is.na(to_test)
  n_miss_per_row <- rowSums(miss)
  expected_w <- 1 - (n_miss_per_row / ncol(to_test))
  expected_w[expected_w < 1e-8] <- 1e-8
  expected_w <- expected_w / sum(expected_w)

  # compare "n_miss" shortcut against missMDA with explicit weights
  set.seed(1234)
  r1 <- missMDA::imputePCA(to_test, ncp = 2, nb.init = 5, row.w = expected_w, seed = 1234)
  set.seed(1234)
  r2 <- pca_imp(to_test, ncp = 2, nb.init = 5, row.w = "n_miss", seed = 1234)
  expect_equal(r1$completeObs, r2[, ])
  r3 <- pca_imp(to_test, ncp = 2, nb.init = 5, row.w = expected_w, seed = 1234)
  expect_equal(r2, r3)
})

test_that("row.w = 'n_miss' floors near-zero weights", {
  set.seed(42)
  # create matrix where one row has almost all missing
  mat <- matrix(rnorm(100), nrow = 10, ncol = 10)
  rownames(mat) <- paste0("row", 1:10)
  colnames(mat) <- paste0("col", 1:10)
  mat[1, -1] <- NA # row 1 has 9/10 missing -> weight = 0.1
  mat[2, ] <- NA
  mat[2, 1] <- rnorm(1) # row 2 has 9/10 missing -> weight = 0.1
  mat[3, 1:5] <- NA # row 3 has 5/10 missing -> weight = 0.5

  expect_no_error(pca_imp(mat, ncp = 2, row.w = "n_miss", seed = 123))
})

test_that("row.w rejects invalid strings", {
  mat <- matrix(rnorm(100), nrow = 10, ncol = 10)
  rownames(mat) <- paste0("row", 1:10)
  colnames(mat) <- paste0("col", 1:10)
  mat[1, 1] <- NA

  expect_error(pca_imp(mat, ncp = 2, row.w = "invalid"), regexp = "row.w")
  expect_error(pca_imp(mat, ncp = 2, row.w = c(67, 69)), regexp = "row.w")
})

# eligibility resolution ----
test_that("pca_imp handles ineligible columns (high miss rate / zero variance) correctly", {
  set.seed(1234)
  to_test <- sim_mat(40, 12, perc_total_na = 0.25, perc_col_na = 0.6)$input

  # Force ineligible columns:
  # - Column 1: miss_rate > colmax (0.925 > 0.9)
  to_test[1:37, 1] <- NA
  mean_1 <- mean(to_test[, 1], na.rm = TRUE)
  # - Column 2: constant column -> variance = 0
  to_test[, 2] <- 69
  # - Column 3: near-zero variance with a few NAs
  to_test[, 3] <- 3.14 + rnorm(40, sd = 1e-10)
  to_test[1:3, 3] <- NA
  mean_3 <- mean(to_test[, 3], na.rm = TRUE)
  expect_true(anyNA(to_test))
  expect_true(col_vars(to_test[, 3, drop = F]) < .Machine$double.eps)

  # 1. post_imp = TRUE: ineligible columns are mean-imputed
  res <- pca_imp(
    to_test,
    ncp = 2,
    nb.init = 5,
    seed = 1234,
    colmax = 0.9,
    scale = FALSE
  )

  expect_false(anyNA(res))

  # ineligible high-miss column becomes constant (mean imputation)
  expect_true(all(res[1:37, 1] == mean_1))
  expect_identical(unname(res[1:37, 1]), rep(mean_1, times = 37))

  # constant column untouched
  expect_equal(length(unique(res[, 2])), 1L)

  # near-zero variance column: NAs filled with column mean
  expect_identical(unname(res[1:3, 3]), rep(mean_3, 3))

  # 2. post_imp = FALSE: only eligible columns are PCA-imputed;
  # ineligible columns keep their original NAs
  res_no_post <- pca_imp(
    to_test,
    ncp = 2,
    nb.init = 5,
    seed = 1234,
    colmax = 0.9,
    post_imp = FALSE,
    scale = FALSE
  )

  expect_true(anyNA(res_no_post))
  expect_gt(mean(is.na(res_no_post[, 1])), mean_1)
  expect_equal(unique(res_no_post[, 2]), 69)
  expect_equal(sum(is.na(res_no_post[, 3])), 3L)
  expect_false(anyNA(res_no_post[, 4:12]))
})

test_that("pca_imp falls back to mean imputation when ncp > usable eligible columns", {
  set.seed(1234)
  to_test <- sim_mat(30, 8, perc_total_na = 0.1, perc_col_na = 0.3)$input
  # This column will excceed colmax
  to_test[1:29, 1] <- NA
  expect_no_error(res <- pca_imp(
    to_test,
    ncp = 3,
    nb.init = 3,
    seed = 1234,
    colmax = 0.9,
    post_imp = FALSE
  ))
  # Make most columns ineligible (all-NA)
  to_test[, 1:6] <- NA
  for (i in 1:6) {
    to_test[sample.int(30, size = 1), i] <- rnorm(1)
  }

  # Only 2 eligible columns left -> ncp = 3 > min(28, 1) -> error (1 usable component)
  expect_error(
    pca_imp(
      to_test,
      ncp = 3,
      nb.init = 3,
      seed = 1234,
      colmax = 0.9,
      post_imp = TRUE
    ),
    "exceeds the maximum usable components"
  )
})

test_that("pca_imp doesn't mess up the original object", {
  set.seed(1234)
  to_test <- sim_mat(30, 30, perc_total_na = 0.1, perc_col_na = 1)$input
  expect_true(anyNA(to_test))
  passed_obj <- to_test
  res <- pca_imp(
    to_test,
    ncp = 3,
    nb.init = 3,
    seed = 1234,
    colmax = 0.9,
    post_imp = FALSE
  )
  expect_identical(passed_obj, to_test)
  expect_identical(is.na(to_test), is.na(passed_obj))
})

test_that("pca_imp restores object even on bad input", {
  set.seed(1234)
  to_test <- sim_mat(30, 30, perc_total_na = 0.1, perc_col_na = 1)$input
  passed_obj <- to_test
  expect_true(anyNA(to_test))
  expect_error(pca_imp(
    to_test,
    ncp = 9999,
    nb.init = 3,
    seed = 1234,
    colmax = 0.9,
    post_imp = FALSE
  ))

  expect_identical(to_test, passed_obj)
})

Try the slideimp package in your browser

Any scripts or data that you put into this service are public.

slideimp documentation built on April 17, 2026, 1:07 a.m.