Nothing
# group_imp ----
test_that("group column + feature column API works correctly", {
set.seed(1234)
to_test <- sim_mat(50, 20, perc_total_na = 0.3, perc_col_na = 1, rho = 0.75)
obj <- to_test$input
# prep_groups to match the expected API
group_long <- data.frame(
group = to_test$col_group$group,
feature = to_test$col_group$feature
)
# equivalent list-column form for comparison
group_1 <- subset(to_test$col_group, group == "group1")$feature
group_2 <- subset(to_test$col_group, group == "group2")$feature
group_list <- data.frame(
feature = I(list(group_1, group_2))
)
expect_identical(
group_imp(obj, group = group_long, k = 3),
group_imp(obj, group = group_list, k = 3),
ignore_attr = "fallback"
)
expect_identical(
group_imp(obj, group = group_long, ncp = 5, nb.init = 10, seed = 1234),
group_imp(obj, group = group_list, ncp = 5, nb.init = 10, seed = 1234),
ignore_attr = "fallback"
)
})
test_that("group column API collapses duplicate groups correctly", {
set.seed(1234)
to_test <- sim_mat(50, 20, perc_total_na = 0.3, perc_col_na = 1)
obj <- to_test$input
# duplicate some rows so the same group label appears in separate blocks
gf <- to_test$col_group
group_doubled <- data.frame(
group = c(gf$group, gf$group),
feature = c(gf$feature, gf$feature)
)
group_single <- data.frame(
group = gf$group,
feature = gf$feature
)
expect_identical(
group_imp(obj, group = group_doubled, k = 3),
group_imp(obj, group = group_single, k = 3)
)
})
test_that("group_imp() handles aux columns present in only some groups (padded)", {
set.seed(1234)
to_test <- sim_mat(n = 20, p = 50, n_col_groups = 2, perc_total_na = 0.3, perc_col_na = 1)
obj <- to_test$input
meta <- to_test$col_group
# move feature 1 into its own tiny group so padding is needed for that
# group only — the other groups will have zero aux columns.
meta[1, "group"] <- "group3"
prepped <- prep_groups(colnames(obj), group = meta, min_group_size = 10)
# sanity: exactly one group has aux, the others have none. This is the
# configuration that previously broke the rep(iter, aux_lengths) split.
aux_lens <- lengths(prepped$aux)
expect_true(sum(aux_lens > 0) == 1L)
expect_true(any(aux_lens == 0L))
# should run without "subscript out of bounds" and return a full matrix.
expect_no_error(
res <- group_imp(obj, group = prepped, k = 3)
)
expect_identical(dim(res), dim(obj))
expect_identical(colnames(res), colnames(obj))
expect_false(anyNA(res))
})
test_that("group column API accepts factor group column", {
set.seed(1234)
to_test <- sim_mat(50, 20, perc_total_na = 0.3, perc_col_na = 1)
obj <- to_test$input
group_df <- data.frame(
group = factor(to_test$col_group$group),
feature = to_test$col_group$feature
)
expect_no_error(group_imp(obj, group = group_df, k = 2))
})
# --- Preconditioning tests for the `group` parameter ---
test_that("group must be a data.frame", {
obj <- matrix(1:12, nrow = 3, dimnames = list(NULL, paste0("f", 1:4)))
expect_error(
group_imp(obj, group = list(feature = list("f1")), k = 2),
"group"
)
})
test_that("group must contain a 'feature' column", {
obj <- matrix(1:12, nrow = 3, dimnames = list(NULL, paste0("f", 1:4)))
group_df <- data.frame(stuff = I(list(c("f1", "f2"))))
expect_error(group_imp(obj, group = group_df, k = 2), "feature")
})
test_that("group errors when feature is character without group column", {
obj <- matrix(1:12, nrow = 3, dimnames = list(NULL, paste0("f", 1:4)))
group_df <- data.frame(feature = c("f1", "f2"))
expect_error(
group_imp(obj, group = group_df, k = 2),
"no group column"
)
})
test_that("group errors on NA in group column", {
obj <- matrix(1:12, nrow = 3, dimnames = list(NULL, paste0("f", 1:4)))
group_df <- data.frame(
group = c("a", NA_character_),
feature = c("f1", "f2")
)
expect_error(group_imp(obj, group = group_df, k = 2), "NA")
})
test_that("group feature column must not have NAs", {
obj <- matrix(1:12, nrow = 3, dimnames = list(NULL, paste0("f", 1:4)))
group_df <- data.frame(
group = c("a", "a"),
feature = c("f1", NA_character_)
)
expect_error(group_imp(obj, group = group_df, k = 2), "feature")
})
test_that("grouped result is correct with aux columns, knn", {
set.seed(1234)
to_test <- sim_mat(50, 20, perc_total_na = 0.3, perc_col_na = 1)
group_1 <- subset(to_test$col_group, group == "group1")$feature
group_2 <- subset(to_test$col_group, group == "group2")$feature
# impute only first 3 values of group 1, the rest are aux. Group 2 do 4 feature.
group_df <- data.frame(
feature = I(list(group_1[1:3], group_2[1:4])),
aux = I(list(group_1, group_2))
)
# run grouped imputation
obj <- to_test$input
grouped_results <- group_imp(obj, group = group_df, k = 3)
# manual imputation for comparison
sub1_cols <- unique(c(group_df$feature[[1]], group_df$aux[[1]]))
sub1 <- knn_imp(obj[, sub1_cols], k = 3, subset = group_df$feature[[1]])
sub2_cols <- unique(c(group_df$feature[[2]], group_df$aux[[2]]))
sub2 <- knn_imp(obj[, sub2_cols], k = 3, subset = group_df$feature[[2]])
expected_results <- cbind(sub1, sub2)[, colnames(obj)]
# Compare results
expect_identical(grouped_results[, ], expected_results)
})
test_that("grouped result is correct with aux columns, pca", {
set.seed(1234)
to_test <- sim_mat(50, 20, perc_total_na = 0.3, perc_col_na = 1)
group_1 <- subset(to_test$col_group, group == "group1")$feature
group_2 <- subset(to_test$col_group, group == "group2")$feature
# impute only first 3 values of group 1, the rest are aux. Group 2 do 4 feature.
group_df <- data.frame(
feature = I(list(group_1[1:3], group_2[1:4])),
aux = I(list(group_1, group_2))
)
# run grouped imputation
obj <- to_test$input
grouped_results <- group_imp(obj, group = group_df, ncp = 2, seed = 1234)
# manual imputation for comparison
sub1_cols <- unique(c(group_df$feature[[1]], group_df$aux[[1]]))
sub1 <- pca_imp(obj[, sub1_cols], ncp = 2, seed = 1234)[, group_df$feature[[1]]]
sub2_cols <- unique(c(group_df$feature[[2]], group_df$aux[[2]]))
sub2 <- pca_imp(obj[, sub2_cols], ncp = 2, seed = 1234)[, group_df$feature[[2]]]
expected_results <- cbind(sub1, sub2)
expect_equal(grouped_results[, colnames(expected_results)], expected_results)
})
test_that("group-specific parameters work correctly", {
set.seed(1234)
to_test <- sim_mat(50, 20, perc_total_na = 0.3, perc_col_na = 1)
group_1 <- subset(to_test$col_group, group == "group1")$feature
group_2 <- subset(to_test$col_group, group == "group2")$feature
# Different k values for each group
group_df <- data.frame(
feature = I(list(group_1[1:3], group_2[1:4])),
aux = I(list(group_1, group_2)),
parameters = I(list(
list(k = 3, dist_pow = 0),
list(k = 7, dist_pow = 1)
))
)
obj <- to_test$input
grouped_results <- group_imp(obj, group = group_df)
# Manual verification with different parameters
sub1 <- knn_imp(obj[, group_1], k = 3, subset = group_1[1:3], dist_pow = 0)
sub2 <- knn_imp(obj[, group_2], k = 7, subset = group_2[1:4], dist_pow = 1)
expected_results <- cbind(sub1, sub2)[, colnames(obj)]
expect_identical(grouped_results[, ], expected_results)
})
test_that("duplicate feature across groups throws error", {
set.seed(1234)
to_test <- sim_mat(50, 20, perc_total_na = 0.3, perc_col_na = 1)
group_1 <- subset(to_test$col_group, group == "group1")$feature
group_2 <- subset(to_test$col_group, group == "group2")$feature
group_df <- data.frame(
feature = I(list(group_1[1:5], c(group_1[5], group_2[1:3]))), # group_1[5] in both
aux = I(list(group_1, group_2))
)
obj <- to_test$input
expect_error(
group_imp(obj, group = group_df, k = 3),
"appear in more than one group"
)
})
test_that("grouped imputation works without aux columns, knn", {
set.seed(1234)
to_test <- sim_mat(50, 20, perc_total_na = 0.3, perc_col_na = 1)
group_1 <- subset(to_test$col_group, group == "group1")$feature
group_2 <- subset(to_test$col_group, group == "group2")$feature
# no aux columns, only feature
group_df <- data.frame(
feature = I(list(group_1[1:5], group_2[6:10]))
)
obj <- to_test$input
grouped_results <- group_imp(
obj,
group = group_df,
k = 3,
allow_unmapped = TRUE
)
# Build expected results: start with original and update only imputed columns
sub1 <- knn_imp(obj[, group_1[1:5]], k = 3)
sub2 <- knn_imp(obj[, group_2[6:10]], k = 3)
expected_results <- obj
expected_results[, group_1[1:5]] <- sub1
expected_results[, group_2[6:10]] <- sub2
expect_identical(grouped_results[, ], expected_results)
})
test_that("group-specific parameters work correctly, pca", {
set.seed(1234)
to_test <- sim_mat(50, 20, perc_total_na = 0.3, perc_col_na = 1)
group_1 <- subset(to_test$col_group, group == "group1")$feature
group_2 <- subset(to_test$col_group, group == "group2")$feature
# Different ncp and coeff.ridge values for each group
group_df <- data.frame(
feature = I(list(group_1[1:3], group_2[1:4])),
aux = I(list(group_1, group_2)),
parameters = I(
list(
list(ncp = 2, coeff.ridge = 1, seed = 1234),
list(ncp = 3, coeff.ridge = 2, seed = 1234)
)
)
)
obj <- to_test$input
grouped_results <- group_imp(obj, group = group_df)
# Manual verification with different parameters
sub1_full <- pca_imp(obj[, group_1], ncp = 2, coeff.ridge = 1, seed = 1234)
sub1 <- obj[, group_1]
sub1[, group_1[1:3]] <- sub1_full[, group_1[1:3]]
sub2_full <- pca_imp(obj[, group_2], ncp = 3, coeff.ridge = 2, seed = 1234)
sub2 <- obj[, group_2]
sub2[, group_2[1:4]] <- sub2_full[, group_2[1:4]]
expected_results <- cbind(sub1, sub2)[, colnames(obj)]
expect_equal(grouped_results[, ], expected_results)
})
test_that("grouped imputation works without aux columns, pca", {
set.seed(1234)
to_test <- sim_mat(50, 20, perc_total_na = 0.3, perc_col_na = 1)
group_1 <- subset(to_test$col_group, group == "group1")$feature
group_2 <- subset(to_test$col_group, group == "group2")$feature
# no aux columns, only feature
group_df <- data.frame(
feature = I(list(group_1[1:5], group_2[6:10]))
)
obj <- to_test$input
grouped_results <- group_imp(
obj,
group = group_df,
ncp = 2,
seed = 1234,
allow_unmapped = TRUE
)
# Build expected results: start with original and update only imputed columns
sub1 <- pca_imp(obj[, group_1[1:5]], ncp = 2, seed = 1234)
sub2 <- pca_imp(obj[, group_2[6:10]], ncp = 2, seed = 1234)
expected_results <- obj
expected_results[, group_1[1:5]] <- sub1
expected_results[, group_2[6:10]] <- sub2
expect_identical(grouped_results[, ], expected_results)
})
test_that("group-specific parameters work correctly in parallel, pca", {
# skip_on_cran()
# skip_on_ci()
# skip_if_not_installed("carrier")
# set.seed(1234)
# to_test <- sim_mat(50, 20, perc_total_na = 0.3, perc_col_na = 1)
# group_1 <- subset(to_test$col_group, group == "group1")$feature
# group_2 <- subset(to_test$col_group, group == "group2")$feature
# # Different ncp and coeff.ridge values for each group
# group_df <- data.frame(
# feature = I(list(group_1[1:3], group_2[1:4])),
# aux = I(list(group_1, group_2)),
# parameters = I(list(
# list(ncp = 2, coeff.ridge = 1),
# list(ncp = 3, coeff.ridge = 2)
# ))
# )
# obj <- to_test$input
# mirai::daemons(2, seed = 1234)
# grouped_results <- group_imp(obj, group = group_df, cores = 1, seed = 1234, nb.init = 10)
# mirai::daemons(0)
# # Manual verification with different parameters
# sub1_full <- pca_imp(obj[, group_1], ncp = 2, coeff.ridge = 1, seed = 1234, nb.init = 10)
# sub1 <- obj[, group_1]
# sub1[, group_1[1:3]] <- sub1_full[, group_1[1:3]]
# sub2_full <- pca_imp(obj[, group_2], ncp = 3, coeff.ridge = 2, seed = 1234, nb.init = 10)
# sub2 <- obj[, group_2]
# sub2[, group_2[1:4]] <- sub2_full[, group_2[1:4]]
# expected_results <- cbind(sub1, sub2)[, colnames(obj)]
#
# imputed_cols <- c(group_1[1:3], group_2[1:4])
# obj_orig <- obj[, imputed_cols]
# grouped_values <- grouped_results[, imputed_cols][is.na(obj_orig)]
# expected_values <- expected_results[, imputed_cols][is.na(obj_orig)]
# # seeding in parallel is hard to reproduce correctly
# expect_true(
# cor(grouped_values, expected_values) > 0.999
# )
})
# prep_groups ----
test_that("prep_groups returns correct structure without k/ncp", {
obj <- matrix(rnorm(2 * 5), nrow = 2, dimnames = list(c("r1", "r2"), c("a", "b", "c", "d", "e")))
features_df <- data.frame(
feature = c("a", "b", "c", "d"),
group = c("g1", "g1", "g2", "g2")
)
result <- prep_groups(colnames(obj), features_df, allow_unmapped = TRUE)
expect_true(inherits(result, "slideimp_tbl"))
expect_true("group" %in% names(result))
expect_true("feature" %in% names(result))
expect_equal(sort(result$group), c("g1", "g2"))
})
test_that("prep_groups handles subset correctly", {
obj <- matrix(rnorm(2 * 6), nrow = 2, dimnames = list(c("r1", "r2"), c("a", "b", "c", "d", "e", "f")))
features_df <- data.frame(
feature = c("a", "b", "c", "d", "e", "f"),
group = c("g1", "g1", "g1", "g2", "g2", "g2")
)
result <- prep_groups(
colnames(obj), features_df,
subset = c("a", "b", "d", "e")
)
expect_true("aux" %in% names(result))
g1_row <- result[result$group == "g1", ]
g2_row <- result[result$group == "g2", ]
# features in subset go to features column
expect_setequal(g1_row$feature[[1]], c("a", "b"))
expect_setequal(g2_row$feature[[1]], c("d", "e"))
# features not in subset go to aux column
expect_equal(g1_row$aux[[1]], "c")
expect_equal(g2_row$aux[[1]], "f")
})
test_that("prep_groups errors when no subset element found", {
obj <- matrix(rnorm(2 * 3), nrow = 2, dimnames = list(c("r1", "r2"), c("a", "b", "c")))
features_df <- data.frame(
feature = c("a", "b", "c"),
group = c("g1", "g1", "g2")
)
expect_error(
suppressWarnings(prep_groups(colnames(obj), features_df, subset = c("x", "y", "z"))),
"x, y, z"
)
})
test_that("prep_groups pads groups to min_group_size", {
n <- 2
p <- 6
obj <- matrix(rnorm(n * p), nrow = n)
rownames(obj) <- paste0("r", seq_len(n))
colnames(obj) <- letters[1:p]
features_df <- data.frame(
feature = c("a", "b", "c"),
group = c("g1", "g1", "g2")
)
result <- prep_groups(
colnames(obj),
features_df,
min_group_size = 4,
seed = 123,
allow_unmapped = TRUE
)
expect_true("aux" %in% names(result))
g1_row <- result[result$group == "g1", ]
g2_row <- result[result$group == "g2", ]
# g1 has 2 features, needs 2 more to reach min_group_size of 4
expect_equal(length(g1_row$feature[[1]]) + length(g1_row$aux[[1]]), 4)
# g2 has 1 feature, needs 3 more
expect_equal(length(g2_row$feature[[1]]) + length(g2_row$aux[[1]]), 4)
})
test_that("prep_groups errors when min_group_size too large", {
n <- 2
p <- 3
obj <- matrix(rnorm(n * p), nrow = n)
rownames(obj) <- paste0("r", seq_len(n))
colnames(obj) <- letters[1:p]
features_df <- data.frame(
feature = c("a", "b", "c"),
group = c("g1", "g1", "g2")
)
expect_error(
prep_groups(colnames(obj), features_df, min_group_size = 100),
"too large"
)
})
test_that("prep_groups errors when no colnames match features_df", {
n <- 2
p <- 3
obj <- matrix(rnorm(n * p), nrow = n)
rownames(obj) <- paste0("r", seq_len(n))
colnames(obj) <- letters[1:p]
features_df <- data.frame(
feature = c("x", "y", "z"),
group = c("g1", "g1", "g2")
)
expect_error(
prep_groups(colnames(obj), features_df),
"columns in `obj` have no matching entry"
)
})
# infeasible ----
test_that("group_imp: on_infeasible = 'error' rethrows slideimp_infeasible", {
set.seed(1234)
sim <- sim_mat(20, 20, perc_total_na = 0.2, perc_col_na = 1)
mat <- sim$input
grp <- sim$col_group
mat[1:19, "feature6"] <- NA
mat[20, ] <- rnorm(20)
bad_grp <- data.frame(
feature = colnames(mat)[1:5],
group = "bad_group",
stringsAsFactors = FALSE
)
mat_bad <- sim$input
mat_bad[1:19, 1:5] <- NA # all subset cols fully NA
expect_error(
group_imp(
mat_bad,
group = bad_grp,
k = 3,
colmax = 0.9,
on_infeasible = "error",
.progress = FALSE,
allow_unmapped = TRUE
),
class = "slideimp_infeasible"
)
})
test_that("group_imp: on_infeasible = 'skip' retains original NAs and flags group", {
set.seed(1234)
sim <- sim_mat(20, 20, perc_total_na = 0.2, perc_col_na = 1)
mat <- sim$input
mat[1:19, 1:5] <- NA # subset cols of the infeasible group all NA
mat[20, ] <- rnorm(20)
bad_grp <- data.frame(
feature = colnames(mat)[1:5],
group = "bad_group"
)
res <- suppressMessages(
suppressWarnings(
group_imp(
mat,
group = bad_grp,
k = 3,
colmax = 0.9,
on_infeasible = "skip",
.progress = FALSE,
allow_unmapped = TRUE
)
)
)
# columns should still be fully NA (skip = original retained)
expect_true(all(is.na(res[1:19, 1:5])))
# group should be recorded in fallback attribute
expect_true("bad_group" %in% attr(res, "fallback"))
expect_identical(attr(res, "fallback_action"), "skip")
expect_true(isTRUE(attr(res, "has_remaining_na")))
})
test_that("group_imp: on_infeasible = 'mean' fills with column means and flags group", {
set.seed(1234)
sim <- sim_mat(20, 20, perc_total_na = 0.2, perc_col_na = 1)
mat <- sim$input
mat[1:19, 1:5] <- NA # 95% missing, exceeds default colmax = 0.9
# leave row 20 with values so mean_imp_col produces something
mat[20, 1:5] <- c(0.1, 0.2, 0.3, 0.4, 0.5)
bad_grp <- data.frame(
feature = colnames(mat)[1:5],
group = "bad_group"
)
res <- suppressMessages(
suppressWarnings(
group_imp(
mat,
group = bad_grp,
k = 3,
colmax = 0.9,
on_infeasible = "mean",
allow_unmapped = TRUE,
.progress = FALSE
)
)
)
expect_true("bad_group" %in% attr(res, "fallback"))
expect_identical(attr(res, "fallback_action"), "mean")
# no remaining NA in the group's subset columns (means were computable)
expect_false(anyNA(res[, 1:5]))
})
test_that("group_imp: mixed feasible + infeasible groups — only bad group falls back", {
set.seed(1234)
sim <- sim_mat(20, 20, perc_total_na = 0.2, perc_col_na = 1)
mat <- sim$input
# force group2 (features 1:5) to be infeasible; leave group1 (feature6) alone
# But group1 has only 1 feature -> k would cap to 0. Expand group1 artificially.
good_grp <- data.frame(
feature = colnames(mat)[6:15],
group = "good_group"
)
bad_grp <- data.frame(
feature = colnames(mat)[1:5],
group = "bad_group"
)
grp <- rbind(good_grp, bad_grp)
mat[1:19, 1:5] <- NA
mat[20, ] <- rnorm(20)
res <- suppressMessages(
suppressWarnings(
group_imp(
mat,
group = grp,
k = 3,
colmax = 0.9,
on_infeasible = "skip",
.progress = FALSE,
allow_unmapped = TRUE
)
)
)
expect_setequal(attr(res, "fallback"), "bad_group")
# good_group columns should be fully imputed
expect_false(anyNA(res[, 6:15]))
# bad_group columns should remain NA
expect_true(all(is.na(res[1:19, 1:5])))
})
test_that("group_imp: on_infeasible = 'error' is the default and propagates", {
set.seed(1234)
sim <- sim_mat(20, 20, perc_total_na = 0.2, perc_col_na = 1)
mat <- sim$input
mat[1:19, 1:5] <- NA
mat[20, ] <- rnorm(20)
bad_grp <- data.frame(
feature = colnames(mat)[1:5],
group = "bad_group",
stringsAsFactors = FALSE
)
# no on_infeasible specified -> default "error"
expect_error(
suppressMessages(
group_imp(
mat,
group = bad_grp,
k = 3,
colmax = 0.9,
allow_unmapped = TRUE,
.progress = FALSE
)
),
class = "slideimp_infeasible"
)
})
test_that("knn_imp: slideimp_infeasible class is set on both abort sites", {
# Site 1: k > n_elig - 1
mat <- matrix(NA_real_, nrow = 20, ncol = 20)
diag(mat) <- rnorm(20)
expect_error(knn_imp(mat, k = 2), class = "slideimp_infeasible")
# Site 2: all subset cols exceed colmax
set.seed(1)
mat2 <- matrix(rnorm(400), 20, 20)
mat2[1:19, 1:5] <- NA
mat2[20, ] <- rnorm(20)
expect_error(
knn_imp(mat2, k = 3, colmax = 0.5, subset = 1:5),
class = "slideimp_infeasible"
)
})
# slideimp.extra ----
test_that("slideimp_extra_manifests works with prep_groups", {
skip("manual testing with {slideimp.extra} on local machines only")
# skip_if_not_installed("slideimp.extra")
# slideimp.extra::set_slideimp_path("dev")
# msa <- slideimp.extra::ilmn_manifest("MSA", deduped = TRUE, rawdir = "dev")
# n_feat <- length(msa$feature)
# sim_mat <- matrix(rnorm(1 * n_feat), nrow = 1, dimnames = list(NULL, msa$feature))
# expect_no_error(prep_groups(colnames(sim_mat), group = msa))
# expect_no_error(prep_groups(colnames(sim_mat), group = "MSA_deduped"))
# slideimp.extra::set_slideimp_path(NULL)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.