Nothing
test_that("`impute_knn_brute` and `impute_knn_mlpack` calculate the missing location correctly", {
set.seed(1234)
to_test <- sim_mat(20, 50, perc_total_na = 0.5, perc_col_na = 1)$input
miss <- is.na(to_test)
cmiss <- colSums(miss)
miss_rate <- cmiss / nrow(to_test)
# same preprocessing knn_imp() does before calling the low-level functions
colmax <- 0.9
eligible <- miss_rate < colmax
pre_imp_cols <- to_test[, eligible, drop = FALSE]
pre_imp_miss <- miss[, eligible, drop = FALSE]
pre_imp_cols[pre_imp_miss] <- 0.0
# local groups (0-based indices into pre_imp_cols)
local_has_miss <- which(cmiss[eligible] > 0L)
grp_impute <- as.integer(local_has_miss - 1L)
# Expected missing positions *local to the submatrix pre_imp_cols* (1-based)
expected_local <- unname(which(pre_imp_miss, arr.ind = TRUE))
# brute-force path
imputed_brute <- impute_knn_brute(
obj = pre_imp_cols,
nmiss = !pre_imp_miss,
k = 5,
grp_impute = grp_impute,
grp_miss_no_imp = integer(0L),
grp_complete = integer(0L),
method = 0L,
dist_pow = 1,
cache = FALSE,
cores = 1
)
# mlpack path
imputed_mlpack <- impute_knn_mlpack(
obj = mean_imp_col(pre_imp_cols),
nmiss = !pre_imp_miss,
k = 5,
grp_impute = grp_impute,
method = 0L,
dist_pow = 1,
cores = 1
)
# extract only the location columns (row, local_col_1based) that the C++ already returns
idx_brute <- imputed_brute[, 1:2, drop = FALSE]
idx_mlpack <- imputed_mlpack[, 1:2, drop = FALSE]
# sort so the comparison is order-independent
expected_sorted <- expected_local[order(expected_local[, 1], expected_local[, 2]), , drop = FALSE]
brute_sorted <- idx_brute[order(idx_brute[, 1], idx_brute[, 2]), , drop = FALSE]
mlpack_sorted <- idx_mlpack[order(idx_mlpack[, 1], idx_mlpack[, 2]), , drop = FALSE]
expect_equal(brute_sorted, expected_sorted)
expect_equal(mlpack_sorted, expected_sorted)
})
test_that("`knn_imp` works", {
obj <- sim_mat(50, 100)$input
expect_no_error(knn_imp(
obj,
k = 3,
method = "euclidean"
))
expect_no_error(knn_imp(
obj,
k = 3,
method = "manhattan",
tree = TRUE
))
})
test_that("`knn_imp` cache and non cache path is the same", {
obj <- sim_mat(50, 100)$input
expect_identical(
knn_imp(obj, k = 10, method = "euclidean"),
knn_imp(obj, k = 10, method = "euclidean", max_cache = 0)
)
})
test_that("`knn_imp` tree and brute is the same for few missing values", {
set.seed(1234)
to_test <- sim_mat(20, n = 1000, perc_total_na = 0, perc_col_na = 0)$input
to_test[1, 1] <- NA
to_test[2, 2] <- NA
expect_identical(
knn_imp(to_test, k = 3, method = "euclidean"),
knn_imp(to_test, k = 3, method = "euclidean", tree = TRUE)
)
})
test_that("Exactly replicate `impute.knn`", {
skip("Manual Testing Only")
# library(impute)
# set.seed(1234)
# # post_imp behavior can cause differences
# obj <- sim_mat(100, 100, perc_total_na = 0.05)$input
#
# # Check if the 'impute' package is installed
#
# # Perform imputation using knn_imp with method "impute.knn" on transposed data
# r1 <- knn_imp(obj, k = 3, method = "euclidean", post_imp = FALSE)
#
# # Perform imputation using the original impute.knn function
# # Transpose the result to match the orientation
# r2 <- t(
# impute.knn(
# t(obj),
# k = 3,
# maxp = ncol(obj)
# )$data
# )
#
# # Verify that the results from knn_imp match exactly with impute.knn
# expect_equal(r1[, ], r2[, ])
})
test_that("`subset` feature of `knn_imp` works with post_imp = FALSE/TRUE", {
set.seed(1234)
to_test <- sim_mat(20, 50, perc_total_na = 0.2, perc_col_na = 1)$input
# Impute just 3 columns
## Check subset using numeric index
r1 <- knn_imp(to_test, k = 3, post_imp = FALSE, subset = c(1, 3, 5))
expect_true(!anyNA(r1[, c(1, 3, 5)]))
expect_equal(is.na(r1[, -c(1, 3, 5)]), is.na(to_test[, -c(1, 3, 5)]))
## Check subset using character vector
r2 <- knn_imp(
to_test,
k = 3,
post_imp = FALSE,
subset = paste0("feature", c(1, 3, 5))
)
expect_equal(r1, r2)
# Test with post_imp = TRUE and a column requiring post imputation
to_test_post <- to_test
# Column 5 will be colMeans if post_imp is TRUE
to_test_post[2:nrow(to_test_post), 5] <- NA
r3 <- knn_imp(to_test_post, k = 3, post_imp = TRUE, subset = c(1, 3, 5))
expect_true(!anyNA(r3[, c(1, 3, 5)]))
# Expect that only the subset columns are imputed. The rests are untouched
expect_equal(is.na(r3[, -c(1, 3, 5)]), is.na(to_test_post[, -c(1, 3, 5)]))
# Verify post_imp on column 5
col5_mean <- mean(to_test_post[, 5], na.rm = TRUE)
expect_equal(unname(r3[, 5]), rep(col5_mean, nrow(to_test_post)))
r4 <- knn_imp(
to_test_post,
k = 3,
post_imp = TRUE,
subset = paste0("feature", c(1, 3, 5))
)
expect_equal(r3, r4)
})
test_that("Behavior with extreme missing columns and rows", {
set.seed(1234)
to_test <- sim_mat(20, 20, perc_total_na = 0.2, perc_col_na = 1)$input
# row 1 is all NA
to_test[1, ] <- NA
expect_no_error(knn_imp(to_test, k = 3, post_imp = FALSE))
expect_true(!anyNA(knn_imp(to_test, k = 3, post_imp = TRUE)))
to_test[, 1] <- NA
expect_error(knn_imp(to_test, k = 3, post_imp = FALSE), "All NA/Inf")
# not admissible
mat <- matrix(NA, nrow = 20, ncol = 20)
diag(mat) <- rnorm(20)
expect_error(knn_imp(mat, k = 2), "exceeds usable columns")
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.