Nothing
iris2 <- generateNA(iris, p = 0.2, seed = 1L)
test_that("all missings are filled for multivariate and univariate imputation", {
imp <- missRanger(iris2, verbose = 0L, seed = 1L, num.trees = 20)
expect_true(!anyNA(imp))
imp <- missRanger(iris2, . ~ 1, verbose = 0L, seed = 1L)
expect_true(!anyNA(imp))
})
#===================================================
# TEST ARGUMENTS
#===================================================
imp1 <- missRanger(iris2, num.trees = 20L, verbose = 0L, seed = 1L)
test_that("pmm.k produces values present in the original data", {
imp2 <- missRanger(iris2, num.trees = 20L, verbose = 0L, pmm.k = 3L, seed = 1L)
expect_false(isTRUE(all.equal(imp1, imp2)))
expect_true(all(imp2$Sepal.Length %in% iris2$Sepal.Length))
})
test_that("num.trees has an effect", {
imp2 <- missRanger(iris2, num.trees = 10L, verbose = 0L, seed = 1L)
expect_false(isTRUE(all.equal(imp1, imp2)))
})
test_that("mtry has an effect", {
imp2 <- missRanger(iris2, num.trees = 20L, verbose = 0L, seed = 1L, mtry = 1)
imp3 <- missRanger(
iris2, num.trees = 20L, verbose = 0L, seed = 1L, mtry = function(m) min(m, 4)
)
expect_false(isTRUE(all.equal(imp1, imp2)))
expect_false(isTRUE(all.equal(imp1, imp3)))
expect_false(isTRUE(all.equal(imp2, imp3)))
})
test_that("min.node.size has an effect", {
imp2 <- missRanger(
iris2, num.trees = 20L, verbose = 0L, seed = 1L, min.node.size = 20
)
expect_false(isTRUE(all.equal(imp1, imp2)))
})
test_that("min.bucket has an effect", {
imp2 <- missRanger(iris2, num.trees = 20L, verbose = 0L, seed = 1L, min.bucket = 20)
expect_false(isTRUE(all.equal(imp1, imp2)))
})
test_that("max.depth has an effect", {
imp2 <- missRanger(iris2, num.trees = 20L, verbose = 0L, seed = 1L, max.depth = 1)
expect_false(isTRUE(all.equal(imp1, imp2)))
})
test_that("replace has an effect", {
imp2 <- missRanger(iris2, num.trees = 20L, verbose = 0L, seed = 1L, replace = FALSE)
expect_false(isTRUE(all.equal(imp1, imp2)))
})
test_that("sample.fraction has an effect", {
imp2 <- missRanger(
iris2, num.trees = 20L, verbose = 0L, seed = 1L, sample.fraction = 0.5
)
expect_false(isTRUE(all.equal(imp1, imp2)))
})
test_that("case.weights works", {
imp2 <- missRanger(
iris2, num.trees = 20L, verbose = 0L, seed = 1L, case.weights = rep(1, nrow(iris))
)
imp3 <- missRanger(
iris2, num.trees = 20L, verbose = 0L, seed = 1L, case.weights = 1:nrow(iris)
)
expect_equal(imp1, imp2)
expect_false(isTRUE(all.equal(imp1, imp3)))
expect_error(missRanger(iris2, num.trees = 3L, verbose = 0L, case.weights = 1:7))
})
test_that("seed works", {
imp2 <- missRanger(iris2, num.trees = 20L, verbose = 0L, seed = 1L)
imp3 <- missRanger(iris2, num.trees = 20L, verbose = 0L, seed = 2L)
expect_equal(imp1, imp2)
expect_false(identical(imp1, imp3))
})
test_that("verbose can be suppressed", {
expect_silent(missRanger(iris2, num.trees = 3L, verbose = 0L))
capture_output(expect_message(missRanger(iris2, num.trees = 3L, verbose = 1L)))
capture_output(expect_message(missRanger(iris2, num.trees = 3L, verbose = 2L)))
})
test_that("returnOOB works", {
imp2 <- missRanger(iris2, num.trees = 20L, verbose = 0L, returnOOB = TRUE)
expect_true(length(attributes(imp2)$oob) == ncol(iris))
expect_null(attributes(imp1)$oob)
})
#===================================================
# DATA TYPE CONSISTENCY
#===================================================
n <- 200L
X <- data.frame(
int = seq_len(n),
double = log(seq_len(n)),
char = rep(LETTERS[1:4], n %/% 4),
fact = factor(rep(LETTERS[1:2], n %/% 2)),
logi = seq_len(n) > n %/% 3,
date = seq.Date(as.Date("2001-01-01"), length.out = n, by = "1 d")
)
X_NA <- generateNA(X[1:5], p = 0.2, seed = 1L)
test_that("Imputing retains data type (with/without PMM and univariately)", {
imp1 <- missRanger(X_NA, num.trees = 20L, verbose = 0L, pmm.k = 0, seed = 1L)
imp2 <- missRanger(X_NA, num.trees = 20L, verbose = 0L, pmm.k = 3, seed = 1L)
imp3 <- missRanger(X_NA, . ~ 1, verbose = 0L, seed = 1L)
for (imp in list(imp1, imp2, imp3)) {
expect_true(is.numeric(imp$double))
expect_true(is.numeric(imp$int))
expect_true(is.logical(imp$logi))
expect_true(is.character(imp$char))
expect_true(is.factor(imp$fact))
expect_equal(levels(imp$fact), levels(X$fact))
}
})
# SELECTION OF "to_impute" and "impute_by"
# Are two vectors identical up to sorting?
.setequal <- function(x, y) {
(length(x) == length(y)) && all(sort(x) == sort(y))
}
test_that("Only features with missings, but not all missings, are being imputed", {
X2 <- generateNA(X, p = c(int = 1, double = 0.2), seed = 1L)
imp <- missRanger(X2, num.trees = 20L, verbose = 0L, seed = 1L, data_only = FALSE)
expect_equal(imp$to_impute, "double")
expect_equal(unname(colSums(is.na(imp$data))), c(n, 0, 0, 0, 0, 0))
})
test_that("Date variables (and other non-standard variable types) are not imputed", {
X2 <- generateNA(X, p = c(date = 0.3, int = 0.1), seed = 1L)
imp <- missRanger(X2, num.trees = 20L, verbose = 0L, seed = 1L, data_only = FALSE)
expect_equal(imp$to_impute, "int")
expect_true(anyNA(imp$data$date))
})
test_that("Features with missings that are not imputed are not used for imputation", {
X2 <- generateNA(X, p = c(int = 0.1, double = 0.2), seed = 1L)
imp <- missRanger(
X2, . - double ~ ., num.trees = 20L, verbose = 0L, seed = 1L, data_only = FALSE
)
expect_equal(imp$impute_by, setdiff(colnames(X2), "double"))
})
test_that("Constant features are not used for imputation", {
X2 <- generateNA(X, p = c(int = 0.1, double = 0.2), seed = 1L) |>
transform(const = 1)
imp <- missRanger(
X2, num.trees = 20L, verbose = 0L, seed = 1L, data_only = FALSE
)
expect_equal(imp$impute_by, setdiff(colnames(X2), "const"))
})
test_that("list variables (and other strange types) cannot be used for imputation", {
X2 <- generateNA(X, p = c(int = 0.1, double = 0.2), seed = 1L)
X2$list <- as.list(replicate(n, NULL))
imp <- missRanger(X2, num.trees = 20L, verbose = 0L, seed = 1L, data_only = FALSE)
expect_equal(imp$impute_by, setdiff(colnames(X), "list"))
})
test_that("Multiple problems together", {
X2 <- generateNA(X, p = 0.2, seed = 1L)
X2$list <- as.list(replicate(n, NULL))
X2$const <- 1
suppressMessages(
capture_output(
imp <- missRanger(X2, . - double ~ ., num.trees = 20L, seed = 1L, data_only = FALSE)
)
)
xpected <- setdiff(colnames(X), c("double", "date"))
expect_equal(imp$impute_by, xpected)
expect_true(.setequal(imp$to_impute, xpected))
})
# FORMULA PARSING
test_that("formula interface works with specified left and right side", {
imp <- missRanger(
X_NA,
int ~ int + double,
pmm = 3L,
num.trees = 20L,
verbose = 0L,
seed = 1L,
data_only = FALSE
)
na_per_col <- colSums(is.na(imp$data))
expect_equal(unname(na_per_col["int"]), 0L)
expect_true(all(na_per_col[-1L] >= 1L))
expect_equal(imp$to_impute, "int")
expect_equal(imp$impute_by, "int")
})
test_that("formula interface works with unspecified right side", {
imp <- missRanger(
X_NA,
int + double ~ .,
pmm = 3L,
num.trees = 20L,
verbose = 0L,
seed = 1L,
data_only = FALSE
)
na_per_col <- colSums(is.na(imp$data))
expect_equal(unname(na_per_col[c("int", "double")]), c(0, 0))
expect_true(all(na_per_col[-(1:2)] >= 1L))
expect_true(.setequal(imp$to_impute, c("int", "double")))
expect_equal(imp$impute_by, c("int", "double"))
})
test_that("formula interface works with unspecified left side", {
imp <- missRanger(
X_NA, . ~ int, pmm = 3L, num.trees = 20L, verbose = 0L, seed = 1L, data_only = FALSE
)
expect_true(!anyNA(imp$data))
expect_true(.setequal(imp$to_impute, colnames(X_NA)))
expect_equal(imp$impute_by, "int")
# Prediction error is on its default 1 for int (issue #77)
expect_equal(imp$pred_errors[, "int"], rep(1, length(imp$mean_pred_errors)))
})
test_that("dropping columns on left side leaves missing values", {
imp <- missRanger(
X_NA,
. - int ~ .,
pmm = 3L,
num.trees = 20L,
verbose = 0L,
seed = 1L,
data_only = FALSE
)
expect_equal(
unname(colSums(is.na(imp$data)) > 0),
c(TRUE, FALSE, FALSE, FALSE, FALSE)
)
xpected <- setdiff(colnames(X_NA), "int")
expect_true(.setequal(imp$to_impute, xpected))
expect_equal(imp$impute_by, xpected)
})
test_that("dropping columns on right side has an impact", {
imp1 <- missRanger(
X_NA, . ~ . - int, num.trees = 20L, verbose = 0L, seed = 1L, data_only = FALSE
)
imp2 <- missRanger(X_NA, num.trees = 20L, verbose = 0L, seed = 1L)
expect_false(identical(imp1$data, imp2))
expect_true(.setequal(imp1$to_impute, colnames(X_NA)))
expect_equal(imp1$impute_by, setdiff(colnames(X_NA), "int"))
})
test_that("empty rhs equals univariate imputation", {
imp1 <- missRanger(X_NA, . ~ 1, num.trees = 20L, verbose = 0L, seed = 1L)
imp2 <- imputeUnivariate(X_NA, seed = 1L)
imp3 <- missRanger(X_NA, int + char ~ 1, num.trees = 20L, verbose = 0L, seed = 1L)
imp4 <- imputeUnivariate(X_NA, seed = 1L, v = c("int", "char"))
expect_equal(imp1, imp2)
expect_equal(imp3, imp4)
})
test_that("non-syntactic column names work with or without formula", {
X_NA2 <- X_NA
colnames(X_NA2)[1:2] <- c("1bad name", "2 also bad")
imp1 <- missRanger(X_NA, num.trees = 20L, verbose = 0L, seed = 1L)
imp2 <- missRanger(X_NA2, num.trees = 20L, verbose = 0L, seed = 1L)
imp3 <- missRanger(
. - `1bad name` ~ - ., data = X_NA2, num.trees = 20L, verbose = 0L, seed = 1L
)
expect_equal(colnames(X_NA2), colnames(imp2))
expect_equal(imp1, setNames(imp2, colnames(imp1)))
expect_false(anyNA(imp2))
expect_equal(
unname(colSums(is.na(imp3)) == 0L),
c(FALSE, TRUE, TRUE, TRUE, TRUE)
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.