Nothing
# Out-of-sample (OOS) Application
iris1 <- generateNA(iris, p = c(Sepal.Width = 0.2, Species = 0.1), seed = 1L)
iris2 <- generateNA(iris, p = 0.2, seed = 1L)
test_that("OOS can work in some cases without forests", {
imp <- missRanger(iris1, verbose = 0, seed = 1L, num.trees = 10, data_only = FALSE)
# There are no missings
expect_equal(predict(imp, head(iris)), head(iris))
# We use univariate imputation at prediction stage
expect_message(
pred <- predict(imp, tail(iris1), iter = 0L),
"Only univariate imputations done"
)
expect_true(!anyNA(pred))
# We use univariate imputation at missRanger() stage
imp <- missRanger(
iris1, . ~ 1, verbose = 0, seed = 1L, num.trees = 10, data_only = FALSE
)
expect_message(
pred <- predict(imp, tail(iris1)),
"Only univariate imputations done"
)
expect_true(!anyNA(pred))
# But usually, it gives an error
imp <- missRanger(iris1, verbose = 0, seed = 1L, num.trees = 10, data_only = FALSE)
# There are no missings
expect_error(predict(imp, tail(iris1)), "No random forests")
})
test_that("OOS removes all missings with and without PMM", {
imp <- missRanger(iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE)
expect_true(!anyNA(predict(imp, iris2)))
expect_true(!anyNA(predict(imp, iris2, pmm.k = 1L)))
imp <- missRanger(iris1, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE)
expect_true(!anyNA(predict(imp, iris1)))
expect_true(!anyNA(predict(imp, iris1, pmm.k = 1L)))
expect_error(predict(imp, iris2), "not allowed.")
})
test_that("OOS is picky about wrong input", {
imp <- missRanger(iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE)
one_row <- iris2[1L, ]
one_row[, "Species"] <- "new species"
expect_error(predict(imp, one_row), "Inconsistency between.")
one_row <- iris2[1L, ]
one_row[, "Sepal.Length"] <- "new species"
expect_error(predict(imp, one_row), "Inconsistency between.")
})
test_that("OOS expects columns to be present, but extra columns are okay", {
imp <- missRanger(iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE)
expect_error(predict(imp, head(iris2[, -5])), "Variables not present.")
expect_no_error(predict(imp, iris2 |> transform(additional = 1)))
})
test_that("OOS can deal with single-iteration fits, when one forest is missing", {
imp <- missRanger(
iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE, maxiter = 1L
)
expect_message(pred <- predict(imp, iris2), "No random forest")
expect_no_message(pred <- predict(imp, iris2, verbose = FALSE))
})
test_that("OOS seed works", {
imp <- missRanger(iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE)
x1 <- predict(imp, iris2, seed = 1L)
x2 <- predict(imp, iris2, seed = 1L)
x3 <- predict(imp, iris2, seed = 2L)
expect_equal(x1, x2)
expect_false(identical(x1, x3))
})
test_that("OOS PMM works", {
imp <- missRanger(iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE)
x1 <- predict(imp, iris2, seed = 1L)
x2 <- predict(imp, iris2, seed = 1L, pmm.k = 5)
expect_false(identical(x1, x2))
# Fitted with PMM
imp <- missRanger(
iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE, pmm.k = 5L
)
x1 <- predict(imp, iris2, seed = 1L, pmm.k = 0)
x2 <- predict(imp, iris2, seed = 1L, pmm.k = 5)
expect_false(identical(x1, x2))
})
test_that("OOS iter works", {
imp <- missRanger(iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE)
x1 <- predict(imp, iris2, seed = 1L, iter = 1, pmm.k = 4L)
x2 <- predict(imp, iris2, seed = 1L, iter = 4, pmm.k = 4L)
expect_false(identical(x1, x2))
})
test_that("OOS is not working when there are missings in impute_by", {
imp <- missRanger(
iris1,
Species ~ Sepal.Length,
verbose = 0,
seed = 1L,
num.trees = 10,
keep_forests = TRUE
)
new_row <- iris[1L, ]
new_row[, c("Sepal.Length", "Species")] <- NA
expect_error(predict(imp, new_row), "not allowed")
})
test_that("OOS does 1 iter if only easy case", {
imp <- missRanger(iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE)
new_row <- iris[1L, ]
new_row[, "Sepal.Length"] <- NA
x1 <- predict(imp, new_row, seed = 1L, pmm.k = 5L, iter = 10L)
x2 <- predict(imp, new_row, seed = 1L, pmm.k = 5L, iter = 1L)
expect_equal(x1, x2)
})
test_that("OOS does not fail if there is all missings, even of wrong type", {
imp <- missRanger(iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE)
new_rows <- iris[1:2, ]
new_rows[] <- NA
expect_no_error(x1 <- predict(imp, new_rows, seed = 1L))
})
test_that("case works where easy case fills a complete column", {
imp <- missRanger(iris2, verbose = 0, seed = 1L, num.trees = 10, keep_forests = TRUE)
X <- data.frame(
Sepal.Length = c(5.1, NA),
Sepal.Width = c(3.5, 3.4),
Petal.Length = c(NA, 1.4),
Petal.Width = c(NA, 0.3),
Species = factor("setosa")
)
expect_no_error(predict(imp, X))
})
n <- 200L
X <- data.frame(
int = seq_len(n),
double = log(seq_len(n)),
char = rep(LETTERS[1:4], n %/% 4),
fact = factor(rep(LETTERS[1:2], n %/% 2)),
logi = seq_len(n) > n %/% 3
)
X_NA <- generateNA(X[1:5], p = 0.2, seed = 1L)
test_that("non-syntactic column names work", {
X_NA2 <- X_NA
colnames(X_NA2)[1:2] <- c("1bad name", "2 also bad")
imp1 <- missRanger(X_NA2, num.trees = 20L, verbose = 0L, seed = 1L, keep_forests = TRUE)
expect_equal(colnames(predict(imp1, head(X_NA2))), colnames(X_NA2))
})
test_that("OOS does not fail if there is all missings, even of wrong type (MORE TYPES)", {
imp1 <- missRanger(
X_NA, num.trees = 20L, verbose = 0L, seed = 1L, keep_forests = TRUE
)
new_rows <- X[1:2, ]
new_rows[] <- NA
expect_no_error(x1 <- predict(imp1, new_rows, seed = 1L, pmm.k = 1L))
expect_equal(lapply(x1, class), lapply(X, class))
# Without PMM, ints might turn into doubles
expect_no_error(x1 <- predict(imp1, new_rows, seed = 1L, pmm.k = 0L))
xp <- list(int = "numeric", double = "numeric", char = "character",
fact = "factor", logi = "logical")
expect_equal(lapply(x1, class), xp)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.