test_missForestPredict.R
In missForestPredict: Missing Value Imputation using Random Forest for Prediction Settings

context("missForestPredict")
library(missForestPredict)
library(testthat)

test_that("tibble returns tibble", {

  require(tidyverse)

  data(iris)
  set.seed(2022)
  iris_train <- produce_NA(iris[1:100,], proportion = 0.1)
  iris_test <- produce_NA(iris[101:150,], proportion = 0.1)
  iris_train_tbl <- as_tibble(iris_train)
  iris_test_tbl <- as_tibble(iris_test, rownames = NA)

  missForest_object_tbl <- missForestPredict::missForest(iris_train_tbl, save_models = TRUE, verbose = FALSE)
  iris_test_tbl_imp <- missForestPredict::missForestPredict(missForest_object_tbl, newdata = iris_test_tbl)

  expect_is(iris_test_tbl_imp, "tbl_df")
  expect_is(missForest_object_tbl$ximp, "tbl_df")

})

test_that("dataframe returns dataframe", {

  require(tidyverse)

  data(iris)
  set.seed(2022)
  iris_train <- produce_NA(iris[1:100,], proportion = 0.1)
  iris_test <- produce_NA(iris[101:150,], proportion = 0.1)

  missForest_object_df <- missForestPredict::missForest(iris_train, save_models = TRUE, verbose = FALSE)
  iris_test_df_imp <- missForestPredict::missForestPredict(missForest_object_df, newdata = iris_test)

  expect_is(iris_test_df_imp, "data.frame")
  expect_is(missForest_object_df$ximp, "data.frame")

})

test_that("tibble and dataframe results are the same for missForest", {

  require(tidyverse)

  data(iris)
  iris_mis <- produce_NA(iris, proportion = 0.1)
  iris_mis_tbl <- as_tibble(iris_mis)

  set.seed(2022)
  missForest_object <- missForestPredict::missForest(iris_mis, save_models = TRUE, verbose = FALSE)
  iris_imp_df <- missForest_object$ximp

  set.seed(2022)
  missForest_object_tbl <- missForestPredict::missForest(iris_mis_tbl, save_models = TRUE, verbose = FALSE)
  iris_imp_tbl <- missForest_object_tbl$ximp

  expect_equal(iris_imp_tbl, as_tibble(iris_imp_df))

})

test_that("tibble and dataframe results are the same for missForestPredict", {

  require(tidyverse)

  data(iris)
  set.seed(2022)
  iris_train <- produce_NA(iris[1:100,], proportion = 0.1)
  iris_test <- produce_NA(iris[101:150,], proportion = 0.1)
  iris_train_tbl <- as_tibble(iris_train)
  iris_test_tbl <- as_tibble(iris_test, rownames = NA)

  missForest_object <- missForestPredict::missForest(iris_train, save_models = TRUE, verbose = FALSE)
  iris_test_df_imp <- missForestPredict::missForestPredict(missForest_object, newdata = iris_test)

  missForest_object_tbl <- missForestPredict::missForest(iris_train_tbl, save_models = TRUE, verbose = FALSE)
  iris_test_tbl_imp <- missForestPredict::missForestPredict(missForest_object, newdata = iris_test_tbl)

  expect_equal(iris_test_tbl_imp, as_tibble(iris_test_df_imp))

})

test_that("prediction on training set is the same as imputation on dataframe", {

  data(iris)
  set.seed(2022)
  iris_mis <- produce_NA(iris, proportion = 0.1)

  missForest_object <- missForestPredict::missForest(iris_mis, save_models = TRUE, verbose = FALSE)
  iris_imp_df <- missForest_object$ximp

  missForest_predictions <- missForestPredict::missForestPredict(missForest_object, newdata = iris_mis)

  expect_equal(iris_imp_df, missForest_predictions)

})

test_that("prediction on training set is the same as imputation on tibble", {

  data(iris)
  set.seed(2022)
  iris_mis <- produce_NA(iris, proportion = 0.1)
  iris_mis <- as_tibble(iris_mis)

  missForest_object <- missForestPredict::missForest(iris_mis, save_models = TRUE, verbose = FALSE)
  iris_imp_df <- missForest_object$ximp

  missForest_predictions <- missForestPredict::missForestPredict(missForest_object, newdata = iris_mis)

  expect_equal(iris_imp_df, missForest_predictions)

})

test_that("imputing complete dataframe returns the same dataframe", {

  data(iris)

  set.seed(2022)
  missForest_object <- missForestPredict::missForest(iris, save_models = TRUE, verbose = FALSE)
  iris_imp_df <- missForest_object$ximp

  expect_equal(iris_imp_df, iris)

})

test_that("imputing complete tibble returns the same tibble", {

  data(iris)

  set.seed(2022)
  iris <- as_tibble(iris)
  missForest_object <- missForestPredict::missForest(iris, save_models = TRUE, verbose = FALSE)
  iris_imp_df <- missForest_object$ximp

  expect_equal(iris_imp_df, iris)

})

test_that("imputation is the same for factor and character", {

  data(iris)

  iris <- produce_NA(iris, proportion = 0.2)

  set.seed(2022)
  missForest_object <- missForestPredict::missForest(iris, save_models = TRUE, verbose = FALSE)
  iris_imp_df <- missForest_object$ximp

  iris_char <- iris
  iris_char$Species <- as.character(iris_char$Species)

  set.seed(2022)
  missForest_object_char <- missForestPredict::missForest(iris_char, save_models = TRUE, verbose = FALSE)
  iris_imp_df_char <- missForest_object_char$ximp

  iris_imp_df_char$Species <- as.factor(iris_imp_df_char$Species)

  expect_equal(iris_imp_df, iris_imp_df_char)

})

test_that("integer is returned as double when return_integer_as_integer is FALSE", {

  require(tidyverse)

  data(iris)
  set.seed(2022)

  iris$Sepal.Length <- as.integer(iris$Sepal.Length)

  iris_train <- produce_NA(iris[1:100,], proportion = 0.1)
  iris_test <- produce_NA(iris[101:150,], proportion = 0.1)
  iris_train_tbl <- as_tibble(iris_train)
  iris_test_tbl <- as_tibble(iris_test, rownames = NA)

  # impute train and test df
  missForest_object_df <- missForestPredict::missForest(iris_train, save_models = TRUE, verbose = FALSE,
                                                        return_integer_as_integer = FALSE)
  iris_test_df_imp <- missForestPredict::missForestPredict(missForest_object_df, newdata = iris_test)

  # impute train and test tibble
  missForest_object_tbl <- missForestPredict::missForest(iris_train_tbl, save_models = TRUE, verbose = FALSE,
                                                         return_integer_as_integer = FALSE)
  iris_test_tbl_imp <- missForestPredict::missForestPredict(missForest_object_tbl, newdata = iris_test_tbl)

  expect_type(iris_test_df_imp$Sepal.Length, "double")
  expect_type(missForest_object_df$ximp$Sepal.Length, "double")

  expect_type(iris_test_tbl_imp$Sepal.Length, "double")
  expect_type(missForest_object_tbl$ximp$Sepal.Length, "double")

})

test_that("integer is returned as integer when return_integer_as_integer is TRUE", {

  require(tidyverse)

  data(iris)
  set.seed(2022)

  iris$Sepal.Length <- as.integer(iris$Sepal.Length)

  iris_train <- produce_NA(iris[1:100,], proportion = 0.1)
  iris_test <- produce_NA(iris[101:150,], proportion = 0.1)
  iris_train_tbl <- as_tibble(iris_train)
  iris_test_tbl <- as_tibble(iris_test, rownames = NA)

  # impute train and test df
  missForest_object_df <- missForestPredict::missForest(iris_train, save_models = TRUE, verbose = FALSE,
                                                        return_integer_as_integer = TRUE)
  iris_test_df_imp <- missForestPredict::missForestPredict(missForest_object_df, newdata = iris_test)

  # impute train and test tibble
  missForest_object_tbl <- missForestPredict::missForest(iris_train_tbl, save_models = TRUE, verbose = FALSE,
                                                         return_integer_as_integer = TRUE)
  iris_test_tbl_imp <- missForestPredict::missForestPredict(missForest_object_tbl, newdata = iris_test_tbl)

  expect_type(iris_test_df_imp$Sepal.Length, "integer")
  expect_type(missForest_object_df$ximp$Sepal.Length, "integer")

  expect_type(iris_test_tbl_imp$Sepal.Length, "integer")
  expect_type(missForest_object_tbl$ximp$Sepal.Length, "integer")

})

test_that("re-imputing with missForestPredict gives the same results as missForest", {

  require(tidyverse)

  data(iris)
  iris_miss <- produce_NA(iris, proportion = 0.1)
  iris_miss_tbl <- as_tibble(iris_miss)

  missForest_object <- missForestPredict::missForest(iris_miss, save_models = TRUE, verbose = FALSE)
  missForest_object_tbl <- missForestPredict::missForest(iris_miss_tbl, save_models = TRUE, verbose = FALSE)
  # re-impute
  iris_miss_imp <- missForestPredict::missForestPredict(missForest_object,
                                                        newdata = iris_miss)
  iris_miss_imp_tbl <- missForestPredict::missForestPredict(missForest_object_tbl,
                                                            newdata = iris_miss_tbl)

  expect_equal(missForest_object$ximp, iris_miss_imp)
  expect_equal(missForest_object_tbl$ximp, iris_miss_imp_tbl)

})

test_that("re-imputing with missForestPredict gives the same results as missForest - 1 iteration", {

  require(tidyverse)

  data(iris)
  iris_miss <- produce_NA(iris, proportion = 0.1)
  iris_miss_tbl <- as_tibble(iris_miss)

  missForest_object <- missForestPredict::missForest(iris_miss, save_models = TRUE, verbose = FALSE,
                                                     maxiter = 1)
  missForest_object_tbl <- missForestPredict::missForest(iris_miss_tbl, save_models = TRUE, verbose = FALSE,
                                                         maxiter = 1)
  # re-impute
  iris_miss_imp <- missForestPredict::missForestPredict(missForest_object,
                                                        newdata = iris_miss)
  iris_miss_imp_tbl <- missForestPredict::missForestPredict(missForest_object_tbl,
                                                            newdata = iris_miss_tbl)

  expect_equal(missForest_object$ximp, iris_miss_imp)
  expect_equal(missForest_object_tbl$ximp, iris_miss_imp_tbl)

})

test_that("infinite values result in error", {

  data(iris)
  iris_miss <- produce_NA(iris, proportion = 0.1)
  iris_miss[1,1] <- Inf

  expect_error(missForestPredict::missForest(iris_miss, verbose = FALSE))

})

test_that("for variable with 1 class a single value model is learned", {

  data(iris)

  iris_train <- produce_NA(iris[1:100,], proportion = 0.1)
  iris_train$Species <- "versicolor"
  iris_test <- produce_NA(iris[101:150,], proportion = 0.1)

  # impute train and test df
  set.seed(2022)
  missForest_object <- missForestPredict::missForest(iris_train, save_models = TRUE, verbose = FALSE)
  iris_test_imp <- missForestPredict::missForestPredict(missForest_object, newdata = iris_test)

  expect_equal(all(iris_test_imp$Species[is.na(iris_test$Species)] == "versicolor"),
               TRUE)

})

test_that("for variable with 1 class a single value model is learned", {

  data(iris)

  iris_train <- produce_NA(iris[1:100,], proportion = 0.1)
  iris_train$Species <- "versicolor"
  iris_test <- produce_NA(iris[101:150,], proportion = 0.1)

  # impute train and test df
  set.seed(2022)
  missForest_object <- missForestPredict::missForest(iris_train, save_models = TRUE, verbose = FALSE)
  iris_test_imp <- missForestPredict::missForestPredict(missForest_object, newdata = iris_test)

  expect_equal(all(iris_test_imp$Species[is.na(iris_test$Species)] == "versicolor"),
               TRUE)

})

test_that("excluding a variable from imputation works (predictor matrix)", {

  data(iris)

  iris_train <- produce_NA(iris[1:100,], proportion = 0.1)
  iris_train$Species <- "versicolor"
  iris_test <- produce_NA(iris[101:150,], proportion = 0.1)

  predictor_matrix <- create_predictor_matrix(iris_train)
  predictor_matrix["Sepal.Length",] <- 0

  # impute train and test df
  set.seed(2022)
  missForest_object <- missForestPredict::missForest(iris_train, save_models = TRUE,
                                                     predictor_matrix = predictor_matrix,
                                                     verbose = FALSE)
  iris_test_imp <- missForestPredict::missForestPredict(missForest_object, newdata = iris_test)

  expect_null(missForest_object$models[[1]]$Sepal.Length)
  expect_equal(missForest_object$models[[1]]$Sepal.Width$num.independent.variables, 4)
  expect_equal(missForest_object$models[[1]]$Petal.Length$num.independent.variables, 4)
  expect_equal(missForest_object$models[[1]]$Petal.Width$num.independent.variables, 4)
  expect_equal(missForest_object$models[[1]]$Species$num.independent.variables, 4)
  expect_equal(is.null(missForest_object$init$Sepal.Length), FALSE)

})

test_that("excluding a variable as a predictor works (predictor matrix)", {

  data(iris)

  iris_train <- produce_NA(iris[1:100,], proportion = 0.1)
  iris_train$Species <- "versicolor"
  iris_test <- produce_NA(iris[101:150,], proportion = 0.1)

  predictor_matrix <- create_predictor_matrix(iris_train)
  predictor_matrix[,"Sepal.Length"] <- 0

  # impute train and test df
  set.seed(2022)
  missForest_object <- missForestPredict::missForest(iris_train, save_models = TRUE,
                                                     predictor_matrix = predictor_matrix,
                                                     verbose = FALSE)
  iris_test_imp <- missForestPredict::missForestPredict(missForest_object, newdata = iris_test)

  expect_equal(is.null(missForest_object$models[[1]]$Sepal.Length), FALSE)
  expect_equal(missForest_object$models[[1]]$Sepal.Width$num.independent.variables, 3)
  expect_equal(missForest_object$models[[1]]$Petal.Length$num.independent.variables, 3)
  expect_equal(missForest_object$models[[1]]$Petal.Width$num.independent.variables, 3)
  expect_equal(missForest_object$models[[1]]$Species$num.independent.variables, 3)
  expect_equal(is.null(missForest_object$init$Sepal.Length), FALSE)
  expect_equal(sort(missForest_object$models[[1]]$Sepal.Length$forest$independent.variable.names),
               sort(names(predictor_matrix["Sepal.Length",][predictor_matrix["Sepal.Length",] == 1])))
  expect_equal(sort(missForest_object$models[[1]]$Sepal.Width$forest$independent.variable.names),
               sort(names(predictor_matrix["Sepal.Width",][predictor_matrix["Sepal.Width",] == 1])))
  expect_equal(sort(missForest_object$models[[1]]$Petal.Length$forest$independent.variable.names),
               sort(names(predictor_matrix["Petal.Length",][predictor_matrix["Petal.Length",] == 1])))
  expect_equal(sort(missForest_object$models[[1]]$Petal.Width$forest$independent.variable.names),
               sort(names(predictor_matrix["Petal.Width",][predictor_matrix["Petal.Width",] == 1])))
  expect_equal(sort(missForest_object$models[[1]]$Species$forest$independent.variable.names),
               sort(names(predictor_matrix["Species",][predictor_matrix["Species",] == 1])))
})

test_that("excluding a variable from imputation and as a predictor works (predictor matrix)", {

  data(iris)

  iris_train <- produce_NA(iris[1:100,], proportion = 0.1)
  iris_train$Species <- "versicolor"
  iris_test <- produce_NA(iris[101:150,], proportion = 0.1)

  predictor_matrix <- create_predictor_matrix(iris_train)
  predictor_matrix[,"Sepal.Length"] <- 0
  predictor_matrix["Sepal.Length",] <- 0

  # impute train and test df
  set.seed(2022)
  missForest_object <- missForestPredict::missForest(iris_train, save_models = TRUE,
                                                     predictor_matrix = predictor_matrix,
                                                     verbose = FALSE)
  iris_test_imp <- missForestPredict::missForestPredict(missForest_object, newdata = iris_test)

  expect_null(missForest_object$models[[1]]$Sepal.Length)
  expect_equal(missForest_object$models[[1]]$Sepal.Width$num.independent.variables, 3)
  expect_equal(missForest_object$models[[1]]$Petal.Length$num.independent.variables, 3)
  expect_equal(missForest_object$models[[1]]$Petal.Width$num.independent.variables, 3)
  expect_equal(missForest_object$models[[1]]$Species$num.independent.variables, 3)
  expect_null(missForest_object$init$Sepal.Length)
  expect_equal(sort(missForest_object$models[[1]]$Sepal.Width$forest$independent.variable.names),
               sort(names(predictor_matrix["Sepal.Width",][predictor_matrix["Sepal.Width",] == 1])))
  expect_equal(sort(missForest_object$models[[1]]$Petal.Length$forest$independent.variable.names),
               sort(names(predictor_matrix["Petal.Length",][predictor_matrix["Petal.Length",] == 1])))
  expect_equal(sort(missForest_object$models[[1]]$Petal.Width$forest$independent.variable.names),
               sort(names(predictor_matrix["Petal.Width",][predictor_matrix["Petal.Width",] == 1])))
  expect_equal(sort(missForest_object$models[[1]]$Species$forest$independent.variable.names),
               sort(names(predictor_matrix["Species",][predictor_matrix["Species",] == 1])))
})

test_that("imputation works when train and test have different factor levels - one factor in train", {

  data(iris)

  iris_train <- produce_NA(iris[1:100,], proportion = 0.1)
  iris_train$Species <- "versicolor"
  iris_train$Species <- as.factor(iris_train$Species)
  iris_test <- produce_NA(iris[101:150,], proportion = 0.1)
  iris_test$Species <- as.factor(as.character(iris_test$Species))

  # impute train and test df
  set.seed(2022)
  missForest_object <- missForestPredict::missForest(iris_train, save_models = TRUE, verbose = FALSE)
  iris_test_imp <- missForestPredict::missForestPredict(missForest_object, newdata = iris_test)

  expect_equal(all(iris_test_imp$Species[is.na(iris_test$Species)] == "versicolor"),
               TRUE)

})

test_that("imputation works when train and test have different factor levels - one factor in test", {

  data(iris)

  iris_train <- produce_NA(iris[1:100,], proportion = 0.1)
  iris_test <- produce_NA(iris[101:150,], proportion = 0.1)
  iris_test$Species <- as.factor(as.character(iris_test$Species))

  # impute train and test df
  set.seed(2022)
  missForest_object <- missForestPredict::missForest(iris_train, save_models = TRUE, verbose = FALSE)
  iris_test_imp <- missForestPredict::missForestPredict(missForest_object, newdata = iris_test)

  expect_equal(all(iris_test_imp$Species[is.na(iris_test$Species)] == "versicolor"),
               TRUE)

})


test_that("fixed number of iterations works", {

  data(iris)
  set.seed(2022)
  iris_mis <- produce_NA(iris, proportion = 0.1)

  missForest_object <- missForestPredict::missForest(iris_mis,
                                                     save_models = TRUE,
                                                     fixed_maxiter = TRUE,
                                                     maxiter = 1,
                                                     verbose = FALSE)
  iris_imp_df <- missForest_object$ximp

  missForest_predictions <- missForestPredict::missForestPredict(missForest_object, newdata = iris_mis)

  expect_equal(iris_imp_df, missForest_predictions)

})
Any scripts or data that you put into this service are public.
missForestPredict documentation built on June 8, 2025, 10:20 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
missForestPredict
Missing Value Imputation using Random Forest for Prediction Settings

tests/testthat/test_missForestPredict.R
In missForestPredict: Missing Value Imputation using Random Forest for Prediction Settings

Try the missForestPredict package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

missForestPredict Missing Value Imputation using Random Forest for Prediction Settings

tests/testthat/test_missForestPredict.R In missForestPredict: Missing Value Imputation using Random Forest for Prediction Settings

Try the missForestPredict package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

missForestPredict
Missing Value Imputation using Random Forest for Prediction Settings

tests/testthat/test_missForestPredict.R
In missForestPredict: Missing Value Imputation using Random Forest for Prediction Settings