tests/testthat/test_missings.R

library(ranger)
library(survival)
context("ranger_unordered")

test_that("Third child for missings only there if missings in data", {
  rf1 <- ranger(Species ~ ., iris, num.trees = 5)
  expect_length(rf1$forest$child.nodeIDs[[1]], 2)
  
  dat <- iris
  dat[1, 1] <- NA
  rf2 <- ranger(Species ~ ., dat, num.trees = 5)
  expect_length(rf2$forest$child.nodeIDs[[1]], 3)
})

test_that("Training works with missing values in x but not in y", {
  dat <- iris
  dat[25, 1] <- NA
  expect_silent(ranger(Species ~ ., dat, num.trees = 5))
  expect_silent(ranger(Petal.Width ~ ., dat, num.trees = 5))
  expect_error(ranger(Sepal.Length ~ ., dat, num.trees = 5), "Missing data in dependent variable.")
  
  dat <- iris
  dat[4, 5] <- NA
  expect_error(ranger(Species ~ ., dat, num.trees = 5), "Missing data in dependent variable.")
})

test_that("No error if missing value in irrelevant column, training", {
  dat <- iris
  dat[1, "Sepal.Width"] <- NA
  expect_silent(ranger(Species ~ Sepal.Length, dat, num.trees = 5))
})

test_that("No error if missing value in irrelevant column, prediction", {
  rf <- ranger(Species ~ Sepal.Length, iris, num.trees = 5)
  dat <- iris
  dat[1, "Sepal.Width"] <- NA
  expect_silent(predict(rf, dat))
})

test_that("Prediction works with missing values, classification", {
  rf <- ranger(Species ~ ., iris, num.trees = 5, write.forest = TRUE)
  
  dat <- iris
  dat[4, 4] <- NA
  dat[25, 1] <- NA
  expect_silent(predict(rf, dat))
})

test_that("Prediction works with missing values, regression", {
  rf <- ranger(Sepal.Width ~ ., iris, num.trees = 5, write.forest = TRUE)
  
  dat <- iris
  dat[4, 4] <- NA
  dat[25, 1] <- NA
  expect_silent(predict(rf, dat))
})

test_that("Order splitting working with missing values for classification", {
  n <- 20
  dt <- data.frame(x = sample(c("A", "B", "C", "D", NA), n, replace = TRUE), 
                   y = factor(rbinom(n, 1, 0.5)),
                   stringsAsFactors = FALSE)
  
  rf <- ranger(y ~ ., data = dt, num.trees = 5, min.node.size = n/2, respect.unordered.factors = 'order')
  expect_true(all(rf$forest$is.ordered))
})

test_that("Order splitting working with missing values for multiclass classification", {
  n <- 20
  dt <- data.frame(x = sample(c("A", "B", "C", "D", NA), n, replace = TRUE), 
                   y = factor(sample(c("A", "B", "C", "D"), n, replace = TRUE)),
                   stringsAsFactors = FALSE)
  
  rf <- ranger(y ~ ., data = dt, num.trees = 5, min.node.size = n/2, respect.unordered.factors = 'order')
  expect_true(all(rf$forest$is.ordered))
})

test_that("Missing values for survival not yet working", {
  dat <- veteran
  dat[1, 1] <- NA
  
  expect_error(ranger(Surv(time, status) ~ ., dat, num.trees = 5), "Error: Missing value handling not yet implemented for survival forests\\.")
})

test_that("na.omit leads to same result as manual removal, classification", {
  dat <- iris
  dat[1, 1] <- NA
  rf1 <- ranger(Species ~ ., dat, num.trees = 5, seed = 10, na.action = "na.omit")
  
  dat2 <- na.omit(dat)
  rf2 <- ranger(Species ~ ., dat2, num.trees = 5, seed = 10)
  
  expect_equal(rf1$predictions, rf2$predictions)
})

test_that("na.omit leads to same result as manual removal, probability", {
  dat <- iris
  dat[1, 1] <- NA
  rf1 <- ranger(Species ~ ., dat, num.trees = 5, probability = TRUE, seed = 10, na.action = "na.omit")
  
  dat2 <- na.omit(dat)
  rf2 <- ranger(Species ~ ., dat2, num.trees = 5, probability = TRUE, seed = 10)
  
  expect_equal(rf1$predictions, rf2$predictions)
})

test_that("na.omit leads to same result as manual removal, regression", {
  dat <- iris
  dat[1, 1] <- NA
  rf1 <- ranger(Sepal.Width ~ ., dat, num.trees = 5, seed = 10, na.action = "na.omit")
  
  dat2 <- na.omit(dat)
  rf2 <- ranger(Sepal.Width ~ ., dat2, num.trees = 5, seed = 10)
  
  expect_equal(rf1$predictions, rf2$predictions)
})

test_that("na.omit leads to same result as manual removal, survival", {
  dat <- veteran
  dat[1, 1] <- NA
  rf1 <- ranger(Surv(time, status) ~ ., dat, num.trees = 5, seed = 10, na.action = "na.omit")
  
  dat2 <- na.omit(dat)
  rf2 <- ranger(Surv(time, status) ~ ., dat2, num.trees = 5, seed = 10)
  
  expect_equal(rf1$chf, rf2$chf)
})

test_that("na.omit not working if no observations left", {
  dat <- iris
  dat[1:150, 1] <- NA
  expect_error(ranger(Species ~ ., dat, num.trees = 5, na.action = "na.omit"), "Error: No observations left after removing missing values\\.")
})

Try the ranger package in your browser

Any scripts or data that you put into this service are public.

ranger documentation built on April 4, 2025, 6:12 a.m.