tests/testthat/test-missRanger.R

irisWithNA <- generateNA(iris, seed = 1, p = 0.3)

test_that("all missings are filled", {
  imp <- missRanger(irisWithNA, pmm.k = 3L, verbose = 0L, seed = 1L, num.trees = 50L)
  expect_true(!anyNA(imp))
})

test_that("messages are suppressed with verbose=0", {
  expect_silent(missRanger(irisWithNA, maxiter = 3L, num.trees = 50L, verbose = 0L))
})

test_that("seed works", {
  imp1 <- missRanger(irisWithNA, maxiter = 3L, num.trees = 50L, verbose = 0L, seed = 1L)
  imp2 <- missRanger(irisWithNA, maxiter = 3L, num.trees = 50L, verbose = 0L, seed = 1L)
  expect_equal(imp1, imp2)
})

test_that("returnOOB works", {
  imp1 <- missRanger(
    irisWithNA, maxiter = 3L, num.trees = 50L, verbose = 0L, returnOOB = TRUE
  )
  expect_true(length(attributes(imp1)$oob) == 5L)
})

n <- 20L
X <- data.frame(
  x1 = seq_len(n), 
  x2 = log(seq_len(n)), 
  x3 = rep(LETTERS[1:4], n %/% 4),
  x4 = factor(rep(LETTERS[1:2], n %/% 2)),
  x5 = seq_len(n) > n %/% 3
)
X_NA <- generateNA(X, p = seq(0.2, 0.8, length.out = ncol(X)), seed = 13L)
imp <- missRanger(X_NA, maxiter = 3L, num.trees = 20L, verbose = 0L, seed = 1L)

test_that("variable type is respected (integer might get double)", {
  expect_true(!anyNA(imp))
  expect_equal(sapply(imp[-1L], class), sapply(X_NA[-1L], class))
  expect_true(class(imp[, 1L]) %in% c("integer", "numeric"))
})

test_that("non-syntactic column names work", {
  X_NA2 <- X_NA
  colnames(X_NA2) <- paste(1:5, colnames(X_NA))
  imp2 <- missRanger(X_NA2, maxiter = 3L, num.trees = 20L, verbose = 0L, seed = 1L)
  imp3 <- missRanger(
    `1 x1` + `2 x2` + `3 x3` + `4 x4` + `5 x5` ~ `1 x1` + `2 x2` + `3 x3` + `4 x4` + `5 x5`,
    data = X_NA2, 
    maxiter = 3L, 
    num.trees = 20L, 
    verbose = 0L, 
    seed = 1L
  )
  
  imp4 <- missRanger(
    . ~ `1 x1` + `2 x2` + `3 x3` + `4 x4` + `5 x5`,
    data = X_NA2, 
    maxiter = 3L, 
    num.trees = 20L, 
    verbose = 0L, 
    seed = 1L
  )
  
  imp5 <- missRanger(
    `1 x1` + `2 x2` + `3 x3` + `4 x4` + `5 x5` ~ .,
    data = X_NA2, 
    maxiter = 3L, 
    num.trees = 20L, 
    verbose = 0L, 
    seed = 1L
  )
  expect_equal(colnames(X_NA2), colnames(imp2))
  expect_equal(imp, setNames(imp2, colnames(imp)))
  expect_equal(imp2, imp3)
  expect_equal(imp2, imp4)
  expect_equal(imp2, imp5)
  
  # https://github.com/mayer79/missRanger/issues/51
  ir3 <- iris
  colnames(ir3)[2L] <- "IGHV3-43D;IGHV3-9"
  expect_no_error(missRanger(ir3, verbose = 0L))
})

test_that("pmm.k works regarding value range in double columns", {
  imp <- missRanger(X_NA, maxiter = 3L, num.trees = 20L, verbose = 0L, pmm.k = 3L)
  expect_true(all(imp$x2 %in% X$x2))
  
  imp <- missRanger(X_NA, maxiter = 3L, num.trees = 20L, verbose = 0L, pmm.k = 0L)
  expect_false(all(imp$x2 %in% X$x2))
})

test_that("pmm.k works regarding value range in integer columns", {
  imp <- missRanger(X_NA, maxiter = 3L, num.trees = 20L, verbose = 0L, pmm.k = 3L)
  expect_true(all(imp$x1 %in% X$x1))
  
  imp <- missRanger(X_NA, maxiter = 3L, num.trees = 20L, verbose = 0L, pmm.k = 0L)
  expect_false(all(imp$x1 %in% X$x1))
})

test_that("formula interface works with specified left and right side", {
  imp <- missRanger(X_NA, x2 ~ x2 + x3, pmm = 3L, num.trees = 20L, verbose = 0L)
  na_per_col <- colSums(is.na(imp))
  
  expect_equal(na_per_col[[2L]], 0L)
  expect_true(all(na_per_col[-2L] >= 1L))
})

test_that("formula interface works with unspecified right side", {
  imp <- missRanger(X_NA, x2 ~ ., pmm = 3L, num.trees = 20L, verbose = 0L)
  na_per_col <- colSums(is.na(imp))
  
  expect_equal(na_per_col[[2L]], 0L)
  expect_true(all(na_per_col[-2L] >= 1L))
})

test_that("formula interface works with unspecified left side", {
  imp <- missRanger(X_NA, . ~ x1, pmm = 3L, num.trees = 20L, verbose = 0L)
  expect_true(!anyNA(imp))
})

test_that("date and datetime columns work", {
  n <- 20L
  Xd <- data.frame(
    time = seq(Sys.time(), by = "1 min", length.out = n),
    date = seq(Sys.Date(), by = "1 d", length.out = n)
  )
  Xd_NA <- generateNA(Xd, p = 0.3, seed = 13L)
  
  imp <- missRanger(Xd_NA, maxiter = 3L, num.trees = 20L, verbose = 0L, pmm.k = 0L)
  expect_true(expect_true(!anyNA(imp)))
  expect_equal(sapply(imp, class), sapply(Xd_NA, class))
})

test_that("mtry works", {
  imp1 <- missRanger(
    irisWithNA, 
    num.trees = 30L, 
    verbose = 0L, 
    seed = 1L, 
    maxiter = 3L,
    mtry = function(m) max(1, m %/% 3)
  )
  
  imp2 <- missRanger(
    irisWithNA, 
    num.trees = 30L, 
    verbose = 0L, 
    seed = 1L, 
    mtry = 1L, 
    maxiter = 3L
  )
  
  imp3 <- missRanger(
    irisWithNA, 
    num.trees = 30L, 
    verbose = 0L, 
    seed = 1L, 
    maxiter = 3L
  )
  
  expect_equal(imp1, imp2)
  expect_false(isTRUE(all.equal(imp1, imp3)))
})

test_that("verbose >= 1 does not produce an error", {
  capture_output(
    expect_no_error(
      missRanger(
        irisWithNA, 
        num.trees = 3L, 
        verbose = 1L, 
        seed = 1L, 
        maxiter = 1L,
      )
    )
  )

  capture_output(
    expect_no_error(
      missRanger(
        irisWithNA, 
        num.trees = 3L, 
        verbose = 2L, 
        seed = 1L, 
        maxiter = 1L,
      )
    )
  )
})

test_that("Too few case.weights give an error", {
  expect_error(
    missRanger(
      irisWithNA,
      num.trees = 3L,
      verbose = 0L,
      seed = 1L,
      maxiter = 1L,
      case.weights = 1:10
    )
  )
})

test_that("Extremely wide datasets are handled", {
  # https://github.com/mayer79/missRanger/issues/50
  set.seed(1L)
  data <- matrix(rnorm(385 * 20000), nrow = 385L, ncol = 20000L)
  data[5L, 5L] <- NA
  expect_no_error(missRanger(as.data.frame(data), num.trees = 3L, verbose = 0L))
})

test_that("Convert and revert do what they should", {
  n <- 20L
  X <- data.frame(
    x1 = seq_len(n), 
    x2 = rep(LETTERS[1:4], n %/% 4),
    x3 = factor(rep(LETTERS[1:2], n %/% 2)),
    x4 = seq_len(n) > n %/% 3,
    x5 = seq(as.Date("2008-11-01"), as.Date("2008-11-21"), length.out = n)
  )
  X <- generateNA(X, seed = 1L)
  conv <- convert(X)
  reve <- revert(conv)
  
  expect_equal(X, reve)
  expect_equal(
    unname(sapply(conv$X, class)), 
    c("integer", "factor", "factor", "factor", "numeric")
  )
  
  Ximp <- missRanger(X, seed = 1L, verbose = FALSE, pmm.k = 3L)
  expect_equal(sapply(Ximp, class), sapply(X, class))
})

Try the missRanger package in your browser

Any scripts or data that you put into this service are public.

missRanger documentation built on Nov. 19, 2023, 5:14 p.m.