to_do.R
In literanger: Random Forests for Multiple Imputation Based on 'ranger'

# classification...
test_that("predict.all for classification returns numeric matrix of size trees x n", {
  rf <- ranger(Species ~ ., iris, num.trees = 5, write.forest = TRUE)
  pred <- predict(rf, iris, predict.all = TRUE)
  expect_is(pred$predictions, "matrix")
  expect_equal(dim(pred$predictions),
              c(nrow(iris), rf$num.trees))
})
# TODO: need to adjust this to "check that it is actally bagging"
test_that("Majority vote of predict.all for classification is equal to forest prediction", {
  rf <- ranger(Species ~ ., iris, num.trees = 5, write.forest = TRUE)
  pred_forest <- predict(rf, iris, predict.all = FALSE)
  pred_trees <- predict(rf, iris, predict.all = TRUE)
  ## Majority vote, NA for ties
  pred_num <- apply(pred_trees$predictions, 1, function(x) {
    res <- which(tabulate(x) == max(tabulate(x)))
    if (length(res) == 1) {
      res
    } else {
      NA
    }
  })
  pred <- integer.to.factor(pred_num, rf$forest$levels)
  idx <- !is.na(pred)
  expect_equal(pred[idx], pred_forest$predictions[idx])
})
# TODO: confusion matrix tests...
test_that("confusion matrix is of right dimension", {
  expect_equal(dim(rg.class$confusion.matrix),
               rep(nlevels(iris$Species), 2))
})

test_that("confusion matrix has right dimnames", {
  expect_equal(dimnames(rg.class$confusion.matrix),
               list(true = levels(iris$Species), predicted = levels(iris$Species)))
})

test_that("confusion matrix rows are the true classes", {
  expect_equal(as.numeric(rowSums(rg.class$confusion.matrix)),
               as.numeric(table(iris$Species)))
})

test_that("confusion matrix rows are the true classes if using case weights", {
  rf <- ranger(Species ~ ., data = iris, num.trees = 5,
               case.weights = c(rep(100, 5), rep(5, 145)))
  expect_equal(as.numeric(rowSums(rf$confusion.matrix)),
               as.numeric(table(iris$Species)))
})

# TODO: inbag stuff?

test_that("Number of samples is right sample fraction, replace=FALSE, default", {
  rf <- ranger(Species ~ ., iris, num.trees = 5, keep.inbag = TRUE, replace = FALSE)
  num.inbag <- sapply(rf$inbag.counts, function(x) {
    sum(x > 0)
  })
  sample.fraction <- mean(num.inbag/nrow(iris))

  expect_gt(sample.fraction, 0.6)
  expect_lt(sample.fraction, 0.7)
})

test_that("Number of samples is right sample fraction, replace=FALSE, 0.3", {
  rf <- ranger(Species ~ ., iris, num.trees = 5, keep.inbag = TRUE, replace = FALSE, sample.fraction = 0.3)
  num.inbag <- sapply(rf$inbag.counts, function(x) {
    sum(x > 0)
  })
  sample.fraction <- mean(num.inbag/nrow(iris))

  expect_gt(sample.fraction, 0.25)
  expect_lt(sample.fraction, 0.35)
})
test_that("Number of samples is right sample fraction, replace=TRUE, default", {
  rf <- ranger(Species ~ ., iris, num.trees = 5, keep.inbag = TRUE, replace = TRUE)
  num.inbag <- sapply(rf$inbag.counts, function(x) {
    sum(x > 0)
  })

  sample.fraction <- mean(num.inbag/nrow(iris))
  expected.sample.fraction <- 1-exp(-1)

  expect_gt(sample.fraction, expected.sample.fraction-0.05)
  expect_lt(sample.fraction, expected.sample.fraction+0.05)
})

test_that("Number of samples is right sample fraction, replace=TRUE, 0.5", {
  rf <- ranger(Species ~ ., iris, num.trees = 5, keep.inbag = TRUE, replace = TRUE, sample.fraction = 0.5)
  num.inbag <- sapply(rf$inbag.counts, function(x) {
    sum(x > 0)
  })

  sample.fraction <- mean(num.inbag/nrow(iris))
  expected.sample.fraction <- 1-exp(-0.5)

  expect_gt(sample.fraction, expected.sample.fraction-0.05)
  expect_lt(sample.fraction, expected.sample.fraction+0.05)
})

test_that("Number of samples is right sample fraction, replace=FALSE, 0.3, weighted", {
  rf <- ranger(Species ~ ., iris, num.trees = 5, keep.inbag = TRUE, replace = FALSE, sample.fraction = 0.3, case.weights = runif(nrow(iris)))
  num.inbag <- sapply(rf$inbag.counts, function(x) {
    sum(x > 0)
  })
  sample.fraction <- mean(num.inbag/nrow(iris))

  expect_gt(sample.fraction, 0.25)
  expect_lt(sample.fraction, 0.35)
})

test_that("Number of samples is right sample fraction, replace=TRUE, 0.5, weighted", {
  rf <- ranger(Species ~ ., iris, num.trees = 5, keep.inbag = TRUE, replace = TRUE, sample.fraction = 0.5, case.weights = runif(nrow(iris)))
  num.inbag <- sapply(rf$inbag.counts, function(x) {
    sum(x > 0)
  })

  sample.fraction <- mean(num.inbag/nrow(iris))
  expected.sample.fraction <- 1-exp(-0.5)

  expect_gt(sample.fraction, expected.sample.fraction-0.05)
  expect_lt(sample.fraction, expected.sample.fraction+0.05)
})


# TODO: classification tests (inbag?)
test_that("Inbag counts match sample fraction, classification", {
  ## With replacement
  rf <- ranger(Species ~ ., iris, num.trees = 5, sample.fraction = c(0.2, 0.3, 0.4),
               replace = TRUE, keep.inbag = TRUE)
  inbag <- do.call(cbind, rf$inbag.counts)
  expect_equal(unique(colSums(inbag[iris$Species == "setosa", ])), 30)
  expect_equal(unique(colSums(inbag[iris$Species == "versicolor", ])), 45)
  expect_equal(unique(colSums(inbag[iris$Species == "virginica", ])), 60)

  ## Without replacement
  rf <- ranger(Species ~ ., iris, num.trees = 5, sample.fraction = c(0.1, 0.2, 0.3),
               replace = FALSE, keep.inbag = TRUE)
  inbag <- do.call(cbind, rf$inbag.counts)
  expect_equal(unique(colSums(inbag[iris$Species == "setosa", ])), 15)
  expect_equal(unique(colSums(inbag[iris$Species == "versicolor", ])), 30)
  expect_equal(unique(colSums(inbag[iris$Species == "virginica", ])), 45)

  ## Different order, without replacement
  dat <- iris[c(51:100, 101:150, 1:50), ]
  rf <- ranger(Species ~ ., dat, num.trees = 5, sample.fraction = c(0.1, 0.2, 0.3),
               replace = FALSE, keep.inbag = TRUE)
  inbag <- do.call(cbind, rf$inbag.counts)
  expect_equal(unique(colSums(inbag[dat$Species == "setosa", ])), 15)
  expect_equal(unique(colSums(inbag[dat$Species == "versicolor", ])), 30)
  expect_equal(unique(colSums(inbag[dat$Species == "virginica", ])), 45)
})

test_that("OOB error is correct for 1 tree, classification", {
  n <- 50
  dat <- data.frame(y = factor(rbinom(n, 1, .5)), x = rnorm(n))
  rf <- ranger(y ~ ., dat, num.trees = 1)
  expect_equal(rf$prediction.error, mean(rf$predictions != dat$y, na.rm = TRUE))
})

test_that("OOB error is correct for 1 tree, regression", {
  n <- 50
  dat <- data.frame(y = rbinom(n, 1, .5), x = rnorm(n))
  rf <- ranger(y ~ ., dat, num.trees = 1)
  expect_equal(rf$prediction.error, mean((dat$y - rf$predictions)^2, na.rm = TRUE))
})

test_that("Split points are at (A+B)/2 for numeric features, regression variance splitting", {
  dat <- data.frame(y = rbinom(100, 1, .5), x = rbinom(100, 1, .5))
  rf <- ranger(y ~ x, dat, num.trees = 10)
  split_points <- sapply(1:rf$num.trees, function(i) {
    res <- treeInfo(rf, i)$splitval
    res[!is.na(res)]
  })
  expect_equal(split_points, rep(0.5, rf$num.trees))
})

test_that("Split points are at (A+B)/2 for numeric features, regression maxstat splitting", {
  dat <- data.frame(y = rbinom(100, 1, .5), x = rbinom(100, 1, .5))
  rf <- ranger(y ~ x, dat, num.trees = 10, splitrule = "maxstat", alpha = 1)
  split_points <- sapply(1:rf$num.trees, function(i) {
    res <- treeInfo(rf, i)$splitval
    res[!is.na(res)]
  })
  expect_equal(split_points, rep(0.5, rf$num.trees))
})

test_that("Split points are at (A+B)/2 for numeric features, classification", {
  dat <- data.frame(y = factor(rbinom(100, 1, .5)), x = rbinom(100, 1, .5))
  rf <- ranger(y ~ x, dat, num.trees = 10)
  split_points <- sapply(1:rf$num.trees, function(i) {
    res <- treeInfo(rf, i)$splitval
    res[!is.na(res)]
  })
  expect_equal(split_points, rep(0.5, rf$num.trees))
})

test_that("Tree depth creates trees of correct size", {
  # Recursive function to get tree depth
  depth <- function(rf, tree, i) {
    left <- rf$forest$child.nodeIDs[[tree]][[1]][i] + 1
    right <- rf$forest$child.nodeIDs[[tree]][[2]][i] + 1
    if (left <= 1) {
      0
    } else {
      1 + max(c(depth(rf, tree, left), depth(rf, tree, right)))
    }
  }
  forest_depth <- function(rf) {
    sapply(1:rf$num.trees, depth, rf = rf, i = 1)
  }

  # Depth 1
  rf <- ranger(Species ~ ., iris, num.trees = 5, max.depth = 1)
  expect_true(all(forest_depth(rf) <= 1))

  # Depth 4
  rf <- ranger(Species ~ ., iris, num.trees = 5, max.depth = 4)
  expect_true(all(forest_depth(rf) <= 4))

  # Random depth (deeper trees)
  max.depth <- round(runif(1, 1, 20))
  dat <- data.frame(y = runif(100, 0, 1), x = runif(100, 0, 1))
  rf <- ranger(y ~ ., dat, num.trees = 5, min.node.size = 1, max.depth = max.depth)
  expect_true(all(forest_depth(rf) <= max.depth))
})

test_that("Tree depth 0 equivalent to unlimited", {
  set.seed(200)
  rf1 <- ranger(Species ~ ., iris, num.trees = 5, max.depth = 0)

  set.seed(200)
  rf2 <- ranger(Species ~ ., iris, num.trees = 5)

  expect_equal(sapply(rf1$forest$split.varIDs, length),
               sapply(rf2$forest$split.varIDs, length))
})

# TODO: prediction num tress
test_that("If num.trees set, these number is used for predictions", {
  rf <- ranger(Species ~ ., iris, num.trees = 5, write.forest = TRUE)
  pred <- predict(rf, iris, predict.all = TRUE, num.trees = 3)
  expect_equal(pred$num.trees, 3)
  expect_equal(dim(pred$predictions), c(nrow(iris), 3))
})

test_that("If num.trees not set, all trees are used for prediction", {
  rf <- ranger(Species ~ ., iris, num.trees = 5, write.forest = TRUE)
  pred <- predict(rf, iris, predict.all = TRUE)
  expect_equal(pred$num.trees, 5)
  expect_equal(dim(pred$predictions), c(nrow(iris), 5))
})

test_that("Warning if predicting with corrected impurity importance", {
  rf <- ranger(Species ~ ., iris, num.trees = 5, importance = "impurity_corrected")
  expect_warning(predict(rf, iris))
})





test_that("results using extratrees split-rule differ from variance", {
    set.seed(42)
    rf1 <- train(data=iris, response_name="Sepal.Length",
                 split_rule="extratrees")
    set.seed(42)
    rf2 <- train(data=iris, response_name="Sepal.Length",
                 split_rule="variance")

    expect_equal(rf1$split_rule, "extratrees")
    expect_equal(rf2$split_rule, "variance")
    expect_false(rf1$oob_error == rf2$oob_error)
})

test_that("results using maxstat split-rule differ from variance", {
    set.seed(42)
    rf1 <- train(data=iris, response_name="Sepal.Length", split_rule="maxstat")
    set.seed(42)
    rf2 <- train(data=iris, response_name="Sepal.Length",
                 split_rule="variance")

    expect_equal(rf1$split_rule, "maxstat")
    expect_equal(rf2$split_rule, "variance")
    expect_false(rf1$oob_error == rf2$oob_error)
})
test_that("results using extratrees split-rule differ from gini", {

    set.seed(42)
    rf1 <- train(data=iris, response_name="Species", split_rule="extratrees")

    set.seed(42)
    rf2 <- train(data=iris, response_name="Species", split_rule="gini")

    expect_equal(rf1$split_rule, "extratrees")
    expect_equal(rf2$split_rule, "gini")
    expect_false(rf1$oob_error == rf2$oob_error)
})







# TODO: variable importance (one day)

test_that("maxstat impurity importance is positive", {
  rf <- ranger(Surv(time, status) ~ ., veteran, num.trees = 5,
               splitrule = "maxstat", importance = "impurity")
  expect_gt(mean(rf$variable.importance), 0)

  rf <- ranger(Sepal.Length ~ ., iris, num.trees = 5,
               splitrule = "maxstat", importance = "impurity")
  expect_gt(mean(rf$variable.importance), 0)
})

test_that("maxstat corrected impurity importance is positive (on average)", {
  rf <- ranger(Surv(time, status) ~ ., veteran, num.trees = 50,
               splitrule = "maxstat", importance = "impurity_corrected")
  expect_gt(mean(rf$variable.importance), 0)

  rf <- ranger(Sepal.Length ~ ., iris, num.trees = 5,
               splitrule = "maxstat", importance = "impurity_corrected")
  expect_gt(mean(rf$variable.importance), 0)
})


test_that("Corrected importance working for sparse data", {
  rf <- ranger(data = dat_sparse, dependent.variable.name = "y", classification = TRUE,
               num.trees = 5, importance = "impurity_corrected")
  expect_equal(names(rf$variable.importance), colnames(dat_sparse)[-1])
})





# TODO: some tests for regression output


test_that("Mean of predict.all for regression is equal to forest prediction", {
  rf <- ranger(Petal.Width ~ ., iris, num.trees = 5, write.forest = TRUE)
  pred_forest <- predict(rf, iris, predict.all = FALSE)
  pred_trees <- predict(rf, iris, predict.all = TRUE)
  expect_equal(rowMeans(pred_trees$predictions), pred_forest$predictions)
})








####### Split weights

test_that("draw predictor weights can be zero or one", {
    weights <- replicate(formals(ranger)$n_tree,
                         sample(c(0, 0, 1, 1)), simplify=F)
    rf <- ranger(data=iris, response_name="Species",
                 draw_predictor_weights=weights)
    # selected_correctly <- sapply(1:rf$num.trees, function(i) {
    #     all(treeInfo(rf, i)[,"splitvarID"] %in% c(which(weights[[i]] > 0) - 1, NA))
    # })
    # expect_true(all(selected_correctly))
})

test_that("Tree-wise split select weights work with 0s", {
  num.trees <- 5
  weights <- replicate(num.trees, sample(c(0, 0, 0.5, 0.5)), simplify = FALSE)
  rf <- ranger(Species ~ ., iris, mtry = 2, num.trees = num.trees,
               split.select.weights = weights)
  selected_correctly <- sapply(1:num.trees, function(i) {
    all(treeInfo(rf, i)[,"splitvarID"] %in% c(which(weights[[i]] > 0) - 1, NA))
  })
  expect_true(all(selected_correctly))
})
Any scripts or data that you put into this service are public.
literanger documentation built on Sept. 30, 2024, 9:15 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
literanger
Random Forests for Multiple Imputation Based on 'ranger'

tests/testthat/ranger/to_do.R
In literanger: Random Forests for Multiple Imputation Based on 'ranger'

Try the literanger package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

literanger Random Forests for Multiple Imputation Based on 'ranger'

tests/testthat/ranger/to_do.R In literanger: Random Forests for Multiple Imputation Based on 'ranger'

Try the literanger package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

literanger
Random Forests for Multiple Imputation Based on 'ranger'

tests/testthat/ranger/to_do.R
In literanger: Random Forests for Multiple Imputation Based on 'ranger'