Nothing
library(blockForest)
library(survival)
context("blockForest")
test_that("Matrix interface works for Probability estimation", {
rf <- blockForest(dependent.variable.name = "Species", data = data.matrix(iris), write.forest = TRUE, probability = TRUE)
expect_equal(rf$treetype, "Probability estimation")
expect_equal(rf$forest$independent.variable.names, colnames(iris)[1:4])
})
test_that("Matrix interface prediction works for Probability estimation", {
dat <- data.matrix(iris)
rf <- blockForest(dependent.variable.name = "Species", data = dat, write.forest = TRUE, probability = TRUE)
expect_silent(predict(rf, dat))
})
test_that("no warning if data.frame has two classes", {
dat <- iris
class(dat) <- c("data.frame", "data.table")
expect_silent(blockForest(Species ~ ., data = dat, verbose = FALSE))
})
test_that("Error if sample fraction is 0 or >1", {
expect_error(blockForest(Species ~ ., iris, num.trees = 5, sample.fraction = 0))
expect_error(blockForest(Species ~ ., iris, num.trees = 5, sample.fraction = 1.1))
})
test_that("Error if sample fraction is vector for regression", {
expect_error(blockForest(Sepal.Length ~ ., iris, num.trees = 5, sample.fraction = c(0.1, 0.2)),
"Error: Invalid value for sample\\.fraction\\. Vector values only valid for classification forests\\.")
})
test_that("Error if sample fraction is vector of wrong size", {
expect_error(blockForest(Species ~ ., iris, num.trees = 5, sample.fraction = c(0.1, 0.2)),
"Error: Invalid value for sample\\.fraction\\. Expecting 3 values, provided 2\\.")
})
test_that("Error if element of sample fraction vector is <0 or >1", {
expect_error(blockForest(Species ~ ., iris, num.trees = 5, sample.fraction = c(0.1, 1.1, 0.3)),
"Error: Invalid value for sample\\.fraction. Please give a value in \\(0,1\\] or a vector of values in \\[0,1\\]\\.")
expect_error(blockForest(Species ~ ., iris, num.trees = 5, sample.fraction = c(-3, 0.5, 0.3)),
"Error: Invalid value for sample.fraction. Please give a value in \\(0,1] or a vector of values in \\[0,1\\]\\.")
})
test_that("Error if sum of sample fraction vector is 0", {
expect_error(blockForest(Species ~ ., iris, num.trees = 5, sample.fraction = c(0, 0, 0)),
"Error: Invalid value for sample\\.fraction. Sum of values must be >0\\.")
})
test_that("Error if replace=FALSE and not enough samples", {
expect_error(blockForest(Species ~ ., iris, num.trees = 5, sample.fraction = c(0.2, 0.3, 0.4),
replace = FALSE, keep.inbag = TRUE),
"Error: Not enough samples in class virginica; available: 50, requested: 60.")
expect_silent(blockForest(Species ~ ., iris, num.trees = 5, sample.fraction = c(0.2, 0.3, 0.4),
replace = TRUE, keep.inbag = TRUE))
})
test_that("Error if sample.fraction and case.weights", {
expect_error(blockForest(Species ~ ., iris, num.trees = 5, sample.fraction = c(0.2, 0.3, 0.4),
case.weights = rbinom(nrow(iris), 1, 0.5)),
"Error: Combination of case\\.weights and class-wise sampling not supported\\.")
})
test_that("Inbag counts match sample fraction, classification", {
## With replacement
rf <- blockForest(Species ~ ., iris, num.trees = 5, sample.fraction = c(0.2, 0.3, 0.4),
replace = TRUE, keep.inbag = TRUE)
inbag <- do.call(cbind, rf$inbag.counts)
expect_equal(unique(colSums(inbag[1:50, ])), 30)
expect_equal(unique(colSums(inbag[51:100, ])), 45)
expect_equal(unique(colSums(inbag[101:150, ])), 60)
## Without replacement
rf <- blockForest(Species ~ ., iris, num.trees = 5, sample.fraction = c(0.1, 0.2, 0.3),
replace = FALSE, keep.inbag = TRUE)
inbag <- do.call(cbind, rf$inbag.counts)
expect_equal(unique(colSums(inbag[1:50, ])), 15)
expect_equal(unique(colSums(inbag[51:100, ])), 30)
expect_equal(unique(colSums(inbag[101:150, ])), 45)
})
test_that("Inbag counts match sample fraction, probability", {
## With replacement
rf <- blockForest(Species ~ ., iris, num.trees = 5, sample.fraction = c(0.2, 0.3, 0.4),
replace = TRUE, keep.inbag = TRUE, probability = TRUE)
inbag <- do.call(cbind, rf$inbag.counts)
expect_equal(unique(colSums(inbag[1:50, ])), 30)
expect_equal(unique(colSums(inbag[51:100, ])), 45)
expect_equal(unique(colSums(inbag[101:150, ])), 60)
## Without replacement
rf <- blockForest(Species ~ ., iris, num.trees = 5, sample.fraction = c(0.1, 0.2, 0.3),
replace = FALSE, keep.inbag = TRUE, probability = TRUE)
inbag <- do.call(cbind, rf$inbag.counts)
expect_equal(unique(colSums(inbag[1:50, ])), 15)
expect_equal(unique(colSums(inbag[51:100, ])), 30)
expect_equal(unique(colSums(inbag[101:150, ])), 45)
})
test_that("as.factor() in formula works", {
n <- 20
dt <- data.frame(x = runif(n), y = rbinom(n, 1, 0.5))
expect_silent(blockForest(as.factor(y) ~ ., data = dt, num.trees = 5, write.forest = TRUE))
})
test_that("holdout mode holding out data with 0 weight", {
weights <- rbinom(nrow(iris), 1, 0.5)
rf <- blockForest(Species ~ ., iris, num.trees = 5, importance = "permutation",
case.weights = weights, replace = FALSE, sample.fraction = 0.632*mean(weights),
holdout = TRUE, keep.inbag = TRUE)
inbag <- data.frame(rf$inbag.counts)
expect_true(all(inbag[weights == 0, ] == 0))
})
test_that("holdout mode uses holdout OOB data", {
weights <- rbinom(nrow(iris), 1, 0.5)
rf <- blockForest(Species ~ ., iris, num.trees = 5, importance = "permutation",
case.weights = weights, replace = FALSE, sample.fraction = 0.632*mean(weights),
holdout = TRUE, keep.inbag = TRUE)
expect_false(any(is.na(rf$predictions[weights == 0])))
expect_true(all(is.na(rf$predictions[weights == 1])))
})
test_that("holdout mode not working if no weights", {
expect_error(blockForest(Species ~ ., iris, num.trees = 5, importance = "permutation", holdout = TRUE))
})
test_that("holdout mode: no OOB prediction if no 0 weights", {
weights <- runif(nrow(iris))
rf <- blockForest(Species ~ ., iris, num.trees = 5, importance = "permutation",
case.weights = weights, replace = FALSE,
holdout = TRUE, keep.inbag = TRUE)
expect_true(all(is.na(rf$predictions)))
})
test_that("OOB error is correct for 1 tree, classification", {
n <- 50
dat <- data.frame(y = factor(rbinom(n, 1, .5)), x = rnorm(n))
rf <- blockForest(y ~ ., dat, num.trees = 1)
expect_equal(rf$prediction.error, mean(rf$predictions != dat$y, na.rm = TRUE))
})
test_that("OOB error is correct for 1 tree, probability prediction", {
n <- 50
dat <- data.frame(y = factor(rbinom(n, 1, .5)), x = rnorm(n))
rf <- blockForest(y ~ ., dat, num.trees = 1, probability = TRUE)
prob <- c(rf$predictions[dat$y == "0", 1], rf$predictions[dat$y == "1", 2])
expect_equal(rf$prediction.error, mean((1 - prob)^2, na.rm = TRUE))
})
test_that("OOB error is correct for 1 tree, regression", {
n <- 50
dat <- data.frame(y = rbinom(n, 1, .5), x = rnorm(n))
rf <- blockForest(y ~ ., dat, num.trees = 1)
expect_equal(rf$prediction.error, mean((dat$y - rf$predictions)^2, na.rm = TRUE))
})
test_that("Missing value columns detected in training", {
dat <- iris
dat[4, 5] <- NA
dat[25, 1] <- NA
expect_error(blockForest(Species ~ ., dat, num.trees = 5), "Missing data in columns: Species, Sepal.Length")
})
test_that("No error if missing value in irrelevant column, training", {
dat <- iris
dat[1, "Sepal.Width"] <- NA
expect_silent(blockForest(Species ~ Sepal.Length, dat, num.trees = 5))
})
test_that("No error if missing value in irrelevant column, prediction", {
rf <- blockForest(Species ~ Sepal.Length, iris, num.trees = 5)
dat <- iris
dat[1, "Sepal.Width"] <- NA
expect_silent(predict(rf, dat))
})
test_that("Split points are at (A+B)/2 for numeric features, regression variance splitting", {
dat <- data.frame(y = rbinom(100, 1, .5), x = rbinom(100, 1, .5))
rf <- blockForest(y ~ x, dat, num.trees = 10)
split_points <- mapply(function(varID, value) {
value[varID > 0]
},
rf$forest$split.varIDs,
rf$forest$split.values
)
expect_equal(split_points, rep(0.5, rf$num.trees))
})
test_that("Split points are at (A+B)/2 for numeric features, regression maxstat splitting", {
dat <- data.frame(y = rbinom(100, 1, .5), x = rbinom(100, 1, .5))
rf <- blockForest(y ~ x, dat, num.trees = 10, splitrule = "maxstat", alpha = 1)
split_points <- mapply(function(varID, value) {
value[varID > 0]
},
rf$forest$split.varIDs,
rf$forest$split.values
)
expect_equal(split_points, rep(0.5, rf$num.trees))
})
test_that("Split points are at (A+B)/2 for numeric features, classification", {
dat <- data.frame(y = factor(rbinom(100, 1, .5)), x = rbinom(100, 1, .5))
rf <- blockForest(y ~ x, dat, num.trees = 10)
split_points <- mapply(function(varID, value) {
value[varID > 0]
},
rf$forest$split.varIDs,
rf$forest$split.values
)
expect_equal(split_points, rep(0.5, rf$num.trees))
})
test_that("Split points are at (A+B)/2 for numeric features, probability", {
dat <- data.frame(y = factor(rbinom(100, 1, .5)), x = rbinom(100, 1, .5))
rf <- blockForest(y ~ x, dat, num.trees = 10, probability = TRUE)
split_points <- mapply(function(varID, value) {
value[varID > 0]
},
rf$forest$split.varIDs,
rf$forest$split.values
)
expect_equal(split_points, rep(0.5, rf$num.trees))
})
test_that("Split points are at (A+B)/2 for numeric features, survival logrank splitting", {
dat <- data.frame(time = runif(100, 1, 10), status = rbinom(100, 1, .5), x = rbinom(100, 1, .5))
rf <- blockForest(Surv(time, status) ~ x, dat, num.trees = 10, splitrule = "logrank")
split_points <- mapply(function(varID, value) {
value[varID > 0]
},
rf$forest$split.varIDs,
rf$forest$split.values
)
expect_equal(split_points, rep(0.5, rf$num.trees))
})
test_that("Split points are at (A+B)/2 for numeric features, survival C-index splitting", {
dat <- data.frame(time = runif(100, 1, 10), status = rbinom(100, 1, .5), x = rbinom(100, 1, .5))
rf <- blockForest(Surv(time, status) ~ x, dat, num.trees = 10, splitrule = "C")
split_points <- mapply(function(varID, value) {
value[varID > 0]
},
rf$forest$split.varIDs,
rf$forest$split.values
)
expect_equal(split_points, rep(0.5, rf$num.trees))
})
test_that("Split points are at (A+B)/2 for numeric features, survival maxstat splitting", {
dat <- data.frame(time = runif(100, 1, 10), status = rbinom(100, 1, .5), x = rbinom(100, 1, .5))
rf <- blockForest(Surv(time, status) ~ x, dat, num.trees = 10, splitrule = "maxstat", alpha = 1)
split_points <- mapply(function(varID, value) {
value[varID > 0]
},
rf$forest$split.varIDs,
rf$forest$split.values
)
expect_equal(split_points, rep(0.5, rf$num.trees))
})
test_that("No error if variable named forest", {
dat <- iris
dat$forest <- rnorm(150)
rf <- blockForest(Species ~ ., dat, num.trees = 5)
expect_silent(predict(rf, dat))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.