tests/testthat/test-regression.R

## Tests for random forests for regression

## Initialize the random forest for regression
iris_mat <- data.matrix(iris)
set.seed(42)
rf_df <- train(data=iris, response_name="Sepal.Length")
set.seed(42)
rf_mat <- train(data=iris_mat, response_name="Sepal.Length")

## Basic tests (for all random forests equal)
test_that("training result has correct class and size", {
    expect_is(rf_df, "literanger")
    expect_equal(length(rf_df), 14)
})

test_that("prediction is a numeric vector", {
    pred <- predict(rf_df, newdata=iris)
    expect_is(pred$values, "numeric")
    expect_null(dim(pred$values))
})

test_that("error if sample fraction is vector for regression", {
    expect_error(
        train(data=iris, response_name="Sepal.Length",
              sample_fraction=c(0.1, 0.2)),
        paste("Invalid value for 'sample_fraction'. Vector values only valid",
              "for classification forests."),
        fixed=TRUE
    )
})


test_that("trained forest has default number of trees", {
    expect_equal(rf_df$n_tree, formals(literanger::train)$n_tree)
})

test_that("trained forest has correct predictor names", {
    expect_equal(setdiff(names(iris), rf_df$predictor_names), "Sepal.Length")
})

test_that("can use matrix interface for training", {
    expect_equal(rf_mat$tree_type, "regression")
    expect_equal(setdiff(names(iris), rf_mat$predictor_names), "Sepal.Length")
})

test_that("can use matrix interface for prediction", {
    expect_silent(predict(rf_mat, newdata=iris_mat, prediction_type="bagged"))
})

test_that("can use save_memory option when training", {
    rf <- train(data=iris, response_name="Sepal.Length", save_memory=TRUE)
    expect_equal(rf$tree_type, "regression")
})

test_that("can omit response variable during prediction", {
    n <- 50
    dat1 <- data.frame(x1=runif(n), x2=runif(n), y=rbinom(n, 1, 0.5))
    rf1 <- train(data=dat1, response_name="y")

    expect_silent(predict(rf1, newdata=dat1))
    expect_silent(predict(rf1, newdata=dat1[, 1:2]))

    dat2 <- data.frame(y=rbinom(n, 1, 0.5), x1=runif(n), x2=runif(n))
    rf2 <- train(data=dat2, response_name="y")

    expect_silent(predict(rf2, newdata=dat2))
    expect_silent(predict(rf2, newdata=dat2[, 2:3]))
})

test_that("predicted values not all the same", {
    n <- 50
    dat <- data.frame(x=runif(n), y=rbinom(n, 1, 0.5))
    rf <- train(data=dat, response_name="y")

    expect_gt(diff(range(predict(rf, newdata=dat)$values)), 0)
    expect_gt(diff(range(predict(rf, newdata=dat[, 1, drop=FALSE])$values)), 0)
})


## Splitrule
test_that("default split metric is 'variance'", {
    set.seed(42)
    rf_var <- train(data=iris, response_name="Sepal.Length",
                    split_rule="variance")

    expect_equal(rf_df$split_rule, "variance")
    expect_equal(rf_var$split_rule, "variance")
    expect_equal(rf_df$oob_error, rf_var$oob_error)
})

Try the literanger package in your browser

Any scripts or data that you put into this service are public.

literanger documentation built on Sept. 30, 2024, 9:15 a.m.