Nothing
context("Tests the gpe function, gpe base learner functions and gpe sampling function")
test_that("gpe works with default settings and gives previous results", {
#####
# Continous outcome
set.seed(8782650)
fit <- gpe(
Ozone ~ .,
data = airquality,
base_learners = list(
gpe_trees(ntrees = 10)))
fit <- fit$glmnet.fit$glmnet.fit
fit <- fit[c("a0", "beta")]
fit$beta <- fit$beta[1:10, 1:10]
# save_to_test(fit, "gpe_fit1")
expect_equal(fit, read_to_test("gpe_fit1"), tolerance = 1.49e-08)
#####
# Binary outcome
fit <- gpe(diabetes ~ ., PimaIndiansDiabetes,
base_learners = list(gpe_linear(), gpe_trees(ntrees = 10)))
fit <- fit$glmnet.fit$glmnet.fit
fit <- fit[c("a0", "beta")]
fit$beta <- fit$beta[, 1:10]
# save_to_test(fit, "gpe_fit1_binary")
expect_equal(fit, read_to_test("gpe_fit1_binary"))
})
test_that("gpe works with factor predictor and gpe_linear", {
dat <- data.frame(
y = 1:30,
x = factor(rep(1:3, 10)),
z = factor(rep(1:2, 15)))
expect_silent(fit <- gpe(y ~ ., dat, base_learners = list(gpe_linear())))
expect_equal(
row.names(fit$glmnet.fit$glmnet.fit$beta),
c("(Intercept)", "lTerm(x == \"2\", scale = 0.48)",
"lTerm(x == \"3\", scale = 0.48)", "lTerm(z == \"2\", scale = 0.51)"))
})
test_that("Sampling and subsampling works and is used in gpe", {
#####
# Bootstrap sample
set.seed(seed <- 9495688)
func <- gpe_sample(1)
out <- func(100, rep(1, 100))
expect_true(any(!1:100 %in% out))
expect_length(out, 100)
#####
# Subsampling with lower fraction
func <- gpe_sample(.5)
out <- func(100, rep(1, 100))
expect_true(any(!1:100 %in% out))
expect_true(!any(table(out) > 1))
expect_length(out, 50)
#####
# Bootstrap with weights
func <- gpe_sample(1)
ws <- rep(1, 100)
ws[1] <- 100
out <- func(100, ws)
expect_true(names(sort(table(out),decreasing=TRUE))[1] == "1")
expect_length(out, 100)
#####
# Bootstrap is used with a lower fraction and unequal weights
func <- gpe_sample(.5)
expect_message(
out <- func(100, ws),
"Some weights do not match. Bootstrap will be used instead of subsampling to reflect weights")
# should only show the message once
expect_silent(out <- func(100, ws))
expect_true(names(sort(table(out),decreasing=TRUE))[1] == "1")
expect_length(out, 50)
})
test_that("gpe_trees gives previous results for continous outcomes", {
# partykit-1.2.0 cannot handle transformation in formulas in the development
# version. Thus, we "cut" Wind prior to get a factor
airquality$Wind_cut <- cut(airquality$Wind, breaks = 3)
on.exit(airquality$Wind_cut <- NULL)
#####
# Default settings with fewer trees
func <- gpe_trees(ntrees = 10)
args <- list(
formula = Ozone ~ Solar.R + Wind + Wind_cut,
data = airquality,
weights = rep(1, nrow(airquality)),
sample_func = gpe_sample(.5),
family = "gaussian")
set.seed(seed <- 6772769)
out <- do.call(func, c(args))
expect_known_value(out, "previous_results/gpe_tree_1.RDS", update = FALSE)
#####
# Only use stumps
func <- gpe_trees(ntrees = 10, maxdepth = 1)
set.seed(seed)
out2 <- do.call(func, args)
expect_lt(length(out2), length(out))
expect_true(!any(grepl("\\$", out2)))
# save_to_test(out2, "gpe_tree_2")
expect_equal(out2, read_to_test("gpe_tree_2"))
#####
# Without learning rate
func <- gpe_trees(ntrees = 10, learnrate = 0)
set.seed(seed)
out3 <- do.call(func, args)
expect_known_value(out3, "previous_results/gpe_tree_3.RDS", update = FALSE)
#####
# Without removal of duplicates and complements
func <- gpe_trees(ntrees = 10, remove_duplicates_complements = FALSE)
set.seed(seed)
out4 <- do.call(func, args)
expect_gt(length(out4), length(out))
expect_true(all(out %in% out4))
# save_to_test(out4, "gpe_tree_4")
expect_equal(out4, read_to_test("gpe_tree_4"))
})
test_that("gpe_trees gives previous results for binary outcomes", {
#####
# Works with binomial outcome
func <- gpe_trees(ntrees = 10)
args <- list(
formula = diabetes ~ .,
data = PimaIndiansDiabetes,
weights = rep(1, nrow(PimaIndiansDiabetes)),
sample_func = gpe_sample(),
family = "binomial")
set.seed(seed <- 8779606)
out <- do.call(func, args)
# save_to_test(out, "gpe_tree_binary_1")
expect_equal(out, read_to_test("gpe_tree_binary_1"))
func <- gpe_trees(ntrees = 10, use_grad = FALSE)
set.seed(seed)
out2 <- do.call(func, args)
expect_true(any(!out2 %in% out))
# save_to_test(out2, "gpe_tree_binary_1_w_glm")
expect_equal(out2, read_to_test("gpe_tree_binary_1_w_glm"))
#####
# Binary without learning rate
func <- gpe_trees(ntrees = 10, learnrate = 0)
set.seed(seed)
out <- do.call(func, args)
# save_to_test(out, "gpe_tree_binary_2")
expect_equal(out, read_to_test("gpe_tree_binary_2"))
})
test_that("gpe_earth gives expected_result for continous outcomes", {
#####
# With defaults settings
func <- gpe_earth()
args <- list(
formula = Ozone ~ Solar.R + Wind +
cut(Wind, breaks = 5), # Notice that we include a factor. There where
# some issues with factor at one point
data = airquality,
weights = rep(1, sum(complete.cases(airquality))),
sample_func = gpe_sample(.5),
family = "gaussian")
set.seed(seed <- 6817505)
out <- do.call(func, args)
# save_to_test(out, "gpe_earth_1")
expect_equal(out, read_to_test("gpe_earth_1"))
#####
# Additive model
func <- gpe_earth(degree = 1)
set.seed(seed)
out2 <- do.call(func, args)
# Match any string not containing *
expect_match(out2, "^[^\\*]+$", perl = TRUE)
# save_to_test(out2, "gpe_earth_2")
expect_equal(out2, read_to_test("gpe_earth_2"))
#####
# Without high learning rate
func <- gpe_earth(learnrate = 1)
set.seed(seed)
out3 <- do.call(func, args)
expect_true(length(setdiff(out3, out)) > 0)
# save_to_test(out, "gpe_earth_3")
expect_equal(out, read_to_test("gpe_earth_3"))
#####
# With different number of end nodes
func <- gpe_earth(nk = 5, ntrain = 1)
set.seed(seed)
out4 <- do.call(func, args)
expect_length(out4, 4) # 5 - 1 for intercept
func <- gpe_earth(nk = 50, ntrain = 1)
set.seed(seed)
out5 <- do.call(func, args)
expect_gt(length(out5), length(out4))
#####
# Without standardizing
func <- gpe_earth(normalize = FALSE)
set.seed(seed)
out6 <- do.call(func, args)
expect_true(!any(grepl("scale\\ ?=", out6, perl = TRUE)))
# save_to_test(out6, "gpe_earth_6")
expect_equal(out6, read_to_test("gpe_earth_6"))
#####
# No using threshold gives different results
func <- gpe_earth(cor_thresh = 1.01)
set.seed(seed)
out7 <- do.call(func, args)
expect_gt(length(out7), length(out))
# Should give the same
func <- gpe_earth(cor_thresh = NULL)
set.seed(seed)
out8 <- do.call(func, args)
expect_equal(out8, out7)
# save_to_test(out8[1:25], "gpe_earth_6.1")
expect_equal(out8[1:25], read_to_test("gpe_earth_6.1"))
######
# Continous outcome with defaults with two factors
func <- gpe_earth(ntrain = 10)
args <- list(
formula = Ozone ~ Solar.R + Wind +
cut(Wind, breaks = 5) + # Notice that we include a factor. There where
cut(Temp, breaks = 5), # some issues with factor at one point
data = airquality,
weights = rep(1, sum(complete.cases(airquality))),
sample_func = gpe_sample(.5),
family = "gaussian")
set.seed(seed)
out <- do.call(func, args)
# save_to_test(out, "gpe_earth_7")
expect_equal(out, read_to_test("gpe_earth_7"))
})
test_that("gpe_earth gives previous results for binary outcomes", {
#####
# With learning rate
func <- gpe_earth(ntrain = 20)
args <- list(
formula = diabetes ~ .,
data = PimaIndiansDiabetes,
weights = rep(1, nrow(PimaIndiansDiabetes)),
sample_func = gpe_sample(),
family = "binomial")
set.seed(seed <- 1067229)
expect_message(out <- do.call(func, args),
"Beware that gpe_earth will use gradient boosting")
# save_to_test(out, "gpe_earth_binary")
expect_equal(out, read_to_test("gpe_earth_binary"))
#####
# Without learning rate
func <- gpe_earth(ntrain = 20, learnrate = 0)
set.seed(seed)
expect_message(out1 <- do.call(func, args),
"Beware that gpe_earth will use L2 loss to train")
expect_true(any(!out1 %in% out))
# save_to_test(out1, "gpe_earth_binary_no_learn")
expect_equal(out1, read_to_test("gpe_earth_binary_no_learn"))
})
test_that("gpe_earth works with ordered factors and does not alter contrast options", {
set.seed(1660180)
n <- 100
dat_frame <- data.frame(
y = rnorm(n),
# Ordered factor
x1 = as.ordered(sample.int(15, n, replace = TRUE)))
old <- c('contr.treatment', 'contr.poly')
options (contrasts = old)
tmp <- gpe_earth(ntrain = 5)
fit <- tmp(
formula = y ~ .,
data = dat_frame,
weights = rep(1, n),
sample_func = gpe_sample(.5),
family = "gaussian")
expect_equal(getOption("contrasts"), old)
})
test_that("gpe_linear gives expected_result", {
func <- gpe_linear()
args <- list(
formula = Ozone ~ Solar.R + Wind + cut(Wind, breaks = 3),
data = airquality,
weights = rep(1, sum(complete.cases(airquality))))
out <- do.call(func, args)
# save_to_test(out, "gpe_linear_1")
expect_equal(out, read_to_test("gpe_linear_1"), tolerance = 1.490116e-08)
# No winsorization
func <- gpe_linear(winsfrac = 0)
out <- do.call(func, args)
# save_to_test(out, "gpe_linear_2")
expect_equal(out, read_to_test("gpe_linear_2"), tolerance = 1.490116e-08)
# No scaling
func <- gpe_linear(normalize = F)
out <- do.call(func, args)
# save_to_test(out, "gpe_linear_3")
expect_equal(out, read_to_test("gpe_linear_3"), tolerance = 1.490116e-08)
# do not do either
func <- gpe_linear(normalize = F, winsfrac = 0)
out <- do.call(func, args)
expect_equal(out[1:2], c("lTerm(Solar.R)", "lTerm(Wind)"))
})
test_that("gpe_linear returns terms for factor levels", {
set.seed(1660180)
n <- 20
dat_frame <- data.frame(
y = rnorm(n),
x1 = factor(sample.int(4, n, replace = TRUE)))
tmp <- gpe_linear()
out <- tmp(y ~ x1, dat_frame)
expect_length(out, 3)
expect_equal(out, c(
"lTerm(x1 == '2', scale = 0.41)", "lTerm(x1 == '3', scale = 0.5)",
"lTerm(x1 == '4', scale = 0.41)"))
# Also works w/ ordered factors
dat_frame$x1 <- ordered(dat_frame$x1)
out <- tmp(y ~ x1, dat_frame)
expect_length(out, 3)
})
test_that("eTerm works for logicals", expect_length(eTerm(c(F, T, T, T)), 4))
test_that("get_cv.glmnet_args works", {
#####
# Not changing defaults works
get_cv.glmnet_args <- with(environment(gpe), get_cv.glmnet_args)
no_args <- get_cv.glmnet_args(list(), c(), c(), c(), "boh")
w_args <- get_cv.glmnet_args(list(xyz = 1), c(), c(), c(), "boh")
expect_equal(no_args, w_args[names(w_args) != "xyz"])
#####
# Changing defaults works
change_def <- get_cv.glmnet_args(list(parallel = T), c(), c(), c(), "boh")
expect_equal(no_args[names(no_args) != "parallel"],
change_def[names(change_def) != "parallel"])
expect_false(no_args$parallel == change_def$parallel)
})
test_that("gpe_cv.glmnet gives same results as cv.glmnet", {
# Create dummy data
set.seed(3782347)
X <- rnorm(90)
dim(X) <- c(30, 3)
y <- rpois(30, 1)
ws <- runif(30)
get_cv.glmnet_args <- with(environment(gpe), get_cv.glmnet_args)
#####
# fit with defaults
def <- get_cv.glmnet_args(
args = list(), x = X, y = y, weights = ws, family = "poisson")
set.seed(seed <- 9629006)
f1 <- do.call(glmnet::cv.glmnet, def)
set.seed(seed)
f2 <- gpe_cv.glmnet()(x = X, y = y, weights = ws, family = "poisson")
expect_equal(f1, f2)
######
# change lambda argument
def <- get_cv.glmnet_args(
args = list(lambda = c(.1, .01)),
x = X, y = y, weights = ws, family = "poisson")
set.seed(seed)
f1 <- do.call(glmnet::cv.glmnet, def)
set.seed(seed)
f2 <- gpe_cv.glmnet(lambda = c(.1, .01))(
x = X, y = y, weights = ws, family = "poisson")
expect_equal(f1, f2)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.