Nothing
context("rbart")
source(system.file("common", "friedmanData.R", package = "dbarts"), local = TRUE)
n.g <- 5L
if (getRversion() >= "3.6.0") {
oldSampleKind <- RNGkind()[3L]
suppressWarnings(RNGkind(sample.kind = "Rounding"))
}
g <- sample(n.g, length(testData$y), replace = TRUE)
if (getRversion() >= "3.6.0") {
suppressWarnings(RNGkind(sample.kind = oldSampleKind))
rm(oldSampleKind)
}
sigma.b <- 1.5
b <- rnorm(n.g, 0, sigma.b)
testData$y <- testData$y + b[g]
testData$g <- g
testData$b <- b
rm(g, b)
test_that("rbart fails with invalid group.by", {
expect_error(rbart_vi(y ~ x, testData, group.by = NA, n.threads = 1L))
expect_error(rbart_vi(y ~ x, testData, group.by = not_a_symbol, n.threads = 1L))
expect_error(rbart_vi(y ~ x, testData, group.by = testData$g[-1L], n.threads = 1L))
expect_error(rbart_vi(y ~ x, testData, group.by = "not a factor", n.threads = 1L))
})
test_that("rbart finds group.by", {
df <- as.data.frame(testData$x)
colnames(df) <- paste0("x_", seq_len(ncol(testData$x)))
df$y <- testData$y
df$g <- testData$g
expect_is(rbart_vi(y ~ . - g, df, group.by = g,
n.samples = 1L, n.burn = 0L, n.thin = 1L, n.chains = 1L,
n.trees = 25L, n.threads = 1L, verbose = FALSE),
"rbart")
g <- df$g
df$g <- NULL
expect_is(rbart_vi(y ~ . , df, group.by = g,
n.samples = 1L, n.burn = 0L, n.thin = 1L, n.chains = 1L,
n.trees = 25L, n.threads = 1L, verbose = FALSE),
"rbart")
y <- testData$y
x <- testData$x
expect_is(rbart_vi(y ~ x, group.by = g,
n.samples = 1L, n.burn = 0L, n.thin = 1L, n.chains = 1L,
n.trees = 25L, n.threads = 1L, verbose = FALSE),
"rbart")
})
test_that("works with multiple threads", {
x <- testData$x
y <- testData$y
g <- factor(testData$g)
set.seed(0)
expect_is(rbart_vi(y ~ x, group.by = g,
n.samples = 7L, n.burn = 0L, n.thin = 1L, n.chains = 2L,
n.trees = 25L, n.threads = 2L, verbose = FALSE),
"rbart")
})
test_that("is reproducible", {
x <- testData$x
y <- testData$y
g <- factor(testData$g)
fit1 <- rbart_vi(y ~ x, group.by = g,
n.samples = 5L, n.burn = 0L, n.thin = 1L, n.chains = 2L,
n.trees = 3L, n.threads = 2L, verbose = FALSE,
seed = 0L)
fit2 <- rbart_vi(y ~ x, group.by = g,
n.samples = 5L, n.burn = 0L, n.thin = 1L, n.chains = 2L,
n.trees = 3L, n.threads = 2L, verbose = FALSE,
seed = 0L)
set.seed(0)
seeds <- sample.int(.Machine$integer.max, 2L)
set.seed(seeds[1L])
fit3 <- rbart_vi(y ~ x, group.by = g,
n.samples = 5L, n.burn = 0L, n.thin = 1L, n.chains = 1L,
n.trees = 3L, n.threads = 1L, verbose = FALSE)
set.seed(seeds[2L])
fit4 <- rbart_vi(y ~ x, group.by = g,
n.samples = 5L, n.burn = 0L, n.thin = 1L, n.chains = 1L,
n.trees = 3L, n.threads = 1L, verbose = FALSE)
expect_equal(fit1$yhat.train, fit2$yhat.train)
expect_equal(fit1$ranef, fit2$ranef)
yhat <- aperm(array(c(fit3$yhat.train, fit4$yhat.train),
c(dim(fit3$yhat.train), 2L)),
c(3L, 1L, 2L))
expect_equal(yhat, fit1$yhat.train)
ranef <- aperm(array(c(fit3$ranef, fit4$ranef),
c(dim(fit3$ranef), 2L)),
c(3L, 1L:2L))
expect_equal(as.vector(ranef), as.vector(fit1$ranef))
})
test_that("extract works at baseline", {
x <- testData$x
y <- testData$y
g <- factor(testData$g)
set.seed(0)
rbartFit <- rbart_vi(y ~ x, group.by = g,
n.samples = 7L, n.burn = 0L, n.thin = 1L, n.chains = 2L,
n.trees = 25L, n.threads = 1L, verbose = FALSE)
expect_equal(extract(rbartFit, type = "bart", combineChains = FALSE), rbartFit$yhat.train)
expect_equal(extract(rbartFit, type = "ranef", combineChains = FALSE), rbartFit$ranef)
expect_equal(extract(rbartFit, type = "ev", combineChains = FALSE), rbartFit$yhat.train + unname(rbartFit$ranef[,,as.character(g)]))
ppd <- extract(rbartFit, type = "ppd", combineChains = FALSE)
sigma.hat <- apply(ppd - extract(rbartFit, type = "ev", combineChains = FALSE), c(1L, 2L), sd)
expect_true(cor(as.vector(sigma.hat), as.vector(rbartFit$sigma)) >= 0.85) # silly test with 7 samples
set.seed(0)
rbartFit.2 <- rbart_vi(y ~ x, group.by = g,
n.samples = 7L, n.burn = 0L, n.thin = 1L, n.chains = 2L,
n.trees = 25L, n.threads = 1L, combineChains = TRUE,
verbose = FALSE)
expect_equal(extract(rbartFit, type = "ev", combineChains = TRUE),
extract(rbartFit.2, type = "ev", combineChains = TRUE))
expect_equal(extract(rbartFit, type = "ev", combineChains = FALSE),
extract(rbartFit.2, type = "ev", combineChains = FALSE))
})
test_that("fitted works correctly", {
x <- testData$x
y <- testData$y
g <- factor(testData$g)
rbartFit <- rbart_vi(y ~ x, group.by = g, test = x, group.by.test = g, offset.test = 5,
n.samples = 7L, n.burn = 0L, n.thin = 1L, n.chains = 2L,
n.trees = 25L, n.threads = 1L, verbose = FALSE)
expect_equal(as.vector(rbartFit$yhat.train), as.vector(rbartFit$yhat.test) - 5)
expect_equal(apply(rbartFit$yhat.train + unname(rbartFit$ranef[,,as.character(g)]), 3L, mean), fitted(rbartFit))
expect_equal(apply(rbartFit$yhat.test + unname(rbartFit$ranef[,,as.character(g)]), 3L, mean), fitted(rbartFit, sample = "test"))
expect_equal(fitted(rbartFit, type = "ranef"), rbartFit$ranef.mean)
})
test_that("predict matches fitted", {
x <- testData$x
y <- testData$y
g <- factor(testData$g)
set.seed(0)
rbartFit.0 <- rbart_vi(y ~ x, group.by = g,
n.samples = 14L, n.burn = 0L, n.thin = 1L, n.chains = 1L,
n.trees = 25L, n.threads = 1L, keepTrees = TRUE,
verbose = FALSE)
expect_equal(fitted(rbartFit.0), apply(predict(rbartFit.0, x, g), 2L, mean))
set.seed(0)
rbartFit.0 <- rbart_vi(y ~ x, group.by = g,
n.samples = 7L, n.burn = 0L, n.thin = 1L, n.chains = 2L,
n.trees = 25L, n.threads = 1L, keepTrees = TRUE, combineChains = FALSE,
verbose = FALSE)
expect_equal(fitted(rbartFit.0), apply(predict(rbartFit.0, x, g, combineChains = FALSE), 3L, mean))
set.seed(0)
rbartFit.1 <- rbart_vi(y ~ x, group.by = g,
n.samples = 7L, n.burn = 0L, n.thin = 1L, n.chains = 2L,
n.trees = 25L, n.threads = 1L, keepTrees = TRUE, combineChains = TRUE,
verbose = FALSE)
expect_equal(fitted(rbartFit.1), apply(predict(rbartFit.1, x, g), 2L, mean))
expect_equal(predict(rbartFit.0, x, g), predict(rbartFit.1, x, g))
expect_equal(predict(rbartFit.0, x, g, combineChains = FALSE), predict(rbartFit.1, x, g, combineChains = FALSE))
})
test_that("works with missing levels", {
n.train <- 80L
x <- testData$x[seq_len(n.train),]
y <- testData$y[seq_len(n.train)]
g <- factor(testData$g[seq_len(n.train)])
x.test <- testData$x[seq.int(n.train + 1L, nrow(testData$x)),]
g.test <- factor(testData$g[seq.int(n.train + 1L, nrow(testData$x))], levels(g))
levels(g.test)[5L] <- "6"
# check that predict works when we've fit with missing levels
rbartFit <- suppressWarnings(rbart_vi(y ~ x, group.by = g, test = x.test, group.by.test = g.test,
n.samples = 7L, n.burn = 0L, n.thin = 1L, n.chains = 2L,
n.trees = 25L, n.threads = 1L, keepTrees = TRUE,
verbose = FALSE))
expect_equal(apply(predict(rbartFit, x.test, g.test), 2L, mean), fitted(rbartFit, sample = "test"))
expect_equal(apply(predict(rbartFit, x.test, g.test, combineChains = FALSE), 3L, mean), fitted(rbartFit, sample = "test"))
# check that predicts works for completely new levels
levels(g.test) <- c(levels(g.test)[-5L], as.character(seq.int(7L, 28L)))
set.seed(0)
ranef.pred <- suppressWarnings(predict(rbartFit, x.test, g.test, type = "ranef", combineChains = FALSE))
expect_equal(ranef.pred[,,as.character(1L:4L)], rbartFit$ranef[,,as.character(1L:4L)])
expect_true(cor(as.numeric(rbartFit$tau), as.numeric(apply(ranef.pred[,,5L:26L], c(1L, 2L), sd))) > 0.90)
# check again with combineChains as TRUE at the top level
g.test <- droplevels(g.test)
levels(g.test) <- c(levels(g)[-5L], "6")
rbartFit <- suppressWarnings(rbart_vi(y ~ x, group.by = g, test = x.test, group.by.test = g.test,
n.samples = 7L, n.burn = 0L, n.thin = 1L, n.chains = 2L,
n.trees = 25L, n.threads = 1L, keepTrees = TRUE, combineChains = TRUE,
verbose = FALSE))
expect_equal(apply(predict(rbartFit, x.test, g.test), 2L, mean), fitted(rbartFit, sample = "test"))
expect_equal(apply(predict(rbartFit, x.test, g.test, combineChains = FALSE), 3L, mean), fitted(rbartFit, sample = "test"))
levels(g.test) <- c(levels(g.test)[-5L], as.character(seq.int(7L, 28L)))
set.seed(0)
ranef.pred <- suppressWarnings(predict(rbartFit, x.test, g.test, type = "ranef"))
expect_equal(as.numeric(ranef.pred[,as.character(1L:4L)]),
as.numeric(rbartFit$ranef[,as.character(1L:4L)]))
expect_true(cor(as.numeric(rbartFit$tau), as.numeric(apply(ranef.pred[,5L:26L], 1L, sd))) > 0.90)
# check one last time with one chain
g.test <- droplevels(g.test)
levels(g.test) <- c(levels(g)[-5L], "6")
rbartFit <- suppressWarnings(rbart_vi(y ~ x, group.by = g, test = x.test, group.by.test = g.test,
n.samples = 14L, n.burn = 0L, n.thin = 1L, n.chains = 1L,
n.trees = 25L, n.threads = 1L, keepTrees = TRUE,
verbose = FALSE))
expect_equal(apply(predict(rbartFit, x.test, g.test), 2L, mean), fitted(rbartFit, sample = "test"))
levels(g.test) <- c(levels(g.test)[-5L], as.character(seq.int(7L, 28L)))
set.seed(0)
ranef.pred <- suppressWarnings(predict(rbartFit, x.test, g.test, type = "ranef"))
expect_equal(as.numeric(ranef.pred[,as.character(1L:4L)]),
as.numeric(rbartFit$ranef[,as.character(1L:4L)]))
expect_true(cor(as.numeric(rbartFit$tau), as.numeric(apply(ranef.pred[,5L:26L], 1L, sd))) > 0.90)
})
test_that("rbart runs example", {
rbartFit <- rbart_vi(y ~ x, testData, group.by = g,
n.samples = 40L, n.burn = 10L, n.thin = 2L, n.chains = 2L,
n.trees = 25L, n.threads = 1L,
verbose = FALSE)
expect_equal(dim(rbartFit$yhat.train), c(2L, 40L %/% 2L, length(testData$y)))
expect_equal(length(rbartFit$yhat.train.mean), length(testData$y))
expect_equal(dim(rbartFit$ranef), c(2L, 40L %/% 2L, length(unique(testData$g))))
expect_equal(dim(rbartFit$first.tau), c(2L, 10L %/% 2L))
expect_equal(dim(rbartFit$first.sigma), c(2L, 10L %/% 2L))
expect_equal(dim(rbartFit$tau), c(2L, 40L %/% 2L))
expect_equal(dim(rbartFit$sigma), c(2L, 40L %/% 2L))
expect_true(length(unique(rbartFit$ranef)) > 1L)
# check for one chain
rbartFit <- rbart_vi(y ~ x, testData, group.by = g,
n.samples = 80L, n.burn = 20L, n.thin = 2L, n.chains = 1L,
n.trees = 25L, n.threads = 1L,
verbose = FALSE)
expect_equal(dim(rbartFit$yhat.train), c(80L %/% 2L, length(testData$y)))
expect_equal(length(rbartFit$yhat.train.mean), length(testData$y))
expect_equal(dim(rbartFit$ranef), c(80L %/% 2L, length(unique(testData$g))))
expect_equal(length(rbartFit$first.tau), 20L %/% 2L)
expect_equal(length(rbartFit$first.sigma), 20L %/% 2L)
expect_equal(length(rbartFit$tau), 80L %/% 2L)
expect_equal(length(rbartFit$sigma), 80L %/% 2L)
expect_true(length(unique(rbartFit$ranef)) > 1L)
})
test_that("rbart works with keepTrainingFits = FALSE", {
rbartFit <- rbart_vi(y ~ x, testData, group.by = g,
n.samples = 2L, n.burn = 0L, n.thin = 2L, n.chains = 2L,
n.trees = 3L, n.threads = 1L,
keepTrainingFits = FALSE,
verbose = FALSE)
expect_is(rbartFit, "rbart")
expect_true(is.null(rbartFit$yhat.train))
expect_true(is.null(rbartFit$yhat.train.mean))
})
test_that("rbart passes regression test", {
df <- as.data.frame(testData$x)
colnames(df) <- paste0("x_", seq_len(ncol(testData$x)))
df$y <- testData$y
df$g <- testData$g
set.seed(99)
rbartFit <- rbart_vi(y ~ . - g, df, group.by = g,
n.samples = 1L, n.burn = 5L, n.thin = 1L, n.chains = 1L,
n.trees = 25L, n.threads = 1L,
verbose = FALSE)
expect_equal(as.numeric(rbartFit$ranef),
c(1.92008577165928, 0.750130559201404, -0.837624979286864, 0.461299843412371, 3.2085237309599))
})
test_that("rbart compares favorably to lmer for nonlinear models", {
skip_if_not_installed("lme4")
lme4 <- asNamespace("lme4")
f <- function(x) {
10 * sin(pi * x[,1] * x[,2]) + 20 * (x[,3] - 0.5)^2 +
10 * x[,4] + 5 * x[,5]
}
set.seed(99)
sigma <- 1.0
n <- 500
x <- matrix(runif(n * 10), n, 10)
Ey <- f(x)
y <- rnorm(n, Ey, sigma)
n.g <- 15
g <- sample(n.g, length(y), replace = TRUE)
sigma.b <- 1.5
b <- rnorm(n.g, 0, sigma.b)
y <- y + b[g]
df <- as.data.frame(x)
colnames(df) <- paste0("x_", seq_len(ncol(x)))
df$y <- y
df$g <- g
rbartFit <- rbart_vi(y ~ . - g, df, group.by = g,
n.samples = 200L, n.burn = 100L, n.thin = 2L, n.chains = 2L,
n.trees = 50L, n.threads = 1L,
verbose = FALSE)
ranef.rbart <- rbartFit$ranef.mean
lmerFit <- suppressWarnings(lme4$lmer(y ~ . - g + (1 | g), df))
ranef.lmer <- lme4$ranef.merMod(lmerFit)[[1L]][[1L]]
b_rmse.rbart <- sqrt(mean((b - ranef.rbart)^2))
b_rmse.lmer <- sqrt(mean((b - ranef.lmer)^2))
expect_true(b_rmse.rbart < b_rmse.lmer)
rho <- 0.4
p.y <- pnorm((Ey - mean(Ey)) / sd(Ey) + rho * .75 * b[g])
set.seed(99)
y <- rbinom(n, 1L, p.y)
df <- as.data.frame(x)
colnames(df) <- paste0("x_", seq_len(ncol(x)))
df$y <- y
df$g <- g
rbartFit <- rbart_vi(y ~ . - g, df, group.by = g,
n.samples = 240L, n.burn = 120L, n.thin = 3L, n.chains = 2L,
n.trees = 50L, n.threads = 1L,
verbose = FALSE)
ranef.rbart <- rbartFit$ranef.mean
glmerFit <- lme4$glmer(y ~ . - g + (1 | g), df, family = binomial(link = "probit"))
rbart.mu.hat <- fitted(rbartFit)
expect_equal(apply(extract(rbartFit), 2L, mean), rbart.mu.hat)
glmer.mu.hat <- fitted(glmerFit, type = "response")
dev.rbart <- -2 * mean(log(ifelse(df$y == 1, rbart.mu.hat, 1 - rbart.mu.hat)))
dev.glmer <- -2 * mean(log(ifelse(df$y == 1, glmer.mu.hat, 1 - glmer.mu.hat)))
expect_true(dev.rbart < dev.glmer)
})
test_that("rbart issues a warning for weights only in training data", {
n.train <- 80L
data <- as.data.frame(testData)
trainData <- data[seq_len(n.train),]
testData <- data[seq.int(n.train + 1L, nrow(data)),]
trainData$w <- 1 / nrow(trainData)
# check that predict works when we've fit with missing levels
expect_warning(rbartFit <- rbart_vi(
y ~ x.1 + x.2 + x.3 + x.4 + x.5 + x.6 + x.7 + x.8 + x.9 + x.10,
data = trainData,
test = testData,
group.by = g,
group.by.test = g,
weights = w,
n.samples = 7L, n.burn = 0L, n.thin = 1L, n.chains = 2L,
n.trees = 25L, n.threads = 1L,
verbose = FALSE))
expect_is(rbartFit, "rbart")
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.