context("test spline functionality")
if (isTRUE(as.logical(Sys.getenv("CI")))){
# If on CI
NITER <- 2
env_test <- "CI"
}else if (!identical(Sys.getenv("NOT_CRAN"), "true")){
# If on CRAN
NITER <- 2
env_test <- "CRAN"
set.seed(41414)
}else{
# If on local machine
NITER <- 2000
env_test <- 'local'
}
custom_expect_vector <- function(x, p){
expect_true(is.vector(x) && is.atomic(x))
if (!missing(p)){
expect_true(length(x) == p)
}
}
print(paste0(NITER, ' for tests because ', env_test))
test_that("fit with non-default options", {
skip_on_cran()
dat <- data.frame(x = rnorm(100), x2 = rexp(100),
g = sample(state.abb[1:5], 100, replace = T),
f = sample(letters[1:5], 100, replace = T))
dat$y <- rbinom(100, 1, plogis(dat$x * runif(5)[match(dat$f, letters)]))
# Custom knots as argument
custom_knots <- quantile(dat$x, c(0, 0.25, 0.75, 0.6, 1))
fit_knots <- vglmer(y ~ v_s(x, knots = custom_knots),
data = dat, family = 'binomial',
control = vglmer_control(iterations = 15))
fit_tpf <- vglmer(y ~ v_s(x, type = 'tpf'),
data = dat, family = 'binomial', control = vglmer_control(iterations = NITER))
fit_o <- vglmer(y ~ v_s(x, type = 'o'),
data = dat, family = 'binomial',
control = vglmer_control(iterations = NITER))
expect_identical(fit_knots$internal_parameters$spline$attr[[1]]$knots, quantile(dat$x, c(0, 0.25, 0.6, 0.75, 1)))
expect_gt(min(diff(ELBO(fit_tpf, 'trajectory'))), -sqrt(.Machine$double.eps))
expect_gt(min(diff(ELBO(fit_o, 'trajectory'))), -sqrt(.Machine$double.eps))
expect_gt(min(diff(ELBO(fit_knots, 'trajectory'))), -sqrt(.Machine$double.eps))
fit_tpf <- vglmer(y ~ v_s(x, knots = 2, type = 'tpf'),
data = dat, family = 'binomial',
control = vglmer_control(iterations = NITER))
expect_length(fit_tpf$internal_parameters$spline$attr[[1]]$knots, 2)
expect_equivalent(fit_tpf$internal_parameters$spline$attr[[1]]$knots,
quantile(dat$x, seq(0,1,length.out=4)[-c(1,4)]))
})
test_that("fit splines works with one knot", {
dat <- data.frame(x = rnorm(100))
dat$y <- rbinom(100, 1, plogis(exp(dat$x)))
for (loop_type in c('tpf', 'o')){
m1 <- vglmer(y ~ v_s(x, type = loop_type, knots = 1), dat = dat,
control = vglmer_control(iterations = NITER),
family = 'linear')
pred_m1 <- predict(m1, newdata = data.frame(x = seq(-5, 5, length.out=100)))
expect_equal(length(m1$internal_parameters$spline$attr[[1]]$knots), 1)
if (loop_type == "tpf"){
# Expect only one "bend"
expect_equal(sum( abs(diff(diff(pred_m1))) > sqrt(.Machine$double.eps)), 2)
}
}
expect_error(vglmer(y ~ v_s(x, knots = 0.5), data = dat, family = 'linear'), regexp = 'If an integer')
expect_warning(est_vglmer <- vglmer(y ~ v_s(x, knots = 0.5, force_vector = T),
data = dat, control = vglmer_control(iterations = 2), family = 'linear'),
regexp = 'observed data')
expect_equivalent(0.5, est_vglmer$internal_parameters$spline$attr[[1]]$knots)
est_vglmer <- expect_warning(vglmer(y ~ v_s(x, knots = 1.2, force_vector = T),
control = vglmer_control(iterations = 2),
data = dat, family = 'linear'), regexp = 'observed data')
expect_equivalent(est_vglmer$internal_parameters$spline$attr[[1]]$knots, 1.2)
custom_expect_vector(predict(est_vglmer, newdata = data.frame(x = seq(-3,3,length.out=5))), 5)
expect_warning(vglmer(y ~ v_s(x, knots = 1.2),
control = vglmer_control(iterations = 2),
data = dat, family = 'linear'), regexp = 'as.integer')
est_vglmer <- vglmer(y ~ v_s(x, knots = 3), control = vglmer_control(iterations = 2),
data = dat, family = 'linear')
expect_length(est_vglmer$internal_parameters$spline$attr[[1]]$knots, 3)
est_vglmer <- vglmer(y ~ v_s(x), control = vglmer_control(iterations = 2),
data = dat, family = 'linear')
expect_length(est_vglmer$internal_parameters$spline$attr[[1]]$knots, floor(length(unique(dat$x))/4))
})
test_that("fit and predict with splines and missing data", {
dat <- data.frame(x = rnorm(100), x2 = rexp(100),
g = sample(state.abb[1:5], 100, replace = T),
f = sample(letters[1:5], 100, replace = T),
stringsAsFactors = F)
dat$y <- rbinom(100, 1, plogis(dat$x * runif(5)[match(dat$f, letters)]))
dat$y[sample(100, 5)] <- NA
dat$g[sample(100, 5)] <- NA
dat$x[sample(100, 5)] <- NA
dat$f[sample(100, 5)] <- NA
fit_spline <- vglmer(y ~ v_s(x, by = f) + (1 | g),
data = dat, family = 'binomial', control = vglmer_control(iterations = NITER))
expect_s3_class(fit_spline, 'vglmer')
expect_gt(min(diff(fit_spline$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
dat$f[6] <- 'h'
pred_spline <- predict(fit_spline, newdata = dat, allow_missing_levels = TRUE)
pred_spline_terms <- predict(fit_spline,
newdata = dat, type = 'terms', allow_missing_levels = TRUE)
expect_equivalent(pred_spline, rowSums(pred_spline_terms))
raw_X <- vglmer_build_spline(x = dat$x,
knots = fit_spline$internal_parameters$spline$attr[[1]]$knots,
type = fit_spline$internal_parameters$spline$attr[[1]]$type,
Boundary.knots = fit_spline$internal_parameters$spline$attr[[1]]$Boundary.knots,
by = NULL)[[1]]$x
expect_equivalent(
pred_spline_terms[, 'spline-x-1-base'],
ifelse(is.na(pred_spline), NA, as.vector(raw_X %*% ranef(fit_spline)[['spline-x-1-base']][,2]))
)
reshape_spline <- matrix(ranef(fit_spline)[['spline-x-1-int']][,2], ncol = 5)
pred_spline_inter <- rowSums(raw_X * t(reshape_spline)[match(dat$f, letters[1:5]),])
pred_spline_inter[!is.na(dat$f) & is.na(pred_spline_inter)] <- 0
expect_equivalent(
pred_spline_terms[, 'spline-x-1-int'],
ifelse(is.na(pred_spline), NA, pred_spline_inter)
)
})
test_that("Check failures of spline fitting", {
suppressWarnings(rm(f))
dat <- data.frame(x = rnorm(100), x2 = rexp(100),
g = sample(state.abb[1:5], 100, replace = T),
f = sample(letters[1:5], 100, replace = T))
dat$y <- rbinom(100, 1, plogis(dat$x * runif(5)[match(dat$f, letters)]))
nox <- dat[, !(colnames(dat) %in% 'x')]
x <- rnorm(nrow(dat))
nof <- dat[, !(colnames(dat) %in% 'f')]
expect_error(vglmer(y ~ v_s(x, by = f) + (1 | g),
data = nox,
control = vglmer_control(verify_columns = TRUE),
family = 'binomial'))
expect_error(vglmer(y ~ v_s(x, by = f) + (1 | g),
data = nof, family = 'binomial'))
})
test_that("test spline 'by' construction", {
x <- splines::bs(x = rnorm(10))[,]
by_values <- sample(letters, 10, replace = T)
u_by <- sort(unique(by_values))
x_by <- sparseMatrix(i = 1:length(by_values), j = match(by_values, u_by), x = 1)
test_m <- t(Matrix::KhatriRao(t(x_by), t(x)))
manual_m <- do.call('cbind', lapply(u_by, FUN=function(u){
drop0(Diagonal(x = (by_values == u)) %*% x)
}))
colnames(manual_m) <- NULL
expect_equal(test_m, manual_m)
})
test_that("CRAN basic spline tests", {
dat <- data.frame(x = rnorm(100), x2 = rexp(100),
g = sample(state.abb[1:5], 100, replace = T),
f = sample(letters[1:5], 100, replace = T))
dat$y <- rbinom(100, 1, plogis(sin(sqrt(dat$x2 * 4)) + cos(dat$x * 3) * runif(5)[match(dat$f, letters)]))
m1 <- vglmer(y ~ x + x2 + v_s(x),
control = vglmer_control(iteration = NITER),
data = dat, family = 'binomial')
expect_gt(min(diff(ELBO(m1, 'traj'))), - sqrt(.Machine$double.eps))
# Check runs with 2
m2 <- vglmer(y ~ v_s(x2) + v_s(x),
control = vglmer_control(
iterations = NITER, print_prog = 20),
data = dat, family = 'binomial')
# Check runs with "by"
m3 <- vglmer(y ~ v_s(x2) + v_s(x, by = f), data = dat,
family = 'binomial',
control = vglmer_control(iterations = NITER, print_prog = 20, prior_variance = 'mean_exists'))
# Check runs with RE
m4 <- vglmer(y ~ v_s(x, by = f) + (1 | g),
data = dat, family = 'binomial',
control = vglmer_control(iterations = NITER, print_prog = 20))
# Check runs with double "by"
m5 <- vglmer(y ~ v_s(x, by = f) + v_s(x, by = g),
data = dat, family = 'binomial',
control = vglmer_control(iterations = NITER, print_prog = 20))
# Check runs with 2 "by" for single grouping
m6 <- vglmer(y ~ v_s(x, by = f) + v_s(x2, by = f),
data = dat, family = 'binomial',
control = vglmer_control(iterations = NITER, print_prog = 20,
factorization_method = 'strong'))
expect_equal(ncol(m6$sigma$cov[[1]]), 3)
m7 <- vglmer(y ~ v_s(x, by = f) + v_s(x2, by = f) ,
data = dat, family = 'binomial',
control = vglmer_control(iterations = NITER, print_prog = 20,
factorization_method = 'strong'))
m8 <- vglmer(y ~ v_s(x2, by = g) + v_s(x, by = f, by_re = FALSE),
control = vglmer_control(
iterations = NITER, print_prog = 20),
data = dat, family = 'binomial')
# Check that they are all accepted
expect_equal(m1$ELBO$accepted_PX, 1)
expect_equal(m2$ELBO$accepted_PX, 1)
# Check ELBO increases
expect_gt(min(diff(m1$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
expect_gt(min(diff(m2$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
expect_gt(min(diff(m3$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
expect_gt(min(diff(m4$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
expect_gt(min(diff(m5$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
expect_gt(min(diff(m6$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
expect_gt(min(diff(m7$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
expect_gt(min(diff(m8$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
for (v in c(TRUE, FALSE)){
dat_custom <- data.frame(x = mean(dat$x), x2 = mean(dat$x2),
g = c('CA', 'AL'), f = c('e', 'a'),
stringsAsFactors = v)
custom_expect_vector(predict(m1, newdata = dat_custom))
custom_expect_vector(predict(m2, newdata = dat_custom))
custom_expect_vector(predict(m3, newdata = dat_custom))
custom_expect_vector(predict(m4, newdata = dat_custom))
custom_expect_vector(predict(m5, newdata = dat_custom))
custom_expect_vector(predict(m6, newdata = dat_custom))
custom_expect_vector(predict(m7, newdata = dat_custom))
custom_expect_vector(predict(m8, newdata = dat_custom))
}
})
test_that("Test order of splines", {
skip_on_cran()
dat <- data.frame(x = rnorm(100), x2 = rexp(100),
g = sample(state.abb[1:5], 100, replace = T),
f = sample(letters[1:5], 100, replace = T))
dat$y <- rbinom(100, 1, plogis(dat$x * runif(5)[match(dat$f, letters)]))
m1 <- vglmer(y ~ x + x2 + v_s(x),
data = dat, family = 'binomial',
control = vglmer_control(iterations = NITER))
expect_equal(length(coef(m1)), 3)
m1a <- vglmer(y ~ x2 + x + v_s(x), data = dat,
family = 'binomial', control = vglmer_control(iterations = NITER))
expect_equal(ELBO(m1a), ELBO(m1))
expect_equal(ranef(m1), ranef(m1a), tol = 1e-4, scale = 1)
expect_equal(coef(m1),
coef(m1a)[match(names(coef(m1)), names(coef(m1a)))],
tol = 1e-4, scale = 1
)
})
test_that("Prediction spline test", {
dat <- data.frame(x = rnorm(100), x2 = rexp(100),
g = sample(state.abb[1:5], 100, replace = T),
f = sample(letters[1:5], 100, replace = T))
dat$y <- rbinom(100, 1, plogis(sin(3 * dat$x) * rnorm(5)[match(dat$f, letters)]))
if (env_test != 'CRAN'){
loop_vector <- c('tpf', 'o')
}else{
loop_vector <- 'tpf'
}
for (loop_type in loop_vector){
message(loop_type)
m1 <- vglmer(y ~ x2 + v_s(x, type = loop_type),
data = dat, family = 'linear',
control = vglmer_control(iterations = NITER))
# Check that prediction works for simple spline case
predict_dat <- expand.grid(x = seq(min(dat$x), max(dat$x), length.out=100), x2 = 0)
predict_vglmer <- predict(m1, newdata = predict_dat)
# Check that *fitted* aligns with *predict*
expect_equivalent(fitted(m1), predict(m1, newdata = dat))
# Check that there is some slope in the function
diff_in_diff <- range(diff(diff(predict_vglmer)))
expect_gte(max(abs(diff_in_diff)), sqrt(.Machine$double.eps))
raw_X <- vglmer_build_spline(x = predict_dat$x, knots = m1$internal_parameters$spline$attr[[1]]$knots,
type = m1$internal_parameters$spline$attr[[1]]$type,
Boundary.knots = m1$internal_parameters$spline$attr[[1]]$Boundary.knots, by = NULL)[[1]]$x
term_1 <- raw_X %*% m1$alpha$mean
term_2 <- cbind(1, 0, predict_dat$x) %*% m1$beta$mean
direct_predict <- as.vector(term_1 + term_2)
expect_equivalent(direct_predict, predict_vglmer)
terms_predict <- predict(m1, newdata = predict_dat, type = 'terms')
expect_equivalent(term_1[,1], terms_predict[, 'spline-x-1-base'])
expect_equivalent(term_2[,1], terms_predict[, 'FE'])
}
if (TRUE){
for (loop_type in loop_vector){
message(loop_type)
m1 <- vglmer(y ~ x2 + v_s(x, by = f, type = loop_type),
data = dat, family = 'linear',
control = vglmer_control(iterations = NITER))
# Check that prediction works for simple spline case
predict_dat <- expand.grid(x = seq(min(dat$x), max(dat$x), length.out=100), x2 = 0, f = c('a', 'd', 'e', 'c'))
predict_vglmer <- predict(m1, newdata = predict_dat)
# Check that *fitted* aligns with *predict*
expect_equivalent(fitted(m1), predict(m1, newdata = dat))
# Check that there is some slope in the function
diff_in_diff <- sapply(split(predict_vglmer, predict_dat$f), FUN=function(i){
diff_in_diff <- diff(diff(i))/diff(range(i))
return(max(abs(diff_in_diff)))
})
expect_true(all(diff_in_diff > sqrt(.Machine$double.eps)))
raw_X <- vglmer_build_spline(x = predict_dat$x, knots = m1$internal_parameters$spline$attr[[1]]$knots,
type = m1$internal_parameters$spline$attr[[1]]$type,
by = predict_dat$f,
Boundary.knots = m1$internal_parameters$spline$attr[[1]]$Boundary.knots)[[1]]$x
expand_x <- do.call('cbind', lapply(c('a', 'c', 'd', 'e'), FUN=function(i){
drop0(Diagonal(x = predict_dat$f == i) %*% raw_X)
}))
all_X <- cbind(raw_X, expand_x)
direct_predict_spline <- as.vector(all_X %*% m1$alpha$mean[grep(rownames(m1$alpha$mean), pattern='spline @ x @ (base|[adec])'),] +
cbind(1, 0, predict_dat$x) %*% m1$beta$mean)
wide_alpha <- matrix(m1$alpha$mean[grepl(rownames(m1$alpha$mean), pattern='^f'),,drop=F], byrow = TRUE, ncol = 2)
direct_predict <- rowSums(wide_alpha[match(predict_dat$f, c('a', 'b', 'c', 'd', 'e')),] * cbind(1, predict_dat$x))
expect_equivalent(direct_predict + direct_predict_spline, predict_vglmer)
}
}
})
test_that("Compare against mgcv", {
if (env_test == 'local'){
N <- 1000
dat <- data.frame(x = c(-3.1, rnorm(N-1)), x2 = rexp(N),
g = sample(state.abb[1:5], N, replace = T),
f = sample(letters[1:5], N, replace = T))
dat$y <- rbinom(N, 1, plogis(2 * sin(3 * dat$x) + rnorm(5)[match(dat$f, letters)]))
dat$f <- factor(dat$f)
est_gam <- mgcv::gam(y ~ s(x, bs = 'bs') + s(f, bs = 're'), method = 'REML', data = dat, family = binomial())
est_vglmer <- vglmer(y ~ v_s(x, type = 'o') + (1 | f), data = dat, family = 'binomial')
gx <- seq(quantile(dat$x, 0.2), quantile(dat$x, 0.8), length.out=100)
pred_estgam <- predict(est_gam, newdata = data.frame(x = gx, f = 'b'))
pred_estvglmer <- predict(est_vglmer, newdata = data.frame(x = gx, f = 'b'))
expect_gte(cor(plogis(pred_estvglmer), plogis(pred_estgam)), 0.95)
est_gam <- mgcv::gam(y ~ s(x), data = dat, method = 'REML', family = gaussian())
est_vglmer <- vglmer(y ~ v_s(x, type = 'o'), data = dat, family = 'linear')
knots_custom <- seq(-3, 3, length.out=35)
est_vglmer_2 <- expect_warning(
vglmer(y ~ v_s(x, knots = knots_custom, type = 'tpf'), data = dat, family = 'linear'),
regexp = 'observed data is outside of the self-provided knots'
)
pred_estgam <- predict(est_gam, newdata = data.frame(x = gx, f = 'b'))
pred_estvglmer <- predict(est_vglmer, newdata = data.frame(x = gx, f = 'b'))
pred_estvglmer_2 <- predict(est_vglmer_2, newdata = data.frame(x = gx, f = 'b'))
expect_gte(cor(pred_estvglmer, pred_estgam), 0.95)
expect_gte(cor(pred_estgam, pred_estvglmer_2), 0.95)
}
})
test_that("Predict tests for tpf; predict outside of boundary", {
N <- 100
dat <- data.frame(x = rnorm(N), x2 = rexp(N),
g = sample(state.abb[1:5], N, replace = T),
f = sample(letters[1:5], N, replace = T))
dat$y <- rbinom(N, 1, plogis(2 * sin(3 * dat$x) + rnorm(5)[match(dat$f, letters)]))
est_vglmer_2 <- vglmer(y ~ v_s(x, type = 'tpf'),
data = dat, family = 'linear', control = vglmer_control(iterations = NITER))
knots_custom <- est_vglmer_2$internal_parameters$spline$attr[[1]]$knots
predict_at_knots <- predict(est_vglmer_2, newdata = data.frame(x = knots_custom))
sum(est_vglmer_2$alpha$mean)
diff_rhs <- diff(predict(est_vglmer_2, newdata = data.frame(x = 5:10)))
expect_lte(max(abs(diff(diff_rhs))), sqrt(.Machine$double.eps))
diff_lhs <- diff(predict(est_vglmer_2, newdata = data.frame(x = -15:-10)))
expect_lte(max(abs(diff(diff_lhs))), sqrt(.Machine$double.eps))
# to the "left", the extrapolated value should be the slope of the linear part
expect_equivalent(mean(diff_lhs), coef(est_vglmer_2)[2])
# to the right, extrapolated value should be the sum of all
# the RE slopes + the baseline slope
expect_equivalent(
coef(est_vglmer_2)[2] + sum(ranef(est_vglmer_2)[[1]][,2]),
mean(diff_rhs)
)
# The slope between any two knots should be the cumulative sum
# of the intermediate slopes from the RE
diff_predict_at_knots <- diff(predict_at_knots)
slope_predict_at_knots <- diff_predict_at_knots/diff(knots_custom)
expect_equivalent(slope_predict_at_knots, coef(est_vglmer_2)[2] + cumsum(est_vglmer_2$alpha$mean[-nrow(est_vglmer_2$alpha$mean),1]))
})
# TO-DO tests
# check decomposition of D and then re-transformation when
# implemented
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.