context("Test Predict")
if (isTRUE(as.logical(Sys.getenv("CI")))){
# If on CI
NITER <- 2
env_test <- "CI"
}else if (!identical(Sys.getenv("NOT_CRAN"), "true")){
# If on CRAN
NITER <- 2
env_test <- "CRAN"
set.seed(171)
}else{
# If on local machine
NITER <- 2000
env_test <- 'local'
}
test_that("Prediction Matches Manual and (nearly) glmer", {
if (env_test == 'local'){
N <- 1000
}else{
N <- 50
}
G <- 3
x <- rnorm(N)
g <- sample(1:G, N, replace = T)
g2 <- sample(1:G, N, replace = T)
alpha <- rnorm(G)
alpha2 <- rnorm(G)
y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g] + alpha2[g]))
est_glmer <- suppressMessages(suppressWarnings(lme4::glmer(y ~ x + (1 + x | g) + (1 | g2), family = binomial)))
example_vglmer <- vglmer(
formula = y ~ x + (1 + x | g) + (1 | g2), data = NULL, family = "binomial",
control = vglmer_control(factorization_method = "weak")
)
draw_samples <- posterior_samples.vglmer(example_vglmer, samples = 10)
draw_MAVB <- MAVB(example_vglmer, samples = 10, var_px = 1)
expect_true(is.matrix(draw_MAVB))
expect_true(is.matrix(draw_samples))
def_predict <- predict(example_vglmer, newdata = data.frame(y = y, x = x, g = g, g2 = g2))
if (env_test == 'local'){
glmer_predict <- predict(est_glmer)
expect_gt(
cor(def_predict, glmer_predict), 0.95
)
}
alpha_names <- rownames(example_vglmer$alpha$mean)
manual_predict <- as.vector(
example_vglmer$alpha$mean[match(paste0("g2 @ (Intercept) @ ", g2), alpha_names)] +
example_vglmer$alpha$mean[match(paste0("g @ x @ ", g), alpha_names)] * x +
example_vglmer$alpha$mean[match(paste0("g @ (Intercept) @ ", g), alpha_names)] +
cbind(1, x) %*% example_vglmer$beta$mean
)
expect_equal(def_predict, manual_predict)
})
test_that("Prediction Matches for New Levels in newdata", {
N <- 50
G <- 10
x <- rnorm(N)
g <- sample(1:G, N, replace = T)
alpha <- rnorm(G)
y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g]))
example_vglmer <- vglmer(
formula = y ~ x + (1 | g), data = NULL,
control = vglmer_control(iterations = 2, print_prog = 10, init = "zero"),
family = "binomial"
)
# No old level in new level
new_data <- data.frame(x = rep(0, 5), g = 11)
expect_error(predict(example_vglmer, newdata = new_data)) # Error on default
expect_equal( # zero when allow_missing_levels = TRUE
predict(example_vglmer, newdata = new_data, allow_missing_levels = TRUE),
rep(example_vglmer$beta$mean[1], nrow(new_data))
)
mixed_data <- data.frame(x = rnorm(10), g = sample(1:25, 10, replace = T))
man_beta <- as.vector(cbind(1, mixed_data$x) %*% example_vglmer$beta$mean)
man_alpha <- as.vector(example_vglmer$alpha$mean)[match(paste0("g @ (Intercept) @ ", mixed_data$g), rownames(example_vglmer$alpha$mean))]
man_alpha[is.na(man_alpha)] <- 0
expect_equivalent(man_beta + man_alpha, predict(example_vglmer, newdata = mixed_data, allow_missing_levels = T))
})
test_that("Prediction Matches for Missing in new.data", {
N <- 50
G <- 10
x <- rnorm(N + G)
g <- c(sample(1:G, N, replace = T), 1:G)
alpha <- rnorm(G)
y <- rbinom(n = N + G, size = 1, prob = plogis(-1 + x + alpha[g]))
example_vglmer <- vglmer(
formula = y ~ x + (1 | g), data = NULL,
control = vglmer_control(iterations = 2), family = "binomial"
)
mixed_data <- data.frame(x = rnorm(20), g = rep(1:10, 2))
rownames(mixed_data) <- letters[1:20]
mixed_data$x[8] <- NA
mixed_data$g[7] <- NA
mixed_data$x[2] <- mixed_data$g[2] <- NA
man_beta <- as.vector(cbind(1, mixed_data$x) %*% example_vglmer$beta$mean)
man_alpha <- example_vglmer$alpha$mean[match(paste0("g @ (Intercept) @ ", mixed_data$g), rownames(example_vglmer$alpha$mean))]
expect_equivalent(
man_beta + man_alpha,
predict(example_vglmer, newdata = mixed_data)
)
})
test_that("Prediction Matches for Simulation", {
if (env_test == "local"){
N <- 1000
G <- 10
}else{
N <- 50
G <- 3
}
x <- rnorm(N)
g <- sample(1:G, N, replace = T)
g2 <- sample(1:G, N, replace = T)
alpha <- rnorm(G)
alpha2 <- rnorm(G)
y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g] + alpha2[g]))
est_glmer <- suppressMessages(suppressWarnings(lme4::glmer(y ~ x + (1 + x | g) + (1 | g2), family = binomial)))
example_vglmer <- vglmer(
formula = y ~ x + (1 + x | g) + (1 | g2), data = NULL, family = "binomial",
control = vglmer_control(factorization_method = "weak", iterations = NITER)
)
mixed_data <- data.frame(x = rnorm(20), g = rep(1:10, 2), g2 = sample(1:25, 20, replace = T))
rownames(mixed_data) <- letters[1:20]
mixed_data$x[8] <- NA
mixed_data$g[7] <- NA
mixed_data$x[2] <- mixed_data$g[2] <- NA
test_data <- rbind(mixed_data, data.frame(x = x, g = g, g2 = g2)[sample(1:length(y), 100, replace = T), ])
point_predict <- predict(example_vglmer, newdata = test_data, allow_missing_levels = T)
terms_predict <- predict(example_vglmer, newdata = test_data, allow_missing_levels = TRUE, type = 'terms')
manual_fe <- test_data$x * coef(example_vglmer)['x'] + coef(example_vglmer)[1]
expect_equal(
terms_predict[,'FE'],
ifelse(is.na(point_predict), NA, manual_fe)
)
manual_g <- rowSums(ranef(example_vglmer)$g[test_data$g,2:3] * cbind(1, test_data$x))
manual_g[is.na(manual_g) & !is.na(test_data$g)] <- 0
expect_equal(
terms_predict[,'g'],
ifelse(is.na(point_predict), NA, manual_g)
)
manual_g2 <- ranef(example_vglmer)$g2[test_data$g2,2]
manual_g2[is.na(manual_g2) & !is.na(test_data$g2)] <- 0
expect_equal(
terms_predict[,'g2'],
ifelse(is.na(point_predict), NA, manual_g2)
)
expect_equivalent(
manual_fe + manual_g + manual_g2, point_predict
)
if (env_test == "local"){
n_samples <- 2 * 10^4
}else{
n_samples <- 2
}
mean_predict <- predict(example_vglmer,
newdata = test_data,
samples = n_samples, allow_missing_levels = T
)
# should have "clean" rownames
expect_equal(rownames(mean_predict), as.character(1:nrow(mean_predict)))
if (env_test == "local"){
# Should be very close
expect_equal(mean_predict$mean, point_predict, 0.01)
}
if (env_test == "local"){
matrix_predict <- predict(example_vglmer,
newdata = test_data,
samples = 2 * 10^4, allow_missing_levels = T, samples_only = TRUE
)
matrix_predict <- colMeans(matrix_predict)
expect_equivalent(
c(coef(example_vglmer), as.vector(example_vglmer$alpha$mean)),
matrix_predict, 0.01
)
}
})
test_that("Prediction Matches for vglmer after MAVB", {
skip_on_cran()
N <- 50
G <- 4
x <- rnorm(N)
g <- sample(1:G, N, replace = T)
g2 <- sample(1:G, N, replace = T)
alpha <- rnorm(G)
alpha2 <- rnorm(G)
y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g] + alpha2[g]))
example_vglmer <- vglmer(
formula = y ~ x + (1 + x | g) + (1 | g2), data = NULL, family = "binomial",
control = vglmer_control(factorization_method = "weak")
)
mixed_data <- data.frame(x = rnorm(20), g = rep(1:10, 2), g2 = sample(1:25, 20, replace = T))
rownames(mixed_data) <- letters[1:20]
mixed_data$x[8] <- NA
mixed_data$g[7] <- NA
mixed_data$x[2] <- mixed_data$g[2] <- NA
test_data <- rbind(mixed_data, data.frame(x = x, g = g, g2 = g2)[sample(1:length(y), 100, replace = T), ])
pred.MAVB <- predict_MAVB(example_vglmer,
newdata = test_data,
samples = 1000, allow_missing_levels = T
)
base_predict <- predict(example_vglmer, newdata = test_data, allow_missing_levels = T)
expect_equal(pred.MAVB$mean, base_predict, tolerance = 0.1)
})
test_that("Prediction with Samples", {
skip_on_cran()
if (env_test == "local"){
n_samples <- 2 * 10^4
}else{
n_samples <- 5
}
N <- 50
G <- 10
x <- rnorm(N + G)
g <- c(sample(1:G, N, replace = T), 1:G)
alpha <- rnorm(G)
y <- rbinom(n = N + G, size = 1, prob = plogis(-1 + x + alpha[g]))
example_vglmer <- vglmer(
formula = y ~ x + (1 | g), data = NULL,
control = vglmer_control(iterations = 2, factorization_method = 'strong'),
family = "binomial"
)
pred_samples <- predict(example_vglmer, newdata = data.frame(x = x, g = g), samples = 10, summary = F)
expect_equivalent(dim(pred_samples), c(10, N + G))
draw_coef <- predict(example_vglmer,
newdata = data.frame(x = x, g = g),
samples = n_samples, samples_only = T
)
expect_equivalent(dim(draw_coef), c(n_samples, G + 2))
if (env_test == "local"){
expect_equivalent(
colMeans(draw_coef), format_vglmer(example_vglmer)$mean,
tolerance = 0.02
)
}
#Confirms that it works with "weak"
example_vglmer <- vglmer(
formula = y ~ x + (1 | g), data = NULL,
control = vglmer_control(iterations = 2, factorization_method = 'weak'),
family = "binomial"
)
pred_samples <- predict(example_vglmer, newdata = data.frame(x = x, g = g), samples = 10, summary = F)
expect_equivalent(dim(pred_samples), c(10, N + G))
draw_coef <- predict(example_vglmer,
newdata = data.frame(x = x, g = g),
samples = n_samples, samples_only = T
)
expect_equivalent(dim(draw_coef), c(n_samples, G + 2))
if (env_test == "local"){
expect_equivalent(
colMeans(draw_coef), format_vglmer(example_vglmer)$mean,
tolerance = 0.02
)
}
})
test_that("Prediction with factor/categorical", {
N <- 50
G <- 10
x <- rnorm(N + G)
g <- c(sample(1:G, N, replace = T), 1:G)
alpha <- rnorm(G)
y <- rbinom(n = N + G, size = 1, prob = plogis(-1 + x + alpha[g]))
l <- sample(letters[1:3], length(x), replace = T)
example_vglmer <- vglmer(
formula = y ~ x + l + (1 | g), data = NULL,
control = vglmer_control(iterations = 2, factorization_method = 'strong'),
family = "binomial"
)
pred1a <- predict(example_vglmer, newdata = data.frame(x = 3, g = 2, l = 'a', stringsAsFactors = TRUE))
expect_equivalent(pred1a, sum(coef(example_vglmer) * c(1, 3, 0, 0)) + ranef(example_vglmer)$g[2,2])
pred1b <- predict(example_vglmer, newdata = data.frame(x = 3, g = 2, l = 'a', stringsAsFactors = FALSE))
expect_equivalent(pred1b, sum(coef(example_vglmer) * c(1, 3, 0, 0)) + ranef(example_vglmer)$g[2,2])
pred2a <- predict(example_vglmer, newdata = data.frame(x = -1, g = 100, l = 'c', stringsAsFactors = TRUE), allow_missing_levels = TRUE)
expect_equivalent(pred2a, sum(coef(example_vglmer) * c(1, -1, 0, 1)) )
pred2b <- predict(example_vglmer, newdata = data.frame(x = -1, g = 100, l = 'c', stringsAsFactors = FALSE), allow_missing_levels = TRUE)
expect_equivalent(pred2b, sum(coef(example_vglmer) * c(1, -1, 0, 1)) )
expect_error(predict(example_vglmer,
newdata = data.frame(x = -1, g = 100, l = 'd'),
allow_missing_levels = TRUE), regexp = 'has new level d')
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.