tests/testthat/test-predict.R

context("Test Predict")

if (isTRUE(as.logical(Sys.getenv("CI")))){
  # If on CI
  NITER <- 2
  env_test <- "CI"
}else if (!identical(Sys.getenv("NOT_CRAN"), "true")){
  # If on CRAN
  NITER <- 2
  env_test <- "CRAN"
  set.seed(171)
}else{
  # If on local machine
  NITER <- 2000
  env_test <- 'local'
}

test_that("Prediction Matches Manual and (nearly) glmer", {
  
  if (env_test == 'local'){
    N <- 1000
  }else{
    N <- 50
  }
  G <- 3
  x <- rnorm(N)
  g <- sample(1:G, N, replace = T)
  g2 <- sample(1:G, N, replace = T)
  alpha <- rnorm(G)
  alpha2 <- rnorm(G)

  y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g] + alpha2[g]))

  est_glmer <- suppressMessages(suppressWarnings(lme4::glmer(y ~ x + (1 + x | g) + (1 | g2), family = binomial)))

  example_vglmer <- vglmer(
    formula = y ~ x + (1 + x | g) + (1 | g2), data = NULL, family = "binomial",
    control = vglmer_control(factorization_method = "weak")
  )
  draw_samples <- posterior_samples.vglmer(example_vglmer, samples = 10)
  draw_MAVB <- MAVB(example_vglmer, samples = 10, var_px = 1)
  expect_true(is.matrix(draw_MAVB))
  expect_true(is.matrix(draw_samples))
  
  def_predict <- predict(example_vglmer, newdata = data.frame(y = y, x = x, g = g, g2 = g2))
  
  if (env_test == 'local'){
    glmer_predict <- predict(est_glmer)
    
    expect_gt(
      cor(def_predict, glmer_predict), 0.95
    )
  }

  alpha_names <- rownames(example_vglmer$alpha$mean)
  manual_predict <- as.vector(
    example_vglmer$alpha$mean[match(paste0("g2 @ (Intercept) @ ", g2), alpha_names)] +
      example_vglmer$alpha$mean[match(paste0("g @ x @ ", g), alpha_names)] * x +
      example_vglmer$alpha$mean[match(paste0("g @ (Intercept) @ ", g), alpha_names)] +
      cbind(1, x) %*% example_vglmer$beta$mean
  )
  expect_equal(def_predict, manual_predict)
})

test_that("Prediction Matches for New Levels in newdata", {
  N <- 50
  G <- 10
  x <- rnorm(N)
  g <- sample(1:G, N, replace = T)
  alpha <- rnorm(G)

  y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g]))

  example_vglmer <- vglmer(
    formula = y ~ x + (1 | g), data = NULL,
    control = vglmer_control(iterations = 2, print_prog = 10, init = "zero"),
    family = "binomial"
  )
  # No old level in new level
  new_data <- data.frame(x = rep(0, 5), g = 11)
  expect_error(predict(example_vglmer, newdata = new_data)) # Error on default
  expect_equal( # zero when allow_missing_levels = TRUE
    predict(example_vglmer, newdata = new_data, allow_missing_levels = TRUE),
    rep(example_vglmer$beta$mean[1], nrow(new_data))
  )
  mixed_data <- data.frame(x = rnorm(10), g = sample(1:25, 10, replace = T))

  man_beta <- as.vector(cbind(1, mixed_data$x) %*% example_vglmer$beta$mean)
  man_alpha <- as.vector(example_vglmer$alpha$mean)[match(paste0("g @ (Intercept) @ ", mixed_data$g), rownames(example_vglmer$alpha$mean))]
  man_alpha[is.na(man_alpha)] <- 0
  expect_equivalent(man_beta + man_alpha, predict(example_vglmer, newdata = mixed_data, allow_missing_levels = T))
})

test_that("Prediction Matches for Missing in new.data", {
  N <- 50
  G <- 10
  x <- rnorm(N + G)
  g <- c(sample(1:G, N, replace = T), 1:G)
  alpha <- rnorm(G)

  y <- rbinom(n = N + G, size = 1, prob = plogis(-1 + x + alpha[g]))

  example_vglmer <- vglmer(
    formula = y ~ x + (1 | g), data = NULL,
    control = vglmer_control(iterations = 2), family = "binomial"
  )

  mixed_data <- data.frame(x = rnorm(20), g = rep(1:10, 2))
  rownames(mixed_data) <- letters[1:20]

  mixed_data$x[8] <- NA
  mixed_data$g[7] <- NA
  mixed_data$x[2] <- mixed_data$g[2] <- NA

  man_beta <- as.vector(cbind(1, mixed_data$x) %*% example_vglmer$beta$mean)
  man_alpha <- example_vglmer$alpha$mean[match(paste0("g @ (Intercept) @ ", mixed_data$g), rownames(example_vglmer$alpha$mean))]
  expect_equivalent(
    man_beta + man_alpha,
    predict(example_vglmer, newdata = mixed_data)
  )
})

test_that("Prediction Matches for Simulation", {
  
  if (env_test == "local"){
    N <- 1000
    G <- 10
  }else{
    N <- 50
    G <- 3
  }
  x <- rnorm(N)
  g <- sample(1:G, N, replace = T)
  g2 <- sample(1:G, N, replace = T)
  alpha <- rnorm(G)
  alpha2 <- rnorm(G)

  y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g] + alpha2[g]))

  est_glmer <- suppressMessages(suppressWarnings(lme4::glmer(y ~ x + (1 + x | g) + (1 | g2), family = binomial)))

  example_vglmer <- vglmer(
    formula = y ~ x + (1 + x | g) + (1 | g2), data = NULL, family = "binomial",
    control = vglmer_control(factorization_method = "weak", iterations = NITER)
  )

  mixed_data <- data.frame(x = rnorm(20), g = rep(1:10, 2), g2 = sample(1:25, 20, replace = T))
  rownames(mixed_data) <- letters[1:20]

  mixed_data$x[8] <- NA
  mixed_data$g[7] <- NA
  mixed_data$x[2] <- mixed_data$g[2] <- NA

  test_data <- rbind(mixed_data, data.frame(x = x, g = g, g2 = g2)[sample(1:length(y), 100, replace = T), ])

  point_predict <- predict(example_vglmer, newdata = test_data, allow_missing_levels = T)
  terms_predict <- predict(example_vglmer, newdata = test_data, allow_missing_levels = TRUE, type = 'terms')
  
  manual_fe <- test_data$x * coef(example_vglmer)['x'] + coef(example_vglmer)[1]
  expect_equal(
    terms_predict[,'FE'], 
    ifelse(is.na(point_predict), NA, manual_fe)
  )
  
  manual_g <- rowSums(ranef(example_vglmer)$g[test_data$g,2:3] * cbind(1, test_data$x))
  manual_g[is.na(manual_g) & !is.na(test_data$g)] <- 0
  expect_equal(
    terms_predict[,'g'], 
    ifelse(is.na(point_predict), NA, manual_g)
  )

  manual_g2 <- ranef(example_vglmer)$g2[test_data$g2,2]
  manual_g2[is.na(manual_g2) & !is.na(test_data$g2)] <- 0
  expect_equal(
    terms_predict[,'g2'], 
    ifelse(is.na(point_predict), NA, manual_g2)
  )
  
  expect_equivalent(
    manual_fe + manual_g + manual_g2, point_predict
  )
  
  if (env_test == "local"){
    n_samples <- 2 * 10^4
  }else{
    n_samples <- 2
  }
  mean_predict <- predict(example_vglmer,
    newdata = test_data,
    samples = n_samples, allow_missing_levels = T
  )
  # should have "clean" rownames
  expect_equal(rownames(mean_predict), as.character(1:nrow(mean_predict)))
  
  if (env_test == "local"){
    # Should be very close
    expect_equal(mean_predict$mean, point_predict, 0.01)
  }

  if (env_test == "local"){
    matrix_predict <- predict(example_vglmer,
                              newdata = test_data,
                              samples = 2 * 10^4, allow_missing_levels = T, samples_only = TRUE
    )
    matrix_predict <- colMeans(matrix_predict)
    expect_equivalent(
      c(coef(example_vglmer), as.vector(example_vglmer$alpha$mean)),
      matrix_predict, 0.01
    )
  }
})


test_that("Prediction Matches for vglmer after MAVB", {
  
  skip_on_cran()
  
  N <- 50
  G <- 4
  x <- rnorm(N)
  g <- sample(1:G, N, replace = T)
  g2 <- sample(1:G, N, replace = T)
  alpha <- rnorm(G)
  alpha2 <- rnorm(G)

  y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g] + alpha2[g]))

  example_vglmer <- vglmer(
    formula = y ~ x + (1 + x | g) + (1 | g2), data = NULL, family = "binomial",
    control = vglmer_control(factorization_method = "weak")
  )


  mixed_data <- data.frame(x = rnorm(20), g = rep(1:10, 2), g2 = sample(1:25, 20, replace = T))
  rownames(mixed_data) <- letters[1:20]

  mixed_data$x[8] <- NA
  mixed_data$g[7] <- NA
  mixed_data$x[2] <- mixed_data$g[2] <- NA

  test_data <- rbind(mixed_data, data.frame(x = x, g = g, g2 = g2)[sample(1:length(y), 100, replace = T), ])

  pred.MAVB <- predict_MAVB(example_vglmer,
    newdata = test_data,
    samples = 1000, allow_missing_levels = T
  )
  base_predict <- predict(example_vglmer, newdata = test_data, allow_missing_levels = T)

  expect_equal(pred.MAVB$mean, base_predict, tolerance = 0.1)
})

test_that("Prediction with Samples", {
  
  skip_on_cran()
  
  if (env_test == "local"){
    n_samples <- 2 * 10^4
  }else{
    n_samples <- 5
  }
  N <- 50
  G <- 10
  x <- rnorm(N + G)
  g <- c(sample(1:G, N, replace = T), 1:G)
  alpha <- rnorm(G)

  y <- rbinom(n = N + G, size = 1, prob = plogis(-1 + x + alpha[g]))

  example_vglmer <- vglmer(
    formula = y ~ x + (1 | g), data = NULL,
    control = vglmer_control(iterations = 2, factorization_method = 'strong'), 
    family = "binomial"
  )

  pred_samples <- predict(example_vglmer, newdata = data.frame(x = x, g = g), samples = 10, summary = F)
  expect_equivalent(dim(pred_samples), c(10, N + G))

  draw_coef <- predict(example_vglmer,
    newdata = data.frame(x = x, g = g),
    samples = n_samples, samples_only = T
  )
  expect_equivalent(dim(draw_coef), c(n_samples, G + 2))

  if (env_test == "local"){
    expect_equivalent(
      colMeans(draw_coef), format_vglmer(example_vglmer)$mean,
      tolerance = 0.02
    )
  }
  #Confirms that it works with "weak"
  example_vglmer <- vglmer(
    formula = y ~ x + (1 | g), data = NULL,
    control = vglmer_control(iterations = 2, factorization_method = 'weak'), 
    family = "binomial"
  )
  
  pred_samples <- predict(example_vglmer, newdata = data.frame(x = x, g = g), samples = 10, summary = F)
  expect_equivalent(dim(pred_samples), c(10, N + G))
  
  draw_coef <- predict(example_vglmer,
                       newdata = data.frame(x = x, g = g),
                       samples = n_samples, samples_only = T
  )
  expect_equivalent(dim(draw_coef), c(n_samples, G + 2))
  
  if (env_test == "local"){
    expect_equivalent(
      colMeans(draw_coef), format_vglmer(example_vglmer)$mean,
      tolerance = 0.02
    )
  }
})

test_that("Prediction with factor/categorical", {
  
  N <- 50
  G <- 10
  x <- rnorm(N + G)
  g <- c(sample(1:G, N, replace = T), 1:G)
  alpha <- rnorm(G)
  
  y <- rbinom(n = N + G, size = 1, prob = plogis(-1 + x + alpha[g]))
  l <- sample(letters[1:3], length(x), replace = T)
  
  example_vglmer <- vglmer(
    formula = y ~ x + l + (1  | g), data = NULL,
    control = vglmer_control(iterations = 2, factorization_method = 'strong'), 
    family = "binomial"
  )
  
  pred1a <- predict(example_vglmer, newdata = data.frame(x = 3, g = 2, l = 'a', stringsAsFactors = TRUE))
  expect_equivalent(pred1a, sum(coef(example_vglmer)  * c(1, 3, 0, 0)) + ranef(example_vglmer)$g[2,2])
  
  pred1b <- predict(example_vglmer, newdata = data.frame(x = 3, g = 2, l = 'a', stringsAsFactors = FALSE))
  expect_equivalent(pred1b, sum(coef(example_vglmer)  * c(1, 3, 0, 0)) + ranef(example_vglmer)$g[2,2])
  
  pred2a <- predict(example_vglmer, newdata = data.frame(x = -1, g = 100, l = 'c', stringsAsFactors = TRUE), allow_missing_levels = TRUE)
  expect_equivalent(pred2a, sum(coef(example_vglmer)  * c(1, -1, 0, 1)) )
  pred2b <- predict(example_vglmer, newdata = data.frame(x = -1, g = 100, l = 'c', stringsAsFactors = FALSE), allow_missing_levels = TRUE)
  expect_equivalent(pred2b, sum(coef(example_vglmer)  * c(1, -1, 0, 1)) )
  
  expect_error(predict(example_vglmer, 
    newdata = data.frame(x = -1, g = 100, l = 'd'), 
    allow_missing_levels = TRUE), regexp = 'has new level d')
})
mgoplerud/vglmer documentation built on Jan. 22, 2025, 6:43 p.m.