tests/testthat/test-splines.R

context("test spline functionality")

if (isTRUE(as.logical(Sys.getenv("CI")))){
  # If on CI
  NITER <- 2
  env_test <- "CI"
}else if (!identical(Sys.getenv("NOT_CRAN"), "true")){
  # If on CRAN
  NITER <- 2
  env_test <- "CRAN"
  set.seed(41414)
}else{
  # If on local machine
  NITER <- 2000
  env_test <- 'local'
}

custom_expect_vector <- function(x, p){
  expect_true(is.vector(x) && is.atomic(x))
  if (!missing(p)){
    expect_true(length(x) == p)
  }
}

print(paste0(NITER, ' for tests because ', env_test))

test_that("fit with non-default options", {
  
  skip_on_cran()
  
  dat <- data.frame(x = rnorm(100), x2 = rexp(100),
    g = sample(state.abb[1:5], 100, replace = T),
    f = sample(letters[1:5], 100, replace = T))
  
  dat$y <- rbinom(100, 1, plogis(dat$x * runif(5)[match(dat$f, letters)]))
  
  # Custom knots as argument
  custom_knots <- quantile(dat$x, c(0, 0.25, 0.75, 0.6, 1))
  fit_knots <- vglmer(y ~ v_s(x, knots = custom_knots),
                      data = dat, family = 'binomial',
                      control = vglmer_control(iterations = 15))
  
  fit_tpf <- vglmer(y ~ v_s(x, type = 'tpf'),
    data = dat, family = 'binomial', control = vglmer_control(iterations = NITER))
  fit_o <- vglmer(y ~ v_s(x, type = 'o'),
    data = dat, family = 'binomial',
    control = vglmer_control(iterations = NITER))
  
  
  expect_identical(fit_knots$internal_parameters$spline$attr[[1]]$knots, quantile(dat$x, c(0, 0.25, 0.6, 0.75, 1)))

  expect_gt(min(diff(ELBO(fit_tpf, 'trajectory'))), -sqrt(.Machine$double.eps))
  expect_gt(min(diff(ELBO(fit_o, 'trajectory'))), -sqrt(.Machine$double.eps))
  expect_gt(min(diff(ELBO(fit_knots, 'trajectory'))), -sqrt(.Machine$double.eps))
  
  fit_tpf <- vglmer(y ~ v_s(x, knots = 2, type = 'tpf'),
      data = dat, family = 'binomial', 
      control = vglmer_control(iterations = NITER))
  expect_length(fit_tpf$internal_parameters$spline$attr[[1]]$knots, 2)
  expect_equivalent(fit_tpf$internal_parameters$spline$attr[[1]]$knots, 
    quantile(dat$x, seq(0,1,length.out=4)[-c(1,4)]))
})

test_that("fit splines works with one knot", {
  
  
  dat <- data.frame(x = rnorm(100))
  dat$y <- rbinom(100, 1, plogis(exp(dat$x)))

  for (loop_type in c('tpf', 'o')){
    m1 <- vglmer(y ~ v_s(x, type = loop_type, knots = 1), dat = dat, 
                 control = vglmer_control(iterations = NITER),
                 family = 'linear')
    pred_m1 <- predict(m1, newdata = data.frame(x = seq(-5, 5, length.out=100)))  
    expect_equal(length(m1$internal_parameters$spline$attr[[1]]$knots), 1)
    if (loop_type == "tpf"){
      # Expect only one "bend"
      expect_equal(sum( abs(diff(diff(pred_m1))) > sqrt(.Machine$double.eps)), 2)
    }
  }
  
  expect_error(vglmer(y ~ v_s(x, knots = 0.5), data = dat, family = 'linear'), regexp = 'If an integer')
  expect_warning(est_vglmer <- vglmer(y ~ v_s(x, knots = 0.5, force_vector = T), 
    data = dat, control = vglmer_control(iterations = 2), family = 'linear'),
    regexp = 'observed data')
  expect_equivalent(0.5, est_vglmer$internal_parameters$spline$attr[[1]]$knots)
  
  est_vglmer <- expect_warning(vglmer(y ~ v_s(x, knots = 1.2, force_vector = T),
         control = vglmer_control(iterations = 2),
         data = dat, family = 'linear'), regexp = 'observed data')
  expect_equivalent(est_vglmer$internal_parameters$spline$attr[[1]]$knots, 1.2)
  custom_expect_vector(predict(est_vglmer, newdata = data.frame(x = seq(-3,3,length.out=5))), 5)
  expect_warning(vglmer(y ~ v_s(x, knots = 1.2),
         control = vglmer_control(iterations = 2),
         data = dat, family = 'linear'), regexp = 'as.integer')
  
  est_vglmer <- vglmer(y ~ v_s(x, knots = 3), control = vglmer_control(iterations = 2), 
         data = dat, family = 'linear')
  expect_length(est_vglmer$internal_parameters$spline$attr[[1]]$knots, 3)
  est_vglmer <- vglmer(y ~ v_s(x), control = vglmer_control(iterations = 2), 
                       data = dat, family = 'linear')
  expect_length(est_vglmer$internal_parameters$spline$attr[[1]]$knots, floor(length(unique(dat$x))/4))
})

test_that("fit and predict with splines and missing data", {
  
  dat <- data.frame(x = rnorm(100), x2 = rexp(100),
    g = sample(state.abb[1:5], 100, replace = T),
    f = sample(letters[1:5], 100, replace = T), 
    stringsAsFactors = F)
  
  dat$y <- rbinom(100, 1, plogis(dat$x * runif(5)[match(dat$f, letters)]))
  dat$y[sample(100, 5)] <- NA
  dat$g[sample(100, 5)] <- NA  
  dat$x[sample(100, 5)] <- NA
  dat$f[sample(100, 5)] <- NA
  
  fit_spline <- vglmer(y ~ v_s(x, by = f) + (1 | g), 
               data = dat, family = 'binomial', control = vglmer_control(iterations = NITER))
  expect_s3_class(fit_spline, 'vglmer')
  expect_gt(min(diff(fit_spline$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
  
  dat$f[6] <- 'h'
  pred_spline <- predict(fit_spline, newdata = dat, allow_missing_levels = TRUE)
  pred_spline_terms <- predict(fit_spline,
    newdata = dat, type = 'terms', allow_missing_levels = TRUE)  
  
  expect_equivalent(pred_spline, rowSums(pred_spline_terms))
  
  raw_X <- vglmer_build_spline(x = dat$x, 
    knots = fit_spline$internal_parameters$spline$attr[[1]]$knots,
    type = fit_spline$internal_parameters$spline$attr[[1]]$type,
    Boundary.knots = fit_spline$internal_parameters$spline$attr[[1]]$Boundary.knots, 
    by = NULL)[[1]]$x

  expect_equivalent(
    pred_spline_terms[, 'spline-x-1-base'],
    ifelse(is.na(pred_spline), NA, as.vector(raw_X %*% ranef(fit_spline)[['spline-x-1-base']][,2]))
  )  

  reshape_spline <- matrix(ranef(fit_spline)[['spline-x-1-int']][,2], ncol = 5)
  pred_spline_inter <- rowSums(raw_X * t(reshape_spline)[match(dat$f, letters[1:5]),])
  pred_spline_inter[!is.na(dat$f) & is.na(pred_spline_inter)] <- 0
  expect_equivalent(
    pred_spline_terms[, 'spline-x-1-int'],
    ifelse(is.na(pred_spline), NA, pred_spline_inter)
  ) 
  
})

test_that("Check failures of spline fitting", {
  
  suppressWarnings(rm(f))
  dat <- data.frame(x = rnorm(100), x2 = rexp(100),
                    g = sample(state.abb[1:5], 100, replace = T),
                    f = sample(letters[1:5], 100, replace = T))
  
  dat$y <- rbinom(100, 1, plogis(dat$x * runif(5)[match(dat$f, letters)]))

  nox <- dat[, !(colnames(dat) %in% 'x')]
  x <- rnorm(nrow(dat))
  nof <- dat[, !(colnames(dat) %in% 'f')]
  
  expect_error(vglmer(y ~ v_s(x, by = f) + (1 | g), 
                       data = nox, 
                      control = vglmer_control(verify_columns = TRUE),
                      family = 'binomial'))
  
  expect_error(vglmer(y ~ v_s(x, by = f) + (1 | g), 
                      data = nof, family = 'binomial'))
  
})


test_that("test spline 'by' construction", {

  x <- splines::bs(x = rnorm(10))[,]
  by_values <- sample(letters, 10, replace = T)
  u_by <- sort(unique(by_values))
  x_by <- sparseMatrix(i = 1:length(by_values), j = match(by_values, u_by), x = 1)
  
  test_m <- t(Matrix::KhatriRao(t(x_by), t(x)))
  manual_m <- do.call('cbind', lapply(u_by, FUN=function(u){
    drop0(Diagonal(x = (by_values == u)) %*% x)
  }))
  colnames(manual_m) <- NULL
  expect_equal(test_m, manual_m)  
  
})

test_that("CRAN basic spline tests", {
  
  dat <- data.frame(x = rnorm(100), x2 = rexp(100),
                    g = sample(state.abb[1:5], 100, replace = T),
                    f = sample(letters[1:5], 100, replace = T))
  
  dat$y <- rbinom(100, 1, plogis(sin(sqrt(dat$x2 * 4)) + cos(dat$x * 3) * runif(5)[match(dat$f, letters)]))
  
  
  m1 <- vglmer(y ~ x + x2 + v_s(x), 
               control = vglmer_control(iteration = NITER),
               data = dat, family = 'binomial')
  expect_gt(min(diff(ELBO(m1, 'traj'))), - sqrt(.Machine$double.eps))  
  
  
  # Check runs with 2
  m2 <- vglmer(y ~ v_s(x2) + v_s(x), 
               control = vglmer_control(
                 iterations = NITER, print_prog = 20),
               data = dat, family = 'binomial')
  # Check runs with "by"
  m3 <- vglmer(y ~ v_s(x2) + v_s(x, by = f), data = dat, 
               family = 'binomial',
               control = vglmer_control(iterations = NITER, print_prog = 20, prior_variance = 'mean_exists'))
  # Check runs with RE 
  m4 <- vglmer(y ~ v_s(x, by = f) + (1 | g), 
               data = dat, family = 'binomial',
               control = vglmer_control(iterations = NITER, print_prog = 20))
  
  # Check runs with double "by"
  m5 <- vglmer(y ~ v_s(x, by = f) + v_s(x, by = g), 
               data = dat, family = 'binomial',
               control = vglmer_control(iterations = NITER, print_prog = 20))
  # Check runs with 2 "by" for single grouping
  m6 <- vglmer(y ~ v_s(x, by = f) + v_s(x2, by = f), 
     data = dat, family = 'binomial',
     control = vglmer_control(iterations = NITER, print_prog = 20,
      factorization_method = 'strong'))
  expect_equal(ncol(m6$sigma$cov[[1]]), 3)

  m7 <- vglmer(y ~ v_s(x, by = f) + v_s(x2, by = f) , 
     data = dat, family = 'binomial',
     control = vglmer_control(iterations = NITER, print_prog = 20,
      factorization_method = 'strong'))

  m8 <- vglmer(y ~ v_s(x2, by = g) + v_s(x, by = f, by_re = FALSE), 
               control = vglmer_control(
                 iterations = NITER, print_prog = 20),
               data = dat, family = 'binomial')
  
  # Check that they are all accepted
  expect_equal(m1$ELBO$accepted_PX, 1)
  expect_equal(m2$ELBO$accepted_PX, 1)
  
  # Check ELBO increases
  expect_gt(min(diff(m1$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
  expect_gt(min(diff(m2$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
  expect_gt(min(diff(m3$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
  expect_gt(min(diff(m4$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
  expect_gt(min(diff(m5$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
  expect_gt(min(diff(m6$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
  expect_gt(min(diff(m7$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
  expect_gt(min(diff(m8$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
  
  for (v in c(TRUE, FALSE)){
    dat_custom <- data.frame(x = mean(dat$x), x2 = mean(dat$x2), 
       g = c('CA', 'AL'), f = c('e', 'a'),
       stringsAsFactors = v)
    custom_expect_vector(predict(m1, newdata = dat_custom))
    custom_expect_vector(predict(m2, newdata = dat_custom))
    custom_expect_vector(predict(m3, newdata = dat_custom))
    custom_expect_vector(predict(m4, newdata = dat_custom))
    custom_expect_vector(predict(m5, newdata = dat_custom))
    custom_expect_vector(predict(m6, newdata = dat_custom))
    custom_expect_vector(predict(m7, newdata = dat_custom))
    custom_expect_vector(predict(m8, newdata = dat_custom))
  }
})

test_that("Test order of splines", {
  
  skip_on_cran()
  
  dat <- data.frame(x = rnorm(100), x2 = rexp(100),
                    g = sample(state.abb[1:5], 100, replace = T),
                    f = sample(letters[1:5], 100, replace = T))
  
  dat$y <- rbinom(100, 1, plogis(dat$x * runif(5)[match(dat$f, letters)]))
  
  m1 <- vglmer(y ~ x + x2 + v_s(x), 
     data = dat, family = 'binomial',
     control = vglmer_control(iterations = NITER))
  
  expect_equal(length(coef(m1)), 3)
  
  m1a <- vglmer(y ~ x2 + x + v_s(x), data = dat, 
    family = 'binomial', control = vglmer_control(iterations = NITER))
  
  expect_equal(ELBO(m1a), ELBO(m1))
  expect_equal(ranef(m1), ranef(m1a), tol = 1e-4, scale = 1)
  expect_equal(coef(m1), 
    coef(m1a)[match(names(coef(m1)), names(coef(m1a)))],
    tol = 1e-4, scale = 1
  )
  
})

test_that("Prediction spline test", {
  
  
  dat <- data.frame(x = rnorm(100), x2 = rexp(100),
                    g = sample(state.abb[1:5], 100, replace = T),
                    f = sample(letters[1:5], 100, replace = T))
  
  dat$y <- rbinom(100, 1, plogis(sin(3 * dat$x) * rnorm(5)[match(dat$f, letters)]))
  
  if (env_test != 'CRAN'){
    loop_vector <- c('tpf', 'o')
  }else{
    loop_vector <- 'tpf'
  }
  
  for (loop_type in loop_vector){
    message(loop_type)
    m1 <- vglmer(y ~ x2 + v_s(x, type = loop_type), 
                 data = dat, family = 'linear',
                 control = vglmer_control(iterations = NITER))
    
    # Check that prediction works for simple spline case
    predict_dat <- expand.grid(x = seq(min(dat$x), max(dat$x), length.out=100), x2 = 0)
    predict_vglmer <- predict(m1, newdata = predict_dat)
    # Check that *fitted* aligns with *predict*  
    expect_equivalent(fitted(m1), predict(m1, newdata = dat))
    # Check that there is some slope in the function
    diff_in_diff <- range(diff(diff(predict_vglmer)))
    expect_gte(max(abs(diff_in_diff)), sqrt(.Machine$double.eps))
    
    raw_X <- vglmer_build_spline(x = predict_dat$x, knots = m1$internal_parameters$spline$attr[[1]]$knots,
                                 type = m1$internal_parameters$spline$attr[[1]]$type,
                                 Boundary.knots = m1$internal_parameters$spline$attr[[1]]$Boundary.knots, by = NULL)[[1]]$x
    term_1 <- raw_X %*% m1$alpha$mean
    term_2 <- cbind(1, 0, predict_dat$x) %*% m1$beta$mean
    direct_predict <- as.vector(term_1 + term_2)
    expect_equivalent(direct_predict, predict_vglmer)   
    terms_predict <- predict(m1, newdata = predict_dat, type = 'terms')
    expect_equivalent(term_1[,1], terms_predict[, 'spline-x-1-base'])
    expect_equivalent(term_2[,1], terms_predict[, 'FE'])
  }

  if (TRUE){
    
    for (loop_type in loop_vector){
      message(loop_type)
      m1 <- vglmer(y ~ x2 + v_s(x, by = f, type = loop_type), 
                   data = dat, family = 'linear',
                   control = vglmer_control(iterations = NITER))
      
      # Check that prediction works for simple spline case
      predict_dat <- expand.grid(x = seq(min(dat$x), max(dat$x), length.out=100), x2 = 0, f = c('a', 'd', 'e', 'c'))
      predict_vglmer <- predict(m1, newdata = predict_dat)
      # Check that *fitted* aligns with *predict*  
      expect_equivalent(fitted(m1), predict(m1, newdata = dat))
      # Check that there is some slope in the function
      diff_in_diff <- sapply(split(predict_vglmer, predict_dat$f), FUN=function(i){
        diff_in_diff <- diff(diff(i))/diff(range(i))
        return(max(abs(diff_in_diff)))        
      })
      expect_true(all(diff_in_diff > sqrt(.Machine$double.eps)))
      
      raw_X <- vglmer_build_spline(x = predict_dat$x, knots = m1$internal_parameters$spline$attr[[1]]$knots,
       type = m1$internal_parameters$spline$attr[[1]]$type,
       by = predict_dat$f,
       Boundary.knots = m1$internal_parameters$spline$attr[[1]]$Boundary.knots)[[1]]$x
      
      expand_x <- do.call('cbind', lapply(c('a', 'c', 'd', 'e'), FUN=function(i){
        drop0(Diagonal(x = predict_dat$f == i) %*% raw_X)
      }))
      all_X <- cbind(raw_X, expand_x)
      
      direct_predict_spline <- as.vector(all_X %*% m1$alpha$mean[grep(rownames(m1$alpha$mean), pattern='spline @ x @ (base|[adec])'),] + 
                cbind(1, 0, predict_dat$x) %*% m1$beta$mean)
      
      wide_alpha <- matrix(m1$alpha$mean[grepl(rownames(m1$alpha$mean), pattern='^f'),,drop=F], byrow = TRUE, ncol = 2)
      
      direct_predict <- rowSums(wide_alpha[match(predict_dat$f, c('a', 'b', 'c', 'd', 'e')),] * cbind(1, predict_dat$x))
      expect_equivalent(direct_predict + direct_predict_spline, predict_vglmer)    
    }
  }

})

test_that("Compare against mgcv", {
  
  if (env_test == 'local'){
    N <- 1000
    dat <- data.frame(x = c(-3.1, rnorm(N-1)), x2 = rexp(N),
                      g = sample(state.abb[1:5], N, replace = T),
                      f = sample(letters[1:5], N, replace = T))
    
    dat$y <- rbinom(N, 1, plogis(2 * sin(3 * dat$x) + rnorm(5)[match(dat$f, letters)]))
    
    dat$f <- factor(dat$f)
    est_gam <- mgcv::gam(y ~ s(x, bs = 'bs') + s(f, bs = 're'), method = 'REML', data = dat, family = binomial())  
    est_vglmer <- vglmer(y ~ v_s(x, type = 'o') + (1 | f), data = dat, family = 'binomial')
    
    gx <- seq(quantile(dat$x, 0.2), quantile(dat$x, 0.8), length.out=100)
    pred_estgam <- predict(est_gam, newdata = data.frame(x = gx, f = 'b'))
    pred_estvglmer <- predict(est_vglmer, newdata = data.frame(x = gx, f = 'b'))
    
    expect_gte(cor(plogis(pred_estvglmer), plogis(pred_estgam)), 0.95)
    
    est_gam <- mgcv::gam(y ~ s(x), data = dat, method = 'REML', family = gaussian())  
    est_vglmer <- vglmer(y ~ v_s(x, type = 'o'), data = dat, family = 'linear')
    
    knots_custom <- seq(-3, 3, length.out=35)
    est_vglmer_2 <- expect_warning(
      vglmer(y ~ v_s(x, knots = knots_custom, type = 'tpf'), data = dat, family = 'linear'),
      regexp = 'observed data is outside of the self-provided knots'
    )
    pred_estgam <- predict(est_gam, newdata = data.frame(x = gx, f = 'b'))
    pred_estvglmer <- predict(est_vglmer, newdata = data.frame(x = gx, f = 'b'))
    pred_estvglmer_2 <- predict(est_vglmer_2, newdata = data.frame(x = gx, f = 'b'))
    
    expect_gte(cor(pred_estvglmer, pred_estgam), 0.95)
    expect_gte(cor(pred_estgam, pred_estvglmer_2), 0.95)
    
  }
})

test_that("Predict tests for tpf; predict outside of boundary", {
  
  N <- 100
  dat <- data.frame(x = rnorm(N), x2 = rexp(N),
                    g = sample(state.abb[1:5], N, replace = T),
                    f = sample(letters[1:5], N, replace = T))
  
  dat$y <- rbinom(N, 1, plogis(2 * sin(3 * dat$x) + rnorm(5)[match(dat$f, letters)]))
  
  est_vglmer_2 <- vglmer(y ~ v_s(x, type = 'tpf'), 
                         data = dat, family = 'linear', control = vglmer_control(iterations = NITER))
  knots_custom <- est_vglmer_2$internal_parameters$spline$attr[[1]]$knots
  predict_at_knots <- predict(est_vglmer_2, newdata = data.frame(x = knots_custom))
  
  sum(est_vglmer_2$alpha$mean)
  
  diff_rhs <- diff(predict(est_vglmer_2, newdata = data.frame(x = 5:10)))
  expect_lte(max(abs(diff(diff_rhs))), sqrt(.Machine$double.eps))
  
  diff_lhs <- diff(predict(est_vglmer_2, newdata = data.frame(x = -15:-10)))
  expect_lte(max(abs(diff(diff_lhs))), sqrt(.Machine$double.eps))
  # to the "left", the extrapolated value should be the slope of the linear part
  expect_equivalent(mean(diff_lhs), coef(est_vglmer_2)[2])
  # to the right, extrapolated value should be the sum of all 
  # the RE slopes + the baseline slope
  expect_equivalent(
    coef(est_vglmer_2)[2] + sum(ranef(est_vglmer_2)[[1]][,2]),
    mean(diff_rhs)
  )
  # The slope between any two knots should be the cumulative sum
  # of the intermediate slopes from the RE
  diff_predict_at_knots <- diff(predict_at_knots)
  slope_predict_at_knots <- diff_predict_at_knots/diff(knots_custom)
  expect_equivalent(slope_predict_at_knots, coef(est_vglmer_2)[2] + cumsum(est_vglmer_2$alpha$mean[-nrow(est_vglmer_2$alpha$mean),1]))
  
  
})

# TO-DO tests
# check decomposition of D and then re-transformation when
# implemented
mgoplerud/vglmer documentation built on Jan. 22, 2025, 6:43 p.m.