test-cv-grpsel.R
In grpsel: Group Subset Selection

test_that('nfold must be valid', {
  set.seed(123)
  y <- rnorm(100)
  x <- rnorm(100)
  expect_error(cv.grpsel(x, y, nfold = 1))
  expect_error(cv.grpsel(x, y, nfold = 101))
})

test_that('length of folds must match sample size', {
  set.seed(123)
  y <- rnorm(100)
  x <- rnorm(100)
  folds <- sample(2, 101, T)
  expect_error(cv.grpsel(x, y, folds = folds))
})

test_that('cross-validation leads to correct subset under square loss', {
  set.seed(123)
  group <- rep(1:5, each = 2)
  x <- matrix(rnorm(100 * 10), 100, 10)
  y <- rnorm(100, rowSums(x[, which(group %in% 1:2)]))
  fit <- cv.grpsel(x, y, group, eps = 1e-15)
  beta <- as.numeric(coef(fit))
  beta.target <- rep(0, 11)
  beta.target[c(T, group %in% 1:2)] <- as.numeric(glm(y ~ x[, group %in% 1:2])$coef)
  expect_equal(beta, beta.target)
})

test_that('cross-validation leads to correct subset under logistic loss', {
  set.seed(123)
  group <- rep(1:5, each = 2)
  x <- matrix(rnorm(100 * 10), 100, 10)
  y <- rbinom(100, 1, 1 / (1 + exp(- 2 * rowSums(x[, which(group %in% 1:2)]))))
  fit <- cv.grpsel(x, y, group, loss = 'logistic', eps = 1e-15)
  beta <- as.numeric(coef(fit))
  beta.target <- rep(0, 11)
  beta.target[c(T, group %in% 1:2)] <- as.numeric(glm(y ~ x[, group %in% 1:2], 'binomial')$coef)
  expect_equal(beta, beta.target)
})

test_that('cross-validation works when folds are manually supplied', {
  set.seed(123)
  group <- rep(1:5, each = 2)
  x <- matrix(rnorm(100 * 10), 100, 10)
  y <- rnorm(100, rowSums(x[, which(group %in% 1:2)]))
  folds <- sample(5, 100, T)
  fit <- cv.grpsel(x, y, group, eps = 1e-15, folds = folds)
  beta <- as.numeric(coef(fit))
  beta.target <- rep(0, 11)
  beta.target[c(T, group %in% 1:2)] <- as.numeric(glm(y ~ x[, group %in% 1:2])$coef)
  expect_equal(beta, beta.target)
})

test_that('coefficients are extracted correctly', {
  set.seed(123)
  x <- matrix(rnorm(100 * 10), 100, 10)
  y <- rnorm(100, rowSums(x))
  fit <- cv.grpsel(x, y, eps = 1e-15)
  fit.target <- glm(y ~ x)
  beta <- coef(fit)
  beta.target <- as.matrix(as.numeric(coef(fit.target)))
  expect_equal(beta, beta.target)
})

test_that('predictions are computed correctly', {
  set.seed(123)
  x <- matrix(rnorm(100 * 10), 100, 10)
  y <- rnorm(100, rowSums(x))
  fit <- cv.grpsel(x, y, eps = 1e-15)
  fit.target <- glm(y ~ x)
  yhat <- predict(fit, x)
  yhat.target <- as.matrix(as.numeric(predict(fit.target, as.data.frame(x))))
  expect_equal(yhat, yhat.target)
})

test_that('plot function returns a plot', {
  set.seed(123)
  x <- matrix(rnorm(100 * 10), 100, 10)
  y <- rnorm(100)
  fit <- cv.grpsel(x, y)
  p <- plot(fit)
  expect_s3_class(p, 'ggplot')
})

test_that('sequential and parallel cross-validation produce same output', {
  set.seed(123)
  x <- matrix(rnorm(100 * 10), 100, 10)
  y <- rnorm(100)
  folds <- rep(1:10, each = 10)
  fit.seq <- cv.grpsel(x, y, folds = folds)
  cl <- parallel::makeCluster(2)
  fit.par <- cv.grpsel(x, y, folds = folds, cluster = cl)
  parallel::stopCluster(cl)
  expect_equal(fit.seq, fit.par)
})