tests/testthat/test-big_pls_cox_gd.R

skip_on_os("windows")

library(bigmemory)

test_that("gradient descent returns latent components and consistent fits", {
  set.seed(42)
  n <- 40
  p <- 6
  base_matrix <- matrix(rnorm(n * p), nrow = n)
  X <- bigmemory::as.big.matrix(base_matrix)
  time <- rexp(n, rate = 0.2)
  status <- rbinom(n, 1, 0.75)

  fit1 <- big_pls_cox_gd(X, time, status, ncomp = 3, max_iter = 100, tol = 1e-6)
  fit2 <- big_pls_cox_gd(X, time, status, ncomp = 3, max_iter = 100, tol = 1e-6)

  expect_equal(length(fit1$coefficients), 3L)
  expect_equal(dim(fit1$scores), c(n, 3))
  expect_equal(dim(fit1$loadings), c(p, 3))
  expect_equal(dim(fit1$weights), c(p, 3))
  expect_equal(length(fit1$center), p)
  expect_equal(length(fit1$scale), p)
  expect_equal(fit1$coefficients, fit2$coefficients)
  expect_equal(fit1$loglik, fit2$loglik)
  expect_true(is.numeric(fit1$loglik))
  expect_true(fit1$iterations >= 1)
  expect_true(is.logical(fit1$converged))
  expect_equal(fit1$scores, fit2$scores)
})

test_that("reordering columns leaves the gradient-descent fit unchanged", {
  set.seed(101)
  n <- 35
  p <- 5
  base_matrix <- matrix(rnorm(n * p), nrow = n)
  time <- rexp(n, rate = 0.3)
  status <- rbinom(n, 1, 0.6)

  X <- bigmemory::as.big.matrix(base_matrix)
  fit_orig <- big_pls_cox_gd(X, time, status, ncomp = 3, max_iter = 150, tol = 1e-6)

  perm <- sample.int(p)
  X_perm <- bigmemory::as.big.matrix(base_matrix[, perm])
  fit_perm <- big_pls_cox_gd(X_perm, time, status, ncomp = 3, max_iter = 150, tol = 1e-6)

  expect_equal(fit_orig$coefficients, fit_perm$coefficients, tolerance = 1e-6)
  expect_equal(fit_orig$loglik, fit_perm$loglik, tolerance = 1e-6)
  expect_equal(fit_orig$scores, fit_perm$scores, tolerance = 1e-6)

  inv_perm <- match(seq_len(p), perm)
  expect_equal(fit_orig$loadings, fit_perm$loadings[inv_perm, , drop = FALSE],
               tolerance = 1e-6)
  expect_equal(fit_orig$weights, fit_perm$weights[inv_perm, , drop = FALSE],
               tolerance = 1e-6)
  expect_equal(fit_orig$center, fit_perm$center[inv_perm], tolerance = 1e-6)
  expect_equal(fit_orig$scale, fit_perm$scale[inv_perm], tolerance = 1e-6)
})

test_that("big_pls_cox approximates plsRcox", {
  skip_if_not_installed("survival")
  skip_if_not_installed("plsRcox")
  skip_if_not_installed("bigmemory")
  
  set.seed(123)
  n <- 30
  p <- 5
  X <- matrix(rnorm(n * p), n, p)
  time <- rexp(n)
  status <- rbinom(n, 1, 0.6)
  
  my <- big_pls_cox(X, time, status, ncomp = 2)
  myother <- plsRcox::plsRcox(X, time, status, nt = 3, verbose = FALSE)
  
  expect_equal(ncol(my$scores), 2)
  expect_equal(nrow(my$loadings), p)
  
  # Compare component spaces via correlations
  cors <- cor(my$scores, myother$tt[, 1:2])
  expect_true(all(abs(cors) <= 1 + 1e-8))
  expect_true(mean(abs(diag(cors))) > 0.95)
})


test_that("prediction helpers work for gradient descent fits", {
  skip_if_not_installed("survival")
  skip_if_not_installed("bigmemory")
  set.seed(404)
  n <- 30
  p <- 5
  X <- bigmemory::as.big.matrix(matrix(rnorm(n * p), nrow = n))
  time <- rexp(n)
  status <- rbinom(n, 1, 0.5)
  fit <- big_pls_cox_gd(X, time, status, ncomp = 2, max_iter = 50)
  lp <- predict(fit, newdata=X, type = "link")
  expect_length(lp, n)
  comps <- predict(fit, newdata=X, type = "components")
  expect_equal(dim(comps), c(n, 2))
})

test_that("GD matches PLS up to rotation", {
  n <- 30
  p <- 5
  X <- matrix(rnorm(n * p), n, p)
  time <- rexp(n)
  status <- rbinom(n, 1, 0.6)
  
  X_big <- bigmemory::as.big.matrix(X)
  
  fit_pls <- big_pls_cox(X, time, status, ncomp = 3)
  fit_gd  <- big_pls_cox_gd(X_big,  time, status, ncomp = 3,
                            max_iter = 2000, learning_rate = 0.05, tol = 1e-8)
  
  eta_pls <- drop(fit_pls$scores %*% fit_pls$cox_fit$coefficients)
  eta_gd  <- drop(fit_gd$scores %*% fit_gd$cox_fit$coefficients)
  expect_gt(cor(eta_pls, eta_gd), 0.99)
  c_pls <- survival::concordance(survival::Surv(time, status) ~ eta_pls)$concordance
  c_gd  <- survival::concordance(survival::Surv(time, status) ~ eta_gd)$concordance
  expect_lt(abs(c_pls - c_gd), 1e-2)
})

test_that("GD predict reproduces training lp", {
  data(micro.censure)
  data(Xmicro.censure_compl_imp)
  
  Y_all <- micro.censure$survyear[1:80]
  status_all <- micro.censure$DC[1:80]
  X_all <- apply(
    as.matrix(Xmicro.censure_compl_imp),
    MARGIN = 2,
    FUN = as.numeric
  )[1:80, ]
  
  set.seed(2024)
  train_id <- 1:60
  test_id <- 61:80
  
  X_train <- X_all[train_id, ]
  Y_train <- Y_all[train_id]
  status_train <- status_all[train_id]

  X_big_train <- bigmemory::as.big.matrix(X_train)

  gd <- big_pls_cox_gd(X_big_train, Y_train, status_train, ncomp = 4, max_iter = 2000)
  
  lp_train   <- drop(gd$scores %*% gd$cox_fit$coefficients)
  lp_predict <- drop(predict(gd, newdata = X_big_train, type = "link"))
  
  expect_gt(cor(lp_train, lp_predict), 0.999)
})

Try the bigPLScox package in your browser

Any scripts or data that you put into this service are public.

bigPLScox documentation built on Nov. 18, 2025, 5:06 p.m.