tests/testthat/test-gesearch.R

context("test-gesearch")
library(foreach)
library(foreach)
library(RhpcBLASctl)
registerDoSEQ()
# =============================================================================
# Setup helper
# =============================================================================

.setup_gesearch_data <- function() {
  skip_if_not_installed("prospectr")
  
  data("NIRsoil", package = "prospectr")
  
  # Preprocess spectra
  sg_det <- prospectr::savitzkyGolay(
    prospectr::detrend(NIRsoil$spc, wav = as.numeric(colnames(NIRsoil$spc))),
    m = 1, p = 1, w = 7
  )
  NIRsoil$spc_pr <- sg_det
  
  # Split data
  train_idx <- NIRsoil$train == 1 & !is.na(NIRsoil$Ciso)
  test_idx <- NIRsoil$train == 0 & !is.na(NIRsoil$Ciso)
  
  list(
    train_x = NIRsoil$spc_pr[train_idx, ],
    train_y = NIRsoil$Ciso[train_idx],
    test_x = NIRsoil$spc_pr[test_idx, ],
    test_y = NIRsoil$Ciso[test_idx]
  )
}


# =============================================================================
# gesearch input validation tests
# =============================================================================

test_that("gesearch validates input dimensions", {
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  # nrow(Xr) must equal length(Yr)
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y[1:10],
      Xu = d$test_x,
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5)
    ),
    "nrow.*Xr.*must equal"
  )
  
  # ncol(Xr) must equal ncol(Xu)
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = d$test_x[, 1:10],
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5)
    ),
    "ncol.*Xr.*must equal.*ncol.*Xu"
  )
})


test_that("gesearch validates k parameter", {
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = d$test_x,
      k = nrow(d$train_x) + 10,
      b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5)
    ),
    "k.*must be less than"
  )
})


test_that("gesearch validates target_size parameter", {
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  # target_size must be >= k
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = d$test_x,
      k = 50, b = 10, target_size = 30,
      fit_method = fit_pls(ncomp = 5)
    ),
    "target_size.*must be.*>= k"
  )
  
  # target_size cannot exceed nrow(Xr)
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = d$test_x,
      k = 30, b = 10, target_size = nrow(d$train_x) + 100,
      fit_method = fit_pls(ncomp = 5)
    ),
    "target_size.*cannot be greater"
  )
})


test_that("gesearch validates retain parameter", {
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = d$test_x,
      k = 30, b = 10, retain = 0, target_size = 350,
      fit_method = fit_pls(ncomp = 5)
    ),
    "retain.*must be in"
  )
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = d$test_x,
      k = 30, b = 10, retain = 1.5, target_size = 350,
      fit_method = fit_pls(ncomp = 5)
    ),
    "retain.*must be in"
  )
})


test_that("gesearch validates optimization parameter", {
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = d$test_x,
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5),
      optimization = "invalid_option"
    ),
    "Invalid.*optimization"
  )
})


test_that("gesearch requires Yu for response optimization", {
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = d$test_x,
      Yu = NULL,
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5),
      optimization = "response"
    ),
    "Yu.*required.*response"
  )
})


test_that("gesearch validates fit_method", {
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = d$test_x,
      k = 30, b = 10, target_size = 350,
      fit_method = "pls"
    ),
    "fit_method.*must be"
  )
})


test_that("gesearch rejects missing values in Xu", {
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  test_x_na <- d$test_x
  test_x_na[1, 1] <- NA
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = test_x_na,
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5)
    ),
    "Xu.*missing values"
  )
})


test_that("gesearch rejects infinite values", {
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  test_x_inf <- d$test_x
  test_x_inf[1, 1] <- Inf
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = test_x_inf,
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5)
    ),
    "Infinite values"
  )
})


test_that("gesearch validates Xu has at least 3 rows", {
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = d$test_x[1:2, ],
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5)
    ),
    "Xu must have at least 3 rows"
  )
})


test_that("gesearch validates verbose parameter", {
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = d$test_x,
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5),
      verbose = "yes"
    ),
    "verbose.*must be logical"
  )
})


# =============================================================================
# Basic functionality tests (skip on CRAN)
# =============================================================================

test_that("gesearch runs with reconstruction optimization", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  gs <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10),
    optimization = "reconstruction",
    verbose = FALSE,
    seed = 42
  )
  
  expect_s3_class(gs, "gesearch")
  expect_true("x_local" %in% names(gs))
  expect_true("y_local" %in% names(gs))
  expect_true("indices" %in% names(gs))
  expect_true("final_models" %in% names(gs))
  expect_true("validation_results" %in% names(gs))
  expect_gte(length(gs$indices), 50)
})


test_that("gesearch runs with different PLS methods", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  # mpls
  gs_mpls <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10, method = "mpls"),
    optimization = "reconstruction",
    verbose = FALSE,
    seed = 42
  )
  expect_s3_class(gs_mpls, "gesearch")
  
  # simpls
  gs_simpls <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10, method = "simpls"),
    optimization = "reconstruction",
    verbose = FALSE,
    seed = 42
  )
  expect_s3_class(gs_simpls, "gesearch")
})


test_that("gesearch runs with response optimization", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  gs <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    Yu = d$test_y,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10),
    optimization = "response",
    verbose = FALSE,
    seed = 42
  )
  
  expect_s3_class(gs, "gesearch")
  expect_gte(length(gs$indices), 50)
})


test_that("gesearch runs with similarity optimization", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  gs <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10),
    optimization = "similarity",
    verbose = FALSE,
    seed = 42
  )
  expect_s3_class(gs, "gesearch")
})


test_that("gesearch runs with combined optimization criteria", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  gs <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    Yu = d$test_y,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10),
    optimization = c("reconstruction", "similarity"),
    verbose = FALSE,
    seed = 42
  )
  
  expect_s3_class(gs, "gesearch")
})


test_that("gesearch with scale = TRUE works and predict works too", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  gs <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10, scale = TRUE),
    optimization = "reconstruction",
    verbose = FALSE,
    seed = 42
  )
  
  expect_s3_class(gs, "gesearch")
})


# =============================================================================
# predict.gesearch tests
# =============================================================================



test_that("predict.gesearch validations", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  gs <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10),
    optimization = "reconstruction",
    verbose = FALSE,
    seed = 42
  )
  
  expect_error(
    predict(gs, newdata = d$test_x, type = "invalid"),
    "type.*must be"
  )
  
  preds <- predict(gs, newdata = d$test_x)
  
  expect_type(preds, "list")
  expect_equal(nrow(preds[[1]]), nrow(d$test_x))
  expect_equal(ncol(preds[[1]]), 10)  # ncomp columns
  expect_error(predict(gs), "newdata.*required")
  
  
})


test_that("predict.gesearch with what = 'all_generations' works", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  gs <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    Yu = d$test_y,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10),
    optimization = "reconstruction",
    intermediate_models = TRUE,
    verbose = FALSE,
    seed = 42
  )
  
  preds <- predict(gs, newdata = d$test_x, what = "all_generations")
  
  expect_type(preds, "list")
  expect_true(length(preds) == gs$complete_iter)
  expect_true(all(grepl("^generation_", names(preds))))
})


test_that("predict.gesearch warns without intermediate_models", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  gs <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10),
    optimization = "reconstruction",
    intermediate_models = FALSE,
    verbose = FALSE,
    seed = 42
  )
  
  expect_warning(
    predict(gs, newdata = d$test_x, what = "all_generations"),
    "intermediate models"
  )
})


# =============================================================================
# plot.gesearch tests
# =============================================================================

test_that("plot.gesearch works", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  gs <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10),
    optimization = "reconstruction",
    verbose = FALSE,
    seed = 42
  )
  
  expect_silent(plot(gs, which = "weakness"))
  expect_silent(plot(gs, which = "removed"))
})


# =============================================================================
# Reproducibility tests
# =============================================================================

test_that("gesearch is reproducible with seed", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  gs1 <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10),
    optimization = "reconstruction",
    verbose = FALSE,
    seed = 42
  )
  
  gs2 <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10),
    optimization = "reconstruction",
    verbose = FALSE,
    seed = 42
  )
  
  expect_equal(gs1$indices, gs2$indices)
  expect_equal(gs1$complete_iter, gs2$complete_iter)
})


# =============================================================================
# Output structure tests
# =============================================================================

test_that("gesearch output contains expected elements", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  gs <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    Yu = d$test_y,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10),
    optimization = "reconstruction",
    verbose = FALSE,
    seed = 42
  )
  
  expected_elements <- c(
    "x_local", "y_local", "indices", "complete_iter",
    "iter_weakness", "samples", "n_removed", "control",
    "fit_method", "validation_results", "final_models", "seed"
  )
  
  for (elem in expected_elements) {
    expect_true(elem %in% names(gs), info = paste("Missing element:", elem))
  }
  
  # Check dimensions
  expect_equal(nrow(gs$x_local), length(gs$indices))
  expect_equal(nrow(gs$y_local), length(gs$indices))
  
  # Check validation results structure
  expect_true("results" %in% names(gs$validation_results[[1]]))
  expect_true("train" %in% names(gs$validation_results[[1]]$results))
  expect_true("test" %in% names(gs$validation_results[[1]]$results))
})


test_that("gesearch n_removed is a data.frame with correct structure", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  gs <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10),
    optimization = "reconstruction",
    verbose = FALSE,
    seed = 42
  )
  
  expect_s3_class(gs$n_removed, "data.frame")
  expect_true(all(c("iteration", "removed", "cumulative") %in% names(gs$n_removed)))
  expect_equal(nrow(gs$n_removed), gs$complete_iter)
  
  # Cumulative should be non-decreasing
  expect_true(all(diff(gs$n_removed$cumulative) >= 0))
})


test_that("gesearch with intermediate_models stores generations", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  gs <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    Yu = d$test_y,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10),
    optimization = "reconstruction",
    intermediate_models = TRUE,
    verbose = FALSE,
    seed = 42
  )
  
  expect_true("intermediate_models" %in% names(gs))
  expect_type(gs$intermediate_models, "list")
  expect_gt(length(gs$intermediate_models), 0)
  
  # Check structure of intermediate models
  for (gen in gs$intermediate_models) {
    expect_true("subset_size" %in% names(gen))
    expect_true("pls_models" %in% names(gen))
    expect_true("validation" %in% names(gen))
  }
})















test_that("gesearch produces similar results with simpls and pls for optimization = 'response'", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  gs_simpls <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    Yu = d$test_y,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10, method = "simpls"),
    optimization = "response",
    intermediate_models = TRUE,
    verbose = FALSE,
    seed = 42
  )
  
  gs_pls <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    Yu = d$test_y,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10, method = "pls"),
    optimization = "response",
    intermediate_models = TRUE,
    verbose = FALSE,
    seed = 42
  )
  
  expect_true(identical(gs_simpls$indices, gs_pls$indices))
})



test_that("gesearch produces different results with pls and mpls for optimization = 'response'", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  gs_pls <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    Yu = d$test_y,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10, method = "pls"),
    optimization = "response",
    intermediate_models = TRUE,
    verbose = FALSE,
    seed = 42
  )
  
  gs_mpls <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    Yu = d$test_y,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10, method = "mpls"),
    optimization = "response",
    intermediate_models = TRUE,
    verbose = FALSE,
    seed = 42
  )
  
  expect_false(identical(gs_mpls$indices, gs_pls$indices))
})















































































































# =============================================================================
# gesearch_control validation tests
# =============================================================================

test_that("gesearch validates control class", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = d$test_x,
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5),
      control = list(tune = FALSE)
    ),
    "'control' must be created by gesearch_control\\(\\)"
  )
})

# =============================================================================
# Multi-column Yr validation tests
# =============================================================================

test_that("gesearch validates multi-column Yr has column names", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  # Multi-column Yr without column names
  Yr_multi <- cbind(d$train_y, d$train_y * 2)
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = Yr_multi,
      Xu = d$test_x,
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5),
      verbose = FALSE
    ),
    "Yr must have column names when it has multiple columns"
  )
})

test_that("gesearch validates multi-column Yr row count", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  # Multi-column Yr with wrong row count
  Yr_multi <- cbind(y1 = d$train_y[1:10], y2 = d$train_y[1:10])
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = Yr_multi,
      Xu = d$test_x,
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5),
      verbose = FALSE
    ),
    "nrow\\(Xr\\) must equal nrow\\(Yr\\)"
  )
})

# =============================================================================
# Multi-column Yu validation tests
# =============================================================================

test_that("gesearch validates Yu length matches Xu rows", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = d$test_x,
      Yu = d$test_y[1:5],
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5),
      verbose = FALSE
    ),
    "nrow\\(Xu\\) must equal length\\(Yu\\)"
  )
})

test_that("gesearch rejects Yu_lims with multi-column Yu", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  Yu_multi <- cbind(y1 = d$test_y, y2 = d$test_y * 2)
  Yr_multi <- cbind(y1 = d$train_y, y2 = d$train_y * 2)
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = Yr_multi,
      Xu = d$test_x,
      Yu = Yu_multi,
      Yu_lims = c(0, 10),
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5),
      verbose = FALSE
    ),
    "'Yu_lims' only supported for single-column Yu"
  )
})

test_that("gesearch validates multi-column Yu ncol matches Yr", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  Yr_multi <- cbind(y1 = d$train_y, y2 = d$train_y * 2)
  Yu_wrong <- cbind(y1 = d$test_y, y2 = d$test_y * 2, y3 = d$test_y * 3)
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = Yr_multi,
      Xu = d$test_x,
      Yu = Yu_wrong,
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5),
      verbose = FALSE
    ),
    "ncol\\(Yr\\) must equal ncol\\(Yu\\)"
  )
})

test_that("gesearch validates multi-column Yu nrow matches Xu", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  Yr_multi <- cbind(y1 = d$train_y, y2 = d$train_y * 2)
  Yu_wrong <- cbind(y1 = d$test_y[1:5], y2 = d$test_y[1:5] * 2)
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = Yr_multi,
      Xu = d$test_x,
      Yu = Yu_wrong,
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5),
      verbose = FALSE
    ),
    "nrow\\(Xu\\) must equal nrow\\(Yu\\)"
  )
})

test_that("gesearch validates multi-column Yu has column names", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  Yr_multi <- cbind(y1 = d$train_y, y2 = d$train_y * 2)
  Yu_no_names <- cbind(d$test_y, d$test_y * 2)
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = Yr_multi,
      Xu = d$test_x,
      Yu = Yu_no_names,
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5),
      verbose = FALSE
    ),
    "Yu must have column names when it has multiple columns"
  )
})

test_that("gesearch validates Yu and Yr column names match", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  Yr_multi <- cbind(y1 = d$train_y, y2 = d$train_y * 2)
  Yu_wrong_names <- cbind(a = d$test_y, b = d$test_y * 2)
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = Yr_multi,
      Xu = d$test_x,
      Yu = Yu_wrong_names,
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5),
      verbose = FALSE
    ),
    "Column names of Yr and Yu must match"
  )
})

# =============================================================================
# fit_method validation tests
# =============================================================================

test_that("gesearch rejects non-fit_pls fit_method", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = d$test_x,
      k = 30, b = 10, target_size = 350,
      fit_method = fit_gpr(),
      verbose = FALSE
    ),
    "Only fit_pls\\(\\) is supported"
  )
})

test_that("gesearch warns when tune is used with reconstruction", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  expect_warning(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = d$test_x,
      k = 30, b = 20, retain = 0.90,
      target_size = 350,
      fit_method = fit_pls(ncomp = 10),
      optimization = "reconstruction",
      control = gesearch_control(tune = TRUE),
      verbose = FALSE,
      seed = 42
    ),
    "PLS components are not tuned when optimization includes 'reconstruction'"
  )
})

# =============================================================================
# Local CV validation tests
# =============================================================================

test_that("gesearch errors when local CV leaves fewer than 3 observations", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = d$test_x,
      Yu = d$test_y,
      k = 10,  # small k
      b = 10,
      target_size = 350,
      fit_method = fit_pls(ncomp = 5),
      optimization = "response",
      control = gesearch_control(tune = TRUE, p = 0.95),  # leaves ~5% for hold-out
      verbose = FALSE
    ),
    "Local cross-validation requires at least 3 observations"
  )
})

test_that("gesearch errors when ncomp exceeds available samples", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = d$test_x,
      k = 10,  # small k
      b = 10,
      target_size = 350,
      fit_method = fit_pls(ncomp = 15),  # more components than k
      control = gesearch_control(tune = FALSE),
      verbose = FALSE
    ),
    "More PLS components than available observations"
  )
})

# =============================================================================
# Range optimization validation tests
# =============================================================================

test_that("gesearch requires Yu_lims for range optimization", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = d$test_x,
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5),
      optimization = "range",
      verbose = FALSE
    ),
    "'Yu_lims' is required for 'range' optimization"
  )
})

test_that("gesearch validates Yu_lims length", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = d$test_x,
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5),
      optimization = "range",
      Yu_lims = c(0, 5, 10),
      verbose = FALSE
    ),
    "'Yu_lims' must be a numeric vector of length 2"
  )
})

test_that("gesearch with range optimization works", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  gs <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10),
    optimization = c("reconstruction", "range"),
    Yu_lims = range(d$train_y),
    verbose = FALSE,
    seed = 42
  )
  
  expect_s3_class(gs, "gesearch")
})

# =============================================================================
# gesearch.formula tests
# =============================================================================

test_that("gesearch.formula validates formula class", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  train_df <- data.frame(y = d$train_y, spc = I(d$train_x))
  test_df <- data.frame(y = d$test_y, spc = I(d$test_x))
  
  expect_error(
    gesearch(
      formula = "y ~ spc",  # character, not formula
      train = train_df,
      test = test_df,
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5)
    ),
    "'formula' argument must be a formula object"
  )
})

test_that("gesearch.formula requires fit_method", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  train_df <- data.frame(y = d$train_y, spc = I(d$train_x))
  test_df <- data.frame(y = d$test_y, spc = I(d$test_x))
  
  expect_error(
    gesearch(
      formula = y ~ spc,
      train = train_df,
      test = test_df,
      k = 30, b = 10, target_size = 350
    ),
    "'fit_method' is missing"
  )
})

test_that("gesearch.formula warns when response missing in test", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  train_df <- data.frame(y = d$train_y, spc = I(d$train_x))
  test_df <- data.frame(spc = I(d$test_x))  # No y column
  
  expect_warning(
    gs <- resemble::gesearch(
      formula = y ~ spc,
      train = train_df,
      test = test_df,
      k = 30, b = 20, retain = 0.90,
      target_size = 350,
      fit_method = fit_pls(ncomp = 10),
      optimization = "reconstruction",
      verbose = FALSE,
      seed = 42
    ),
    "y not found in test; assigned NA."
  )
  
  expect_s3_class(gs, "gesearch")
})

test_that("gesearch.formula errors when response optimization without response in test", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  train_df <- data.frame(y = d$train_y, spc = I(d$train_x))
  test_df <- data.frame(spc = I(d$test_x))  # No y column
  
  expect_error(
    gesearch(
      formula = y ~ spc,
      train = train_df,
      test = test_df,
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5),
      optimization = "response"
    ),
    "'optimization = \"response\"' requires response values in test"
  )
})

test_that("gesearch.formula works with data.frame input", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  train_df <- data.frame(y = d$train_y, spc = I(d$train_x))
  test_df <- data.frame(y = d$test_y, spc = I(d$test_x))
  
  gs <- gesearch(
    formula = y ~ spc,
    train = train_df,
    test = test_df,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10),
    optimization = "reconstruction",
    verbose = FALSE,
    seed = 42
  )
  
  expect_s3_class(gs, "gesearch")
  expect_s3_class(gs, "gesearch.formula")
  expect_true("formula" %in% names(gs))
  expect_true("dataclasses" %in% names(gs))
})

# =============================================================================
# predict.gesearch formula model tests
# =============================================================================

test_that("predict.gesearch validates newdata for formula models", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  train_df <- data.frame(y = d$train_y, spc = I(d$train_x))
  test_df <- data.frame(y = d$test_y, spc = I(d$test_x))
  
  gs <- gesearch(
    formula = y ~ spc,
    train = train_df,
    test = test_df,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10),
    optimization = "reconstruction",
    verbose = FALSE,
    seed = 42
  )
  
  # Wrong type for newdata
  expect_error(
    predict(gs, newdata = as.list(d$test_x)),
    "'newdata' must be a data.frame or matrix"
  )
})

test_that("predict.gesearch validates missing variables for formula models", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  train_df <- data.frame(y = d$train_y, spc = I(d$train_x))
  test_df <- data.frame(y = d$test_y, spc = I(d$test_x))
  
  gs <- gesearch(
    formula = y ~ spc,
    train = train_df,
    test = test_df,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10),
    optimization = "reconstruction",
    verbose = FALSE,
    seed = 42
  )
  
  # Missing predictor variables
  wrong_df <- data.frame(wrong_name = I(d$test_x))
  
  expect_error(
    predict(gs, newdata = wrong_df),
    "Missing predictor variables"
  )
})

test_that("predict.gesearch works with formula model and data.frame", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  train_df <- data.frame(y = d$train_y, spc = I(d$train_x))
  test_df <- data.frame(y = d$test_y, spc = I(d$test_x))
  
  gs <- gesearch(
    formula = y ~ spc,
    train = train_df,
    test = test_df,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10),
    optimization = "reconstruction",
    verbose = FALSE,
    seed = 42
  )
  
  preds <- predict(gs, newdata = test_df)
  
  expect_type(preds, "list")
  expect_equal(nrow(preds[[1]]), nrow(d$test_x))
})

test_that("predict.gesearch works with formula model and matrix", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  train_df <- data.frame(y = d$train_y, spc = I(d$train_x))
  test_df <- data.frame(y = d$test_y, spc = I(d$test_x))
  
  gs <- gesearch(
    formula = y ~ spc,
    train = train_df,
    test = test_df,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10),
    optimization = "reconstruction",
    verbose = FALSE,
    seed = 42
  )
  
  # Predict with matrix
  preds <- predict(gs, newdata = d$test_x)
  
  expect_type(preds, "list")
  expect_equal(nrow(preds[[1]]), nrow(d$test_x))
})

test_that("predict.gesearch rejects non-matrix newdata for non-formula models", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  gs <- gesearch(
    Xr = d$train_x,
    Yr = d$train_y,
    Xu = d$test_x,
    k = 30, b = 20, retain = 0.90,
    target_size = 350,
    fit_method = fit_pls(ncomp = 10),
    optimization = "reconstruction",
    verbose = FALSE,
    seed = 42
  )
  
  expect_error(
    predict(gs, newdata = as.data.frame(d$test_x)),
    "'newdata' must be a matrix"
  )
})

# =============================================================================
# Verbose output tests
# =============================================================================

test_that("gesearch verbose output works", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  expect_output(
    gs <- gesearch(
      Xr = d$train_x,
      Yr = d$train_y,
      Xu = d$test_x,
      k = 30, b = 20, retain = 0.90,
      target_size = 350,
      fit_method = fit_pls(ncomp = 10),
      optimization = "reconstruction",
      verbose = TRUE,
      seed = 42
    ),
    "Generation"
  )
})

# =============================================================================
# Missing Xr/Yr validation tests
# =============================================================================

test_that("gesearch rejects missing values in Xr", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  train_x_na <- d$train_x
  train_x_na[1, 1] <- NA
  
  expect_error(
    gesearch(
      Xr = train_x_na,
      Yr = d$train_y,
      Xu = d$test_x,
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5),
      verbose = FALSE
    ),
    "'Xr' contains missing values"
  )
})

test_that("gesearch rejects missing values in Yr", {
  skip_on_cran()
  skip_if_not_installed("prospectr")
  
  d <- .setup_gesearch_data()
  
  train_y_na <- d$train_y
  train_y_na[1] <- NA
  
  expect_error(
    gesearch(
      Xr = d$train_x,
      Yr = train_y_na,
      Xu = d$test_x,
      k = 30, b = 10, target_size = 350,
      fit_method = fit_pls(ncomp = 5),
      verbose = FALSE
    ),
    "'Yr' contains missing values"
  )
})

Try the resemble package in your browser

Any scripts or data that you put into this service are public.

resemble documentation built on April 21, 2026, 1:07 a.m.