tests/testthat/test-permute-labels.R

test_that("quantile break cache and majority level helpers behave", {
  vals <- c(1, 2, 3, 4, NA)
  br1 <- bioLeak:::.get_cached_quantile_breaks(vals, probs = c(0, 0.5, 1))
  br2 <- bioLeak:::.get_cached_quantile_breaks(vals, probs = c(0, 0.5, 1))
  expect_equal(br1, br2)

  expect_equal(bioLeak:::.majority_level(c("a", "b", "a", NA)), "a")
  expect_true(is.na(bioLeak:::.majority_level(c(NA, NA))))
})

test_that("grouped and within-group permutation functions preserve values", {
  set.seed(1)
  y <- c(1, 2, 3, 4, 5, 6)
  subj <- c("s1", "s1", "s2", "s2", "s3", "s3")
  perm <- bioLeak:::.permute_subject_grouped(y, subj)
  expect_equal(sort(perm), sort(y))

  group <- c("g1", "g1", "g2", "g2", "g2", "g1")
  perm2 <- bioLeak:::.permute_within_group(y, group)
  expect_equal(sort(perm2), sort(y))

  perm3 <- bioLeak:::.permute_within_batch(y, group)
  perm4 <- bioLeak:::.permute_within_study(y, group)
  expect_equal(sort(perm3), sort(y))
  expect_equal(sort(perm4), sort(y))
})

test_that("permute_labels_factory returns per-fold permutations", {
  set.seed(1)
  df <- make_class_df(20)
  splits <- make_split_plan_quiet(df, outcome = "outcome",
                              mode = "subject_grouped", group = "subject",
                              v = 4, stratify = FALSE, seed = 1)
  perm_fun <- bioLeak:::.permute_labels_factory(
    cd = df, outcome = "outcome", mode = "subject_grouped",
    folds = splits@indices, perm_stratify = TRUE,
    time_block = "circular", block_len = 2, seed = 1,
    group_col = "subject", batch_col = "batch", study_col = "study"
  )
  out <- perm_fun(1)
  expect_equal(length(out), length(splits@indices))
  expect_equal(length(out[[1]]), length(splits@indices[[1]]$test))
})

test_that("permute_labels_factory warns for small numeric stratification", {
  df <- make_class_df(12)
  df$outcome <- rnorm(12)
  splits <- make_split_plan_quiet(df, outcome = "outcome",
                              mode = "batch_blocked", batch = "batch",
                              v = 3, stratify = FALSE, seed = 1)
  perm_fun <- expect_warning_match(
    bioLeak:::.permute_labels_factory(
      cd = df, outcome = "outcome", mode = "batch_blocked",
      folds = splits@indices, perm_stratify = TRUE,
      time_block = "circular", block_len = 2, seed = 1,
      batch_col = "batch"
    ),
    "requires at least 20"
  )
  out <- perm_fun(1)
  expect_equal(length(out), length(splits@indices))
})

test_that("permute_labels_factory handles time-series permutations", {
  df <- make_class_df(30)
  splits <- make_split_plan_quiet(df, outcome = "outcome",
                              mode = "time_series", time = "time",
                              v = 4, seed = 1)
  perm_fun <- bioLeak:::.permute_labels_factory(
    cd = df, outcome = "outcome", mode = "time_series",
    folds = splits@indices, perm_stratify = FALSE,
    time_block = "stationary", block_len = 3, seed = 1,
    time_col = "time"
  )
  out <- perm_fun(1)
  expect_equal(length(out), length(splits@indices))
  expect_equal(length(out[[1]]), length(splits@indices[[1]]$test))
})

test_that("time-series permutations respect time order", {
  df <- make_class_df(20)
  df$time <- rev(seq_len(nrow(df)))
  splits <- make_split_plan_quiet(df, outcome = "outcome",
                              mode = "time_series", time = "time",
                              v = 3, seed = 1, compact = TRUE)
  folds <- splits@indices
  attr(folds, "fold_assignments") <- splits@info$fold_assignments
  fold <- folds[[1]]
  test_idx <- which(splits@info$fold_assignments[[1]] == fold$fold)
  block_len <- length(test_idx)

  perm_fun <- bioLeak:::.permute_labels_factory(
    cd = df, outcome = "outcome", mode = "time_series",
    folds = folds, perm_stratify = FALSE,
    time_block = "circular", block_len = block_len, seed = 1,
    time_col = "time"
  )
  out <- perm_fun(1)

  time_order <- test_idx[order(df$time[test_idx], test_idx)]
  set.seed(2)
  perm_idx <- bioLeak:::.circular_block_permute(time_order, block_len = block_len)
  perm_time <- df$outcome[perm_idx]
  expected <- perm_time[match(test_idx, time_order)]
  expect_equal(out[[1]], expected)
})

test_that("permute_labels_factory errors on invalid metadata", {
  df <- data.frame(outcome = c(NA, NA), x = 1:2)
  splits <- list(list(test = 1:2, train = integer(0)))
  expect_error(bioLeak:::.permute_labels_factory(
    cd = df, outcome = "outcome", mode = "subject_grouped",
    folds = splits, perm_stratify = FALSE,
    time_block = "circular", block_len = 2, seed = 1
  ), "only NA")
})

Try the bioLeak package in your browser

Any scripts or data that you put into this service are public.

bioLeak documentation built on March 6, 2026, 1:06 a.m.