Nothing
test_that("audit_leakage reports batch association and duplicates", {
set.seed(2)
X <- matrix(rnorm(24), nrow = 12, ncol = 2)
X[7, ] <- X[1, ]
X[8, ] <- X[2, ]
df <- data.frame(
outcome = rep(c(0, 1), 6),
batch = rep(c("A", "B"), each = 6),
x1 = X[, 1],
x2 = X[, 2]
)
fold1 <- c(1L, 2L, 3L, 7L, 8L, 9L)
fold2 <- c(4L, 5L, 6L, 10L, 11L, 12L)
indices <- list(
list(train = setdiff(seq_len(12), fold1), test = fold1, fold = 1, repeat_id = 1),
list(train = setdiff(seq_len(12), fold2), test = fold2, fold = 2, repeat_id = 1)
)
splits <- bioLeak:::LeakSplits(mode = "custom", indices = indices,
info = list(outcome = "outcome", coldata = df))
custom <- list(
glm = list(
fit = function(x, y, task, weights, ...) {
suppressWarnings(stats::glm(y ~ ., data = as.data.frame(x),
family = stats::binomial(), weights = weights))
},
predict = function(object, newdata, task, ...) {
as.numeric(suppressWarnings(stats::predict(object,
newdata = as.data.frame(newdata),
type = "response")))
}
)
)
fit <- fit_resample(
df,
outcome = "outcome",
splits = splits,
learner = "glm",
custom_learners = custom,
metrics = "auc",
refit = FALSE,
seed = 1
)
audit <- audit_leakage(
fit,
metric = "auc",
B = 10,
perm_stratify = FALSE,
batch_cols = "batch",
X_ref = df[, c("x1", "x2")],
sim_threshold = 0.999,
duplicate_scope = "all",
# ADD THIS LINE TO FIX WARNINGS:
target_scan_multivariate = FALSE
)
expect_true(nrow(audit@batch_assoc) >= 1)
expect_true(is.finite(audit@batch_assoc$pval[1]))
expect_true(nrow(audit@duplicates) >= 1)
})
test_that("audit_leakage batch association respects repeated CV folds", {
set.seed(3)
df <- make_class_df(8)
df$batch <- rep(c("A", "B"), each = 4)
df <- df[, c("batch", "outcome", "x1", "x2")]
fold1 <- c(1L, 2L, 5L, 6L)
fold2 <- c(3L, 4L, 7L, 8L)
fold3 <- c(1L, 2L, 7L, 8L)
fold4 <- c(3L, 4L, 5L, 6L)
indices <- list(
list(train = setdiff(seq_len(8), fold1), test = fold1, fold = 1, repeat_id = 1),
list(train = setdiff(seq_len(8), fold2), test = fold2, fold = 2, repeat_id = 1),
list(train = setdiff(seq_len(8), fold3), test = fold3, fold = 1, repeat_id = 2),
list(train = setdiff(seq_len(8), fold4), test = fold4, fold = 2, repeat_id = 2)
)
splits <- bioLeak:::LeakSplits(mode = "custom", indices = indices,
info = list(outcome = "outcome", coldata = df, batch = "batch"))
custom <- make_custom_learners()
fit <- fit_resample_quiet(
df,
outcome = "outcome",
splits = splits,
learner = "glm",
custom_learners = custom,
metrics = "auc",
refit = FALSE,
seed = 1
)
audit <- audit_leakage(
fit,
metric = "auc",
B = 2,
perm_stratify = FALSE,
batch_cols = "batch",
return_perm = FALSE,
target_scan = FALSE,
target_scan_multivariate = FALSE
)
expect_equal(sort(unique(audit@batch_assoc$repeat_id)), c(1, 2))
expect_true(all(audit@batch_assoc$df == 1))
})
test_that("audit_leakage supports refit-based permutations", {
df <- make_class_df(20)
splits <- make_split_plan_quiet(df, outcome = "outcome",
mode = "subject_grouped", group = "subject",
v = 3, seed = 1)
custom <- make_custom_learners()
fit <- fit_resample_quiet(df, outcome = "outcome", splits = splits,
learner = "glm", custom_learners = custom,
metrics = "auc", refit = FALSE, seed = 1)
audit <- audit_leakage(
fit,
metric = "auc",
B = 2,
perm_refit = TRUE,
perm_refit_spec = list(
x = df,
outcome = "outcome",
learner = "glm",
custom_learners = custom
),
perm_stratify = FALSE
)
expect_true(identical(audit@info$perm_method, "refit"))
expect_equal(audit@permutation_gap$n_perm[1], 2)
})
test_that("audit_leakage supports perm_refit auto mode", {
df <- make_class_df(20)
splits <- make_split_plan_quiet(df, outcome = "outcome",
mode = "subject_grouped", group = "subject",
v = 3, seed = 1)
custom <- make_custom_learners()
fit <- fit_resample_quiet(df, outcome = "outcome", splits = splits,
learner = "glm", custom_learners = custom,
metrics = "auc", refit = FALSE, seed = 1)
audit_fixed <- audit_leakage(
fit,
metric = "auc",
B = 5,
perm_refit = "auto",
perm_refit_auto_max = 2,
perm_stratify = FALSE
)
expect_true(identical(audit_fixed@info$perm_method, "fixed"))
expect_true(grepl("^auto", audit_fixed@info$perm_refit_mode))
audit_refit <- audit_leakage(
fit,
metric = "auc",
B = 2,
perm_refit = "auto",
perm_refit_auto_max = 5,
perm_refit_spec = list(
x = df,
outcome = "outcome",
learner = "glm",
custom_learners = custom
),
perm_stratify = FALSE
)
expect_true(identical(audit_refit@info$perm_method, "refit"))
expect_true(grepl("^auto", audit_refit@info$perm_refit_mode))
})
test_that("audit_leakage multivariate target scan runs", {
df <- make_class_df(24)
splits <- make_split_plan_quiet(df, outcome = "outcome",
mode = "subject_grouped", group = "subject",
v = 3, seed = 1)
custom <- make_custom_learners()
fit <- fit_resample_quiet(df, outcome = "outcome", splits = splits,
learner = "glm", custom_learners = custom,
metrics = "auc", refit = FALSE, seed = 1)
aud <- audit_leakage(
fit,
metric = "auc",
B = 3,
perm_stratify = FALSE,
X_ref = df[, c("x1", "x2")],
target_scan_multivariate = TRUE,
target_scan_multivariate_B = 2,
target_scan_multivariate_components = 2
)
mv <- aud@info$target_multivariate
expect_true(is.data.frame(mv))
expect_true(nrow(mv) > 0)
expect_true(all(c("metric", "score", "p_value") %in% names(mv)))
})
test_that("audit_leakage adds mechanism taxonomy and FDR-adjusted target flags", {
df <- make_class_df(24)
splits <- make_split_plan_quiet(
df,
outcome = "outcome",
mode = "subject_grouped",
group = "subject",
v = 3,
seed = 7
)
custom <- make_custom_learners()
fit <- fit_resample_quiet(
df,
outcome = "outcome",
splits = splits,
learner = "glm",
custom_learners = custom,
metrics = "auc",
refit = FALSE,
seed = 7
)
aud <- audit_leakage(
fit,
metric = "auc",
B = 3,
perm_stratify = FALSE,
X_ref = df[, c("x1", "x2")],
target_scan_multivariate = FALSE,
target_p_adjust = "BH",
target_alpha = 0.2
)
expect_true("mechanism_class" %in% names(aud@permutation_gap))
if (nrow(aud@batch_assoc)) {
expect_true("mechanism_class" %in% names(aud@batch_assoc))
}
if (nrow(aud@target_assoc)) {
expect_true("mechanism_class" %in% names(aud@target_assoc))
expect_true("p_value_adj" %in% names(aud@target_assoc))
expect_true("flag_fdr" %in% names(aud@target_assoc))
}
if (nrow(aud@duplicates)) {
expect_true("mechanism_class" %in% names(aud@duplicates))
}
expect_true(is.data.frame(aud@info$taxonomy))
expect_true(all(c("mechanism_class", "evidence_slot") %in% names(aud@info$taxonomy)))
expect_true(is.data.frame(aud@info$mechanism_summary))
expect_true(all(c("mechanism_class", "flagged", "evidence") %in% names(aud@info$mechanism_summary)))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.