Nothing
#' Summarize a leakage audit
#'
#' Prints a concise, human-readable report for a `LeakAudit` object produced by
#' [audit_leakage()]. The summary surfaces four diagnostics when available:
#' label-permutation gap (prediction-label association by default), batch/study
#' association tests (metadata aligned with fold splits), target leakage scan
#' (features strongly associated with the outcome), and near-duplicate detection
#' (high similarity in `X_ref`). The output reflects the stored audit results
#' only; it does not recompute any tests.
#'
#' @details
#' The permutation test quantifies prediction-label association when using fixed
#' predictions; refit-based permutations require `perm_refit = TRUE` (or `"auto"`
#' with refit data). It does not by itself prove or rule out leakage.
#' Batch association flags metadata that align with fold assignment; this may
#' reflect study design rather than leakage.
#' Target leakage scan uses univariate feature-outcome associations and can miss
#' multivariate proxies, interaction leakage, or features not included in `X_ref`.
#' The multivariate scan (enabled by default for supported tasks) reports an
#' additional model-based score.
#' Duplicate detection only considers the provided `X_ref` features and the
#' similarity threshold used during [audit_leakage()]. By default,
#' `duplicate_scope = "train_test"` filters to pairs that cross train/test;
#' set `duplicate_scope = "all"` to include within-fold duplicates.
#' Sections are reported as "not available" when the corresponding audit
#' component was not computed.
#'
#' @seealso [plot_perm_distribution()], [plot_fold_balance()], [plot_overlap_checks()]
#'
#' @param object A `LeakAudit` object from [audit_leakage()]. The summary reads
#' stored results from `object` and prints them to the console.
#' @param digits Integer number of digits to show when formatting numeric
#' statistics in the console output. Defaults to `3`. Increasing `digits`
#' shows more precision; decreasing it shortens the printout without changing
#' the underlying values.
#' @param ... Unused. Included for S3 method compatibility; additional
#' arguments are ignored.
#' @return Invisibly returns `object` after printing the summary.
#' @examples
#' set.seed(1)
#' df <- data.frame(
#' subject = rep(1:6, each = 2),
#' outcome = rbinom(12, 1, 0.5),
#' x1 = rnorm(12),
#' x2 = rnorm(12)
#' )
#' splits <- make_split_plan(df, outcome = "outcome",
#' mode = "subject_grouped", group = "subject", v = 3)
#' custom <- list(
#' glm = list(
#' fit = function(x, y, task, weights, ...) {
#' stats::glm(y ~ ., data = as.data.frame(x),
#' family = stats::binomial(), weights = weights)
#' },
#' predict = function(object, newdata, task, ...) {
#' as.numeric(stats::predict(object, newdata = as.data.frame(newdata),
#' type = "response"))
#' }
#' )
#' )
#' fit <- fit_resample(df, outcome = "outcome", splits = splits,
#' learner = "glm", custom_learners = custom,
#' metrics = "auc", refit = FALSE, seed = 1)
#' audit <- audit_leakage(fit, metric = "auc", B = 5,
#' X_ref = df[, c("x1", "x2")], seed = 1)
#' summary(audit) # prints the audit report and returns `audit` invisibly
#'
#' @export
summary.LeakAudit <- function(object, digits = 3, ...) {
if (!inherits(object, "LeakAudit"))
stop("Object must be a 'LeakAudit'.", call. = FALSE)
cat("\n==============================\n")
cat(" bioLeak Leakage Audit Summary\n")
cat("==============================\n\n")
fit <- object@fit
info <- fit@info
warn_sym <- .bio_symbol("warn")
ok_sym <- .bio_symbol("check")
sym_pm <- .bio_symbol("pm")
sym_chi <- .bio_symbol("chi_sq")
sym_ge <- .bio_symbol("ge")
task <- fit@task %||% NA_character_
outcome <- fit@outcome %||% NA_character_
splits <- fit@splits
mode <- if (!is.null(splits)) splits@mode %||% NA_character_ else NA_character_
indices <- if (!is.null(splits)) splits@indices else NULL
hash <- info$hash %||% "NA"
pos_class <- fit@info$positive_class %||% NA_character_
pos_label <- ""
if (!is.null(pos_class) && isTRUE(task == "binomial") &&
!is.na(pos_class) && nzchar(as.character(pos_class))) {
pos_label <- paste0(" | Positive class: ", as.character(pos_class))
}
cat(sprintf("Task: %s | Outcome: %s | Splitting mode: %s%s\n",
task, outcome, mode, pos_label))
cat(sprintf("Hash: %s | Folds: %d | Repeats: %d\n\n",
substr(hash, 1, 12),
length(indices),
info$repeats %||% 1))
# --- Label-permutation association test ---
if (!is.null(object@permutation_gap) && nrow(object@permutation_gap) > 0) {
pg <- object@permutation_gap
perm_method <- object@info$perm_method %||% "fixed"
perm_mode <- object@info$perm_refit_mode %||% perm_method
perm_label <- if (identical(perm_method, "refit")) {
"refit per permutation"
} else {
"fixed predictions"
}
if (grepl("^auto", perm_mode)) {
perm_label <- paste0(perm_label, " (auto)")
}
cat("Label-Permutation Association Test:\n")
cat(sprintf(" Method: %s\n", perm_label))
if (!is.null(object@info$perm_refit_reason) &&
nzchar(object@info$perm_refit_reason)) {
cat(sprintf(" Auto mode: %s\n", object@info$perm_refit_reason))
}
cat(sprintf(" Observed metric: %s\n",
formatC(pg$metric_obs, digits = digits, format = "f")))
cat(sprintf(" Permuted mean %s SD: %s %s %s\n",
sym_pm,
formatC(pg$perm_mean, digits = digits, format = "f"),
sym_pm,
formatC(pg$perm_sd, digits = digits, format = "f")))
cat(sprintf(" Gap: %s (larger gap = stronger non-random signal)\n",
formatC(pg$gap, digits = digits, format = "f")))
if (!identical(perm_method, "refit")) {
cat(" Note: Fixed-prediction label permutations quantify prediction-label association.\n")
cat(" They do NOT refit models and are not a full null test of no signal.\n")
}
cat(" This test does NOT diagnose information leakage. Use the Batch Association,\n")
cat(" Target Leakage Scan, and Duplicate Detection sections to check for leakage.\n\n")
} else {
cat("Label-Permutation Association Test: not available.\n\n")
}
# --- Batch association ---
if (!is.null(object@batch_assoc) && nrow(object@batch_assoc) > 0) {
ba <- object@batch_assoc
cat("Batch / Study Association:\n")
if ("batch_col" %in% names(ba)) {
has_repeat <- "repeat_id" %in% names(ba)
for (i in seq_len(nrow(ba))) {
label <- if (has_repeat && is.finite(ba$repeat_id[i])) {
sprintf("%s (repeat %s)", ba$batch_col[i], ba$repeat_id[i])
} else {
ba$batch_col[i]
}
cat(sprintf(" %s: %s = %s (df = %s), p = %s\n",
label,
sym_chi,
formatC(ba$stat[i], digits = digits, format = "f"),
formatC(ba$df[i], digits = digits, format = "f"),
formatC(ba$pval[i], digits = digits, format = "f")))
}
cat("\n")
} else {
cat(sprintf(" %s = %s (df = %s), p = %s\n\n",
sym_chi,
formatC(ba$stat, digits = digits, format = "f"),
formatC(ba$df, digits = digits, format = "f"),
formatC(ba$pval, digits = digits, format = "f")))
}
} else {
cat("Batch / Study Association: none detected.\n\n")
}
# --- Target leakage scan ---
if (!is.null(object@target_assoc) && nrow(object@target_assoc) > 0) {
ta <- object@target_assoc
threshold <- object@info$target_threshold %||% 0.9
flagged <- ta[!is.na(ta$score) & ta$score >= threshold, , drop = FALSE]
cat("Target Leakage Scan:\n")
cat(sprintf(" Features checked: %d | Flagged (score %s %s): %d\n",
nrow(ta),
sym_ge,
formatC(threshold, digits = digits, format = "f"),
nrow(flagged)))
if (nrow(flagged) > 0) {
flagged <- flagged[order(flagged$score, decreasing = TRUE, na.last = TRUE), , drop = FALSE]
top <- utils::head(flagged, 5)
cols <- intersect(c("feature", "metric", "value", "score", "p_value"), names(top))
print(top[, cols, drop = FALSE], row.names = FALSE)
} else {
cat(" No strong proxy features detected.\n")
}
cat("\n")
} else {
cat("Target Leakage Scan: not available.\n\n")
}
# --- Multivariate target scan ---
mv <- object@info$target_multivariate %||% data.frame()
if (is.data.frame(mv) && nrow(mv) > 0) {
cat("Multivariate Target Scan:\n")
mv_row <- mv[1, , drop = FALSE]
cat(sprintf(" Metric: %s | Score: %s | p = %s\n",
mv_row$metric,
formatC(mv_row$score, digits = digits, format = "f"),
formatC(mv_row$p_value, digits = digits, format = "f")))
if (all(c("n_features", "n_components", "n_interactions", "n_perm") %in% names(mv_row))) {
cat(sprintf(" Features: %s | Components: %s | Interactions: %s | Permutations: %s\n",
mv_row$n_features, mv_row$n_components, mv_row$n_interactions, mv_row$n_perm))
}
cat("\n")
} else if (isTRUE(object@info$target_scan_multivariate)) {
cat("Multivariate Target Scan: not available.\n\n")
}
# --- Duplicate detection ---
if (!is.null(object@duplicates) && nrow(object@duplicates) > 0) {
dd <- object@duplicates
cat("Near-Duplicate Samples:\n")
sim_label <- object@info$sim_method %||% "cosine"
dup_scope <- object@info$duplicate_scope %||% "all"
scope_label <- if (identical(dup_scope, "train_test")) "train/test only" else "all pairs"
cat(sprintf(" Scope: %s\n", scope_label))
cat(sprintf(" %d pairs detected above %s %s %s\n",
nrow(dd),
sim_label,
sym_ge,
formatC(object@info$duplicate_threshold, digits = digits, format = "f")))
head_pairs <- utils::head(dd, 5)
cat(" Example pairs:\n")
print(head_pairs, row.names = FALSE)
cat("\n")
} else {
cat("No near-duplicates detected.\n\n")
}
# --- Mechanism risk assessment ---
mech <- object@info$mechanism_summary %||% data.frame()
if (is.data.frame(mech) && nrow(mech) > 0) {
cat("Mechanism Risk Assessment:\n")
cols <- intersect(c("mechanism_class", "flagged", "evidence", "statistic", "p_value"),
names(mech))
print(mech[, cols, drop = FALSE], row.names = FALSE)
cat("\n")
} else {
cat("Mechanism Risk Assessment: not available.\n\n")
}
# --- Overall interpretation ---
cat("Interpretation:\n")
pg_available <- !is.null(object@permutation_gap) && nrow(object@permutation_gap) > 0
if (!pg_available) {
cat(" No permutation test results.\n")
} else if (object@permutation_gap$gap < 0.01) {
cat(sprintf(" %s Little non-random signal (gap near zero).\n", warn_sym))
} else if (object@permutation_gap$gap < 0.05) {
cat(sprintf(" %s Modest non-random signal.\n", ok_sym))
} else {
cat(sprintf(" %s Strong non-random signal.\n", ok_sym))
}
invisible(object)
}
#' Summarize a LeakFit object
#'
#' Prints a compact console report for a [LeakFit] object created by
#' [fit_resample()]. The report lists task/outcome metadata, learners,
#' total folds, and cross-validated metrics summarized as mean and standard
#' deviation across completed folds, plus a small audit table with per-fold
#' train/test sizes and retained feature counts.
#'
#' This summary is meant for quick sanity checks of the resampling setup and
#' performance. It does not run leakage diagnostics and will not detect target
#' leakage, duplicate samples, or batch/study confounding; use [audit_leakage()]
#' or `summary()` on a [LeakAudit] object for those checks.
#'
#' @param object A [LeakFit] object returned by [fit_resample()]. It should
#' contain `metric_summary` and `audit` slots; missing entries result in empty
#' sections in the printed report.
#' @param digits Integer scalar. Number of decimal places to print in numeric
#' summary tables. Defaults to 3; affects printed output only, not the
#' returned data.
#' @param ... Unused. Included for S3 method compatibility; changing these
#' values has no effect.
#' @return Invisibly returns `object@metric_summary`, a data frame of per-learner
#' metric means and standard deviations computed across folds. This function
#' does not recompute metrics.
#' @examples
#' set.seed(1)
#' df <- data.frame(
#' subject = rep(1:6, each = 2),
#' outcome = factor(rep(c(0, 1), each = 6)),
#' x1 = rnorm(12),
#' x2 = rnorm(12)
#' )
#' splits <- make_split_plan(
#' df,
#' outcome = "outcome",
#' mode = "subject_grouped",
#' group = "subject",
#' v = 3,
#' stratify = TRUE,
#' progress = FALSE
#' )
#' custom <- list(
#' glm = list(
#' fit = function(x, y, task, weights, ...) {
#' stats::glm(y ~ ., data = data.frame(y = y, x),
#' family = stats::binomial(), weights = weights)
#' },
#' predict = function(object, newdata, task, ...) {
#' as.numeric(stats::predict(object,
#' newdata = as.data.frame(newdata),
#' type = "response"))
#' }
#' )
#' )
#' fit <- fit_resample(df, outcome = "outcome", splits = splits,
#' learner = "glm", custom_learners = custom,
#' metrics = "auc", seed = 1)
#' summary_df <- summary(fit)
#' summary_df
#'
#' @export
summary.LeakFit <- function(object, digits = 3, ...) {
if (!inherits(object, "LeakFit"))
stop("Object must be of class 'LeakFit'.")
cat("\n===========================\n")
cat(" bioLeak Model Fit Summary\n")
cat("===========================\n\n")
sym_pm <- .bio_symbol("pm")
# Basic info
info <- object@info
cat(sprintf("Task: %s\n", object@task))
cat(sprintf("Outcome: %s\n", object@outcome))
if (identical(object@task, "binomial") && !is.null(info$positive_class) &&
!is.na(info$positive_class) && nzchar(as.character(info$positive_class))) {
cat(sprintf("Positive class: %s\n", as.character(info$positive_class)))
}
cat(sprintf("Learners: %s\n", paste(unique(object@metrics$learner), collapse = ", ")))
cat(sprintf("Total folds: %d\n", length(object@splits@indices)))
if (is.data.frame(info$fold_status) && nrow(info$fold_status) > 0) {
ok <- sum(info$fold_status$status == "success", na.rm = TRUE)
sk <- sum(info$fold_status$status == "skipped", na.rm = TRUE)
fl <- sum(info$fold_status$status == "failed", na.rm = TRUE)
cat(sprintf("Fold status: %d success, %d skipped, %d failed\n", ok, sk, fl))
}
cat(sprintf("Refit performed: %s\n", if (isTRUE(info$refit)) "Yes" else "No"))
cat(sprintf("Hash: %s\n\n", substr(info$hash, 1, 12)))
# Metric summary
if (nrow(object@metric_summary) > 0) {
cat(sprintf("Cross-validated metrics (mean %s SD):\n", sym_pm))
ms <- object@metric_summary
metrics_fmt <- as.data.frame(ms)
num_cols <- vapply(metrics_fmt, is.numeric, logical(1))
if (any(num_cols)) {
metrics_fmt[num_cols] <- lapply(metrics_fmt[num_cols], round, digits = digits)
}
print(metrics_fmt)
cat("\n")
} else {
cat("No metric summary available.\n\n")
}
# Audit information
if (nrow(object@audit) > 0) {
cat("Audit overview:\n")
audit_df <- head(object@audit, 5)
print(audit_df, row.names = FALSE)
cat("\n")
} else {
cat("No audit information stored.\n\n")
}
invisible(object@metric_summary)
}
#' Summarize a nested tuning result
#'
#' Prints a concise report for a `LeakTune` object produced by [tune_resample()].
#' The report highlights the tuning strategy, selection metric, and
#' cross-validated performance across outer folds, plus a glimpse of the selected
#' hyperparameters.
#'
#' @param object A [LeakTune] object returned by [tune_resample()].
#' @param digits Integer scalar. Number of decimal places to print in numeric
#' summary tables. Defaults to 3.
#' @param ... Unused. Included for S3 method compatibility.
#' @return Invisibly returns `object$metric_summary`, the data frame of per-learner
#' metric means and standard deviations computed across outer folds.
#' @export
summary.LeakTune <- function(object, digits = 3, ...) {
if (!inherits(object, "LeakTune"))
stop("Object must be of class 'LeakTune'.", call. = FALSE)
cat("\n============================\n")
cat(" bioLeak Tuning Summary\n")
cat("============================\n\n")
sym_pm <- .bio_symbol("pm")
info <- object$info
outer_fit_ok <- vapply(object$outer_fits, function(of) {
!is.null(of) && methods::is(of, "LeakFit")
}, logical(1))
successful_outer <- sum(outer_fit_ok)
total_outer <- length(object$outer_fits)
# Retrieve outcome from the first successful outer fit if available
outcome_label <- "Unknown"
if (successful_outer > 0) {
first_outer <- object$outer_fits[[which(outer_fit_ok)[1]]]
outcome_label <- first_outer@outcome
}
cat(sprintf("Task: %s\n", info$task))
cat(sprintf("Outcome: %s\n", outcome_label))
if (identical(info$task, "binomial") && !is.null(info$positive_class)) {
cat(sprintf("Positive class: %s\n", as.character(info$positive_class)))
}
# Tuning Info
grid_info <- if (is.data.frame(info$grid)) paste(nrow(info$grid), "combinations") else info$grid
cat(sprintf("Tuning Grid: %s\n", grid_info))
cat(sprintf("Selection Rule: %s (Metric: %s)\n", info$selection, info$selection_metric))
if (is.data.frame(info$fold_status) && nrow(info$fold_status) > 0) {
ok <- sum(info$fold_status$status == "success", na.rm = TRUE)
sk <- sum(info$fold_status$status == "skipped", na.rm = TRUE)
fl <- sum(info$fold_status$status == "failed", na.rm = TRUE)
cat(sprintf("Fold status: %d success, %d skipped, %d failed\n", ok, sk, fl))
}
cat(sprintf("Refit performed: %s\n", if (isTRUE(info$refit) && !is.null(object$final_model)) "Yes" else "No"))
cat(sprintf("Outer Folds: %d successful / %d total\n\n", successful_outer, total_outer))
# Metric Summary
if (nrow(object$metric_summary) > 0) {
cat(sprintf("Outer Loop Metrics (mean %s SD):\n", sym_pm))
ms <- object$metric_summary
metrics_fmt <- as.data.frame(ms)
num_cols <- vapply(metrics_fmt, is.numeric, logical(1))
if (any(num_cols)) {
metrics_fmt[num_cols] <- lapply(metrics_fmt[num_cols], round, digits = digits)
}
print(metrics_fmt)
cat("\n")
} else {
cat("No metric summary available.\n\n")
}
# Best Params Snapshot (Cleaned)
if (nrow(object$best_params) > 0) {
cat("Best Parameters (First 5 Folds):\n")
bp <- head(object$best_params, 5)
# Remove internal bookkeeping columns
bp$id <- NULL
bp$.config <- NULL
# Check if any actual parameters remain
param_cols <- setdiff(names(bp), c("fold", "learner"))
if (length(param_cols) == 0) {
cat(" (No tunable parameters detected in model spec)\n")
} else {
# rounding for display
num_cols_bp <- vapply(bp, is.numeric, logical(1))
if (any(num_cols_bp)) {
bp[num_cols_bp] <- lapply(bp[num_cols_bp], round, digits = digits)
}
print(bp, row.names = FALSE)
}
cat("\n")
}
invisible(object$metric_summary)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.