R/skipcor.R

Defines functions print.summary.skipped_corr summary.skipped_corr .mc_skipcor_pairwise_summary plot.skipped_corr print.skipped_corr skipped_corr_masks `[[.skipped_corr` `$.skipped_corr` names.skipped_corr .mc_skipcor_component_names .mc_skipcor_inference_attr .mc_skipcor_ci_attr skipped_corr

Documented in plot.skipped_corr print.skipped_corr print.summary.skipped_corr skipped_corr skipped_corr_masks summary.skipped_corr

#' @title Pairwise skipped correlation
#'
#' @description
#' Computes all pairwise skipped correlation coefficients for the numeric
#' columns of a matrix or data frame using a high-performance 'C++' backend.
#'
#' Skipped correlation detects bivariate outliers using a projection rule and
#' then computes Pearson or Spearman correlation on the retained observations.
#' It is designed for situations where marginally robust methods can still be
#' distorted by unusual points in the joint data cloud.
#'
#' @param data A numeric matrix or a data frame with at least two numeric
#' columns. All non-numeric columns will be excluded.
#' @param method Correlation computed after removing projected outliers. One of
#'   \code{"pearson"} (default) or \code{"spearman"}.
#' @param na_method One of \code{"error"} (default) or \code{"pairwise"}.
#'   With \code{"error"}, the function requires all retained numeric columns to
#'   be free of missing or non-finite values and aborts otherwise. This is the
#'   recommended setting when you want a single common sample size across all
#'   pairs, reproducible skipped-row diagnostics on the same rows, or bootstrap
#'   inference via \code{ci = TRUE} / \code{p_value = TRUE}. With
#'   \code{"pairwise"}, each variable pair is computed on its own overlap of
#'   finite rows. This is more permissive for incomplete data, but different
#'   pairs can be based on different effective samples and different skipped-row
#'   sets, so the resulting matrix is less directly comparable across entries.
#' @param stand Logical; if \code{TRUE} (default), each variable in the pair is
#'   centred by its median and divided by a robust scale estimate before the
#'   projection outlier search. The scale estimate is the MAD when positive,
#'   with fallback to \eqn{\mathrm{IQR}/1.34898} and then the usual sample
#'   standard deviation if needed. This standardisation affects only outlier
#'   detection, not the final correlation computed on the retained
#'   observations.
#' @param outlier_rule One of \code{"idealf"} (default) or \code{"mad"}.
#'   The default uses the ideal-fourths interquartile width of projected
#'   distances; \code{"mad"} uses the median absolute deviation of projected
#'   distances.
#' @param cutoff Positive numeric constant multiplying the projected spread in
#'   the outlier rule
#'   \eqn{\mathrm{med}(d_{i\cdot}) + cutoff \times s(d_{i\cdot})}. Larger
#'   values flag fewer observations as outliers; smaller values flag more.
#'   Default \code{sqrt(qchisq(0.975, df = 2))}.
#' @param n_threads Integer \eqn{\geq 1}. Number of OpenMP threads. Defaults to
#'   \code{getOption("matrixCorr.threads", 1L)}.
#' @param return_masks Logical; if \code{TRUE}, attach compact pairwise skipped-row
#'   indices as an attribute. Default \code{FALSE}.
#' @param ci Logical; if \code{TRUE}, attach percentile-bootstrap confidence
#'   intervals for each skipped correlation using the Wilcox (2015) B2
#'   resampling scheme. Default \code{FALSE}.
#' @param p_value Logical; if \code{TRUE}, attach bootstrap p-values for testing
#'   whether each skipped correlation is zero. Default \code{FALSE}.
#' @param conf_level Confidence level used when \code{ci = TRUE}. Default
#'   \code{0.95}.
#' @param n_boot Integer \eqn{\geq 2}. Number of bootstrap resamples used when
#'   \code{ci = TRUE} or \code{p_value = TRUE}. Default \code{2000}.
#' @param p_adjust One of \code{"none"} (default), \code{"hochberg"}, or
#'   \code{"ecp"}. Optional familywise-error procedure applied to the matrix of
#'   bootstrap p-values. \code{"hochberg"} corresponds to method H in Wilcox,
#'   Rousselet, and Pernet (2018); \code{"ecp"} corresponds to their simulated
#'   critical-p-value method ECP.
#' @param fwe_level Familywise-error level used when
#'   \code{p_adjust = "hochberg"} or \code{"ecp"}. Default \code{0.05}.
#' @param n_mc Integer \eqn{\geq 10}. Number of null Monte Carlo data sets used
#'   when \code{p_adjust = "ecp"} to estimate the critical p-value. Default
#'   \code{1000}.
#' @param seed Optional positive integer used to seed the bootstrap resampling
#'   when \code{ci = TRUE} or \code{p_value = TRUE}. If \code{NULL}, a fresh
#'   internal seed is generated.
#' @param output Output representation for the computed estimates.
#'   \itemize{
#'   \item \code{"matrix"} (default): full dense matrix; best when you need
#'   matrix algebra, dense heatmaps, or full compatibility with existing code.
#'   \item \code{"sparse"}: sparse matrix from \pkg{Matrix} containing only
#'   retained entries; best when many values are dropped by thresholding.
#'   \item \code{"edge_list"}: long-form data frame with columns
#'   \code{row}, \code{col}, \code{value}; convenient for filtering, joins,
#'   and network-style workflows.
#'   }
#' @param threshold Non-negative absolute-value filter for non-matrix outputs:
#'   keep entries with \code{abs(value) >= threshold}. Use
#'   \code{threshold > 0} when you want only stronger associations (typically
#'   with \code{output = "sparse"} or \code{"edge_list"}). Keep
#'   \code{threshold = 0} to retain all values. Must be \code{0} when
#'   \code{output = "matrix"}.
#' @param diag Logical; whether to include diagonal entries in
#'   \code{"sparse"} and \code{"edge_list"} outputs.
#' @param x An object of class \code{skipped_corr}.
#' @param var1,var2 Optional column names or 1-based column indices used by
#'   [skipped_corr_masks()] to extract the skipped-row indices for one pair.
#' @param digits Integer; number of digits to print.
#' @param n Optional row threshold for compact preview output.
#' @param topn Optional number of leading/trailing rows to show when truncated.
#' @param max_vars Optional maximum number of visible columns; `NULL` derives this
#'   from console width.
#' @param width Optional display width; defaults to \code{getOption("width")}.
#' @param ... Additional arguments passed to the underlying print or plot helper.
#' @param title Character; plot title.
#' @param low_color,high_color,mid_color Colors used in the heatmap.
#' @param value_text_size Numeric text size for overlaid cell values.
#' @param show_value Logical; if \code{TRUE} (default), overlay numeric values
#'   on the heatmap tiles.
#'
#' @return A symmetric correlation matrix with class \code{skipped_corr} and
#'   attributes \code{method = "skipped_correlation"}, \code{description}, and
#'   \code{package = "matrixCorr"}. When \code{return_masks = TRUE}, the matrix
#'   also carries a \code{skipped_masks} attribute containing compact pairwise
#'   skipped-row indices. The \code{diagnostics} attribute stores per-pair
#'   complete-case counts and skipped-row counts/proportions. When
#'   \code{ci = TRUE} or \code{p_value = TRUE}, bootstrap inference matrices are
#'   attached via attributes.
#'
#' @details
#' Let \eqn{X \in \mathbb{R}^{n \times p}} be a numeric matrix with rows as
#' observations and columns as variables. For a given pair of columns
#' \eqn{(x, y)}, write the observed bivariate points as
#' \eqn{u_i = (x_i, y_i)^\top}, \eqn{i=1,\ldots,n}. If \code{stand = TRUE},
#' each margin is first centred by its median and divided by a robust scale
#' estimate before outlier detection; otherwise the original pair is used. The
#' robust scale is the MAD when positive, with fallback to
#' \eqn{\mathrm{IQR}/1.34898} and then the usual sample standard deviation if
#' needed. Let \eqn{\tilde u_i} denote the resulting points and let \eqn{c} be
#' the componentwise median center of the detection cloud.
#'
#' For each observation \eqn{i}, define the direction vector
#' \eqn{b_i = \tilde u_i - c}. When \eqn{\|b_i\| > 0}, all observations are
#' projected onto the line through \eqn{c} in direction \eqn{b_i}. The
#' projected distances are
#' \deqn{
#' d_{ij} \;=\; \frac{|(\tilde u_j - c)^\top b_i|}{\|b_i\|},
#' \qquad j=1,\ldots,n.
#' }
#' For each direction \eqn{i}, observation \eqn{j} is flagged as an outlier if
#' 
#' \deqn{
#' d_{ij} \;>\; \mathrm{med}(d_{i\cdot}) + g\, s(d_{i\cdot}),
#' \qquad g = \code{cutoff},
#' }
#' where \eqn{s(\cdot)} is either the ideal-fourths interquartile width
#' (\code{outlier_rule = "idealf"}) or the median absolute deviation
#' (\code{outlier_rule = "mad"}). An observation is removed if it is flagged
#' for at least one projection direction. The skipped correlation is then the
#' ordinary Pearson or Spearman correlation computed from the retained
#' observations:
#' \deqn{
#' r_{\mathrm{skip}}(x,y) \;=\;
#' \mathrm{cor}\!\left(x_{\mathcal{K}}, y_{\mathcal{K}}\right),
#' }
#' where \eqn{\mathcal{K}} is the index set of observations not flagged as
#' outliers.
#'
#' Unlike marginally robust methods such as \code{pbcor()}, \code{wincor()},
#' or \code{bicor()}, skipped correlation is explicitly pairwise because
#' outlier detection depends on the joint geometry of each variable pair. As a
#' result, the reported matrix need not be positive semidefinite, even with
#' complete data.
#'
#' \strong{Computational notes.} In the complete-data path, each column pair
#' requires a full bivariate projection search, so the dominant cost is higher
#' than for marginal robust methods. The implementation evaluates pairs in
#' 'C++'; where available, pairs are processed with 'OpenMP' parallelism. With
#' \code{na_method = "pairwise"}, each pair is recomputed on its overlap of
#' non-missing rows.
#'
#' \strong{Bootstrap inference.} When \code{ci = TRUE} or \code{p_value = TRUE},
#' the implementation uses the percentile-bootstrap strategy studied by Wilcox
#' (2015). Each bootstrap replicate resamples whole observation pairs with
#' replacement, reruns the skipped-correlation outlier detection on the
#' resampled data, and recomputes the skipped correlation on the retained
#' observations. This corresponds to Wilcox's B2 method and avoids the
#' statistically unsatisfactory shortcut of removing outliers only once before
#' bootstrapping. Bootstrap inference currently requires complete data
#' (\code{na_method = "error"}). When \code{p_adjust = "hochberg"}, the
#' bootstrap p-values are processed with Hochberg's step-up procedure (method H
#' in Wilcox, Rousselet, and Pernet, 2018). When \code{p_adjust = "ecp"}, the
#' package follows their ECP method and simulates \code{n_mc} null data sets
#' from a \eqn{p}-variate normal distribution with identity covariance,
#' recomputes the pairwise bootstrap p-values for each null data set, stores the
#' minimum p-value from each run, and estimates the \code{fwe_level} quantile of
#' that null distribution using the Harrell-Davis estimator. Hypotheses are then
#' rejected when their observed bootstrap p-values are less than or equal to the
#' estimated critical p-value. The calibrated H1 procedure from Wilcox,
#' Rousselet, and Pernet (2018) is not currently implemented.
#'
#' @references
#' Wilcox, R. R. (2004). Inferences based on a skipped correlation coefficient.
#' Journal of Applied Statistics, 31(2), 131-143.
#' \doi{10.1080/0266476032000148821}
#'
#' Wilcox, R. R. (2015). Inferences about the skipped correlation coefficient:
#' Dealing with heteroscedasticity and non-normality. Journal of Modern Applied
#' Statistical Methods, 14(1), 172-188.
#' \doi{10.22237/jmasm/1430453580}
#'
#' Wilcox, R. R., Rousselet, G. A., & Pernet, C. R. (2018). Improved methods
#' for making inferences about multiple skipped correlations. Journal of
#' Statistical Computation and Simulation, 88(16), 3116-3131.
#' \doi{10.1080/00949655.2018.1501051}
#'
#' @seealso [pbcor()], [wincor()], [bicor()]
#'
#' @examples
#' set.seed(12)
#' X <- matrix(rnorm(160 * 4), ncol = 4)
#' X[1, 1] <- 9
#' X[1, 2] <- -8
#'
#' R <- skipped_corr(X, method = "pearson")
#' print(R, digits = 2)
#' summary(R)
#' plot(R)
#'
#' Rm <- skipped_corr(X, method = "pearson", return_masks = TRUE)
#' skipped_corr_masks(Rm, 1, 2)
#'
#' # Example 1:
#' Xm <- as.matrix(datasets::mtcars[, c("mpg", "disp", "hp", "wt")])
#' Rm2 <- skipped_corr(Xm, method = "spearman")
#' print(Rm2, digits = 2)
#'
#' # Example 2:
#' Ri <- skipped_corr(Xm, method = "pearson", ci = TRUE, n_boot = 40, seed = 1)
#' Ri$ci
#'
#' # Interactive viewing (requires shiny)
#' if (interactive() && requireNamespace("shiny", quietly = TRUE)) {
#'   view_corr_shiny(R)
#' }
#'
#' @author Thiago de Paula Oliveira
#' @export
skipped_corr <- function(data,
                    method = c("pearson", "spearman"),
                    na_method = c("error", "pairwise"),
                    ci = FALSE,
                    p_value = FALSE,
                    conf_level = 0.95,
                    n_threads = getOption("matrixCorr.threads", 1L),
                    return_masks = FALSE,
                    stand = TRUE,
                    outlier_rule = c("idealf", "mad"),
                    cutoff = sqrt(stats::qchisq(0.975, df = 2)),
                    n_boot = 2000L,
                    p_adjust = c("none", "hochberg", "ecp"),
                    fwe_level = 0.05,
                    n_mc = 1000L,
                    seed = NULL,
                    output = c("matrix", "sparse", "edge_list"),
                    threshold = 0,
                    diag = TRUE) {
  output_cfg <- .mc_validate_output_args(
    output = output,
    threshold = threshold,
    diag = diag
  )
  method <- match.arg(method)
  na_method <- match.arg(na_method)
  outlier_rule <- match.arg(outlier_rule)
  p_adjust <- match.arg(p_adjust)
  check_bool(stand, arg = "stand")
  check_bool(return_masks, arg = "return_masks")
  check_bool(ci, arg = "ci")
  check_bool(p_value, arg = "p_value")
  check_scalar_numeric(cutoff, arg = "cutoff", lower = 0, closed_lower = FALSE)
  check_prob_scalar(conf_level, arg = "conf_level", open_ends = TRUE)
  check_prob_scalar(fwe_level, arg = "fwe_level", open_ends = TRUE)
  n_threads <- check_scalar_int_pos(n_threads, arg = "n_threads")
  n_boot <- check_scalar_int_pos(n_boot, arg = "n_boot")
  n_mc <- check_scalar_int_pos(n_mc, arg = "n_mc")
  if (n_mc < 10L) {
    abort_bad_arg("n_mc", message = "must be >= 10.")
  }
  if (!is.null(seed)) {
    seed <- check_scalar_int_pos(seed, arg = "seed")
  }
  if ((isTRUE(ci) || isTRUE(p_value)) && na_method != "error") {
    abort_bad_arg(
      "na_method",
      message = "{.arg ci} and {.arg p_value} currently require {.code na_method = \"error\"}."
    )
  }
  if (!isTRUE(p_value) && !identical(p_adjust, "none")) {
    abort_bad_arg(
      "p_adjust",
      message = "{.arg p_adjust} can only be used when {.arg p_value} = TRUE."
    )
  }

  numeric_data <- if (na_method == "error") {
    validate_corr_input(data)
  } else {
    validate_corr_input(data, check_na = FALSE)
  }
  colnames_data <- colnames(numeric_data)

  method_int <- switch(method, pearson = 0L, spearman = 1L)
  use_mad <- identical(outlier_rule, "mad")
  prev_threads <- get_omp_threads()
  on.exit(set_omp_threads(as.integer(prev_threads)), add = TRUE)
  res <- skipcor_matrix_cpp(
    numeric_data,
    method_int = method_int,
    stand = stand,
    use_mad = use_mad,
    gval = cutoff,
    min_n = 5L,
    n_threads = n_threads,
    return_masks = return_masks,
    return_inference = isTRUE(ci) || isTRUE(p_value),
    conf_level = conf_level,
    n_boot = n_boot,
    seed = if (is.null(seed)) sample.int(.Machine$integer.max, 1L) else seed,
    multiple_method_int = switch(p_adjust, none = 0L, hochberg = 1L, ecp = 2L),
    fwe_level = fwe_level,
    n_mc = n_mc
  )

  mask_payload <- NULL
  if (isTRUE(return_masks)) {
    mask_payload <- structure(
      list(
        pair_i = as.integer(res$pair_i),
        pair_j = as.integer(res$pair_j),
        skipped_rows = unname(lapply(res$skipped_rows, as.integer)),
        n_rows = nrow(numeric_data),
        n_cols = ncol(numeric_data),
        colnames = colnames_data
      ),
      class = "skipped_corr_masks"
    )
  }

  diag_payload <- list(
    n_complete = unclass(res$n_complete),
    skipped_n = unclass(res$skipped_n),
    skipped_prop = unclass(res$skipped_prop)
  )
  res_cor <- .mc_set_matrix_dimnames(res$cor, colnames_data)
  for (nm in names(diag_payload)) {
    diag_payload[[nm]] <- .mc_set_matrix_dimnames(diag_payload[[nm]], colnames_data)
  }

  infer_payload <- NULL
  if (isTRUE(ci) || isTRUE(p_value)) {
    infer_payload <- list(
      conf_level = conf_level,
      n_boot = n_boot,
      method = switch(
        p_adjust,
        none = "bootstrap_b2",
        hochberg = "method_h",
        ecp = "ecp"
      )
    )
    if (isTRUE(ci)) {
      lwr <- unclass(res$lwr)
      upr <- unclass(res$upr)
      lwr <- .mc_set_matrix_dimnames(lwr, colnames_data)
      upr <- .mc_set_matrix_dimnames(upr, colnames_data)
      infer_payload$lwr <- lwr
      infer_payload$upr <- upr
    }
    if (isTRUE(p_value)) {
      p_mat <- unclass(res$p_value)
      p_mat <- .mc_set_matrix_dimnames(p_mat, colnames_data)
      infer_payload$p_value <- p_mat
      if (identical(p_adjust, "hochberg")) {
        padj <- unclass(res$p_value_adjusted)
        reject <- unclass(res$reject) > 0.5
        padj <- .mc_set_matrix_dimnames(padj, colnames_data)
        reject <- .mc_set_matrix_dimnames(reject, colnames_data)
        infer_payload$p_value_adjusted <- padj
        infer_payload$reject <- reject
        infer_payload$p_adjust <- "hochberg"
      } else if (identical(p_adjust, "ecp")) {
        reject <- unclass(res$reject) > 0.5
        reject <- .mc_set_matrix_dimnames(reject, colnames_data)
        infer_payload$reject <- reject
        infer_payload$critical_p_value <- unname(res$critical_p_value)
        infer_payload$p_adjust <- "ecp"
      } else {
        infer_payload$p_adjust <- "none"
      }
    }
  }

  ci_attr <- NULL
  inference_attr <- NULL
  if (!is.null(infer_payload)) {
    if (isTRUE(ci)) {
      ci_attr <- list(
        est = unclass(res_cor),
        lwr.ci = infer_payload$lwr,
        upr.ci = infer_payload$upr,
        conf.level = infer_payload$conf_level
      )
    }

    inference_attr <- list(
      method = infer_payload$method,
      n_boot = infer_payload$n_boot,
      conf.level = infer_payload$conf_level,
      fwe_level = fwe_level,
      p_adjust = infer_payload$p_adjust %||% "none"
    )
    if (isTRUE(ci)) {
      inference_attr$ci <- ci_attr
    }
    if (isTRUE(p_value)) {
      inference_attr$p_value <- infer_payload$p_value
      if (!is.null(infer_payload$p_value_adjusted)) {
        inference_attr$p_value_adjusted <- infer_payload$p_value_adjusted
      }
      if (!is.null(infer_payload$reject)) {
        inference_attr$reject <- infer_payload$reject
      }
      if (!is.null(infer_payload$critical_p_value)) {
        inference_attr$critical_p_value <- infer_payload$critical_p_value
      }
    }
    if (identical(p_adjust, "ecp")) inference_attr$n_mc <- n_mc
  }

  out <- .mc_structure_corr_matrix(
    res_cor,
    class_name = "skipped_corr",
    method = "skipped_correlation",
    description = paste0(
      "Skipped correlation; base = ", method,
      "; rule = ", outlier_rule,
      "; standardise = ", stand,
      "; NA mode = ", na_method, "."
    ),
    diagnostics = diag_payload,
    extra_attrs = c(
      if (isTRUE(return_masks)) list(skipped_masks = mask_payload),
      if (!is.null(ci_attr)) list(ci = ci_attr),
      if (!is.null(inference_attr)) list(inference = inference_attr)
    )
  )
  .mc_finalize_corr_output(
    out,
    output = output_cfg$output,
    threshold = output_cfg$threshold,
    diag = output_cfg$diag
  )
}

.mc_skipcor_ci_attr <- function(x) {
  ci <- attr(x, "ci", exact = TRUE)
  if (!is.null(ci)) return(ci)

  lwr <- attr(x, "lwr.ci", exact = TRUE)
  upr <- attr(x, "upr.ci", exact = TRUE)
  if (is.null(lwr) && is.null(upr)) return(NULL)

  list(
    est = unclass(as.matrix(x)),
    lwr.ci = lwr,
    upr.ci = upr,
    conf.level = attr(x, "conf.level", exact = TRUE)
  )
}

.mc_skipcor_inference_attr <- function(x) {
  inf <- attr(x, "inference", exact = TRUE)
  if (!is.null(inf)) return(inf)

  legacy <- list(
    method = attr(x, "inference_method", exact = TRUE),
    n_boot = attr(x, "n_boot", exact = TRUE),
    conf.level = attr(x, "conf.level", exact = TRUE),
    fwe_level = attr(x, "fwe_level", exact = TRUE),
    p_adjust = attr(x, "p_adjust", exact = TRUE),
    p_value = attr(x, "p_value", exact = TRUE),
    p_value_adjusted = attr(x, "p_value_adjusted", exact = TRUE),
    reject = attr(x, "reject", exact = TRUE),
    critical_p_value = attr(x, "critical_p_value", exact = TRUE)
  )
  if (all(vapply(legacy, is.null, logical(1)))) return(NULL)
  legacy$ci <- .mc_skipcor_ci_attr(x)
  legacy
}

.mc_skipcor_component_names <- function(x) {
  out <- c("estimate")
  if (!is.null(.mc_skipcor_ci_attr(x))) out <- c(out, "ci")
  inf <- .mc_skipcor_inference_attr(x)
  if (!is.null(inf)) {
    if (!is.null(inf$p_value)) out <- c(out, "p_value")
    if (!is.null(inf$p_value_adjusted)) out <- c(out, "p_value_adjusted")
    if (!is.null(inf$reject)) out <- c(out, "reject")
    if (!is.null(inf$critical_p_value)) out <- c(out, "critical_p_value")
    out <- c(out, "inference")
  }
  if (!is.null(attr(x, "diagnostics", exact = TRUE))) out <- c(out, "diagnostics")
  if (!is.null(attr(x, "skipped_masks", exact = TRUE))) out <- c(out, "skipped_masks")
  unique(out)
}

#' @export
names.skipped_corr <- function(x) {
  .mc_skipcor_component_names(x)
}

#' @export
`$.skipped_corr` <- function(x, name) {
  ci <- .mc_skipcor_ci_attr(x)
  inf <- .mc_skipcor_inference_attr(x)
  switch(name,
    estimate = unclass(as.matrix(x)),
    est = unclass(as.matrix(x)),
    cor = unclass(as.matrix(x)),
    ci = ci,
    p_value = if (is.null(inf)) NULL else inf$p_value,
    p_value_adjusted = if (is.null(inf)) NULL else inf$p_value_adjusted,
    reject = if (is.null(inf)) NULL else inf$reject,
    critical_p_value = if (is.null(inf)) NULL else inf$critical_p_value,
    inference = inf,
    diagnostics = attr(x, "diagnostics", exact = TRUE),
    skipped_masks = attr(x, "skipped_masks", exact = TRUE),
    NULL
  )
}

#' @export
`[[.skipped_corr` <- function(x, i, ...) {
  if (is.numeric(i)) {
    nms <- names(x)
    if (length(i) != 1L || is.na(i) || i < 1L || i > length(nms)) {
      abort_bad_arg("i", message = "must be a valid component index.")
    }
    i <- nms[[i]]
  }
  `$.skipped_corr`(x, i)
}

#' @rdname skipped_corr
#' @export
skipped_corr_masks <- function(x, var1 = NULL, var2 = NULL) {
  check_inherits(x, "skipped_corr")
  masks <- attr(x, "skipped_masks", exact = TRUE)
  if (is.null(masks)) return(NULL)
  if (is.null(var1) && is.null(var2)) return(masks)
  if (is.null(var1) || is.null(var2)) {
    abort_bad_arg("var1", message = "and {.arg var2} must both be supplied when extracting a specific pair mask.")
  }

  resolve_one <- function(var, arg) {
    if (is.character(var)) {
      if (length(var) != 1L || is.na(var)) {
        abort_bad_arg(arg, message = "must be a single non-missing column name or index.")
      }
      idx <- match(var, colnames(x))
      if (is.na(idx)) {
        abort_bad_arg(arg, message = "must match a column name in {.arg x}.")
      }
      idx
    } else {
      idx <- check_scalar_int_pos(var, arg = arg)
      if (idx > ncol(x)) {
        abort_bad_arg(arg, message = "must be <= ncol(x).")
      }
      idx
    }
  }

  i <- resolve_one(var1, "var1")
  j <- resolve_one(var2, "var2")
  if (i == j) return(integer())
  lo <- min(i, j)
  hi <- max(i, j)
  hit <- which(masks$pair_i == lo & masks$pair_j == hi)
  if (!length(hit)) integer() else masks$skipped_rows[[hit[[1L]]]]
}

#' @rdname skipped_corr
#' @method print skipped_corr
#' @param ci_digits Integer; digits for skipped-correlation confidence limits.
#' @param show_ci One of \code{"yes"} or \code{"no"}.
#' @param show_p One of \code{"auto"}, \code{"yes"}, \code{"no"}. For
#'   \code{print()}, \code{"auto"} keeps the compact matrix-only display;
#'   use \code{"yes"} to also print pairwise p-values.
#' @export
print.skipped_corr <- function(x,
                               digits = 4,
                               n = NULL,
                               topn = NULL,
                               max_vars = NULL,
                               width = NULL,
                               ci_digits = 4,
                               show_ci = NULL,
                               show_p = c("auto", "yes", "no"),
                               ...) {
  show_ci <- .mc_validate_yes_no(
    show_ci,
    arg = "show_ci",
    default = .mc_display_option("print_show_ci", "yes")
  )
  show_p <- match.arg(show_p)
  .mc_print_corr_matrix(
    x,
    header = "Skipped correlation matrix",
    digits = digits,
    n = n,
    topn = topn,
    max_vars = max_vars,
    width = width,
    show_ci = show_ci,
    ...
  )
  ci <- .mc_skipcor_ci_attr(x)
  inf <- .mc_skipcor_inference_attr(x)

  include_ci <- identical(show_ci, "yes") && !is.null(ci)
  include_p <- identical(show_p, "yes") && !is.null(inf) && !is.null(inf$p_value)
  if (include_p && !include_ci) {
    cli::cli_inform("Use {.code summary(x, show_ci = \"no\")} for bounded pairwise p-value output.")
  }
  invisible(x)
}

#' @rdname skipped_corr
#' @method plot skipped_corr
#' @param ci_text_size Text size for confidence intervals in the heatmap.
#' @param show_value Logical; if \code{TRUE} (default), overlay numeric values
#'   on the heatmap tiles.
#' @export
plot.skipped_corr <- function(x,
                         title = "Skipped correlation heatmap",
                         low_color = "indianred1",
                         high_color = "steelblue1",
                         mid_color = "white",
                         value_text_size = 4,
                         ci_text_size = 3,
                         show_value = TRUE,
                         ...) {
  check_inherits(x, "skipped_corr")
  check_bool(show_value, arg = "show_value")

  est_mat <- as.matrix(x)
  df_est <- as.data.frame(as.table(est_mat))
  names(df_est) <- c("Var1", "Var2", "skipped_corr")

  ci <- .mc_skipcor_ci_attr(x)
  if (!is.null(ci) && !is.null(ci$lwr.ci) && !is.null(ci$upr.ci)) {
    df_lwr <- as.data.frame(as.table(ci$lwr.ci))
    names(df_lwr)[3] <- "lwr"
    df_upr <- as.data.frame(as.table(ci$upr.ci))
    names(df_upr)[3] <- "upr"
    df <- Reduce(
      function(a, b) merge(a, b, by = c("Var1", "Var2"), all = TRUE),
      list(df_est, df_lwr, df_upr)
    )

    diag_idx <- df$Var1 == df$Var2
    df$lwr[diag_idx] <- NA_real_
    df$upr[diag_idx] <- NA_real_
    df$ci_label <- ifelse(
      is.na(df$lwr) | is.na(df$upr),
      NA_character_,
      sprintf("(%.2f, %.2f)", df$lwr, df$upr)
    )
  } else {
    df <- df_est
    df$ci_label <- NA_character_
  }

  lev_row <- unique(df_est$Var1)
  lev_col <- unique(df_est$Var2)
  df$Var1 <- factor(df$Var1, levels = rev(lev_row))
  df$Var2 <- factor(df$Var2, levels = lev_col)
  df$label <- sprintf("%.2f", df$skipped_corr)

  p <- ggplot2::ggplot(df, ggplot2::aes(x = Var2, y = Var1, fill = skipped_corr)) +
    ggplot2::geom_tile(color = "white") +
    ggplot2::scale_fill_gradient2(
      low = low_color,
      high = high_color,
      mid = mid_color,
      midpoint = 0,
      limits = c(-1, 1),
      name = "skipped_corr"
    ) +
    ggplot2::theme_minimal(base_size = 12) +
    ggplot2::theme(
      axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
      panel.grid = ggplot2::element_blank(),
      ...
    ) +
    ggplot2::coord_fixed() +
    ggplot2::labs(title = title, x = NULL, y = NULL)

  if (isTRUE(show_value)) {
    p <- p + ggplot2::geom_text(ggplot2::aes(label = label), size = value_text_size, color = "black")
  }

  if (isTRUE(show_value) && any(!is.na(df$ci_label))) {
    p <- p + ggplot2::geom_text(
      ggplot2::aes(label = ci_label, y = as.numeric(Var1) - 0.25),
      size = ci_text_size,
      color = "gray30",
      na.rm = TRUE
    )
  }

  p
}

.mc_skipcor_pairwise_summary <- function(object,
                                         digits = 4,
                                         ci_digits = 2,
                                         p_digits = 4,
                                         show_ci = NULL,
                                         show_p = c("auto", "yes", "no")) {
  show_ci <- .mc_validate_yes_no(
    show_ci,
    arg = "show_ci",
    default = .mc_display_option("summary_show_ci", "yes")
  )
  show_p <- match.arg(show_p)
  check_inherits(object, "skipped_corr")

  est <- as.matrix(object)
  rn <- rownames(est); cn <- colnames(est)
  if (is.null(rn)) rn <- as.character(seq_len(nrow(est)))
  if (is.null(cn)) cn <- as.character(seq_len(ncol(est)))

  ci <- .mc_skipcor_ci_attr(object)
  inf <- .mc_skipcor_inference_attr(object)
  diag_attr <- attr(object, "diagnostics", exact = TRUE)

  include_ci <- identical(show_ci, "yes") && !is.null(ci)
  include_p <- switch(show_p, auto = !is.null(inf) && !is.null(inf$p_value), yes = TRUE, no = FALSE)

  rows <- vector("list", nrow(est) * (ncol(est) - 1L) / 2L)
  k <- 0L
  for (i in seq_len(nrow(est) - 1L)) {
    for (j in (i + 1L):ncol(est)) {
      k <- k + 1L
      rec <- list(
        var1 = rn[i],
        var2 = cn[j],
        estimate = round(est[i, j], digits)
      )
      if (include_ci) {
        rec$lwr <- if (!is.null(ci$lwr.ci) && is.finite(ci$lwr.ci[i, j])) round(ci$lwr.ci[i, j], ci_digits) else NA_real_
        rec$upr <- if (!is.null(ci$upr.ci) && is.finite(ci$upr.ci[i, j])) round(ci$upr.ci[i, j], ci_digits) else NA_real_
      }
      if (include_p) {
        rec$p_value <- if (!is.null(inf$p_value) && is.finite(inf$p_value[i, j])) round(inf$p_value[i, j], p_digits) else NA_real_
        if (!is.null(inf$p_value_adjusted)) {
          rec$p_value_adjusted <- if (is.finite(inf$p_value_adjusted[i, j])) round(inf$p_value_adjusted[i, j], p_digits) else NA_real_
        }
        if (!is.null(inf$reject)) rec$reject <- isTRUE(inf$reject[i, j])
      }
      if (is.list(diag_attr)) {
        if (is.matrix(diag_attr$skipped_n)) rec$skipped_n <- as.integer(diag_attr$skipped_n[i, j])
        if (is.matrix(diag_attr$skipped_prop)) rec$skipped_prop <- round(diag_attr$skipped_prop[i, j], p_digits)
        if (is.matrix(diag_attr$n_complete)) rec$n_complete <- as.integer(diag_attr$n_complete[i, j])
      }
      rows[[k]] <- rec
    }
  }

  df <- do.call(rbind.data.frame, rows)
  rownames(df) <- NULL
  num_cols <- intersect(c("estimate", "lwr", "upr", "p_value", "p_value_adjusted", "skipped_prop"), names(df))
  int_cols <- intersect(c("skipped_n", "n_complete"), names(df))
  for (nm in num_cols) df[[nm]] <- as.numeric(df[[nm]])
  for (nm in int_cols) df[[nm]] <- as.integer(df[[nm]])

  base <- .mc_summary_corr_matrix(object)
  out <- .mc_finalize_summary_df(df, class_name = "summary.skipped_corr")
  attr(out, "overview") <- base
  attr(out, "has_ci") <- include_ci
  attr(out, "has_p") <- include_p
  attr(out, "conf.level") <- if (is.null(ci)) NA_real_ else ci$conf.level
  attr(out, "digits") <- digits
  attr(out, "ci_digits") <- ci_digits
  attr(out, "p_digits") <- p_digits
  attr(out, "inference_method") <- if (is.null(inf)) NA_character_ else inf$method
  attr(out, "p_adjust") <- if (is.null(inf) || is.null(inf$p_adjust)) "none" else inf$p_adjust
  attr(out, "critical_p_value") <- if (is.null(inf)) NA_real_ else inf$critical_p_value %||% NA_real_
  out
}

#' @rdname skipped_corr
#' @method summary skipped_corr
#' @param object An object of class \code{skipped_corr}.
#' @export
summary.skipped_corr <- function(object, n = NULL, topn = NULL,
                                 max_vars = NULL, width = NULL,
                                 show_ci = NULL, ...) {
  check_inherits(object, "skipped_corr")
  ci <- .mc_skipcor_ci_attr(object)
  inf <- .mc_skipcor_inference_attr(object)

  if (is.null(ci) && (is.null(inf) || is.null(inf$p_value))) {
    return(.mc_summary_corr_matrix(object, topn = topn))
  }

  .mc_skipcor_pairwise_summary(
    object,
    show_ci = .mc_validate_yes_no(
      show_ci,
      arg = "show_ci",
      default = .mc_display_option("summary_show_ci", "yes")
    ),
    show_p = "auto"
  )
}

#' @rdname skipped_corr
#' @method print summary.skipped_corr
#' @param x An object of class \code{summary.skipped_corr}.
#' @export
print.summary.skipped_corr <- function(x, digits = NULL, n = NULL,
                                       topn = NULL, max_vars = NULL,
                                       width = NULL, show_ci = NULL, ...) {
  has_ci <- isTRUE(attr(x, "has_ci"))
  has_p <- isTRUE(attr(x, "has_p"))
  p_digits <- attr(x, "p_digits"); if (!is.numeric(p_digits)) p_digits <- 4
  extra_items <- c(
    if (has_p) c(inference = attr(x, "inference_method")),
    if (has_p) c(multiplicity = attr(x, "p_adjust"))
  )
  crit <- suppressWarnings(as.numeric(attr(x, "critical_p_value")))
  if (is.finite(crit)) {
    extra_items <- c(extra_items, critical_p = format(signif(crit, digits = p_digits)))
  }
  .mc_print_pairwise_summary_digest(
    x,
    title = "Skipped correlation summary",
    digits = .mc_coalesce(digits, 4),
    n = n,
    topn = topn,
    max_vars = max_vars,
    width = width,
    show_ci = show_ci,
    ci_method = if (has_ci) "bootstrap_b2" else NULL,
    extra_items = extra_items,
    ...
  )
  invisible(x)
}

Try the matrixCorr package in your browser

Any scripts or data that you put into this service are public.

matrixCorr documentation built on April 18, 2026, 5:06 p.m.