R/latent_corr.R

Defines functions print.summary.latent_corr print.summary.corr_matrix print.summary.matrixCorr print.summary.biserial_corr summary.biserial_corr .mc_biserial_pairwise_summary .mc_print_latent_pairwise_summary .mc_latent_pairwise_summary plot.biserial_corr print.biserial_corr biserial print.summary.polyserial_corr summary.polyserial_corr plot.polyserial_corr print.polyserial_corr polyserial print.summary.polychoric_corr summary.polychoric_corr plot.polychoric_corr print.polychoric_corr polychoric print.summary.tetrachoric_corr summary.tetrachoric_corr plot.tetrachoric_corr print.tetrachoric_corr tetrachoric .mc_structure_corr_matrix .mc_fill_pair_diag .mc_matrix_diagnostics_template .mc_latent_pair_metadata .mc_tau_matrix .mc_encode_list_to_matrix .mc_biserial_inference_attr .mc_biserial_ci_attr .mc_biserial_inference_one .mc_pair_biserial .mc_pair_polyserial .mc_pair_polychoric .mc_pair_tetrachoric .mc_polychoric_estimate_internal .mc_tetrachoric_estimate_internal .mc_polychoric_table_estimate .mc_tetrachoric_table_estimate .mc_poly_table_thresholds .mc_tetra_table_thresholds .mc_poly_opt_exact_binary .mc_tetra_opt_exact .mc_binary_prob_exact_poly .mc_binary_prob_exact .mc_phi2_exact .mc_global_cutpoints .mc_binary_tau .mc_fast_ordinal_table .mc_fast_binary_table01 .mc_pair_complete .mc_diag_discrete .mc_summary_corr_matrix .mc_plot_corr_matrix .mc_print_corr_matrix .mc_polyserial_inference_one .mc_polychoric_inference_one .mc_tetrachoric_inference_one .mc_recenter_wald_fit .mc_latent_matrix_payload .mc_new_metric_matrix .mc_latent_scalar_payload .mc_latent_inference_attr .mc_latent_ci_attr .mc_attach_latent_inference .mc_attach_scalar_latent .mc_set_matrix_dimnames .mc_square_dimnames .mc_scalar_or_matrix .mc_check_latent_missing .mc_extract_continuous_matrix .mc_extract_discrete_columns .mc_encode_ordinal_vector .mc_as_data_frame .mc_is_integerish_numeric .mc_has_missing_or_bad

Documented in biserial plot.biserial_corr plot.polychoric_corr plot.polyserial_corr plot.tetrachoric_corr polychoric polyserial print.biserial_corr print.polychoric_corr print.polyserial_corr print.summary.biserial_corr print.summary.latent_corr print.summary.matrixCorr print.summary.polychoric_corr print.summary.polyserial_corr print.summary.tetrachoric_corr print.tetrachoric_corr summary.biserial_corr summary.polychoric_corr summary.polyserial_corr summary.tetrachoric_corr tetrachoric

# Latent categorical / ordinal correlation wrappers

.mc_has_missing_or_bad <- function(x) {
  if (is.matrix(x)) {
    if (is.logical(x)) {
      return(anyNA(x))
    }
    return(anyNA(x) || !all(is.finite(x)))
  }
  if (is.factor(x) || is.logical(x)) {
    return(anyNA(x))
  }
  anyNA(x) || any(!is.finite(x))
}

.mc_is_integerish_numeric <- function(x, tol = 1e-8) {
  is.numeric(x) && all(is.na(x) | abs(x - round(x)) <= tol)
}

.mc_as_data_frame <- function(data, arg = as.character(substitute(data))) {
  if (is.data.frame(data)) {
    return(data)
  }
  if (is.matrix(data)) {
    return(as.data.frame(data, stringsAsFactors = FALSE))
  }
  abort_bad_arg(
    arg,
    message = "must be a matrix or data frame."
  )
}

.mc_encode_ordinal_vector <- function(x, binary = FALSE) {
  if (is.logical(x)) {
    levels_x <- c("FALSE", "TRUE")
    code <- ifelse(is.na(x), NA_integer_, ifelse(x, 2L, 1L))
  } else if (is.factor(x)) {
    levels_x <- levels(x)
    code <- as.integer(x)
  } else if (.mc_is_integerish_numeric(x)) {
    vals <- sort(unique(as.numeric(x[!is.na(x)])))
    if (length(vals) == 0L) {
      return(NULL)
    }
    levels_x <- as.character(vals)
    code <- match(as.numeric(x), vals)
  } else {
    return(NULL)
  }

  if (length(levels_x) < 2L) {
    return(NULL)
  }
  if (binary && length(levels_x) != 2L) {
    return(NULL)
  }

  list(code = as.integer(code), levels = levels_x)
}

.mc_extract_discrete_columns <- function(data, kind = c("ordinal", "binary"),
                                         arg = as.character(substitute(data)),
                                         min_cols = 1L) {
  kind <- match.arg(kind)
  want_binary <- identical(kind, "binary")

  if (is.null(dim(data)) && is.atomic(data)) {
    enc <- .mc_encode_ordinal_vector(data, binary = want_binary)
    if (is.null(enc)) {
      msg <- if (want_binary) {
        "must be a binary (two-level) variable."
      } else {
        "must be an ordinal variable."
      }
      abort_bad_arg(arg, message = msg)
    }
    out <- list(enc)
    names(out) <- arg
    return(out)
  }

  df <- .mc_as_data_frame(data, arg = arg)

  keep <- vapply(df, function(col) {
    !is.null(.mc_encode_ordinal_vector(col, binary = want_binary))
  }, logical(1))

  df <- df[keep]
  if (ncol(df) < min_cols) {
    msg <- if (want_binary) {
      "must contain at least {min_cols} binary column{?s}."
    } else {
      paste0(
        "must contain at least {min_cols} ordinal column{?s} ",
        "(factor, ordered factor, logical, or integer-like numeric)."
      )
    }
    abort_bad_arg(arg, message = msg, min_cols = min_cols)
  }

  encoded <- lapply(df, .mc_encode_ordinal_vector, binary = want_binary)
  names(encoded) <- names(df)
  encoded
}

.mc_extract_continuous_matrix <- function(x,
                                          arg = as.character(substitute(x)),
                                          min_cols = 1L) {
  if (is.null(dim(x)) && is.atomic(x) && !is.factor(x)) {
    if (!is.numeric(x)) {
      abort_bad_arg(arg, message = "must be numeric.")
    }
    mat <- matrix(as.numeric(x), ncol = 1L)
    colnames(mat) <- arg
    return(mat)
  }

  df <- .mc_as_data_frame(x, arg = arg)
  keep <- vapply(df, is.numeric, logical(1))
  df <- df[keep]
  if (ncol(df) < min_cols) {
    abort_bad_arg(
      arg,
      message = "must contain at least {min_cols} numeric column{?s}.",
      min_cols = min_cols
    )
  }
  data.matrix(df)
}

.mc_check_latent_missing <- function(x, check_na, arg) {
  check_bool(check_na)
  if (!check_na) {
    return(invisible(NULL))
  }
  if (is.matrix(x)) {
    if (.mc_has_missing_or_bad(x)) {
      abort_bad_arg(
        arg,
        message = "contains missing, NaN, or infinite values.",
        .hint = "Use `na_method = \"pairwise\"` to use pairwise complete cases."
      )
    }
    return(invisible(NULL))
  }
  if (!is.list(x)) {
    x <- list(x)
  }
  bad <- vapply(x, .mc_has_missing_or_bad, logical(1))
  if (any(bad)) {
    abort_bad_arg(
      arg,
      message = "contains missing, NaN, or infinite values.",
      .hint = "Use `na_method = \"pairwise\"` to use pairwise complete cases."
    )
  }
  invisible(NULL)
}

.mc_scalar_or_matrix <- function(mat, scalar) {
  if (scalar) {
    return(as.numeric(mat[1L, 1L]))
  }
  mat
}

.mc_square_dimnames <- function(names) {
  list(names, names)
}

.mc_set_matrix_dimnames <- function(x, row_names = NULL, col_names = row_names) {
  if (is.null(row_names) && is.null(col_names)) {
    return(x)
  }
  dimnames(x) <- list(row_names, col_names)
  x
}

.mc_attach_scalar_latent <- function(x, method, description,
                                     diagnostics = NULL, thresholds = NULL) {
  extra_attrs <- list(estimate = as.numeric(x))
  if (is.list(diagnostics) && length(diagnostics$n_complete) == 1L) {
    extra_attrs$n_obs <- as.integer(diagnostics$n_complete)
  }
  do.call(
    structure,
    c(
      list(
        .Data = as.numeric(x),
        method = method,
        description = description,
        package = "matrixCorr",
        diagnostics = diagnostics,
        thresholds = thresholds
      ),
      extra_attrs
    )
  )
}

.mc_attach_latent_inference <- function(x, payload, conf_level = NULL) {
  attrs <- attributes(x)
  if (is.list(payload$inference)) {
    attrs$inference <- payload$inference
    if (length(payload$inference$estimate) == 1L) {
      attrs$estimate <- as.numeric(payload$inference$estimate)
    }
    if (length(payload$inference$n_obs) == 1L) {
      attrs$n_obs <- as.integer(payload$inference$n_obs)
    }
  }
  if (is.list(payload$ci)) {
    attrs$ci <- payload$ci
    attrs$estimate <- as.numeric(payload$ci$est)
    attrs$lwr <- as.numeric(payload$ci$lwr.ci)
    attrs$upr <- as.numeric(payload$ci$upr.ci)
    attrs$conf.level <- conf_level %||% payload$ci$conf.level
    attrs$conf_level <- conf_level %||% payload$ci$conf.level
  }
  attributes(x) <- attrs
  x
}

.mc_latent_ci_attr <- function(x) {
  attr(x, "ci", exact = TRUE)
}

.mc_latent_inference_attr <- function(x) {
  attr(x, "inference", exact = TRUE)
}

.mc_latent_scalar_payload <- function(fit,
                                      return_ci = FALSE,
                                      return_p = FALSE) {
  list(
    inference = if (isTRUE(return_p)) {
      list(
        method = as.character(fit$inference.method),
        estimate = as.numeric(fit$estimate),
        statistic = as.numeric(fit$statistic),
        parameter = as.numeric(fit$parameter),
        p_value = as.numeric(fit$p_value),
        n_obs = as.integer(fit$n_obs),
        alternative = "two.sided"
      )
    } else {
      NULL
    },
    ci = if (isTRUE(return_ci)) {
      list(
        est = as.numeric(fit$estimate),
        lwr.ci = as.numeric(fit$lwr),
        upr.ci = as.numeric(fit$upr),
        conf.level = as.numeric(fit$conf.level),
        ci.method = as.character(fit$ci.method)
      )
    } else {
      NULL
    }
  )
}

.mc_new_metric_matrix <- function(template, mode = c("double", "integer")) {
  mode <- match.arg(mode)
  matrix(
    if (identical(mode, "integer")) NA_integer_ else NA_real_,
    nrow = nrow(template),
    ncol = ncol(template),
    dimnames = dimnames(template)
  )
}

.mc_latent_matrix_payload <- function(estimate,
                                      statistic = NULL,
                                      parameter = NULL,
                                      p_value = NULL,
                                      n_obs = NULL,
                                      lwr = NULL,
                                      upr = NULL,
                                      conf_level = NULL,
                                      ci_method = NULL,
                                      inference_method = NULL,
                                      return_ci = FALSE,
                                      return_p = FALSE) {
  list(
    inference = if (isTRUE(return_p)) {
      list(
        method = inference_method,
        estimate = estimate,
        statistic = statistic,
        parameter = parameter,
        p_value = p_value,
        n_obs = n_obs,
        alternative = "two.sided"
      )
    } else {
      NULL
    },
    ci = if (isTRUE(return_ci)) {
      list(
        est = estimate,
        lwr.ci = lwr,
        upr.ci = upr,
        conf.level = conf_level,
        ci.method = ci_method
      )
    } else {
      NULL
    }
  )
}

.mc_recenter_wald_fit <- function(fit, estimate, conf_level) {
  fit$estimate <- as.numeric(estimate)
  se <- as.numeric(fit$se)
  est <- as.numeric(fit$estimate)

  if (is.finite(est) && is.finite(se) && se > 0) {
    z_crit <- stats::qnorm(0.5 * (1 + conf_level))
    fit$statistic <- est / se
    fit$p_value <- 2 * stats::pnorm(abs(fit$statistic), lower.tail = FALSE)
    fit$lwr <- max(-1, est - z_crit * se)
    fit$upr <- min(1, est + z_crit * se)
  } else {
    fit$statistic <- NA_real_
    fit$p_value <- NA_real_
    fit$lwr <- NA_real_
    fit$upr <- NA_real_
  }

  fit
}

.mc_tetrachoric_inference_one <- function(tab, correct, conf_level) {
  fit <- matrixCorr_tetrachoric_inference_cpp(
    tab = unclass(tab),
    correct = correct,
    conf_level = conf_level
  )
  .mc_recenter_wald_fit(
    fit,
    estimate = .mc_tetrachoric_table_estimate(tab, correct = correct),
    conf_level = conf_level
  )
}

.mc_polychoric_inference_one <- function(tab, correct, conf_level) {
  fit <- matrixCorr_polychoric_inference_cpp(
    tab = unclass(tab),
    correct = correct,
    conf_level = conf_level
  )
  .mc_recenter_wald_fit(
    fit,
    estimate = .mc_polychoric_table_estimate(tab, correct = correct),
    conf_level = conf_level
  )
}

.mc_polyserial_inference_one <- function(x, y, check_na, conf_level) {
  pair <- .mc_pair_complete(x, y, check_na)
  out <- list(
    estimate = NA_real_,
    statistic = NA_real_,
    parameter = NA_real_,
    p_value = NA_real_,
    n_obs = as.integer(length(pair$x)),
    lwr = NA_real_,
    upr = NA_real_,
    ci.method = "wald_information_polyserial",
    inference.method = "wald_z_polyserial"
  )

  if (length(pair$x) < 2L) {
    return(out)
  }
  yy <- pair$y[!is.na(pair$y)]
  if (length(unique(yy)) < 2L) {
    return(out)
  }

  fit <- matrixCorr_polyserial_inference_cpp(
    x = as.numeric(pair$x),
    y = as.integer(as.factor(pair$y)),
    conf_level = conf_level
  )
  fit$n_obs <- as.integer(fit$n_obs)
  fit
}

.mc_print_corr_matrix <- function(x,
                                  header,
                                  digits = 4,
                                  n = NULL,
                                  topn = NULL,
                                  max_vars = NULL,
                                  width = NULL,
                                  show_ci = NULL,
                                  mat = NULL,
                                  ...) {
  legacy <- .mc_extract_legacy_display_args(
    list(...),
    n = n,
    topn = topn,
    max_vars = max_vars
  )
  cfg <- .mc_resolve_display_args(
    context = "print",
    n = legacy$n,
    topn = legacy$topn,
    max_vars = legacy$max_vars,
    width = width,
    show_ci = show_ci
  )

  m <- as.matrix(.mc_coalesce(mat, x))
  attributes(m) <- attributes(m)[c("dim", "dimnames")]
  ci_present <- .mc_has_ci_payload(x)
  p_present <- .mc_has_p_payload(x)

  cat(header, "\n", sep = "")
  digest <- c(
      method = .mc_coalesce(attr(x, "method", exact = TRUE), NA_character_),
      dimensions = sprintf("%d x %d", nrow(m), ncol(m)),
      if (ci_present) c(ci = "yes"),
      if (p_present) c(p_values = "yes")
    )
  digest <- digest[!is.na(digest) & nzchar(digest)]
  .mc_print_named_digest(digest)
  cat("\n")
  do.call(
    .mc_print_preview_matrix,
    c(
      list(
        m = m,
        digits = digits,
        n = cfg$n,
        topn = cfg$topn,
        max_vars = cfg$max_vars,
        width = cfg$width,
        context = "print",
        full_hint = TRUE,
        summary_hint = TRUE
      ),
      legacy$dots
    )
  )

  invisible(x)
}

.mc_plot_corr_matrix <- function(x, class_name, fill_name, title,
                                 low_color = "indianred1",
                                 high_color = "steelblue1",
                                 mid_color = "white",
                                 value_text_size = 4,
                                 show_value = TRUE, ...) {
  check_inherits(x, class_name)
  check_bool(show_value, arg = "show_value")

  mat <- as.matrix(x)
  df <- as.data.frame(as.table(mat))
  colnames(df) <- c("Var1", "Var2", fill_name)
  df$Var1 <- factor(df$Var1, levels = rev(unique(df$Var1)))

  p <- ggplot2::ggplot(df, ggplot2::aes(Var2, Var1, fill = .data[[fill_name]])) +
    ggplot2::geom_tile(color = "white") +
    ggplot2::scale_fill_gradient2(
      low = low_color,
      high = high_color,
      mid = mid_color,
      midpoint = 0,
      limits = c(-1, 1),
      name = fill_name
    ) +
    ggplot2::theme_minimal(base_size = 12) +
    ggplot2::theme(
      axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
      panel.grid = ggplot2::element_blank(),
      ...
    ) +
    ggplot2::coord_fixed() +
    ggplot2::labs(title = title, x = NULL, y = NULL)

  if (isTRUE(show_value) && !is.null(value_text_size) && is.finite(value_text_size)) {
    p <- p + ggplot2::geom_text(
      ggplot2::aes(label = sprintf("%.2f", .data[[fill_name]])),
      size = value_text_size,
      color = "black"
    )
  }

  p
}

.mc_summary_corr_matrix <- function(object,
                                    header = "Correlation summary",
                                    topn = NULL) {
  m <- as.matrix(object)
  symmetric <- isTRUE(nrow(m) == ncol(m)) && isTRUE(isSymmetric(unclass(m)))
  vals <- if (symmetric && nrow(m) > 1L) {
    m[upper.tri(m, diag = FALSE)]
  } else {
    as.vector(m)
  }
  vals <- vals[is.finite(vals)]
  n_pairs <- if (symmetric) {
    stats::setNames(nrow(m) * (nrow(m) - 1L) / 2L, NULL)
  } else {
    stats::setNames(nrow(m) * ncol(m), NULL)
  }

  out <- list(
    class = class(object)[1L],
    method = attr(object, "method"),
    description = attr(object, "description"),
    correct = attr(object, "correct"),
    n_variables = if (symmetric) nrow(m) else NA_integer_,
    n_pairs = as.integer(n_pairs),
    n_rows = nrow(m),
    n_cols = ncol(m),
    symmetric = symmetric,
    n_missing = sum(is.na(m)),
    estimate_min = if (length(vals)) min(vals) else NA_real_,
    estimate_max = if (length(vals)) max(vals) else NA_real_,
    threshold_sets = {
      thr <- attr(object, "thresholds")
      if (is.list(thr)) length(thr) else NA_integer_
    },
    has_ci = .mc_has_ci_payload(object),
    has_p = .mc_has_p_payload(object)
  )
  extremes <- .mc_pair_extremes(object, digits = 4)
  out$most_negative <- extremes$most_negative
  out$most_positive <- extremes$most_positive

  diag_attr <- attr(object, "diagnostics")
  if (is.list(diag_attr)) {
    selector <- if (symmetric && nrow(m) > 1L) upper.tri(m, diag = FALSE) else matrix(TRUE, nrow(m), ncol(m))
    pair_pick <- function(x, transform = identity, default = NA_integer_) {
      if (!is.matrix(x) || !identical(dim(x), dim(m))) return(default)
      vals <- transform(x[selector])
      vals <- vals[!is.na(vals)]
      vals <- vals[is.finite(vals)]
      if (!length(vals)) return(default)
      vals
    }

    zero_vals <- pair_pick(diag_attr$zero_cells)
    ncomp_vals <- pair_pick(diag_attr$n_complete)
    skipped_n_vals <- pair_pick(diag_attr$skipped_n, default = integer())
    skipped_prop_vals <- pair_pick(diag_attr$skipped_prop, default = numeric())
    boundary_vals <- pair_pick(diag_attr$boundary, as.logical, NA)
    near_vals <- pair_pick(diag_attr$near_boundary, as.logical, NA)
    corrected_vals <- pair_pick(diag_attr$corrected, as.logical, NA)
    conv_vals <- pair_pick(diag_attr$converged, as.logical, NA)

    out$zero_cell_pairs <- if (is.atomic(zero_vals) && length(zero_vals)) sum(zero_vals > 0, na.rm = TRUE) else NA_integer_
    out$boundary_pairs <- if (is.atomic(boundary_vals) && length(boundary_vals)) sum(boundary_vals, na.rm = TRUE) else NA_integer_
    out$near_boundary_pairs <- if (is.atomic(near_vals) && length(near_vals)) sum(near_vals, na.rm = TRUE) else NA_integer_
    out$corrected_pairs <- if (is.atomic(corrected_vals) && length(corrected_vals)) sum(corrected_vals, na.rm = TRUE) else NA_integer_
    out$converged_pairs <- if (is.atomic(conv_vals) && length(conv_vals)) sum(conv_vals, na.rm = TRUE) else NA_integer_
    out$n_complete_min <- if (is.atomic(ncomp_vals) && length(ncomp_vals)) min(ncomp_vals, na.rm = TRUE) else NA_integer_
    out$n_complete_max <- if (is.atomic(ncomp_vals) && length(ncomp_vals)) max(ncomp_vals, na.rm = TRUE) else NA_integer_
    out$skipped_n_min <- if (is.atomic(skipped_n_vals) && length(skipped_n_vals)) min(skipped_n_vals, na.rm = TRUE) else NA_integer_
    out$skipped_n_max <- if (is.atomic(skipped_n_vals) && length(skipped_n_vals)) max(skipped_n_vals, na.rm = TRUE) else NA_integer_
    out$skipped_prop_min <- if (is.atomic(skipped_prop_vals) && length(skipped_prop_vals)) min(skipped_prop_vals, na.rm = TRUE) else NA_real_
    out$skipped_prop_max <- if (is.atomic(skipped_prop_vals) && length(skipped_prop_vals)) max(skipped_prop_vals, na.rm = TRUE) else NA_real_
      out$optimizer_tol <- if (is.null(diag_attr$optimizer_tol)) NA_real_ else diag_attr$optimizer_tol
    } else {
    out$zero_cell_pairs <- NA_integer_
    out$boundary_pairs <- NA_integer_
    out$near_boundary_pairs <- NA_integer_
    out$corrected_pairs <- NA_integer_
    out$converged_pairs <- NA_integer_
    out$n_complete_min <- NA_integer_
    out$n_complete_max <- NA_integer_
    out$skipped_n_min <- NA_integer_
    out$skipped_n_max <- NA_integer_
    out$skipped_prop_min <- NA_real_
    out$skipped_prop_max <- NA_real_
      out$optimizer_tol <- NA_real_
    }

  ci_attr <- attr(object, "ci", exact = TRUE)
  out$ci_conf_level <- NA_real_
  out$ci_method <- NA_character_
  out$ci_width_min <- NA_real_
  out$ci_width_max <- NA_real_
  out$ci_cross_zero <- NA_integer_
  if (is.list(ci_attr) &&
      is.matrix(ci_attr$lwr.ci) &&
      is.matrix(ci_attr$upr.ci) &&
      identical(dim(ci_attr$lwr.ci), dim(m)) &&
      identical(dim(ci_attr$upr.ci), dim(m))) {
    selector <- if (symmetric && nrow(m) > 1L) upper.tri(m, diag = FALSE) else matrix(TRUE, nrow(m), ncol(m))
    lwr_vals <- ci_attr$lwr.ci[selector]
    upr_vals <- ci_attr$upr.ci[selector]
    keep_ci <- is.finite(lwr_vals) & is.finite(upr_vals)
    out$ci_conf_level <- suppressWarnings(as.numeric(ci_attr$conf.level %||% attr(object, "conf.level", exact = TRUE)))
    out$ci_method <- as.character(ci_attr$ci.method %||% attr(object, "ci.method", exact = TRUE) %||% NA_character_)
    if (any(keep_ci)) {
      widths <- upr_vals[keep_ci] - lwr_vals[keep_ci]
      out$ci_width_min <- min(widths)
      out$ci_width_max <- max(widths)
      out$ci_cross_zero <- sum(lwr_vals[keep_ci] <= 0 & upr_vals[keep_ci] >= 0, na.rm = TRUE)
    }
  }
  
  out$header <- header
  out$top_results <- .mc_summary_top_pairs(
    object,
    digits = 4,
    topn = .mc_coalesce(topn, .mc_display_option("summary_topn", 5L))
  )
  out <- .mc_finalize_summary_list(
    out,
    class_name = if (identical(header, "Latent correlation summary")) {
      "summary.latent_corr"
    } else {
      "summary.matrixCorr"
    }
  )
  if (identical(class(out)[1L], "summary.matrixCorr")) {
    class(out) <- c("summary.matrixCorr", "summary.corr_matrix")
  } else {
    class(out) <- c("summary.latent_corr", "summary.matrixCorr", "summary.corr_matrix")
  }
  out
}

.mc_diag_discrete <- function(code) {
  keep <- !is.na(code)
  if (sum(keep) < 2L || length(unique(code[keep])) < 2L) {
    return(NA_real_)
  }
  1
}

.mc_pair_complete <- function(x, y, check_na) {
  if (check_na) {
    return(list(x = x, y = y))
  }
  keep <- stats::complete.cases(x, y)
  list(x = x[keep], y = y[keep])
}

.mc_fast_binary_table01 <- function(x, y) {
  matrix(
    tabulate(as.integer(x) + 2L * as.integer(y) + 1L, nbins = 4L),
    nrow = 2L, ncol = 2L
  )
}

.mc_fast_ordinal_table <- function(x, y, n_x, n_y) {
  matrix(
    tabulate(as.integer(x) + (as.integer(y) - 1L) * as.integer(n_x),
             nbins = as.integer(n_x) * as.integer(n_y)),
    nrow = as.integer(n_x),
    ncol = as.integer(n_y)
  )
}

.mc_binary_tau <- function(x01) {
  x01 <- x01[!is.na(x01)]
  -stats::qnorm(mean(x01))
}

.mc_global_cutpoints <- function(code, n_levels) {
  if (n_levels < 2L) {
    return(numeric())
  }
  code <- as.integer(code)
  code <- code[!is.na(code)]
  freq <- tabulate(code, nbins = as.integer(n_levels))
  probs <- freq / sum(freq)
  stats::qnorm(cumsum(probs)[-n_levels])
}

.mc_phi2_exact <- function(a, b, rho) {
  stats::integrate(
    function(z) stats::dnorm(z) * stats::pnorm((b - rho * z) / sqrt(1 - rho^2)),
    lower = -Inf,
    upper = a,
    rel.tol = 1e-12,
    abs.tol = 0
  )$value
}

.mc_binary_prob_exact <- function(rc, cc, rho) {
  p11 <- .mc_phi2_exact(rc, cc, rho)
  p21 <- stats::pnorm(rc) - p11
  p12 <- stats::pnorm(cc) - p11
  p22 <- 1 - stats::pnorm(rc) - p12
  matrix(c(p11, p21, p12, p22), 2L, 2L)
}

.mc_binary_prob_exact_poly <- function(rc, cc, rho) {
  p11 <- .mc_phi2_exact(rc, cc, rho)
  p12 <- stats::pnorm(rc) - p11
  p21 <- stats::pnorm(cc) - p11
  p22 <- 1 - stats::pnorm(rc) - p21
  matrix(c(p11, p21, p12, p22), 2L, 2L)
}

.mc_tetra_opt_exact <- function(tab, rc, cc) {
  fn <- function(rho) {
    P <- .mc_binary_prob_exact(rc, cc, rho)
    if (any(tab > 0 & (!is.finite(P) | P <= 0))) {
      return(Inf)
    }
    -sum(tab * log(P))
  }
  suppressWarnings(stats::optimize(fn, c(-1, 1))$minimum)
}

.mc_poly_opt_exact_binary <- function(tab, rc, cc) {
  fn <- function(rho) {
    P <- .mc_binary_prob_exact_poly(rc, cc, rho)
    P[P <= 0] <- NA_real_
    lP <- log(P)
    lP[!is.finite(lP)] <- NA_real_
    -sum(tab * lP, na.rm = TRUE)
  }
  suppressWarnings(stats::optimize(fn, c(-1, 1))$minimum)
}

.mc_tetra_table_thresholds <- function(tab, correct) {
  tab <- as.matrix(tab)
  if (correct > 0) {
    tab[tab == 0] <- correct
  }
  tab <- tab / sum(tab)
  list(
    row = if (nrow(tab) == 2L) stats::qnorm(rowSums(tab))[1L] else stats::qnorm(cumsum(rowSums(tab))[-nrow(tab)]),
    col = if (ncol(tab) == 2L) stats::qnorm(colSums(tab))[1L] else stats::qnorm(cumsum(colSums(tab))[-ncol(tab)])
  )
}

.mc_poly_table_thresholds <- function(tab, correct) {
  tab <- as.matrix(tab)
  tot <- sum(tab)
  tab <- tab / tot
  if (correct > 0) {
    tab[tab == 0] <- correct / tot
  }
  list(
    row = stats::qnorm(cumsum(rowSums(tab))[-nrow(tab)]),
    col = stats::qnorm(cumsum(colSums(tab))[-ncol(tab)])
  )
}

.mc_tetrachoric_table_estimate <- function(tab, correct) {
  tab <- as.matrix(tab)
  if (all(dim(tab) == c(2L, 2L))) {
    if (correct > 0) {
      tab[tab == 0] <- correct
    }
    tab <- tab / sum(tab)
    rc <- stats::qnorm(colSums(tab))[1L]
    cc <- stats::qnorm(rowSums(tab))[1L]
    return(.mc_tetra_opt_exact(tab, rc = rc, cc = cc))
  }
  matrixCorr_tetrachoric_mle_cpp(unclass(tab), correct = correct)
}

.mc_polychoric_table_estimate <- function(tab, correct) {
  tab <- as.matrix(tab)
  if (all(dim(tab) == c(2L, 2L))) {
    tot <- sum(tab)
    tab <- tab / tot
    if (correct > 0) {
      tab[tab == 0] <- correct / tot
    }
    cc <- stats::qnorm(cumsum(colSums(tab))[-ncol(tab)])
    rc <- stats::qnorm(cumsum(rowSums(tab))[-nrow(tab)])
    return(.mc_poly_opt_exact_binary(tab, rc = rc, cc = cc))
  }
  matrixCorr_polychoric_mle_cpp(unclass(tab), correct = correct)
}

.mc_tetrachoric_estimate_internal <- function(x, y, correct,
                                              tau_x = NULL, tau_y = NULL,
                                              global = FALSE) {
  x01 <- as.integer(x) - min(as.integer(x), na.rm = TRUE)
  y01 <- as.integer(y) - min(as.integer(y), na.rm = TRUE)
  tab <- .mc_fast_binary_table01(x01, y01)

  if ((sum(tab) > 1L) && (min(tab) == 0L) && (correct > 0)) {
    tab[tab == 0L] <- correct
  }

  if (any(tab == correct) || any(tab == 0L)) {
    if (global) {
      cc <- if (is.null(tau_x)) .mc_binary_tau(x01) else tau_x
      rc <- if (is.null(tau_y)) .mc_binary_tau(y01) else tau_y
      return(.mc_tetra_opt_exact(tab, rc = rc, cc = cc))
    }
    tab_local <- tab / sum(tab)
    rc <- stats::qnorm(colSums(tab_local))[1L]
    cc <- stats::qnorm(rowSums(tab_local))[1L]
    return(.mc_tetra_opt_exact(tab_local, rc = rc, cc = cc))
  }

  if (global) {
    cc <- if (is.null(tau_x)) .mc_binary_tau(x01) else tau_x
    rc <- if (is.null(tau_y)) .mc_binary_tau(y01) else tau_y
    return(matrixCorr_tetrachoric_fixed_cpp(unclass(tab), rc = rc, cc = cc, correct = correct))
  } else {
    return(matrixCorr_tetrachoric_mle_cpp(unclass(tab), correct = correct))
  }
}

.mc_polychoric_estimate_internal <- function(x, y, n_x, n_y, correct,
                                             tau_x = NULL, tau_y = NULL,
                                             global = FALSE) {
  tab <- .mc_fast_ordinal_table(x, y, n_x = n_x, n_y = n_y)
  tot <- sum(tab)
  if (tot <= 0) {
    return(NA_real_)
  }
  tab <- tab / tot

  if (n_x == 2L && n_y == 2L && any(tab == 0)) {
    if (global) {
      return(.mc_poly_opt_exact_binary(tab, rc = tau_x, cc = tau_y))
    }
    tab_local <- tab
    if (correct > 0 && any(tab_local == 0)) {
      tab_local[tab_local == 0] <- correct / tot
    }
    cc <- stats::qnorm(cumsum(colSums(tab_local))[-ncol(tab_local)])
    rc <- stats::qnorm(cumsum(rowSums(tab_local))[-nrow(tab_local)])
    return(.mc_poly_opt_exact_binary(tab_local, rc = rc, cc = cc))
  }

  if (global) {
    return(matrixCorr_polychoric_fixed_cpp(unclass(tab), rc_in = tau_x, cc_in = tau_y))
  } else {
    return(matrixCorr_polychoric_mle_cpp(unclass(tab), correct = correct))
  }
}

.mc_pair_tetrachoric <- function(x, y, correct, check_na,
                                 tau_x = NULL, tau_y = NULL,
                                 global = TRUE) {
  pair <- .mc_pair_complete(x, y, check_na)
  if (length(pair$x) < 2L) {
    return(NA_real_)
  }
  ux <- unique(pair$x)
  uy <- unique(pair$y)
  ux <- ux[!is.na(ux)]
  uy <- uy[!is.na(uy)]
  if (length(ux) < 2L || length(uy) < 2L) {
    return(NA_real_)
  }
  .mc_tetrachoric_estimate_internal(
    pair$x, pair$y, correct = correct,
    tau_x = tau_x, tau_y = tau_y, global = global
  )
}

.mc_pair_polychoric <- function(x, y, n_x, n_y, correct, check_na,
                                tau_x = NULL, tau_y = NULL,
                                global = identical(n_x, n_y)) {
  pair <- .mc_pair_complete(x, y, check_na)
  if (length(pair$x) < 2L) {
    return(NA_real_)
  }
  ux <- unique(pair$x)
  uy <- unique(pair$y)
  ux <- ux[!is.na(ux)]
  uy <- uy[!is.na(uy)]
  if (length(ux) < 2L || length(uy) < 2L) {
    return(NA_real_)
  }
  .mc_polychoric_estimate_internal(
    pair$x, pair$y,
    n_x = n_x, n_y = n_y,
    correct = correct,
    tau_x = tau_x, tau_y = tau_y,
    global = global
  )
}

.mc_pair_polyserial <- function(x, y, check_na) {
  pair <- .mc_pair_complete(x, y, check_na)
  if (length(pair$x) < 2L) {
    return(NA_real_)
  }
  if (length(unique(pair$y[!is.na(pair$y)])) < 2L) {
    return(NA_real_)
  }
  matrixCorr_polyserial_mle_cpp(
    x = as.numeric(pair$x),
    y = as.integer(as.factor(pair$y))
  )
}

.mc_pair_biserial <- function(x, y, check_na) {
  pair <- .mc_pair_complete(x, y, check_na)
  if (length(pair$x) < 2L) {
    return(NA_real_)
  }
  yy <- pair$y[!is.na(pair$y)]
  if (length(unique(yy)) < 2L) {
    return(NA_real_)
  }
  matrixCorr_biserial_latent_cpp(as.numeric(pair$x), as.logical(pair$y == 2L))
}

.mc_biserial_inference_one <- function(x, y, check_na,
                                       return_ci = FALSE,
                                       conf_level = 0.95) {
  pair <- .mc_pair_complete(x, y, check_na)
  n_complete <- length(pair$x)
  out <- list(
    estimate = NA_real_,
    statistic = NA_real_,
    parameter = NA_real_,
    p_value = NA_real_,
    n_obs = as.integer(n_complete),
    lwr = NA_real_,
    upr = NA_real_
  )

  if (n_complete < 2L) {
    return(out)
  }

  yy <- pair$y[!is.na(pair$y)]
  if (length(unique(yy)) < 2L) {
    return(out)
  }

  est <- matrixCorr_biserial_latent_cpp(
    as.numeric(pair$x),
    as.logical(pair$y == 2L)
  )
  out$estimate <- est

  df <- n_complete - 2L
  out$parameter <- if (df > 0L) as.numeric(df) else NA_real_
  if (is.finite(est) && df > 0L) {
    if (abs(est) >= 1) {
      statistic <- sign(est) * Inf
    } else {
      statistic <- est * sqrt(df / (1 - est^2))
    }
    out$statistic <- statistic
    out$p_value <- 2 * stats::pt(abs(statistic), df = df, lower.tail = FALSE)
  }

  if (isTRUE(return_ci) && is.finite(est) && n_complete > 3L) {
    if (abs(est) >= 1) {
      out$lwr <- est
      out$upr <- est
    } else {
      z_est <- atanh(est)
      z_crit <- stats::qnorm(0.5 * (1 + conf_level))
      z_half_width <- z_crit / sqrt(n_complete - 3)
      out$lwr <- tanh(z_est - z_half_width)
      out$upr <- tanh(z_est + z_half_width)
    }
  }

  out
}

.mc_biserial_ci_attr <- function(x) {
  attr(x, "ci", exact = TRUE)
}

.mc_biserial_inference_attr <- function(x) {
  attr(x, "inference", exact = TRUE)
}

.mc_encode_list_to_matrix <- function(enc) {
  out <- matrix(NA_integer_, nrow = length(enc[[1L]]$code), ncol = length(enc))
  for (j in seq_along(enc)) {
    out[, j] <- as.integer(enc[[j]]$code)
  }
  colnames(out) <- names(enc)
  out
}

.mc_tau_matrix <- function(enc) {
  max_cuts <- max(vapply(enc, function(z) max(length(z$levels) - 1L, 0L), integer(1)))
  out <- matrix(NA_real_, nrow = max_cuts, ncol = length(enc))
  for (j in seq_along(enc)) {
    tau_j <- .mc_global_cutpoints(enc[[j]]$code, length(enc[[j]]$levels))
    if (length(tau_j) > 0L) {
      out[seq_along(tau_j), j] <- tau_j
    }
  }
  out
}

.mc_latent_pair_metadata <- function(est, tab, n_complete, correct, thresholds,
                                     optimizer_tol = 1e-8) {
  zero_cells <- sum(tab == 0)
  boundary <- isTRUE(correct == 0) && zero_cells > 0L
  near_boundary <- isTRUE(boundary) || (is.finite(est) && abs(est) >= 0.99)
  list(
    zero_cells = as.integer(zero_cells),
    correct = correct,
    corrected = isTRUE(correct > 0) && zero_cells > 0L,
    boundary = boundary,
    near_boundary = near_boundary,
    n_complete = as.integer(n_complete),
    converged = is.finite(est),
    optimizer_tol = optimizer_tol,
    thresholds = thresholds
  )
}

.mc_matrix_diagnostics_template <- function(p, dimnames) {
  mat_int <- matrix(NA_integer_, nrow = p, ncol = p, dimnames = dimnames)
  mat_lgl <- matrix(NA, nrow = p, ncol = p, dimnames = dimnames)
  list(
    zero_cells = mat_int,
    n_complete = mat_int,
    corrected = mat_lgl,
    boundary = mat_lgl,
    near_boundary = mat_lgl,
    converged = mat_lgl,
    optimizer_tol = 1e-8
  )
}

.mc_fill_pair_diag <- function(diag, j, k, est, tab, n_complete, correct) {
  zero_cells <- as.integer(sum(tab == 0))
  boundary <- isTRUE(correct == 0) && zero_cells > 0L
  near_boundary <- isTRUE(boundary) || (is.finite(est) && abs(est) >= 0.99)
  corrected <- isTRUE(correct > 0) && zero_cells > 0L
  converged <- is.finite(est)

  diag$zero_cells[j, k] <- zero_cells
  diag$zero_cells[k, j] <- zero_cells
  diag$n_complete[j, k] <- as.integer(n_complete)
  diag$n_complete[k, j] <- as.integer(n_complete)
  diag$corrected[j, k] <- corrected
  diag$corrected[k, j] <- corrected
  diag$boundary[j, k] <- boundary
  diag$boundary[k, j] <- boundary
  diag$near_boundary[j, k] <- near_boundary
  diag$near_boundary[k, j] <- near_boundary
  diag$converged[j, k] <- converged
  diag$converged[k, j] <- converged
  diag
}

.mc_structure_corr_matrix <- function(mat, class_name, method, description,
                                      diagnostics = NULL, thresholds = NULL,
                                      correct = NULL, dimnames = NULL,
                                      extra_attrs = NULL,
                                      classes = c(class_name, "matrix")) {
  if (!is.null(dimnames)) {
    dimnames(mat) <- dimnames
  }
  keep_classes <- setdiff(
    classes,
    c(class_name, "matrix", "corr_matrix", "corr_result")
  )
  out <- .mc_new_corr_matrix(
    mat = mat,
    estimator_class = class_name,
    method = method,
    description = description,
    output = "matrix",
    threshold = 0,
    diag = TRUE,
    diagnostics = diagnostics,
    ci = attr(mat, "ci", exact = TRUE),
    conf.level = attr(mat, "conf.level", exact = TRUE),
    symmetric = isTRUE(nrow(mat) == ncol(mat)) &&
      isTRUE(isSymmetric(mat, check.attributes = FALSE)),
    extra_attrs = c(
      list(
        thresholds = thresholds,
        correct = correct
      ),
      extra_attrs %||% list()
    ),
    extra_classes = keep_classes
  )
  out
}

#' @title Pairwise Tetrachoric Correlation
#'
#' @description
#' Computes the tetrachoric correlation for either a pair of binary variables
#' or all pairwise combinations of binary columns in a matrix/data frame.
#'
#' @param data A binary vector, matrix, or data frame. In matrix/data-frame mode,
#' only binary columns are retained.
#' @param y Optional second binary vector. When supplied, the function returns a
#' single tetrachoric correlation estimate.
#' @param correct Non-negative continuity correction added to zero-count cells.
#' Default is \code{0.5}.
#' @param na_method Character scalar controlling missing-data handling.
#'   \code{"error"} rejects missing values. \code{"pairwise"} uses pairwise
#'   complete cases.
#' @param ci Logical (default \code{FALSE}). If \code{TRUE}, attach
#' model-based large-sample Wald confidence intervals derived from the observed
#' information matrix of the latent-variable likelihood.
#' @param p_value Logical (default \code{FALSE}). If \code{TRUE}, attach
#' model-based large-sample Wald p-values and test statistics for each
#' estimated latent correlation.
#' @param conf_level Confidence level used when \code{ci = TRUE}. Default is
#' \code{0.95}.
#' @param output Output representation for the computed estimates.
#'   \itemize{
#'   \item \code{"matrix"} (default): full dense matrix; best when you need
#'   matrix algebra, dense heatmaps, or full compatibility with existing code.
#'   \item \code{"sparse"}: sparse matrix from \pkg{Matrix} containing only
#'   retained entries; best when many values are dropped by thresholding.
#'   \item \code{"edge_list"}: long-form data frame with columns
#'   \code{row}, \code{col}, \code{value}; convenient for filtering, joins,
#'   and network-style workflows.
#'   }
#' @param threshold Non-negative absolute-value filter for non-matrix outputs:
#'   keep entries with \code{abs(value) >= threshold}. Use
#'   \code{threshold > 0} when you want only stronger associations (typically
#'   with \code{output = "sparse"} or \code{"edge_list"}). Keep
#'   \code{threshold = 0} to retain all values. Must be \code{0} when
#'   \code{output = "matrix"}.
#' @param diag Logical; whether to include diagonal entries in
#'   \code{"sparse"} and \code{"edge_list"} outputs.
#'
#' @return
#' If \code{y} is supplied, a numeric scalar with attributes
#' \code{diagnostics} and \code{thresholds}. Otherwise a symmetric matrix of
#' class \code{tetrachoric_corr} with attributes \code{method},
#' \code{description}, \code{package = "matrixCorr"}, \code{diagnostics},
#' \code{thresholds}, and \code{correct}. When \code{p_value = TRUE}, the
#' returned object also carries an \code{inference} attribute with elements
#' \code{estimate}, \code{statistic}, \code{parameter}, \code{p_value}, and
#' \code{n_obs}. When \code{ci = TRUE}, it also carries a \code{ci} attribute
#' with elements \code{est}, \code{lwr.ci}, \code{upr.ci}, \code{conf.level},
#' and \code{ci.method}, plus \code{attr(x, "conf.level")}. Scalar outputs keep
#' the same point estimate and gain the same metadata only when inference is
#' requested. In matrix mode, \code{output = "edge_list"} returns a data frame with columns
#' \code{row}, \code{col}, \code{value}; \code{output = "sparse"} returns a
#' symmetric sparse matrix.
#'
#' @details
#' The tetrachoric correlation assumes that the observed binary variables arise
#' by dichotomising latent standard-normal variables. Let
#' \eqn{Z_1, Z_2 \sim N(0, 1)} with latent correlation \eqn{\rho}, and define
#' observed binary variables by thresholds \eqn{\tau_1, \tau_2}:
#' \deqn{
#' X = \mathbf{1}\{Z_1 > \tau_1\},
#' \qquad
#' Y = \mathbf{1}\{Z_2 > \tau_2\}.
#' }
#' If the observed \eqn{2 \times 2} table has counts
#' \eqn{n_{ij}} for \eqn{i,j \in \{0,1\}}, the marginal proportions determine
#' the thresholds:
#' \deqn{
#' \tau_1 = \Phi^{-1}\!\big(P(X = 0)\big),
#' \qquad
#' \tau_2 = \Phi^{-1}\!\big(P(Y = 0)\big).
#' }
#' The estimator returned here is the maximum-likelihood estimate of the latent
#' correlation \eqn{\rho}, obtained by maximizing the multinomial log-likelihood
#' built from the rectangle probabilities of the bivariate normal distribution:
#' \deqn{
#' \ell(\rho) = \sum_{i=0}^1 \sum_{j=0}^1 n_{ij}\log \pi_{ij}(\rho;\tau_1,\tau_2),
#' }
#' where \eqn{\pi_{ij}} are the four bivariate-normal cell probabilities implied
#' by \eqn{\rho} and the fixed thresholds. The implementation evaluates the
#' likelihood over \eqn{\rho \in (-1,1)} by a coarse search followed by Brent
#' refinement in C++.
#'
#' The argument \code{correct} adds a continuity correction only to zero-count
#' cells before threshold estimation and likelihood evaluation. This stabilises
#' the estimator for sparse tables and mirrors the conventional
#' \code{correct = 0.5} continuity-correction behaviour used in several
#' latent-correlation implementations.
#' When \code{correct = 0} and the observed contingency table contains zero
#' cells, the fit is non-regular and may be boundary-driven. In those cases the
#' returned object stores sparse-fit diagnostics, including whether the fit was
#' classified as \code{boundary} or \code{near_boundary}.
#'
#' \strong{Assumptions.} The coefficient is appropriate when both observed
#' binary variables are viewed as thresholded versions of jointly normal latent
#' variables. The optional p-values and confidence intervals adopt this
#' latent-normal interpretation and use the same likelihood that defines the
#' tetrachoric estimate. These inferential quantities are therefore model-based
#' and should not be interpreted as distribution-free summaries.
#'
#' \strong{Inference.} When \code{ci = TRUE} or \code{p_value = TRUE}, the
#' function refits the pairwise tetrachoric model by maximum likelihood and
#' obtains the observed information matrix numerically in C++. The reported
#' confidence interval is a Wald interval
#' \eqn{\hat\rho \pm z_{1-\alpha/2}\operatorname{SE}(\hat\rho)}, and the
#' reported p-value is from the large-sample Wald \eqn{z}-test for
#' \eqn{H_0:\rho = 0}. These inferential quantities are only computed when
#' explicitly requested.
#'
#' In matrix/data-frame mode, all pairwise tetrachoric correlations are computed
#' between binary columns. Diagonal entries are \code{1} for non-degenerate
#' columns and \code{NA} for columns with fewer than two observed levels.
#' Variable-specific latent thresholds are stored in the \code{thresholds}
#' attribute, and pairwise sparse-fit diagnostics are stored in
#' \code{diagnostics}.
#'
#' \strong{Computational complexity.} For \eqn{p} binary variables, the matrix
#' path evaluates \eqn{p(p-1)/2} pairwise likelihoods. Each pair uses a
#' one-dimensional optimisation with negligible memory overhead beyond the
#' output matrix.
#'
#' @references
#' Pearson, K. (1900). Mathematical contributions to the theory of evolution.
#' VII. On the correlation of characters not quantitatively measurable.
#' \emph{Philosophical Transactions of the Royal Society A}, 195, 1-47.
#'
#' Olsson, U. (1979). Maximum likelihood estimation of the polychoric
#' correlation coefficient. \emph{Psychometrika}, 44(4), 443-460.
#'
#' @examplesIf requireNamespace("mnormt", quietly = TRUE)
#' set.seed(123)
#' n <- 1000
#' Sigma <- matrix(c(
#'   1.00, 0.55, 0.35,
#'   0.55, 1.00, 0.45,
#'   0.35, 0.45, 1.00
#' ), 3, 3, byrow = TRUE)
#'
#' Z <- mnormt::rmnorm(n = n, mean = rep(0, 3), varcov = Sigma)
#' X <- data.frame(
#'   item1 = Z[, 1] > stats::qnorm(0.70),
#'   item2 = Z[, 2] > stats::qnorm(0.60),
#'   item3 = Z[, 3] > stats::qnorm(0.50)
#' )
#'
#' tc <- tetrachoric(X)
#' print(tc, digits = 3)
#' summary(tc)
#' plot(tc)
#' tetrachoric(X, output = "edge_list", diag = FALSE)
#' tetrachoric(X, output = "sparse", threshold = 0.4, diag = FALSE)
#'
#' # Interactive viewing (requires shiny)
#' if (interactive() && requireNamespace("shiny", quietly = TRUE)) {
#'   view_corr_shiny(tc)
#' }
#'
#' # latent Pearson correlations used to generate the binary items
#' round(stats::cor(Z), 2)
#'
#' @author Thiago de Paula Oliveira
#' @export
tetrachoric <- function(data,
                        y = NULL,
                        na_method = c("error", "pairwise"),
                        ci = FALSE,
                        p_value = FALSE,
                        conf_level = 0.95,
                        correct = 0.5,
                        output = c("matrix", "sparse", "edge_list"),
                        threshold = 0,
                        diag = TRUE,
                        ...) {
  output_cfg <- .mc_validate_output_args(
    output = output,
    threshold = threshold,
    diag = diag
  )
  if (missing(na_method) && isFALSE(ci) && isFALSE(p_value) && ...length() == 0L) {
    check_scalar_nonneg(correct, arg = "correct")

    if (is.null(y) && inherits(data, "table")) {
      if (!identical(output_cfg$output, "matrix")) {
        abort_bad_arg(
          "output",
          message = "must be {.val matrix} when {.arg data} is a contingency table."
        )
      }
      tab <- as.matrix(data)
      est <- .mc_tetrachoric_table_estimate(tab, correct = correct)
      thresholds <- .mc_tetra_table_thresholds(tab, correct = correct)
      diagnostics <- .mc_latent_pair_metadata(
        est = est,
        tab = tab,
        n_complete = sum(tab),
        correct = correct,
        thresholds = thresholds
      )
      return(.mc_attach_scalar_latent(
        est,
        method = "tetrachoric",
        description = "Pairwise tetrachoric correlation",
        diagnostics = diagnostics,
        thresholds = thresholds
      ))
    }

    if (!is.null(y)) {
      if (!identical(output_cfg$output, "matrix")) {
        abort_bad_arg(
          "output",
          message = "must be {.val matrix} when {.arg y} is supplied."
        )
      }
      check_same_length(data, y, arg_x = "data", arg_y = "y")
      .mc_check_latent_missing(list(data = data, y = y), check_na = TRUE, arg = "data")

      enc_x <- .mc_encode_ordinal_vector(data, binary = TRUE)
      enc_y <- .mc_encode_ordinal_vector(y, binary = TRUE)
      if (is.null(enc_x) || is.null(enc_y)) {
        abort_bad_arg(
          "data",
          message = "and {.arg y} must both be binary (two-level) variables."
        )
      }

      pair <- .mc_pair_complete(enc_x$code, enc_y$code, TRUE)
      est <- .mc_pair_tetrachoric(pair$x, pair$y, correct, TRUE)
      x01 <- as.integer(pair$x) - min(as.integer(pair$x), na.rm = TRUE)
      y01 <- as.integer(pair$y) - min(as.integer(pair$y), na.rm = TRUE)
      tab <- .mc_fast_binary_table01(x01, y01)
      thresholds <- list(data = .mc_binary_tau(x01), y = .mc_binary_tau(y01))
      diagnostics <- .mc_latent_pair_metadata(
        est = est,
        tab = tab,
        n_complete = length(pair$x),
        correct = correct,
        thresholds = thresholds
      )
      return(.mc_attach_scalar_latent(
        est,
        method = "tetrachoric",
        description = "Pairwise tetrachoric correlation",
        diagnostics = diagnostics,
        thresholds = thresholds
      ))
    }

    enc <- .mc_extract_discrete_columns(data, kind = "binary", arg = "data", min_cols = 2L)
    .mc_check_latent_missing(lapply(enc, `[[`, "code"), check_na = TRUE, arg = "data")

    x_mat <- .mc_encode_list_to_matrix(enc)
    tau <- vapply(enc, function(z) {
      code <- as.integer(z$code)
      .mc_binary_tau(code - min(code, na.rm = TRUE))
    }, numeric(1))
    out <- matrixCorr_tetrachoric_matrix_cpp(
      x = x_mat,
      tau = tau,
      correct = correct,
      pairwise_complete = FALSE
    )
    dimnames(out) <- list(names(enc), names(enc))

    p <- length(enc)
    diag_info <- .mc_matrix_diagnostics_template(p, dimnames(out))
    diag(diag_info$zero_cells) <- 0L
    diag(diag_info$n_complete) <- colSums(!is.na(x_mat))
    diag(diag_info$corrected) <- FALSE
    diag(diag_info$boundary) <- FALSE
    diag(diag_info$near_boundary) <- FALSE
    diag(diag_info$converged) <- is.finite(diag(out))
    for (j in seq_len(p - 1L)) {
      for (k in (j + 1L):p) {
        pair <- .mc_pair_complete(enc[[j]]$code, enc[[k]]$code, TRUE)
        x01 <- as.integer(pair$x) - min(as.integer(pair$x), na.rm = TRUE)
        y01 <- as.integer(pair$y) - min(as.integer(pair$y), na.rm = TRUE)
        tab <- .mc_fast_binary_table01(x01, y01)
        diag_info <- .mc_fill_pair_diag(diag_info, j, k, out[j, k], tab, length(pair$x), correct)
      }
    }
    thresholds <- as.list(tau)
    names(thresholds) <- names(enc)
    out <- .mc_structure_corr_matrix(
      out,
      class_name = "tetrachoric_corr",
      method = "tetrachoric",
      description = "Pairwise tetrachoric correlation matrix",
      diagnostics = diag_info,
      thresholds = thresholds,
      correct = correct
    )
    return(.mc_finalize_corr_output(
      out,
      output = output_cfg$output,
      threshold = output_cfg$threshold,
      diag = output_cfg$diag
    ))
  }

  legacy_args <- .mc_extract_legacy_aliases(list(...), allowed = "check_na")
  na_cfg <- resolve_na_args(
    na_method = na_method,
    check_na = legacy_args$check_na %||% NULL,
    na_method_missing = missing(na_method)
  )
  check_na <- na_cfg$check_na
  check_bool(ci, arg = "ci")
  check_bool(p_value, arg = "p_value")
  check_scalar_nonneg(correct, arg = "correct")
  if (isTRUE(ci)) {
    check_prob_scalar(conf_level, arg = "conf_level", open_ends = TRUE)
  }

  if (is.null(y) && inherits(data, "table")) {
    if (!identical(output_cfg$output, "matrix")) {
      abort_bad_arg(
        "output",
        message = "must be {.val matrix} when {.arg data} is a contingency table."
      )
    }
    tab <- as.matrix(data)
    fit <- if (isTRUE(ci) || isTRUE(p_value)) {
      .mc_tetrachoric_inference_one(tab, correct = correct, conf_level = conf_level)
    } else {
      NULL
    }
    est <- if (is.null(fit)) .mc_tetrachoric_table_estimate(tab, correct = correct) else as.numeric(fit$estimate)
    thresholds <- .mc_tetra_table_thresholds(tab, correct = correct)
    diagnostics <- .mc_latent_pair_metadata(
      est = est,
      tab = tab,
      n_complete = sum(tab),
      correct = correct,
      thresholds = thresholds
    )
    out <- .mc_attach_scalar_latent(
      est,
      method = "tetrachoric",
      description = "Pairwise tetrachoric correlation",
      diagnostics = diagnostics,
      thresholds = thresholds
    )
    if (isTRUE(ci) || isTRUE(p_value)) {
      out <- .mc_attach_latent_inference(
        out,
        .mc_latent_scalar_payload(fit, return_ci = ci, return_p = p_value),
        conf_level = conf_level
      )
    }
    return(out)
  }

  if (!is.null(y)) {
    if (!identical(output_cfg$output, "matrix")) {
      abort_bad_arg(
        "output",
        message = "must be {.val matrix} when {.arg y} is supplied."
      )
    }
    check_same_length(data, y, arg_x = "data", arg_y = "y")
    .mc_check_latent_missing(list(data = data, y = y), check_na = check_na, arg = "data")

    enc_x <- .mc_encode_ordinal_vector(data, binary = TRUE)
    enc_y <- .mc_encode_ordinal_vector(y, binary = TRUE)
    if (is.null(enc_x) || is.null(enc_y)) {
      abort_bad_arg(
        "data",
        message = "and {.arg y} must both be binary (two-level) variables."
      )
    }
    pair <- .mc_pair_complete(enc_x$code, enc_y$code, check_na)
    x01 <- as.integer(pair$x) - min(as.integer(pair$x), na.rm = TRUE)
    y01 <- as.integer(pair$y) - min(as.integer(pair$y), na.rm = TRUE)
    tab <- .mc_fast_binary_table01(x01, y01)
    fit <- if (isTRUE(ci) || isTRUE(p_value)) {
      .mc_tetrachoric_inference_one(tab, correct = correct, conf_level = conf_level)
    } else {
      NULL
    }
    est <- .mc_pair_tetrachoric(pair$x, pair$y, correct, TRUE)
    if (!is.null(fit)) {
      fit <- .mc_recenter_wald_fit(fit, estimate = est, conf_level = conf_level)
    }
    thresholds <- list(
      data = .mc_binary_tau(x01),
      y = .mc_binary_tau(y01)
    )
    diagnostics <- .mc_latent_pair_metadata(
      est = est,
      tab = tab,
      n_complete = length(pair$x),
      correct = correct,
      thresholds = thresholds
    )
    out <- .mc_attach_scalar_latent(
      est,
      method = "tetrachoric",
      description = "Pairwise tetrachoric correlation",
      diagnostics = diagnostics,
      thresholds = thresholds
    )
    if (isTRUE(ci) || isTRUE(p_value)) {
      out <- .mc_attach_latent_inference(
        out,
        .mc_latent_scalar_payload(fit, return_ci = ci, return_p = p_value),
        conf_level = conf_level
      )
    }
    return(out)
  }

  enc <- .mc_extract_discrete_columns(data, kind = "binary", arg = "data", min_cols = 2L)
  .mc_check_latent_missing(
    lapply(enc, `[[`, "code"),
    check_na = check_na,
    arg = "data"
  )

  p <- length(enc)
  x_mat <- .mc_encode_list_to_matrix(enc)
  tau <- vapply(enc, function(z) {
    code <- as.integer(z$code)
    .mc_binary_tau(code - min(code, na.rm = TRUE))
  }, numeric(1))
  out <- if (!isTRUE(ci) && !isTRUE(p_value)) {
    matrixCorr_tetrachoric_matrix_cpp(
      x = x_mat,
      tau = tau,
      correct = correct,
      pairwise_complete = !check_na
    )
  } else {
    matrix(NA_real_, nrow = p, ncol = p)
  }
  dimnames(out) <- list(names(enc), names(enc))
  diag_info <- .mc_matrix_diagnostics_template(p, dimnames(out))
  diag(diag_info$zero_cells) <- 0L
  diag(diag_info$n_complete) <- colSums(!is.na(x_mat))
  diag(diag_info$corrected) <- FALSE
  diag(diag_info$boundary) <- FALSE
  diag(diag_info$near_boundary) <- FALSE
  diag(diag_info$converged) <- TRUE
  diag(out) <- ifelse(vapply(enc, function(z) length(unique(z$code[!is.na(z$code)])) >= 2L, logical(1)), 1, NA_real_)
  statistic <- parameter <- p_mat <- lwr <- upr <- NULL
  if (isTRUE(ci) || isTRUE(p_value)) {
    statistic <- if (isTRUE(p_value)) .mc_new_metric_matrix(out, "double") else NULL
    parameter <- if (isTRUE(p_value)) .mc_new_metric_matrix(out, "double") else NULL
    p_mat <- if (isTRUE(p_value)) .mc_new_metric_matrix(out, "double") else NULL
    lwr <- if (isTRUE(ci)) .mc_new_metric_matrix(out, "double") else NULL
    upr <- if (isTRUE(ci)) .mc_new_metric_matrix(out, "double") else NULL
  }
  for (j in seq_len(p - 1L)) {
    for (k in (j + 1L):p) {
      pair <- .mc_pair_complete(enc[[j]]$code, enc[[k]]$code, check_na)
      x01 <- as.integer(pair$x) - min(as.integer(pair$x), na.rm = TRUE)
      y01 <- as.integer(pair$y) - min(as.integer(pair$y), na.rm = TRUE)
      tab <- .mc_fast_binary_table01(x01, y01)
      est_jk <- .mc_pair_tetrachoric(
        enc[[j]]$code, enc[[k]]$code,
        correct = correct,
        check_na = check_na,
        tau_x = tau[j],
        tau_y = tau[k],
        global = TRUE
      )
      if (isTRUE(ci) || isTRUE(p_value)) {
        fit <- .mc_tetrachoric_inference_one(tab, correct = correct, conf_level = conf_level)
        fit <- .mc_recenter_wald_fit(fit, estimate = est_jk, conf_level = conf_level)
        out[j, k] <- est_jk
        out[k, j] <- out[j, k]
        if (isTRUE(p_value)) {
          statistic[j, k] <- as.numeric(fit$statistic)
          statistic[k, j] <- statistic[j, k]
          parameter[j, k] <- as.numeric(fit$parameter)
          parameter[k, j] <- parameter[j, k]
          p_mat[j, k] <- as.numeric(fit$p_value)
          p_mat[k, j] <- p_mat[j, k]
        }
        if (isTRUE(ci)) {
          lwr[j, k] <- as.numeric(fit$lwr)
          lwr[k, j] <- lwr[j, k]
          upr[j, k] <- as.numeric(fit$upr)
          upr[k, j] <- upr[j, k]
        }
      }
      diag_info <- .mc_fill_pair_diag(diag_info, j, k, out[j, k], tab, length(pair$x), correct)
    }
  }
  thresholds <- as.list(tau)
  names(thresholds) <- names(enc)

  out <- .mc_structure_corr_matrix(
    out,
    class_name = "tetrachoric_corr",
    method = "tetrachoric",
    description = "Pairwise tetrachoric correlation matrix",
    diagnostics = diag_info,
    thresholds = thresholds,
    correct = correct
  )
  if (isTRUE(ci) || isTRUE(p_value)) {
    out <- .mc_attach_latent_inference(
      out,
      .mc_latent_matrix_payload(
        estimate = unclass(out),
        statistic = statistic,
        parameter = parameter,
        p_value = p_mat,
        n_obs = diag_info$n_complete,
        lwr = lwr,
        upr = upr,
        conf_level = conf_level,
        ci_method = "wald_information_tetrachoric",
        inference_method = "wald_z_tetrachoric",
        return_ci = ci,
        return_p = p_value
      ),
      conf_level = conf_level
    )
  }
  .mc_finalize_corr_output(
    out,
    output = output_cfg$output,
    threshold = output_cfg$threshold,
    diag = output_cfg$diag
  )
}

#' @rdname tetrachoric
#' @method print tetrachoric_corr
#' @param x An object of class \code{tetrachoric_corr}.
#' @param digits Integer; number of decimal places to print.
#' @param n Optional row threshold for compact preview output.
#' @param topn Optional number of leading/trailing rows to show when truncated.
#' @param max_vars Optional maximum number of visible columns; `NULL` derives this
#'   from console width.
#' @param width Optional display width; defaults to \code{getOption("width")}.
#' @param show_ci One of \code{"yes"} or \code{"no"}.
#' @param ... Additional arguments passed to \code{print()}.
#' @export
print.tetrachoric_corr <- function(x, digits = 4, n = NULL, topn = NULL,
                                   max_vars = NULL, width = NULL,
                                   show_ci = NULL, ...) {
  .mc_print_corr_matrix(
    x, header = "Tetrachoric correlation matrix",
    digits = digits, n = n, topn = topn,
    max_vars = max_vars, width = width, show_ci = show_ci, ...
  )
}

#' @rdname tetrachoric
#' @method plot tetrachoric_corr
#' @param title Plot title. Default is \code{"Tetrachoric correlation heatmap"}.
#' @param low_color Color for the minimum correlation.
#' @param high_color Color for the maximum correlation.
#' @param mid_color Color for zero correlation.
#' @param value_text_size Font size used in tile labels.
#' @param show_value Logical; if \code{TRUE} (default), overlay numeric values
#'   on the heatmap tiles.
#' @export
plot.tetrachoric_corr <- function(x, title = "Tetrachoric correlation heatmap",
                                  low_color = "indianred1",
                                  high_color = "steelblue1",
                                  mid_color = "white",
                                  value_text_size = 4,
                                  show_value = TRUE, ...) {
  .mc_plot_corr_matrix(
    x, class_name = "tetrachoric_corr", fill_name = "Tetrachoric",
    title = title, low_color = low_color, high_color = high_color,
    mid_color = mid_color, value_text_size = value_text_size,
    show_value = show_value, ...
  )
}

#' @rdname tetrachoric
#' @method summary tetrachoric_corr
#' @param object An object of class \code{tetrachoric_corr}.
#' @param ci_digits Integer; digits for confidence limits in the pairwise
#'   summary.
#' @param p_digits Integer; digits for p-values in the pairwise summary.
#' @export
summary.tetrachoric_corr <- function(object, n = NULL, topn = NULL,
                                     max_vars = NULL, width = NULL,
                                     ci_digits = 3,
                                     p_digits = 4,
                                     show_ci = NULL, ...) {
  check_inherits(object, "tetrachoric_corr")
  show_ci <- .mc_validate_yes_no(
    show_ci,
    arg = "show_ci",
    default = .mc_display_option("summary_show_ci", "yes")
  )

  ci <- .mc_latent_ci_attr(object)
  inf <- .mc_latent_inference_attr(object)
  if (is.null(ci) && (is.null(inf) || is.null(inf$p_value))) {
    return(.mc_summary_corr_matrix(object, header = "Latent correlation summary", topn = topn))
  }

  .mc_latent_pairwise_summary(
    object,
    summary_class = "summary.tetrachoric_corr",
    ci_digits = ci_digits,
    p_digits = p_digits,
    show_ci = show_ci
  )
}

#' @rdname tetrachoric
#' @method print summary.tetrachoric_corr
#' @param x An object of class \code{summary.tetrachoric_corr}.
#' @export
print.summary.tetrachoric_corr <- function(x, digits = NULL, n = NULL,
                                           topn = NULL, max_vars = NULL,
                                           width = NULL, show_ci = NULL, ...) {
  .mc_print_latent_pairwise_summary(
    x,
    title = "Tetrachoric correlation summary",
    digits = digits,
    n = n,
    topn = topn,
    max_vars = max_vars,
    width = width,
    show_ci = show_ci,
    ...
  )
}

#' @title Pairwise Polychoric Correlation
#'
#' @description
#' Computes the polychoric correlation for either a pair of ordinal variables
#' or all pairwise combinations of ordinal columns in a matrix/data frame.
#'
#' @param data An ordinal vector, matrix, or data frame. Supported columns are
#' factors, ordered factors, logical values, or integer-like numerics.
#' In matrix/data-frame mode, only supported ordinal columns are retained.
#' @param y Optional second ordinal vector. When supplied, the function returns
#' a single polychoric correlation estimate.
#' @param correct Non-negative continuity correction added to zero-count cells.
#' Default is \code{0.5}.
#' @param na_method Character scalar controlling missing-data handling.
#'   \code{"error"} rejects missing values. \code{"pairwise"} uses pairwise
#'   complete cases.
#' @param ci Logical (default \code{FALSE}). If \code{TRUE}, attach
#' model-based large-sample Wald confidence intervals derived from the observed
#' information matrix of the latent-variable likelihood.
#' @param p_value Logical (default \code{FALSE}). If \code{TRUE}, attach
#' model-based large-sample Wald p-values and test statistics for each
#' estimated latent correlation.
#' @param conf_level Confidence level used when \code{ci = TRUE}. Default is
#' \code{0.95}.
#' @param output Output representation for the computed estimates.
#'   \itemize{
#'   \item \code{"matrix"} (default): full dense matrix; best when you need
#'   matrix algebra, dense heatmaps, or full compatibility with existing code.
#'   \item \code{"sparse"}: sparse matrix from \pkg{Matrix} containing only
#'   retained entries; best when many values are dropped by thresholding.
#'   \item \code{"edge_list"}: long-form data frame with columns
#'   \code{row}, \code{col}, \code{value}; convenient for filtering, joins,
#'   and network-style workflows.
#'   }
#' @param threshold Non-negative absolute-value filter for non-matrix outputs:
#'   keep entries with \code{abs(value) >= threshold}. Use
#'   \code{threshold > 0} when you want only stronger associations (typically
#'   with \code{output = "sparse"} or \code{"edge_list"}). Keep
#'   \code{threshold = 0} to retain all values. Must be \code{0} when
#'   \code{output = "matrix"}.
#' @param diag Logical; whether to include diagonal entries in
#'   \code{"sparse"} and \code{"edge_list"} outputs.
#'
#' @return
#' If \code{y} is supplied, a numeric scalar with attributes
#' \code{diagnostics} and \code{thresholds}. Otherwise a symmetric matrix of
#' class \code{polychoric_corr} with attributes \code{method},
#' \code{description}, \code{package = "matrixCorr"}, \code{diagnostics},
#' \code{thresholds}, and \code{correct}. When \code{p_value = TRUE}, the
#' returned object also carries an \code{inference} attribute with elements
#' \code{estimate}, \code{statistic}, \code{parameter}, \code{p_value}, and
#' \code{n_obs}. When \code{ci = TRUE}, it also carries a \code{ci} attribute
#' with elements \code{est}, \code{lwr.ci}, \code{upr.ci}, \code{conf.level},
#' and \code{ci.method}, plus \code{attr(x, "conf.level")}. Scalar outputs keep
#' the same point estimate and gain the same metadata only when inference is
#' requested. In matrix mode, \code{output = "edge_list"} returns a data frame with columns
#' \code{row}, \code{col}, \code{value}; \code{output = "sparse"} returns a
#' symmetric sparse matrix.
#'
#' @details
#' The polychoric correlation generalises the tetrachoric model to ordered
#' categorical variables with more than two levels. It assumes latent
#' standard-normal variables \eqn{Z_1, Z_2} with correlation \eqn{\rho}, and
#' cut-points
#' \eqn{-\infty = \alpha_0 < \alpha_1 < \cdots < \alpha_R = \infty} and
#' \eqn{-\infty = \beta_0 < \beta_1 < \cdots < \beta_C = \infty} such that
#' \deqn{
#' X = r \iff \alpha_{r-1} < Z_1 \le \alpha_r,
#' \qquad
#' Y = c \iff \beta_{c-1} < Z_2 \le \beta_c.
#' }
#' For an observed \eqn{R \times C} contingency table with counts \eqn{n_{rc}},
#' the thresholds are estimated from the marginal cumulative proportions:
#' \deqn{
#' \alpha_r = \Phi^{-1}\!\Big(\sum_{k \le r} P(X = k)\Big),
#' \qquad
#' \beta_c = \Phi^{-1}\!\Big(\sum_{k \le c} P(Y = k)\Big).
#' }
#' Holding those thresholds fixed, the log-likelihood for the latent
#' correlation is
#' \deqn{
#' \ell(\rho) = \sum_{r=1}^{R}\sum_{c=1}^{C}
#' n_{rc} \log \Pr\!\big(
#' \alpha_{r-1} < Z_1 \le \alpha_r,\;
#' \beta_{c-1} < Z_2 \le \beta_c
#' \mid \rho \big),
#' }
#' and the estimator returned is the maximiser over \eqn{\rho \in (-1,1)}.
#' The C++ implementation performs a dense one-dimensional search followed by
#' Brent refinement.
#'
#' The argument \code{correct} adds a non-negative continuity correction to
#' empty cells before marginal threshold estimation and likelihood evaluation.
#' This avoids numerical failures for sparse tables with structurally zero cells.
#' When \code{correct = 0} and zero cells are present, the corresponding fit can
#' be boundary-driven rather than a regular interior maximum-likelihood problem.
#' The returned object stores sparse-fit diagnostics and the thresholds used for
#' estimation so those cases can be inspected explicitly.
#'
#' \strong{Assumptions.} The coefficient is appropriate when both observed
#' ordinal variables are viewed as discretisations of jointly normal latent
#' variables. The optional p-values and confidence intervals adopt this
#' latent-normal interpretation and use the same likelihood that defines the
#' polychoric estimate. These inferential quantities are therefore model-based
#' and should not be interpreted as distribution-free summaries.
#'
#' \strong{Inference.} When \code{ci = TRUE} or \code{p_value = TRUE}, the
#' function refits the pairwise polychoric model by maximum likelihood and
#' obtains the observed information matrix numerically in C++. The reported
#' confidence interval is a Wald interval
#' \eqn{\hat\rho \pm z_{1-\alpha/2}\operatorname{SE}(\hat\rho)}, and the
#' reported p-value is from the large-sample Wald \eqn{z}-test for
#' \eqn{H_0:\rho = 0}. These inferential quantities are only computed when
#' explicitly requested.
#'
#' In matrix/data-frame mode, all pairwise polychoric correlations are computed
#' between supported ordinal columns. Diagonal entries are \code{1} for
#' non-degenerate columns and \code{NA} when a column has fewer than two
#' observed levels.
#'
#' \strong{Computational complexity.} For \eqn{p} ordinal variables, the matrix
#' path evaluates \eqn{p(p-1)/2} bivariate likelihoods. Each pair optimises a
#' single scalar parameter \eqn{\rho}, so the main cost is repeated evaluation
#' of bivariate normal rectangle probabilities.
#'
#' @references
#' Olsson, U. (1979). Maximum likelihood estimation of the polychoric
#' correlation coefficient. \emph{Psychometrika}, 44(4), 443-460.
#'
#' @examplesIf requireNamespace("mnormt", quietly = TRUE)
#' set.seed(124)
#' n <- 1200
#' Sigma <- matrix(c(
#'   1.00, 0.60, 0.40,
#'   0.60, 1.00, 0.50,
#'   0.40, 0.50, 1.00
#' ), 3, 3, byrow = TRUE)
#'
#' Z <- mnormt::rmnorm(n = n, mean = rep(0, 3), varcov = Sigma)
#' Y <- data.frame(
#'   y1 = ordered(cut(
#'     Z[, 1],
#'     breaks = c(-Inf, -0.7, 0.4, Inf),
#'     labels = c("low", "mid", "high")
#'   )),
#'   y2 = ordered(cut(
#'     Z[, 2],
#'     breaks = c(-Inf, -1.0, -0.1, 0.8, Inf),
#'     labels = c("1", "2", "3", "4")
#'   )),
#'   y3 = ordered(cut(
#'     Z[, 3],
#'     breaks = c(-Inf, -0.4, 0.2, 1.1, Inf),
#'     labels = c("A", "B", "C", "D")
#'   ))
#' )
#'
#' pc <- polychoric(Y)
#' print(pc, digits = 3)
#' summary(pc)
#' plot(pc)
#' polychoric(Y, output = "edge_list", threshold = 0.3, diag = FALSE)
#'
#' # Interactive viewing (requires shiny)
#' if (interactive() && requireNamespace("shiny", quietly = TRUE)) {
#'   view_corr_shiny(pc)
#' }
#'
#' # latent Pearson correlations used to generate the ordinal variables
#' round(stats::cor(Z), 2)
#' @author Thiago de Paula Oliveira
#' @export
polychoric <- function(data,
                       y = NULL,
                       na_method = c("error", "pairwise"),
                       ci = FALSE,
                       p_value = FALSE,
                       conf_level = 0.95,
                       correct = 0.5,
                       output = c("matrix", "sparse", "edge_list"),
                       threshold = 0,
                       diag = TRUE,
                       ...) {
  output_cfg <- .mc_validate_output_args(
    output = output,
    threshold = threshold,
    diag = diag
  )
  if (missing(na_method) && isFALSE(ci) && isFALSE(p_value) && ...length() == 0L) {
    check_scalar_nonneg(correct, arg = "correct")

    if (is.null(y) && inherits(data, "table")) {
      if (!identical(output_cfg$output, "matrix")) {
        abort_bad_arg(
          "output",
          message = "must be {.val matrix} when {.arg data} is a contingency table."
        )
      }
      tab <- as.matrix(data)
      est <- .mc_polychoric_table_estimate(tab, correct = correct)
      thresholds <- .mc_poly_table_thresholds(tab, correct = correct)
      diagnostics <- .mc_latent_pair_metadata(
        est = est,
        tab = tab,
        n_complete = sum(tab),
        correct = correct,
        thresholds = thresholds
      )
      return(.mc_attach_scalar_latent(
        est,
        method = "polychoric",
        description = "Pairwise polychoric correlation",
        diagnostics = diagnostics,
        thresholds = thresholds
      ))
    }

    if (!is.null(y)) {
      if (!identical(output_cfg$output, "matrix")) {
        abort_bad_arg(
          "output",
          message = "must be {.val matrix} when {.arg y} is supplied."
        )
      }
      check_same_length(data, y, arg_x = "data", arg_y = "y")
      .mc_check_latent_missing(list(data = data, y = y), check_na = TRUE, arg = "data")

      enc_x <- .mc_encode_ordinal_vector(data, binary = FALSE)
      enc_y <- .mc_encode_ordinal_vector(y, binary = FALSE)
      if (is.null(enc_x) || is.null(enc_y)) {
        abort_bad_arg("data", message = "and {.arg y} must both be ordinal variables.")
      }
      n_x <- length(enc_x$levels)
      n_y <- length(enc_y$levels)
      global <- identical(n_x, n_y)
      pair <- .mc_pair_complete(enc_x$code, enc_y$code, TRUE)
      est <- .mc_pair_polychoric(
        pair$x, pair$y,
        n_x = n_x,
        n_y = n_y,
        correct = correct,
        check_na = TRUE,
        tau_x = if (global) .mc_global_cutpoints(pair$x, n_x) else NULL,
        tau_y = if (global) .mc_global_cutpoints(pair$y, n_y) else NULL,
        global = global
      )
      tab <- .mc_fast_ordinal_table(pair$x, pair$y, n_x = n_x, n_y = n_y)
      thresholds <- list(
        data = .mc_global_cutpoints(pair$x, n_x),
        y = .mc_global_cutpoints(pair$y, n_y)
      )
      diagnostics <- .mc_latent_pair_metadata(
        est = est,
        tab = tab,
        n_complete = length(pair$x),
        correct = correct,
        thresholds = thresholds
      )
      return(.mc_attach_scalar_latent(
        est,
        method = "polychoric",
        description = "Pairwise polychoric correlation",
        diagnostics = diagnostics,
        thresholds = thresholds
      ))
    }

    enc <- .mc_extract_discrete_columns(data, kind = "ordinal", arg = "data", min_cols = 2L)
    .mc_check_latent_missing(lapply(enc, `[[`, "code"), check_na = TRUE, arg = "data")

    p <- length(enc)
    n_levels <- vapply(enc, function(z) length(z$levels), integer(1))
    global_all <- length(unique(n_levels)) == 1L
    tau_mat <- if (global_all) .mc_tau_matrix(enc) else matrix(NA_real_, nrow = 0L, ncol = 0L)
    x_mat <- .mc_encode_list_to_matrix(enc)
    out <- matrixCorr_polychoric_matrix_cpp(
      x = x_mat,
      n_levels = n_levels,
      tau_mat = tau_mat,
      global_all = global_all,
      correct = correct,
      pairwise_complete = FALSE
    )
    dimnames(out) <- list(names(enc), names(enc))

    diag_info <- .mc_matrix_diagnostics_template(p, dimnames(out))
    diag(diag_info$zero_cells) <- 0L
    diag(diag_info$n_complete) <- vapply(enc, function(z) sum(!is.na(z$code)), integer(1))
    diag(diag_info$corrected) <- FALSE
    diag(diag_info$boundary) <- FALSE
    diag(diag_info$near_boundary) <- FALSE
    diag(diag_info$converged) <- is.finite(diag(out))
    for (j in seq_len(p - 1L)) {
      for (k in (j + 1L):p) {
        pair <- .mc_pair_complete(enc[[j]]$code, enc[[k]]$code, TRUE)
        tab <- .mc_fast_ordinal_table(
          pair$x, pair$y,
          n_x = length(enc[[j]]$levels),
          n_y = length(enc[[k]]$levels)
        )
        diag_info <- .mc_fill_pair_diag(diag_info, j, k, out[j, k], tab, length(pair$x), correct)
      }
    }
    thresholds <- lapply(enc, function(z) .mc_global_cutpoints(z$code, length(z$levels)))
    names(thresholds) <- names(enc)
    out <- .mc_structure_corr_matrix(
      out,
      class_name = "polychoric_corr",
      method = "polychoric",
      description = "Pairwise polychoric correlation matrix",
      diagnostics = diag_info,
      thresholds = thresholds,
      correct = correct
    )
    return(.mc_finalize_corr_output(
      out,
      output = output_cfg$output,
      threshold = output_cfg$threshold,
      diag = output_cfg$diag
    ))
  }

  legacy_args <- .mc_extract_legacy_aliases(list(...), allowed = "check_na")
  na_cfg <- resolve_na_args(
    na_method = na_method,
    check_na = legacy_args$check_na %||% NULL,
    na_method_missing = missing(na_method)
  )
  check_na <- na_cfg$check_na
  check_bool(ci, arg = "ci")
  check_bool(p_value, arg = "p_value")
  check_scalar_nonneg(correct, arg = "correct")
  if (isTRUE(ci)) {
    check_prob_scalar(conf_level, arg = "conf_level", open_ends = TRUE)
  }

  if (is.null(y) && inherits(data, "table")) {
    if (!identical(output_cfg$output, "matrix")) {
      abort_bad_arg(
        "output",
        message = "must be {.val matrix} when {.arg data} is a contingency table."
      )
    }
    tab <- as.matrix(data)
    fit <- if (isTRUE(ci) || isTRUE(p_value)) {
      .mc_polychoric_inference_one(tab, correct = correct, conf_level = conf_level)
    } else {
      NULL
    }
    est <- if (is.null(fit)) .mc_polychoric_table_estimate(tab, correct = correct) else as.numeric(fit$estimate)
    thresholds <- .mc_poly_table_thresholds(tab, correct = correct)
    diagnostics <- .mc_latent_pair_metadata(
      est = est,
      tab = tab,
      n_complete = sum(tab),
      correct = correct,
      thresholds = thresholds
    )
    out <- .mc_attach_scalar_latent(
      est,
      method = "polychoric",
      description = "Pairwise polychoric correlation",
      diagnostics = diagnostics,
      thresholds = thresholds
    )
    if (isTRUE(ci) || isTRUE(p_value)) {
      out <- .mc_attach_latent_inference(
        out,
        .mc_latent_scalar_payload(fit, return_ci = ci, return_p = p_value),
        conf_level = conf_level
      )
    }
    return(out)
  }

  if (!is.null(y)) {
    if (!identical(output_cfg$output, "matrix")) {
      abort_bad_arg(
        "output",
        message = "must be {.val matrix} when {.arg y} is supplied."
      )
    }
    check_same_length(data, y, arg_x = "data", arg_y = "y")
    .mc_check_latent_missing(list(data = data, y = y), check_na = check_na, arg = "data")

    enc_x <- .mc_encode_ordinal_vector(data, binary = FALSE)
    enc_y <- .mc_encode_ordinal_vector(y, binary = FALSE)
    if (is.null(enc_x) || is.null(enc_y)) {
      abort_bad_arg(
        "data",
        message = "and {.arg y} must both be ordinal variables."
      )
    }
    n_x <- length(enc_x$levels)
    n_y <- length(enc_y$levels)
    global <- identical(n_x, n_y)
    pair <- .mc_pair_complete(enc_x$code, enc_y$code, check_na)
    tab <- .mc_fast_ordinal_table(pair$x, pair$y, n_x = n_x, n_y = n_y)
    fit <- if (isTRUE(ci) || isTRUE(p_value)) {
      .mc_polychoric_inference_one(tab, correct = correct, conf_level = conf_level)
    } else {
      NULL
    }
    est <- .mc_pair_polychoric(
      pair$x, pair$y,
      n_x = n_x, n_y = n_y,
      correct = correct, check_na = TRUE,
      tau_x = if (global) .mc_global_cutpoints(pair$x, n_x) else NULL,
      tau_y = if (global) .mc_global_cutpoints(pair$y, n_y) else NULL,
      global = global
    )
    if (!is.null(fit)) {
      fit <- .mc_recenter_wald_fit(fit, estimate = est, conf_level = conf_level)
    }
    thresholds <- list(
      data = .mc_global_cutpoints(pair$x, n_x),
      y = .mc_global_cutpoints(pair$y, n_y)
    )
    diagnostics <- .mc_latent_pair_metadata(
      est = est,
      tab = tab,
      n_complete = length(pair$x),
      correct = correct,
      thresholds = thresholds
    )
    out <- .mc_attach_scalar_latent(
      est,
      method = "polychoric",
      description = "Pairwise polychoric correlation",
      diagnostics = diagnostics,
      thresholds = thresholds
    )
    if (isTRUE(ci) || isTRUE(p_value)) {
      out <- .mc_attach_latent_inference(
        out,
        .mc_latent_scalar_payload(fit, return_ci = ci, return_p = p_value),
        conf_level = conf_level
      )
    }
    return(out)
  }

  enc <- .mc_extract_discrete_columns(data, kind = "ordinal", arg = "data", min_cols = 2L)
  .mc_check_latent_missing(
    lapply(enc, `[[`, "code"),
    check_na = check_na,
    arg = "data"
  )

  p <- length(enc)
  n_levels <- vapply(enc, function(z) length(z$levels), integer(1))
  global_all <- length(unique(n_levels)) == 1L
  tau_mat <- if (global_all) {
    .mc_tau_matrix(enc)
  } else {
    matrix(NA_real_, nrow = 0L, ncol = 0L)
  }
  global_thresholds <- lapply(enc, function(z) .mc_global_cutpoints(z$code, length(z$levels)))
  names(global_thresholds) <- names(enc)
  x_mat <- .mc_encode_list_to_matrix(enc)
  out <- if (!isTRUE(ci) && !isTRUE(p_value)) {
    matrixCorr_polychoric_matrix_cpp(
      x = x_mat,
      n_levels = n_levels,
      tau_mat = tau_mat,
      global_all = global_all,
      correct = correct,
      pairwise_complete = !check_na
    )
  } else {
    matrix(NA_real_, nrow = p, ncol = p)
  }
  dimnames(out) <- list(names(enc), names(enc))
  diag_info <- .mc_matrix_diagnostics_template(p, dimnames(out))
  diag(diag_info$zero_cells) <- 0L
  diag(diag_info$n_complete) <- vapply(enc, function(z) sum(!is.na(z$code)), integer(1))
  diag(diag_info$corrected) <- FALSE
  diag(diag_info$boundary) <- FALSE
  diag(diag_info$near_boundary) <- FALSE
  diag(diag_info$converged) <- TRUE
  diag(out) <- ifelse(vapply(enc, function(z) length(unique(z$code[!is.na(z$code)])) >= 2L, logical(1)), 1, NA_real_)
  statistic <- parameter <- p_mat <- lwr <- upr <- NULL
  if (isTRUE(ci) || isTRUE(p_value)) {
    statistic <- if (isTRUE(p_value)) .mc_new_metric_matrix(out, "double") else NULL
    parameter <- if (isTRUE(p_value)) .mc_new_metric_matrix(out, "double") else NULL
    p_mat <- if (isTRUE(p_value)) .mc_new_metric_matrix(out, "double") else NULL
    lwr <- if (isTRUE(ci)) .mc_new_metric_matrix(out, "double") else NULL
    upr <- if (isTRUE(ci)) .mc_new_metric_matrix(out, "double") else NULL
  }
  for (j in seq_len(p - 1L)) {
    for (k in (j + 1L):p) {
      pair <- .mc_pair_complete(enc[[j]]$code, enc[[k]]$code, check_na)
      tab <- .mc_fast_ordinal_table(
        pair$x, pair$y,
        n_x = length(enc[[j]]$levels),
        n_y = length(enc[[k]]$levels)
      )
      global <- if (global_all) TRUE else identical(n_levels[j], n_levels[k])
      est_jk <- .mc_pair_polychoric(
        enc[[j]]$code, enc[[k]]$code,
        n_x = n_levels[j],
        n_y = n_levels[k],
        correct = correct,
        check_na = check_na,
        tau_x = if (global) global_thresholds[[j]] else NULL,
        tau_y = if (global) global_thresholds[[k]] else NULL,
        global = global
      )
      if (isTRUE(ci) || isTRUE(p_value)) {
        fit <- .mc_polychoric_inference_one(tab, correct = correct, conf_level = conf_level)
        fit <- .mc_recenter_wald_fit(fit, estimate = est_jk, conf_level = conf_level)
        out[j, k] <- est_jk
        out[k, j] <- out[j, k]
        if (isTRUE(p_value)) {
          statistic[j, k] <- as.numeric(fit$statistic)
          statistic[k, j] <- statistic[j, k]
          parameter[j, k] <- as.numeric(fit$parameter)
          parameter[k, j] <- parameter[j, k]
          p_mat[j, k] <- as.numeric(fit$p_value)
          p_mat[k, j] <- p_mat[j, k]
        }
        if (isTRUE(ci)) {
          lwr[j, k] <- as.numeric(fit$lwr)
          lwr[k, j] <- lwr[j, k]
          upr[j, k] <- as.numeric(fit$upr)
          upr[k, j] <- upr[j, k]
        }
      }
      diag_info <- .mc_fill_pair_diag(diag_info, j, k, out[j, k], tab, length(pair$x), correct)
    }
  }
  thresholds <- global_thresholds

  out <- .mc_structure_corr_matrix(
    out,
    class_name = "polychoric_corr",
    method = "polychoric",
    description = "Pairwise polychoric correlation matrix",
    diagnostics = diag_info,
    thresholds = thresholds,
    correct = correct
  )
  if (isTRUE(ci) || isTRUE(p_value)) {
    out <- .mc_attach_latent_inference(
      out,
      .mc_latent_matrix_payload(
        estimate = unclass(out),
        statistic = statistic,
        parameter = parameter,
        p_value = p_mat,
        n_obs = diag_info$n_complete,
        lwr = lwr,
        upr = upr,
        conf_level = conf_level,
        ci_method = "wald_information_polychoric",
        inference_method = "wald_z_polychoric",
        return_ci = ci,
        return_p = p_value
      ),
      conf_level = conf_level
    )
  }
  .mc_finalize_corr_output(
    out,
    output = output_cfg$output,
    threshold = output_cfg$threshold,
    diag = output_cfg$diag
  )
}

#' @rdname polychoric
#' @method print polychoric_corr
#' @param x An object of class \code{polychoric_corr}.
#' @param digits Integer; number of decimal places to print.
#' @param n Optional row threshold for compact preview output.
#' @param topn Optional number of leading/trailing rows to show when truncated.
#' @param max_vars Optional maximum number of visible columns; `NULL` derives this
#'   from console width.
#' @param width Optional display width; defaults to \code{getOption("width")}.
#' @param show_ci One of \code{"yes"} or \code{"no"}.
#' @param ... Additional arguments passed to \code{print()}.
#' @export
print.polychoric_corr <- function(x, digits = 4, n = NULL, topn = NULL,
                                  max_vars = NULL, width = NULL,
                                  show_ci = NULL, ...) {
  .mc_print_corr_matrix(
    x, header = "Polychoric correlation matrix",
    digits = digits, n = n, topn = topn,
    max_vars = max_vars, width = width, show_ci = show_ci, ...
  )
}

#' @rdname polychoric
#' @method plot polychoric_corr
#' @param title Plot title. Default is \code{"Polychoric correlation heatmap"}.
#' @param low_color Color for the minimum correlation.
#' @param high_color Color for the maximum correlation.
#' @param mid_color Color for zero correlation.
#' @param value_text_size Font size used in tile labels.
#' @param show_value Logical; if \code{TRUE} (default), overlay numeric values
#'   on the heatmap tiles.
#' @export
plot.polychoric_corr <- function(x, title = "Polychoric correlation heatmap",
                                 low_color = "indianred1",
                                 high_color = "steelblue1",
                                 mid_color = "white",
                                 value_text_size = 4,
                                 show_value = TRUE, ...) {
  .mc_plot_corr_matrix(
    x, class_name = "polychoric_corr", fill_name = "Polychoric",
    title = title, low_color = low_color, high_color = high_color,
    mid_color = mid_color, value_text_size = value_text_size,
    show_value = show_value, ...
  )
}

#' @rdname polychoric
#' @method summary polychoric_corr
#' @param object An object of class \code{polychoric_corr}.
#' @param ci_digits Integer; digits for confidence limits in the pairwise
#'   summary.
#' @param p_digits Integer; digits for p-values in the pairwise summary.
#' @export
summary.polychoric_corr <- function(object, n = NULL, topn = NULL,
                                    max_vars = NULL, width = NULL,
                                    ci_digits = 3,
                                    p_digits = 4,
                                    show_ci = NULL, ...) {
  check_inherits(object, "polychoric_corr")
  show_ci <- .mc_validate_yes_no(
    show_ci,
    arg = "show_ci",
    default = .mc_display_option("summary_show_ci", "yes")
  )

  ci <- .mc_latent_ci_attr(object)
  inf <- .mc_latent_inference_attr(object)
  if (is.null(ci) && (is.null(inf) || is.null(inf$p_value))) {
    return(.mc_summary_corr_matrix(object, header = "Latent correlation summary", topn = topn))
  }

  .mc_latent_pairwise_summary(
    object,
    summary_class = "summary.polychoric_corr",
    ci_digits = ci_digits,
    p_digits = p_digits,
    show_ci = show_ci
  )
}

#' @rdname polychoric
#' @method print summary.polychoric_corr
#' @param x An object of class \code{summary.polychoric_corr}.
#' @export
print.summary.polychoric_corr <- function(x, digits = NULL, n = NULL,
                                          topn = NULL, max_vars = NULL,
                                          width = NULL, show_ci = NULL, ...) {
  .mc_print_latent_pairwise_summary(
    x,
    title = "Polychoric correlation summary",
    digits = digits,
    n = n,
    topn = topn,
    max_vars = max_vars,
    width = width,
    show_ci = show_ci,
    ...
  )
}

#' @title Polyserial Correlation Between Continuous and Ordinal Variables
#'
#' @description
#' Computes polyserial correlations between continuous variables in \code{data}
#' and ordinal variables in \code{y}. Both pairwise vector mode and rectangular
#' matrix/data-frame mode are supported.
#'
#' @usage polyserial(data, y, na_method = c("error", "pairwise"), ci = FALSE, p_value = FALSE,
#'   conf_level = 0.95, ...)
#'
#' @param data A numeric vector, matrix, or data frame containing continuous
#' variables.
#' @param y An ordinal vector, matrix, or data frame containing ordinal
#' variables. Supported columns are factors, ordered factors, logical values,
#' or integer-like numerics.
#' @param na_method Character scalar controlling missing-data handling.
#'   \code{"error"} rejects missing values. \code{"pairwise"} uses pairwise
#'   complete cases.
#' @param ci Logical (default \code{FALSE}). If \code{TRUE}, attach
#' model-based large-sample Wald confidence intervals derived from the observed
#' information matrix of the latent-variable likelihood.
#' @param p_value Logical (default \code{FALSE}). If \code{TRUE}, attach
#' model-based large-sample Wald p-values and test statistics for each
#' estimated latent correlation.
#' @param conf_level Confidence level used when \code{ci = TRUE}. Default is
#' \code{0.95}.
#'
#' @return
#' If both \code{data} and \code{y} are vectors, a numeric scalar. Otherwise a
#' numeric matrix of class \code{polyserial_corr} with rows corresponding to
#' the continuous variables in \code{data} and columns to the ordinal variables
#' in \code{y}. Matrix outputs carry attributes \code{method},
#' \code{description}, and \code{package = "matrixCorr"}. When
#' \code{p_value = TRUE}, the returned object also carries an \code{inference}
#' attribute with elements \code{estimate}, \code{statistic}, \code{parameter},
#' \code{p_value}, and \code{n_obs}. When \code{ci = TRUE}, it also carries a
#' \code{ci} attribute with elements \code{est}, \code{lwr.ci},
#' \code{upr.ci}, \code{conf.level}, and \code{ci.method}, plus
#' \code{attr(x, "conf.level")}. Scalar outputs keep the same point estimate
#' and gain the same metadata only when inference is requested.
#'
#' @details
#' The polyserial correlation assumes a latent bivariate normal model between a
#' continuous variable and an unobserved continuous propensity underlying an
#' ordinal variable. Let
#' \eqn{(X, Z)^\top \sim N_2(0, \Sigma)} with
#' \eqn{\mathrm{corr}(X,Z)=\rho}, and suppose the observed ordinal response
#' \eqn{Y} is formed by cut-points
#' \eqn{-\infty = \beta_0 < \beta_1 < \cdots < \beta_K = \infty}:
#' \deqn{
#' Y = k \iff \beta_{k-1} < Z \le \beta_k.
#' }
#' After standardising the observed continuous variable \eqn{X}, the thresholds
#' are estimated from the marginal proportions of \eqn{Y}. Conditional on an
#' observed \eqn{x_i}, the category probability is
#' \deqn{
#' \Pr(Y_i = k \mid X_i = x_i, \rho)
#' =
#' \Phi\!\left(\frac{\beta_k - \rho x_i}{\sqrt{1-\rho^2}}\right)
#' -
#' \Phi\!\left(\frac{\beta_{k-1} - \rho x_i}{\sqrt{1-\rho^2}}\right).
#' }
#' The returned estimate maximises the log-likelihood
#' \deqn{
#' \ell(\rho) = \sum_{i=1}^{n}\log \Pr(Y_i = y_i \mid X_i = x_i, \rho)
#' }
#' over \eqn{\rho \in (-1,1)} via a one-dimensional Brent search in C++.
#'
#' \strong{Assumptions.} The coefficient is appropriate when the ordinal
#' variable is viewed as the discretised version of a latent normal variable
#' that is jointly normal with the observed continuous variable. The optional
#' p-values and confidence intervals adopt this latent-normal interpretation
#' and use the same likelihood that defines the polyserial estimate. These
#' inferential quantities are therefore model-based and should not be
#' interpreted as distribution-free summaries.
#'
#' \strong{Inference.} When \code{ci = TRUE} or \code{p_value = TRUE}, the
#' function refits the pairwise polyserial model by maximum likelihood and
#' obtains the observed information matrix numerically in C++. The reported
#' confidence interval is a Wald interval
#' \eqn{\hat\rho \pm z_{1-\alpha/2}\operatorname{SE}(\hat\rho)}, and the
#' reported p-value is from the large-sample Wald \eqn{z}-test for
#' \eqn{H_0:\rho = 0}. These inferential quantities are only computed when
#' explicitly requested.
#'
#' In vector mode a single estimate is returned. In matrix/data-frame mode,
#' every numeric column of \code{data} is paired with every ordinal column of
#' \code{y}, producing a rectangular matrix of continuous-by-ordinal
#' polyserial correlations.
#'
#' \strong{Computational complexity.} If \code{data} has \eqn{p_x} continuous
#' columns and \code{y} has \eqn{p_y} ordinal columns, the matrix path computes
#' \eqn{p_x p_y} separate one-parameter likelihood optimisations.
#'
#' @references
#' Olsson, U., Drasgow, F., & Dorans, N. J. (1982). The polyserial
#' correlation coefficient. \emph{Psychometrika}, 47(3), 337-347.
#' @param ... Deprecated compatibility aliases. The legacy \code{check_na}
#'   argument is still accepted temporarily.
#'
#' @examplesIf requireNamespace("mnormt", quietly = TRUE)
#' set.seed(125)
#' n <- 1000
#' Sigma <- matrix(c(
#'   1.00, 0.30, 0.55, 0.20,
#'   0.30, 1.00, 0.25, 0.50,
#'   0.55, 0.25, 1.00, 0.40,
#'   0.20, 0.50, 0.40, 1.00
#' ), 4, 4, byrow = TRUE)
#'
#' Z <- mnormt::rmnorm(n = n, mean = rep(0, 4), varcov = Sigma)
#' X <- data.frame(x1 = Z[, 1], x2 = Z[, 2])
#' Y <- data.frame(
#'   y1 = ordered(cut(
#'     Z[, 3],
#'     breaks = c(-Inf, -0.5, 0.7, Inf),
#'     labels = c("low", "mid", "high")
#'   )),
#'   y2 = ordered(cut(
#'     Z[, 4],
#'     breaks = c(-Inf, -1.0, 0.0, 1.0, Inf),
#'     labels = c("1", "2", "3", "4")
#'   ))
#' )
#'
#' ps <- polyserial(X, Y)
#' print(ps, digits = 3)
#' summary(ps)
#' plot(ps)
#' @author Thiago de Paula Oliveira
#' @export
polyserial <- function(data,
                       y,
                       na_method = c("error", "pairwise"),
                       ci = FALSE,
                       p_value = FALSE,
                       conf_level = 0.95,
                       ...) {
  if (missing(na_method) && isFALSE(ci) && isFALSE(p_value) && ...length() == 0L) {
    scalar <- is.null(dim(data)) && is.atomic(data) && !is.factor(data) &&
      is.null(dim(y)) && is.atomic(y)

    x_mat <- .mc_extract_continuous_matrix(data, arg = "data", min_cols = 1L)
    y_enc <- .mc_extract_discrete_columns(y, kind = "ordinal", arg = "y", min_cols = 1L)
    if (nrow(x_mat) != length(y_enc[[1L]]$code)) {
      abort_bad_arg("data", message = "and {.arg y} must have the same number of observations.")
    }
    .mc_check_latent_missing(
      c(list(.x_mat = x_mat), lapply(y_enc, `[[`, "code")),
      check_na = TRUE,
      arg = "data"
    )

    if (scalar) {
      return(.mc_pair_polyserial(x_mat[, 1L], y_enc[[1L]]$code, TRUE))
    }

    out <- matrix(
      NA_real_,
      nrow = ncol(x_mat),
      ncol = length(y_enc),
      dimnames = list(colnames(x_mat), names(y_enc))
    )
    for (j in seq_len(ncol(x_mat))) {
      xj <- x_mat[, j]
      for (k in seq_along(y_enc)) {
        out[j, k] <- .mc_pair_polyserial(xj, y_enc[[k]]$code, TRUE)
      }
    }
    return(.mc_structure_corr_matrix(
      out,
      class_name = "polyserial_corr",
      method = "polyserial",
      description = "Polyserial correlation matrix (continuous x ordinal)"
    ))
  }

  legacy_args <- .mc_extract_legacy_aliases(list(...), allowed = "check_na")
  na_cfg <- resolve_na_args(
    na_method = na_method,
    check_na = legacy_args$check_na %||% NULL,
    na_method_missing = missing(na_method)
  )
  check_na <- na_cfg$check_na
  check_bool(ci, arg = "ci")
  check_bool(p_value, arg = "p_value")
  if (isTRUE(ci)) {
    check_prob_scalar(conf_level, arg = "conf_level", open_ends = TRUE)
  }

  scalar <- is.null(dim(data)) && is.atomic(data) && !is.factor(data) &&
    is.null(dim(y)) && is.atomic(y)

  x_mat <- .mc_extract_continuous_matrix(data, arg = "data", min_cols = 1L)
  y_enc <- .mc_extract_discrete_columns(y, kind = "ordinal", arg = "y", min_cols = 1L)

  if (nrow(x_mat) != length(y_enc[[1L]]$code)) {
    abort_bad_arg(
      "data",
      message = "and {.arg y} must have the same number of observations."
    )
  }

  .mc_check_latent_missing(
    c(list(.x_mat = x_mat), lapply(y_enc, `[[`, "code")),
    check_na = check_na,
    arg = "data"
  )

  if (scalar) {
    if (!isTRUE(ci) && !isTRUE(p_value)) {
      return(.mc_pair_polyserial(x_mat[, 1L], y_enc[[1L]]$code, check_na))
    }
    fit <- .mc_polyserial_inference_one(
      x_mat[, 1L],
      y_enc[[1L]]$code,
      check_na = check_na,
      conf_level = conf_level
    )
    out <- .mc_attach_scalar_latent(
      fit$estimate,
      method = "polyserial",
      description = "Polyserial correlation",
      diagnostics = list(n_complete = as.integer(fit$n_obs))
    )
    out <- .mc_attach_latent_inference(
      out,
      .mc_latent_scalar_payload(fit, return_ci = ci, return_p = p_value),
      conf_level = conf_level
    )
    return(out)
  }

  out <- matrix(NA_real_, nrow = ncol(x_mat), ncol = length(y_enc),
                dimnames = list(colnames(x_mat), names(y_enc)))
  n_complete <- NULL
  if (isTRUE(ci) || isTRUE(p_value)) {
    n_complete <- matrix(
      NA_integer_,
      nrow = nrow(out),
      ncol = ncol(out),
      dimnames = dimnames(out)
    )
  }
  statistic <- parameter <- p_mat <- lwr <- upr <- NULL
  if (isTRUE(ci) || isTRUE(p_value)) {
    statistic <- if (isTRUE(p_value)) .mc_new_metric_matrix(out, "double") else NULL
    parameter <- if (isTRUE(p_value)) .mc_new_metric_matrix(out, "double") else NULL
    p_mat <- if (isTRUE(p_value)) .mc_new_metric_matrix(out, "double") else NULL
    lwr <- if (isTRUE(ci)) .mc_new_metric_matrix(out, "double") else NULL
    upr <- if (isTRUE(ci)) .mc_new_metric_matrix(out, "double") else NULL
  }

  for (j in seq_len(ncol(x_mat))) {
    xj <- x_mat[, j]
    for (k in seq_along(y_enc)) {
      if (isTRUE(ci) || isTRUE(p_value)) {
        fit <- .mc_polyserial_inference_one(
          xj,
          y_enc[[k]]$code,
          check_na = check_na,
          conf_level = conf_level
        )
        out[j, k] <- as.numeric(fit$estimate)
        n_complete[j, k] <- as.integer(fit$n_obs)
        if (isTRUE(p_value)) {
          statistic[j, k] <- as.numeric(fit$statistic)
          parameter[j, k] <- as.numeric(fit$parameter)
          p_mat[j, k] <- as.numeric(fit$p_value)
        }
        if (isTRUE(ci)) {
          lwr[j, k] <- as.numeric(fit$lwr)
          upr[j, k] <- as.numeric(fit$upr)
        }
      } else {
        out[j, k] <- .mc_pair_polyserial(xj, y_enc[[k]]$code, check_na)
      }
    }
  }

  out <- .mc_structure_corr_matrix(
    out,
    class_name = "polyserial_corr",
    method = "polyserial",
    description = "Polyserial correlation matrix (continuous x ordinal)",
    diagnostics = if (isTRUE(ci) || isTRUE(p_value)) list(n_complete = n_complete) else NULL
  )
  if (isTRUE(ci) || isTRUE(p_value)) {
    out <- .mc_attach_latent_inference(
      out,
      .mc_latent_matrix_payload(
        estimate = unclass(out),
        statistic = statistic,
        parameter = parameter,
        p_value = p_mat,
        n_obs = n_complete,
        lwr = lwr,
        upr = upr,
        conf_level = conf_level,
        ci_method = "wald_information_polyserial",
        inference_method = "wald_z_polyserial",
        return_ci = ci,
        return_p = p_value
      ),
      conf_level = conf_level
    )
  }
  out
}

#' @rdname polyserial
#' @method print polyserial_corr
#' @param x An object of class \code{polyserial_corr}.
#' @param digits Integer; number of decimal places to print.
#' @param n Optional row threshold for compact preview output.
#' @param topn Optional number of leading/trailing rows to show when truncated.
#' @param max_vars Optional maximum number of visible columns; `NULL` derives this
#'   from console width.
#' @param width Optional display width; defaults to \code{getOption("width")}.
#' @param show_ci One of \code{"yes"} or \code{"no"}.
#' @param ... Additional arguments passed to \code{print()}.
#' @export
print.polyserial_corr <- function(x, digits = 4, n = NULL, topn = NULL,
                                  max_vars = NULL, width = NULL,
                                  show_ci = NULL, ...) {
  .mc_print_corr_matrix(
    x, header = "Polyserial correlation matrix",
    digits = digits, n = n, topn = topn,
    max_vars = max_vars, width = width, show_ci = show_ci, ...
  )
}

#' @rdname polyserial
#' @method plot polyserial_corr
#' @param title Plot title. Default is \code{"Polyserial correlation heatmap"}.
#' @param low_color Color for the minimum correlation.
#' @param high_color Color for the maximum correlation.
#' @param mid_color Color for zero correlation.
#' @param value_text_size Font size used in tile labels.
#' @param show_value Logical; if \code{TRUE} (default), overlay numeric values
#'   on the heatmap tiles.
#' @export
plot.polyserial_corr <- function(x, title = "Polyserial correlation heatmap",
                                 low_color = "indianred1",
                                 high_color = "steelblue1",
                                 mid_color = "white",
                                 value_text_size = 4,
                                 show_value = TRUE, ...) {
  .mc_plot_corr_matrix(
    x, class_name = "polyserial_corr", fill_name = "Polyserial",
    title = title, low_color = low_color, high_color = high_color,
    mid_color = mid_color, value_text_size = value_text_size,
    show_value = show_value, ...
  )
}

#' @rdname polyserial
#' @method summary polyserial_corr
#' @param object An object of class \code{polyserial_corr}.
#' @param ci_digits Integer; digits for confidence limits in the pairwise
#'   summary.
#' @param p_digits Integer; digits for p-values in the pairwise summary.
#' @export
summary.polyserial_corr <- function(object, n = NULL, topn = NULL,
                                    max_vars = NULL, width = NULL,
                                    ci_digits = 3,
                                    p_digits = 4,
                                    show_ci = NULL, ...) {
  check_inherits(object, "polyserial_corr")
  show_ci <- .mc_validate_yes_no(
    show_ci,
    arg = "show_ci",
    default = .mc_display_option("summary_show_ci", "yes")
  )

  ci <- .mc_latent_ci_attr(object)
  inf <- .mc_latent_inference_attr(object)
  if (is.null(ci) && (is.null(inf) || is.null(inf$p_value))) {
    return(.mc_summary_corr_matrix(object, header = "Latent correlation summary", topn = topn))
  }

  .mc_latent_pairwise_summary(
    object,
    summary_class = "summary.polyserial_corr",
    ci_digits = ci_digits,
    p_digits = p_digits,
    show_ci = show_ci
  )
}

#' @rdname polyserial
#' @method print summary.polyserial_corr
#' @param x An object of class \code{summary.polyserial_corr}.
#' @export
print.summary.polyserial_corr <- function(x, digits = NULL, n = NULL,
                                          topn = NULL, max_vars = NULL,
                                          width = NULL, show_ci = NULL, ...) {
  .mc_print_latent_pairwise_summary(
    x,
    title = "Polyserial correlation summary",
    digits = digits,
    n = n,
    topn = topn,
    max_vars = max_vars,
    width = width,
    show_ci = show_ci,
    ...
  )
}

#' @title Biserial Correlation Between Continuous and Binary Variables
#'
#' @description
#' Computes biserial correlations between continuous variables in \code{data}
#' and binary variables in \code{y}. Both pairwise vector mode and rectangular
#' matrix/data-frame mode are supported.
#'
#' @usage biserial(data, y, na_method = c("error", "pairwise"), ci = FALSE, p_value = FALSE,
#'   conf_level = 0.95, ...)
#'
#' @param data A numeric vector, matrix, or data frame containing continuous
#' variables.
#' @param y A binary vector, matrix, or data frame. In data-frame mode, only
#' two-level columns are retained.
#' @param na_method Character scalar controlling missing-data handling.
#'   \code{"error"} rejects missing values. \code{"pairwise"} uses pairwise
#'   complete cases.
#' @param ci Logical (default \code{FALSE}). If \code{TRUE}, attach
#' approximate large-sample confidence intervals derived from a Fisher
#' \eqn{z}-transformation of the biserial estimate.
#' @param p_value Logical (default \code{FALSE}). If \code{TRUE}, attach
#' model-based large-sample p-values, test statistics, and degrees of freedom
#' for each biserial estimate.
#' @param conf_level Confidence level used when \code{ci = TRUE}. Default is
#' \code{0.95}.
#'
#' @return
#' If both \code{data} and \code{y} are vectors, a numeric scalar. Otherwise a
#' numeric matrix of class \code{biserial_corr} with rows corresponding to
#' the continuous variables in \code{data} and columns to the binary variables
#' in \code{y}. Matrix outputs carry attributes \code{method},
#' \code{description}, and \code{package = "matrixCorr"}. When
#' \code{p_value = TRUE}, the object also carries an \code{inference}
#' attribute with matrices \code{estimate}, \code{statistic},
#' \code{parameter}, \code{p_value}, and \code{n_obs}. When \code{ci = TRUE},
#' it additionally carries a \code{ci} attribute with matrices
#' \code{lwr.ci} and \code{upr.ci}, plus \code{attr(x, "conf.level")}. Scalar
#' outputs keep the same point estimate and gain the same metadata only when
#' inference is requested.
#'
#' @details
#' The biserial correlation is the special two-category case of the polyserial
#' model. It assumes that a binary variable \eqn{Y} arises by thresholding an
#' unobserved standard-normal variable \eqn{Z} that is jointly normal with a
#' continuous variable \eqn{X}. Writing \eqn{p = P(Y = 1)} and
#' \eqn{q = 1-p}, let \eqn{z_p = \Phi^{-1}(p)} and \eqn{\phi(z_p)} be the
#' standard-normal density evaluated at \eqn{z_p}. If \eqn{\bar x_1} and
#' \eqn{\bar x_0} denote the sample means of \eqn{X} in the two observed groups
#' and \eqn{s_x} is the sample standard deviation of \eqn{X}, the usual
#' biserial estimator is
#' \deqn{
#' r_b =
#' \frac{\bar x_1 - \bar x_0}{s_x}
#' \frac{pq}{\phi(z_p)}.
#' }
#' This is exactly the estimator implemented in the underlying C++ kernel.
#'
#' \strong{Assumptions.} The biserial coefficient is appropriate when the
#' observed binary variable is viewed as a thresholded version of an
#' unobserved continuous latent variable that is jointly normal with the
#' observed continuous variable. The optional p-values and confidence
#' intervals adopt this latent-normal interpretation together with the usual
#' large-sample approximations used for correlation coefficients. These
#' inferential quantities are therefore model-based and should not be
#' interpreted as distribution-free summaries.
#'
#' \strong{Inference.} When \code{p_value = TRUE}, the package reports the
#' large-sample \eqn{t}-statistic
#' \deqn{
#' t = r_b \sqrt{\frac{n - 2}{1 - r_b^2}},
#' }
#' referenced to a Student \eqn{t}-distribution with \eqn{n - 2} degrees of
#' freedom. When \code{ci = TRUE}, the package forms an approximate Fisher
#' \eqn{z}-interval by transforming \eqn{r_b} with
#' \eqn{z = \operatorname{atanh}(r_b)}, using standard error
#' \eqn{1 / \sqrt{n - 3}}, and mapping the limits back with
#' \eqn{\tanh(\cdot)}. The CI is therefore an internal large-sample
#' extension and is only computed when explicitly requested.
#'
#' In vector mode a single biserial correlation is returned. In
#' matrix/data-frame mode, every numeric column of \code{data} is paired with every
#' binary column of \code{y}, producing a rectangular matrix of
#' continuous-by-binary biserial correlations.
#'
#' Unlike the point-biserial correlation, which is just Pearson correlation on a
#' 0/1 coding of the binary variable, the biserial coefficient explicitly
#' assumes an underlying latent normal threshold model and rescales the mean
#' difference accordingly.
#'
#' \strong{Computational complexity.} If \code{data} has \eqn{p_x} continuous
#' columns and \code{y} has \eqn{p_y} binary columns, the matrix path computes
#' \eqn{p_x p_y} closed-form estimates with negligible extra memory beyond the
#' output matrix.
#'
#' @references
#' Olsson, U., Drasgow, F., & Dorans, N. J. (1982). The polyserial
#' correlation coefficient. \emph{Psychometrika}, 47(3), 337-347.
#'
#' Fisher, R. A. (1921). On the probable error of a coefficient of
#' correlation deduced from a small sample. \emph{Metron}, 1, 3-32.
#' @param ... Deprecated compatibility aliases. The legacy \code{check_na}
#'   argument is still accepted temporarily.
#'
#' @examplesIf requireNamespace("mnormt", quietly = TRUE)
#' set.seed(126)
#' n <- 1000
#' Sigma <- matrix(c(
#'   1.00, 0.35, 0.50, 0.25,
#'   0.35, 1.00, 0.30, 0.55,
#'   0.50, 0.30, 1.00, 0.40,
#'   0.25, 0.55, 0.40, 1.00
#' ), 4, 4, byrow = TRUE)
#'
#' Z <- mnormt::rmnorm(n = n, mean = rep(0, 4), varcov = Sigma)
#' X <- data.frame(x1 = Z[, 1], x2 = Z[, 2])
#' Y <- data.frame(
#'   g1 = Z[, 3] > stats::qnorm(0.65),
#'   g2 = Z[, 4] > stats::qnorm(0.55)
#' )
#'
#' bs <- biserial(X, Y, ci = TRUE, p_value = TRUE)
#' print(bs, digits = 3)
#' summary(bs)
#' plot(bs)
#' @author Thiago de Paula Oliveira
#' @export
biserial <- function(data,
                     y,
                     na_method = c("error", "pairwise"),
                     ci = FALSE,
                     p_value = FALSE,
                     conf_level = 0.95,
                     ...) {
  if (missing(na_method) && isFALSE(ci) && isFALSE(p_value) && ...length() == 0L) {
    scalar <- is.null(dim(data)) && is.atomic(data) && !is.factor(data) &&
      is.null(dim(y)) && is.atomic(y)

    x_mat <- .mc_extract_continuous_matrix(data, arg = "data", min_cols = 1L)
    y_enc <- .mc_extract_discrete_columns(y, kind = "binary", arg = "y", min_cols = 1L)
    if (nrow(x_mat) != length(y_enc[[1L]]$code)) {
      abort_bad_arg("data", message = "and {.arg y} must have the same number of observations.")
    }
    .mc_check_latent_missing(
      c(list(.x_mat = x_mat), lapply(y_enc, `[[`, "code")),
      check_na = TRUE,
      arg = "data"
    )

    out <- matrix(
      NA_real_,
      nrow = ncol(x_mat),
      ncol = length(y_enc),
      dimnames = list(colnames(x_mat), names(y_enc))
    )
    for (j in seq_len(ncol(x_mat))) {
      xj <- x_mat[, j]
      for (k in seq_along(y_enc)) {
        out[j, k] <- .mc_pair_biserial(xj, y_enc[[k]]$code, TRUE)
      }
    }
    out <- .mc_scalar_or_matrix(out, scalar = scalar)
    if (scalar) {
      return(out)
    }
    return(.mc_structure_corr_matrix(
      out,
      class_name = "biserial_corr",
      method = "biserial",
      description = "Biserial correlation matrix (continuous x binary)"
    ))
  }

  legacy_args <- .mc_extract_legacy_aliases(list(...), allowed = "check_na")
  na_cfg <- resolve_na_args(
    na_method = na_method,
    check_na = legacy_args$check_na %||% NULL,
    na_method_missing = missing(na_method)
  )
  check_na <- na_cfg$check_na
  check_bool(ci, arg = "ci")
  check_bool(p_value, arg = "p_value")
  if (isTRUE(ci)) {
    check_prob_scalar(conf_level, arg = "conf_level", open_ends = TRUE)
  }

  scalar <- is.null(dim(data)) && is.atomic(data) && !is.factor(data) &&
    is.null(dim(y)) && is.atomic(y)

  x_mat <- .mc_extract_continuous_matrix(data, arg = "data", min_cols = 1L)
  y_enc <- .mc_extract_discrete_columns(y, kind = "binary", arg = "y", min_cols = 1L)

  if (nrow(x_mat) != length(y_enc[[1L]]$code)) {
    abort_bad_arg(
      "data",
      message = "and {.arg y} must have the same number of observations."
    )
  }

  .mc_check_latent_missing(
    c(list(.x_mat = x_mat), lapply(y_enc, `[[`, "code")),
    check_na = check_na,
    arg = "data"
  )

  out <- matrix(NA_real_, nrow = ncol(x_mat), ncol = length(y_enc),
                dimnames = list(colnames(x_mat), names(y_enc)))
  diagnostics <- NULL
  inference_attr <- NULL
  ci_attr <- NULL

  if (isTRUE(ci) || isTRUE(p_value)) {
    n_complete <- matrix(
      NA_integer_,
      nrow = nrow(out),
      ncol = ncol(out),
      dimnames = dimnames(out)
    )
    statistic <- if (isTRUE(p_value)) matrix(
      NA_real_,
      nrow = nrow(out),
      ncol = ncol(out),
      dimnames = dimnames(out)
    ) else NULL
    parameter <- if (isTRUE(p_value)) matrix(
      NA_real_,
      nrow = nrow(out),
      ncol = ncol(out),
      dimnames = dimnames(out)
    ) else NULL
    p_mat <- if (isTRUE(p_value)) matrix(
      NA_real_,
      nrow = nrow(out),
      ncol = ncol(out),
      dimnames = dimnames(out)
    ) else NULL
    lwr <- if (isTRUE(ci)) matrix(
      NA_real_,
      nrow = nrow(out),
      ncol = ncol(out),
      dimnames = dimnames(out)
    ) else NULL
    upr <- if (isTRUE(ci)) matrix(
      NA_real_,
      nrow = nrow(out),
      ncol = ncol(out),
      dimnames = dimnames(out)
    ) else NULL
  }

  for (j in seq_len(ncol(x_mat))) {
    xj <- x_mat[, j]
    for (k in seq_along(y_enc)) {
      if (isTRUE(ci) || isTRUE(p_value)) {
        fit <- .mc_biserial_inference_one(
          xj,
          y_enc[[k]]$code,
          check_na = check_na,
          return_ci = ci,
          conf_level = conf_level
        )
        out[j, k] <- fit$estimate
        n_complete[j, k] <- fit$n_obs
        if (isTRUE(p_value)) {
          statistic[j, k] <- fit$statistic
          parameter[j, k] <- fit$parameter
          p_mat[j, k] <- fit$p_value
        }
        if (isTRUE(ci)) {
          lwr[j, k] <- fit$lwr
          upr[j, k] <- fit$upr
        }
      } else {
        out[j, k] <- .mc_pair_biserial(xj, y_enc[[k]]$code, check_na)
      }
    }
  }

  estimate_mat <- out
  attributes(estimate_mat) <- attributes(estimate_mat)[c("dim", "dimnames")]
  out <- .mc_scalar_or_matrix(out, scalar = scalar)
  if (scalar) {
    if (!isTRUE(ci) && !isTRUE(p_value)) {
      return(out)
    }
    diagnostics <- list(n_complete = as.integer(n_complete[1L, 1L]))
    scalar_out <- .mc_attach_scalar_latent(
      out,
      method = "biserial",
      description = "Biserial correlation",
      diagnostics = diagnostics
    )
    if (isTRUE(p_value)) {
      attr(scalar_out, "inference") <- list(
        method = "biserial_t_test",
        estimate = as.numeric(out),
        statistic = as.numeric(statistic[1L, 1L]),
        parameter = as.numeric(parameter[1L, 1L]),
        p_value = as.numeric(p_mat[1L, 1L]),
        n_obs = as.integer(n_complete[1L, 1L]),
        alternative = "two.sided"
      )
    }
    if (isTRUE(ci)) {
      attr(scalar_out, "ci") <- list(
        est = as.numeric(out),
        lwr.ci = as.numeric(lwr[1L, 1L]),
        upr.ci = as.numeric(upr[1L, 1L]),
        conf.level = conf_level,
        ci.method = "fisher_z_biserial"
      )
      attr(scalar_out, "conf.level") <- conf_level
    }
    return(scalar_out)
  }

  diagnostics <- if (isTRUE(ci) || isTRUE(p_value)) {
    list(n_complete = n_complete)
  } else {
    NULL
  }

  out <- .mc_structure_corr_matrix(
    out,
    class_name = "biserial_corr",
    method = "biserial",
    description = "Biserial correlation matrix (continuous x binary)",
    diagnostics = diagnostics
  )
    if (isTRUE(p_value)) {
      inference_attr <- list(
        method = "biserial_t_test",
        estimate = estimate_mat,
        statistic = statistic,
        parameter = parameter,
        p_value = p_mat,
      n_obs = n_complete,
      alternative = "two.sided"
    )
    attr(out, "inference") <- inference_attr
  }
  if (isTRUE(ci)) {
    ci_attr <- list(
      est = estimate_mat,
      lwr.ci = lwr,
      upr.ci = upr,
      conf.level = conf_level,
      ci.method = "fisher_z_biserial"
    )
    attr(out, "ci") <- ci_attr
    attr(out, "conf.level") <- conf_level
  }
  out
}

#' @rdname biserial
#' @method print biserial_corr
#' @param x An object of class \code{biserial_corr}.
#' @param digits Integer; number of decimal places to print.
#' @param n Optional row threshold for compact preview output.
#' @param topn Optional number of leading/trailing rows to show when truncated.
#' @param max_vars Optional maximum number of visible columns; `NULL` derives this
#'   from console width.
#' @param width Optional display width; defaults to \code{getOption("width")}.
#' @param show_ci One of \code{"yes"} or \code{"no"}.
#' @param ... Additional arguments passed to \code{print()}.
#' @export
print.biserial_corr <- function(x, digits = 4, n = NULL, topn = NULL,
                                max_vars = NULL, width = NULL,
                                show_ci = NULL, ...) {
  .mc_print_corr_matrix(
    x, header = "Biserial correlation matrix",
    digits = digits, n = n, topn = topn,
    max_vars = max_vars, width = width, show_ci = show_ci, ...
  )
}

#' @rdname biserial
#' @method plot biserial_corr
#' @param title Plot title. Default is \code{"Biserial correlation heatmap"}.
#' @param low_color Color for the minimum correlation.
#' @param high_color Color for the maximum correlation.
#' @param mid_color Color for zero correlation.
#' @param value_text_size Font size used in tile labels.
#' @param ci_text_size Text size for confidence intervals in the heatmap.
#' @param show_value Logical; if \code{TRUE} (default), overlay numeric values
#'   on the heatmap tiles.
#' @export
plot.biserial_corr <- function(x, title = "Biserial correlation heatmap",
                               low_color = "indianred1",
                               high_color = "steelblue1",
                               mid_color = "white",
                               value_text_size = 4,
                               ci_text_size = 3,
                               show_value = TRUE, ...) {
  check_bool(show_value, arg = "show_value")
  ci <- .mc_biserial_ci_attr(x)
  if (is.null(ci) || is.null(ci$lwr.ci) || is.null(ci$upr.ci)) {
    return(.mc_plot_corr_matrix(
      x, class_name = "biserial_corr", fill_name = "Biserial",
      title = title, low_color = low_color, high_color = high_color,
      mid_color = mid_color, value_text_size = value_text_size,
      show_value = show_value, ...
    ))
  }

  est_mat <- as.matrix(x)
  df_est <- as.data.frame(as.table(est_mat))
  names(df_est) <- c("Var1", "Var2", "biserial")

  df_lwr <- as.data.frame(as.table(ci$lwr.ci))
  names(df_lwr)[3L] <- "lwr"
  df_upr <- as.data.frame(as.table(ci$upr.ci))
  names(df_upr)[3L] <- "upr"
  df <- Reduce(
    function(a, b) merge(a, b, by = c("Var1", "Var2"), all = TRUE),
    list(df_est, df_lwr, df_upr)
  )

  lev_row <- unique(df_est$Var1)
  lev_col <- unique(df_est$Var2)
  df$Var1 <- factor(df$Var1, levels = rev(lev_row))
  df$Var2 <- factor(df$Var2, levels = lev_col)
  df$ci_label <- ifelse(
    is.na(df$lwr) | is.na(df$upr),
    NA_character_,
    sprintf("[%.3f, %.3f]", df$lwr, df$upr)
  )

  p <- ggplot2::ggplot(df, ggplot2::aes(x = Var2, y = Var1, fill = .data$biserial)) +
    ggplot2::geom_tile(color = "white") +
    ggplot2::scale_fill_gradient2(
      low = low_color,
      high = high_color,
      mid = mid_color,
      midpoint = 0,
      limits = c(-1, 1),
      name = "Biserial"
    ) +
    ggplot2::theme_minimal(base_size = 12) +
    ggplot2::theme(
      axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
      panel.grid = ggplot2::element_blank(),
      ...
    ) +
    ggplot2::coord_fixed() +
    ggplot2::labs(title = title, x = NULL, y = NULL)

  if (isTRUE(show_value) && !is.null(value_text_size) && is.finite(value_text_size)) {
    p <- p + ggplot2::geom_text(
      ggplot2::aes(label = ifelse(is.na(biserial), "NA", sprintf("%.2f", biserial))),
      size = value_text_size,
      color = "black"
    )
  }
  if (isTRUE(show_value) && any(!is.na(df$ci_label))) {
    p <- p + ggplot2::geom_text(
      ggplot2::aes(label = ci_label, y = as.numeric(Var1) - 0.22),
      size = ci_text_size,
      color = "gray30",
      na.rm = TRUE
    )
  }

  p
}

.mc_latent_pairwise_summary <- function(object,
                                        summary_class,
                                        digits = 4,
                                        ci_digits = 3,
                                        p_digits = 4,
                                        show_ci = "yes") {
  est <- as.matrix(object)
  rn <- rownames(est)
  cn <- colnames(est)
  if (is.null(rn)) rn <- as.character(seq_len(nrow(est)))
  if (is.null(cn)) cn <- as.character(seq_len(ncol(est)))

  ci <- .mc_latent_ci_attr(object)
  inf <- .mc_latent_inference_attr(object)
  diag_attr <- attr(object, "diagnostics", exact = TRUE)
  include_ci <- identical(show_ci, "yes") && is.list(ci)
  include_p <- is.list(inf) && !is.null(inf$p_value)
  symmetric <- isTRUE(nrow(est) == ncol(est)) && isTRUE(isSymmetric(unclass(est)))

  n_pairs <- if (symmetric) {
    nrow(est) * (ncol(est) - 1L) / 2L
  } else {
    nrow(est) * ncol(est)
  }
  rows <- vector("list", n_pairs)
  k <- 0L

  for (i in seq_len(nrow(est))) {
    j_start <- if (symmetric) i + 1L else 1L
    if (j_start > ncol(est)) {
      next
    }
    for (j in j_start:ncol(est)) {
      k <- k + 1L
      rec <- list(
        var1 = rn[i],
        var2 = cn[j],
        estimate = round(est[i, j], digits)
      )
      if (is.list(diag_attr) && is.matrix(diag_attr$n_complete)) {
        rec$n_complete <- as.integer(diag_attr$n_complete[i, j])
      }
      if (include_ci) {
        rec$lwr <- if (is.finite(ci$lwr.ci[i, j])) round(ci$lwr.ci[i, j], ci_digits) else NA_real_
        rec$upr <- if (is.finite(ci$upr.ci[i, j])) round(ci$upr.ci[i, j], ci_digits) else NA_real_
      }
      if (include_p) {
        rec$statistic <- if (is.finite(inf$statistic[i, j])) round(inf$statistic[i, j], digits) else NA_real_
        rec$df <- if (is.finite(inf$parameter[i, j])) round(inf$parameter[i, j], digits) else NA_real_
        rec$p_value <- if (is.finite(inf$p_value[i, j])) round(inf$p_value[i, j], p_digits) else NA_real_
      }
      rows[[k]] <- rec
    }
  }

  if (!k) {
    df <- data.frame(var1 = character(), var2 = character(), estimate = numeric())
  } else {
    rows <- rows[seq_len(k)]
    df <- do.call(rbind.data.frame, rows)
  }
  rownames(df) <- NULL
  num_cols <- intersect(c("estimate", "lwr", "upr", "statistic", "df", "p_value"), names(df))
  int_cols <- intersect(c("n_complete"), names(df))
  for (nm in num_cols) df[[nm]] <- as.numeric(df[[nm]])
  for (nm in int_cols) df[[nm]] <- as.integer(df[[nm]])

  out <- .mc_finalize_summary_df(df, class_name = summary_class)
  attr(out, "overview") <- .mc_summary_corr_matrix(object, header = "Latent correlation summary")
  attr(out, "has_ci") <- include_ci
  attr(out, "has_p") <- include_p
  attr(out, "conf.level") <- if (is.null(ci)) NA_real_ else ci$conf.level
  attr(out, "ci_method") <- if (is.null(ci)) NA_character_ else ci$ci.method
  attr(out, "digits") <- digits
  attr(out, "ci_digits") <- ci_digits
  attr(out, "p_digits") <- p_digits
  attr(out, "inference_method") <- if (is.null(inf)) NA_character_ else inf$method
  out
}

.mc_print_latent_pairwise_summary <- function(x,
                                              title,
                                              digits = NULL,
                                              n = NULL,
                                              topn = NULL,
                                              max_vars = NULL,
                                              width = NULL,
                                              show_ci = NULL,
                                              ...) {
  .mc_print_pairwise_summary_digest(
    x,
    title = title,
    digits = .mc_coalesce(digits, 4),
    n = n,
    topn = topn,
    max_vars = max_vars,
    width = width,
    show_ci = show_ci,
    ci_method = if (isTRUE(attr(x, "has_ci"))) attr(x, "ci_method", exact = TRUE) else NULL,
    extra_items = c(inference = attr(x, "inference_method", exact = TRUE)),
    ...
  )
  invisible(x)
}

.mc_biserial_pairwise_summary <- function(object,
                                          digits = 4,
                                          ci_digits = 3,
                                          p_digits = 4,
                                          show_ci = "yes") {
  check_inherits(object, "biserial_corr")

  est <- as.matrix(object)
  rn <- rownames(est)
  cn <- colnames(est)
  if (is.null(rn)) rn <- as.character(seq_len(nrow(est)))
  if (is.null(cn)) cn <- as.character(seq_len(ncol(est)))

  ci <- .mc_biserial_ci_attr(object)
  inf <- .mc_biserial_inference_attr(object)
  diag_attr <- attr(object, "diagnostics", exact = TRUE)
  include_ci <- identical(show_ci, "yes") && is.list(ci)
  include_p <- is.list(inf) && !is.null(inf$p_value)

  rows <- vector("list", nrow(est) * ncol(est))
  k <- 0L
  for (i in seq_len(nrow(est))) {
    for (j in seq_len(ncol(est))) {
      k <- k + 1L
      rec <- list(
        var1 = rn[i],
        var2 = cn[j],
        estimate = round(est[i, j], digits)
      )
      if (is.list(diag_attr) && is.matrix(diag_attr$n_complete)) {
        rec$n_complete <- as.integer(diag_attr$n_complete[i, j])
      }
      if (include_ci) {
        rec$lwr <- if (is.finite(ci$lwr.ci[i, j])) round(ci$lwr.ci[i, j], ci_digits) else NA_real_
        rec$upr <- if (is.finite(ci$upr.ci[i, j])) round(ci$upr.ci[i, j], ci_digits) else NA_real_
      }
      if (include_p) {
        rec$statistic <- if (is.finite(inf$statistic[i, j])) round(inf$statistic[i, j], digits) else NA_real_
        rec$df <- if (is.finite(inf$parameter[i, j])) round(inf$parameter[i, j], digits) else NA_real_
        rec$p_value <- if (is.finite(inf$p_value[i, j])) round(inf$p_value[i, j], p_digits) else NA_real_
      }
      rows[[k]] <- rec
    }
  }

  df <- do.call(rbind.data.frame, rows)
  rownames(df) <- NULL
  num_cols <- intersect(c("estimate", "lwr", "upr", "statistic", "df", "p_value"), names(df))
  int_cols <- intersect(c("n_complete"), names(df))
  for (nm in num_cols) df[[nm]] <- as.numeric(df[[nm]])
  for (nm in int_cols) df[[nm]] <- as.integer(df[[nm]])

  out <- .mc_finalize_summary_df(df, class_name = "summary.biserial_corr")
  attr(out, "overview") <- .mc_summary_corr_matrix(object)
  attr(out, "has_ci") <- include_ci
  attr(out, "has_p") <- include_p
  attr(out, "conf.level") <- if (is.null(ci)) NA_real_ else ci$conf.level
  attr(out, "digits") <- digits
  attr(out, "ci_digits") <- ci_digits
  attr(out, "p_digits") <- p_digits
  attr(out, "inference_method") <- if (is.null(inf)) NA_character_ else inf$method
  out
}

#' @rdname biserial
#' @method summary biserial_corr
#' @param object An object of class \code{biserial_corr}.
#' @param ci_digits Integer; digits for biserial confidence limits in the
#'   pairwise summary.
#' @param p_digits Integer; digits for biserial p-values in the pairwise
#'   summary.
#' @export
summary.biserial_corr <- function(object, n = NULL, topn = NULL,
                                  max_vars = NULL, width = NULL,
                                  ci_digits = 3,
                                  p_digits = 4,
                                  show_ci = NULL, ...) {
  check_inherits(object, "biserial_corr")
  show_ci <- .mc_validate_yes_no(
    show_ci,
    arg = "show_ci",
    default = .mc_display_option("summary_show_ci", "yes")
  )

  ci <- .mc_biserial_ci_attr(object)
  inf <- .mc_biserial_inference_attr(object)
  if (is.null(ci) && (is.null(inf) || is.null(inf$p_value))) {
    return(.mc_summary_corr_matrix(object, header = "Latent correlation summary", topn = topn))
  }

  .mc_biserial_pairwise_summary(
    object,
    ci_digits = ci_digits,
    p_digits = p_digits,
    show_ci = show_ci
  )
}

#' @rdname biserial
#' @method print summary.biserial_corr
#' @param x An object of class \code{summary.biserial_corr}.
#' @export
print.summary.biserial_corr <- function(x, digits = NULL, n = NULL,
                                        topn = NULL, max_vars = NULL,
                                        width = NULL, show_ci = NULL, ...) {
  .mc_print_pairwise_summary_digest(
    x,
    title = "Biserial correlation summary",
    digits = .mc_coalesce(digits, 4),
    n = n,
    topn = topn,
    max_vars = max_vars,
    width = width,
    show_ci = show_ci,
    ci_method = if (isTRUE(attr(x, "has_ci"))) "fisher_z_biserial" else NULL,
    extra_items = c(inference = attr(x, "inference_method", exact = TRUE)),
    ...
  )
  invisible(x)
}

#' @title Summary Method for Correlation Matrices
#'
#' @description
#' Prints compact summary statistics returned by matrix-style
#' \code{summary()} methods in \pkg{matrixCorr}.
#'
#' @param x An object of class \code{summary.matrixCorr}.
#' @param digits Integer; number of decimal places to print.
#' @param n Optional row threshold for compact preview output.
#' @param topn Optional number of leading/trailing rows to show when truncated.
#' @param max_vars Optional maximum number of visible columns; `NULL` derives this
#'   from console width.
#' @param width Optional display width; defaults to \code{getOption("width")}.
#' @param show_ci One of \code{"yes"} or \code{"no"}.
#' @param ... Unused.
#'
#' @return Invisibly returns \code{x}.
#' @method print summary.matrixCorr
#' @export
print.summary.matrixCorr <- function(x,
                                     digits = 4,
                                     n = NULL,
                                     topn = NULL,
                                     max_vars = NULL,
                                     width = NULL,
                                     show_ci = NULL,
                                     ...) {
  if (inherits(x, "summary.corr_result")) {
    x2 <- x
    attr(x2, "summary_title") <- "Correlation summary"
    return(print.summary.corr_result(
      x2,
      digits = digits,
      n = n,
      topn = topn,
      max_vars = max_vars,
      width = width,
      ...
    ))
  }
  cfg <- .mc_resolve_display_args(
    context = "summary",
    n = n,
    topn = topn,
    max_vars = max_vars,
    width = width,
    show_ci = show_ci
  )

  digest <- .mc_corr_summary_digest_items(x, digits = digits, show_ci = cfg$show_ci)
  if (isTRUE(x$has_ci) && identical(cfg$show_ci, "yes")) {
    if (is.finite(x$ci_conf_level)) {
      digest <- c(digest, ci_level = sprintf("%g%%", 100 * x$ci_conf_level))
    }
    if (!is.null(x$ci_method) && length(x$ci_method) == 1L &&
        !is.na(x$ci_method) && nzchar(x$ci_method)) {
      digest <- c(digest, ci_method = x$ci_method)
    }
    ci_width_txt <- .mc_format_scalar_or_range(
      x$ci_width_min,
      x$ci_width_max,
      digits = 3,
      integer = FALSE
    )
    if (!is.null(ci_width_txt)) {
      digest <- c(digest, ci_width = ci_width_txt)
    }
    if (is.finite(x$ci_cross_zero) && x$ci_cross_zero > 0L) {
      digest <- c(digest, cross_zero = sprintf("%s pair(s)", .mc_count_fmt(x$ci_cross_zero)))
    }
  }
  if (!is.null(x$lambda) && is.finite(x$lambda)) {
    digest <- c(digest, lambda = format(signif(x$lambda, digits = digits)))
  }
  if (!is.null(x$rho) && is.finite(x$rho)) {
    digest <- c(digest, rho = format(signif(x$rho, digits = digits)))
  }
  if (!is.null(x$jitter) && is.finite(x$jitter)) {
    digest <- c(digest, jitter = format(signif(x$jitter, digits = digits)))
  }

  .mc_print_named_digest(
    digest,
    header = .mc_coalesce(x$header, "Correlation summary")
  )

  if (is.data.frame(x$top_results) && nrow(x$top_results)) {
    .mc_print_ranked_pairs_preview(
      x$top_results,
      header = "Strongest pairs by |estimate|",
      topn = cfg$topn,
      max_vars = cfg$max_vars,
      width = cfg$width,
      show_ci = cfg$show_ci,
      ...
    )
  }
  invisible(x)
}

#' @method print summary.corr_matrix
#' @export
print.summary.corr_matrix <- function(x,
                                      digits = 4,
                                      n = NULL,
                                      topn = NULL,
                                      max_vars = NULL,
                                      width = NULL,
                                      show_ci = NULL,
                                      ...) {
  print.summary.matrixCorr(
    x,
    digits = digits,
    n = n,
    topn = topn,
    max_vars = max_vars,
    width = width,
    show_ci = show_ci,
    ...
  )
}

print.summary_corr_matrix <- print.summary.corr_matrix

#' @title Summary Method for Latent Correlation Matrices
#'
#' @description
#' Prints compact summary statistics returned by
#' \code{summary.tetrachoric_corr()}, \code{summary.polychoric_corr()},
#' \code{summary.polyserial_corr()}, and \code{summary.biserial_corr()}.
#'
#' @param x An object of class \code{summary.latent_corr}.
#' @param digits Integer; number of decimal places to print.
#' @param ... Unused.
#'
#' @return Invisibly returns \code{x}.
#' @method print summary.latent_corr
#' @export
print.summary.latent_corr <- function(x, digits = 4, ...) {
  if (inherits(x, "summary.corr_result")) {
    x2 <- x
    attr(x2, "summary_title") <- "Latent correlation summary"
    return(print.summary.corr_result(x2, digits = digits, ...))
  }
  print.summary.matrixCorr(x, digits = digits, ...)
}

print.summary_latent_corr <- print.summary.latent_corr

Try the matrixCorr package in your browser

Any scripts or data that you put into this service are public.

matrixCorr documentation built on April 18, 2026, 5:06 p.m.