R/alsi.R
In alsi: Aggregated Latent Space Index for Multiple Correspondence Analysis

Documented in alsi alsi_workflow make_disjunctive mca_align mca_bootstrap mca_indicator mca_pa plot_category_projections plot_subspace_stability .read_xlsx summarise_matrix to01

## =======================================================================
## ALSI: Aggregated Latent Space Index for MCA
## Core functions for stability-validated aggregation
## =======================================================================


## ---- Utility Functions ----

#' Read Excel file with fallback options
#' @keywords internal
.read_xlsx <- function(path) {
  if (requireNamespace("readxl", quietly = TRUE)) {
    return(as.data.frame(readxl::read_xlsx(path)))
  }
  if (requireNamespace("openxlsx", quietly = TRUE)) {
    return(as.data.frame(openxlsx::read.xlsx(path)))
  }
  stop("Please install 'readxl' or 'openxlsx' to read .xlsx files.")
}

#' Convert various formats to binary 0/1
#' @keywords internal
to01 <- function(x) {
  if (is.logical(x)) return(as.integer(x))
  if (is.factor(x)) x <- as.character(x)

  
  if (is.character(x)) {
    x <- tolower(trimws(x))
    x <- ifelse(x %in% c("true", "t", "yes", "y", "1"), 1L,
                ifelse(x %in% c("false", "f", "no", "n", "0"), 0L, NA_integer_))
  } else {
    x <- as.integer(x)
  }
  
  if (any(!is.na(x) & !x %in% c(0L, 1L))) {
    stop("Non-binary values detected; recode to 0/1.")
  }
  x
}

#' Create disjunctive (indicator) matrix from binary data
#' @keywords internal
make_disjunctive <- function(X01) {
  X01 <- as.data.frame(X01)
  
  out <- lapply(names(X01), function(v) {
    f <- factor(X01[[v]], levels = c(0, 1))
    mm <- model.matrix(~ f - 1, data = data.frame(f = f))
    colnames(mm) <- paste0(v, "_", c("0", "1"))
    mm
  })
  
  Xd <- do.call(cbind, out)
  storage.mode(Xd) <- "numeric"
  Xd
}

#' Summarize matrix columns (median and quantiles)
#' @keywords internal
summarise_matrix <- function(X, probs = c(0.05, 0.95)) {
  t(apply(X, 2, function(x) {
    c(median = median(x, na.rm = TRUE),
      setNames(quantile(x, probs = probs, na.rm = TRUE), 
               paste0("p", probs * 100)))
  }))
}

## ---- Core MCA Engine ----

#' Perform Multiple Correspondence Analysis on binary indicator matrix
#' 
#' @param Xbin01 Data frame or matrix with binary (0/1) variables
#' @return List containing MCA results with eigenvalues, coordinates, and masses
#' @keywords internal
mca_indicator <- function(Xbin01) {
  # Validate and convert to binary
  Xbin01 <- as.data.frame(Xbin01)
  Xbin01[] <- lapply(Xbin01, to01)
  Xbin01 <- Xbin01[complete.cases(Xbin01), , drop = FALSE]
  
  N <- nrow(Xbin01)
  J <- ncol(Xbin01)
  
  if (N < 2) stop("Need at least 2 complete rows for MCA.")
  if (J < 2) stop("Need at least 2 variables for MCA.")
  
  # Create disjunctive coding
  Xd <- make_disjunctive(Xbin01)
  M <- ncol(Xd)
  
  # Correspondence matrix
  P <- Xd / (N * J)
  r <- rep(1 / N, N)  # row masses (constant for indicator matrix)
  c <- colSums(P)      # column masses
  
  if (any(c <= 0)) stop("Zero column mass; check input coding.")
  
  # Standardized residual matrix: S = Dr^{-1/2} (P - rc') Dc^{-1/2}
  # Since r_i = 1/N constant, Dr^{-1/2} = sqrt(N) * I
  Pc <- P - tcrossprod(r, c)
  Dc_inv_sqrt <- diag(1 / sqrt(c), nrow = M)
  S <- sqrt(N) * Pc %*% Dc_inv_sqrt
  
  # SVD
  sv <- svd(S)
  eig <- sv$d^2
  
  # Principal coordinates
  # Row: Fmat = Dr^{-1/2} U Sigma = sqrt(N) U Sigma
  if (length(sv$d) == 1) {
    Fmat <- sqrt(N) * sv$u * sv$d
  } else {
    Fmat <- sqrt(N) * sv$u %*% diag(sv$d)
  }
  
  # Column: G = Dc^{-1/2} V Sigma
  if (length(sv$d) == 1) {
    G <- Dc_inv_sqrt %*% sv$v * sv$d
  } else {
    G <- Dc_inv_sqrt %*% sv$v %*% diag(sv$d)
  }
  rownames(G) <- colnames(Xd)
  
  structure(
    list(
      Xbin = Xbin01,
      Xd = Xd,
      N = N,
      J = J,
      M = M,
      eig = eig,
      svd = sv,
      masses = list(r = r, c = c),
      F = Fmat,
      G = G
    ),
    class = "mca_fit"
  )
}

## ---- S3 Methods for mca_fit ----

#' @export
print.mca_fit <- function(x, ...) {
  cat("MCA fit:\n")
  cat("  N =", x$N, "individuals\n")
  cat("  J =", x$J, "binary variables\n")
  cat("  M =", x$M, "categories (disjunctive coding)\n")
  cat("  Total inertia:", round(sum(x$eig), 4), "\n")
  cat("  Top 5 eigenvalues:", round(x$eig[1:min(5, length(x$eig))], 4), "\n")
  invisible(x)
}

## =====================================================================
## Main Functions (Exported API)
## =====================================================================

#' Parallel Analysis for MCA Dimensionality Assessment
#'
#' Compares observed MCA eigenvalues against reference distributions from
#' permuted data to identify statistically meaningful dimensions.
#'
#' @param data Data frame or path to .xlsx file
#' @param vars Character vector of binary variable names
#' @param B Integer, number of permutations (default: 2000)
#' @param q Numeric, reference quantile for retention (default: 0.95)
#' @param seed Integer, random seed for reproducibility
#' @param max_dims Integer, maximum dimensions to display in plot
#' @param verbose Logical, print progress messages
#'
#' @return S3 object of class \code{mca_pa} containing:
#'   \item{eig_obs}{Observed eigenvalues from the MCA of the original data}
#'   \item{eig_q}{Reference quantiles from permutation distribution}
#'   \item{eig_perm}{Matrix of permutation eigenvalues (B x dimensions)}
#'   \item{K_star}{Suggested number of dimensions to retain (where observed > reference)}
#'   \item{fit}{MCA fit object (class \code{mca_fit}) from original data}
#'   \item{q}{Quantile threshold used for comparison}
#'   \item{B}{Number of permutations performed}
#'
#' @export
#' @examples
#' \donttest{
#' # Using included ANR2 dataset
#' data(ANR2)
#' pa <- mca_pa(ANR2, vars = names(ANR2), B = 100)
#' print(pa$K_star)
#' }
mca_pa <- function(data, 
                   vars, 
                   B = 2000, 
                   q = 0.95, 
                   seed = 20260123,
                   max_dims = 20, 
                   verbose = TRUE) {
  
  # Load and prepare data
  dat <- if (is.character(data)) .read_xlsx(data) else as.data.frame(data)
  
  if (!all(vars %in% names(dat))) {
    missing <- setdiff(vars, names(dat))
    stop("Missing variables: ", paste(missing, collapse = ", "))
  }
  
  # Clean data once
  Xbin <- dat[, vars, drop = FALSE]
  Xbin[] <- lapply(Xbin, to01)
  Xbin <- Xbin[complete.cases(Xbin), , drop = FALSE]
  
  # Reference MCA
  fit0 <- mca_indicator(Xbin)
  eig_obs <- fit0$eig
  Kmax <- length(eig_obs)
  
  # Permutation loop
  eig_perm <- matrix(NA_real_, nrow = B, ncol = Kmax)
  set.seed(seed)
  
  if (verbose) {
    cat("Running parallel analysis with B =", B, "permutations...\n")
    pb <- txtProgressBar(0, B, style = 3)
  }
  
  for (b in seq_len(B)) {
    # Permute within each variable
    Xp <- as.data.frame(lapply(Xbin, sample, replace = FALSE))
    eig_perm[b, ] <- mca_indicator(Xp)$eig
    
    if (verbose) setTxtProgressBar(pb, b)
  }
  
  if (verbose) close(pb)
  
  # Determine retention
  eig_q <- apply(eig_perm, 2, quantile, probs = q, na.rm = TRUE)
  K_star <- sum(eig_obs > eig_q)
  
  # Create output object
  out <- structure(
    list(
      eig_obs = eig_obs,
      eig_q = eig_q,
      eig_perm = eig_perm,
      q = q,
      B = B,
      K_star = K_star,
      fit = fit0
    ),
    class = "mca_pa"
  )
  
  if (verbose) {
    cat("\nParallel Analysis complete.\n")
    cat("Suggested K* (dims where eig_obs > q =", q, ") =", K_star, "\n\n")
  }
  
  # Generate diagnostic plot
  Kplot <- min(max_dims, Kmax)
  plot(1:Kplot, eig_obs[1:Kplot], 
       type = "b", pch = 19, col = "black",
       xlab = "Dimension k", 
       ylab = "Eigenvalue (inertia)",
       main = sprintf("MCA Parallel Analysis (B=%d, q=%.2f)", B, q),
       ylim = range(c(eig_obs[1:Kplot], eig_q[1:Kplot])))
  lines(1:Kplot, eig_q[1:Kplot], type = "b", pch = 1, col = "blue")
  legend("topright", 
         legend = c("Observed", sprintf("PA q=%.2f", q)),
         lty = 1, pch = c(19, 1), col = c("black", "blue"),
         bty = "n")
  
  invisible(out)
}

#' @export
print.mca_pa <- function(x, ...) {
  cat("MCA Parallel Analysis\n")
  cat("  B =", x$B, "permutations\n")
  cat("  Quantile threshold: q =", x$q, "\n")
  cat("  Suggested K* =", x$K_star, "\n")
  cat("  Top eigenvalues:\n")
  top_k <- min(10, length(x$eig_obs))
  comp <- data.frame(
    k = 1:top_k,
    Observed = round(x$eig_obs[1:top_k], 4),
    Reference = round(x$eig_q[1:top_k], 4),
    Retain = ifelse(1:top_k <= x$K_star, "Yes", "No")
  )
  print(comp, row.names = FALSE)
  invisible(x)
}

## ========================================================================
## Procrustes Alignment
## ========================================================================

#' Align MCA solution via Procrustes rotation with sign anchoring
#'
#' Performs orthogonal Procrustes rotation to align a set of category 
#' coordinates to a reference solution, then applies sign correction to
#' maximize agreement with the reference on each dimension.
#'
#' @param G Matrix of category coordinates to align (M x K)
#' @param Gref Reference matrix of category coordinates (M x K)
#'
#' @return List containing:
#'   \item{G_aligned}{Matrix of aligned category coordinates (M x K), 
#'     rotated and sign-corrected to match the reference}
#'   \item{R}{Orthogonal rotation matrix (K x K) used for alignment}
#'
#' @export
#' @examples
#' # Create example matrices
#' set.seed(123)
#' Gref <- matrix(rnorm(20), nrow = 10, ncol = 2)
#' G <- Gref %*% matrix(c(0.8, 0.6, -0.6, 0.8), 2, 2)
#' 
#' # Align G to Gref
#' aligned <- mca_align(G, Gref)
#' print(aligned$G_aligned)
mca_align <- function(G, Gref) {
  G <- as.matrix(G)
  Gref <- as.matrix(Gref)
  
  if (nrow(G) != nrow(Gref) || ncol(G) != ncol(Gref)) {
    stop("G and Gref must have identical dimensions.")
  }
  
  # Orthogonal Procrustes: minimize ||G R - Gref||_F
  # Solution: R = U V' from SVD(G' Gref) = U Sigma V'
  svp <- svd(crossprod(G, Gref))
  R <- tcrossprod(svp$u, svp$v)
  Gal <- G %*% R
  
  # Sign anchoring: maximize agreement with reference
  for (k in seq_len(ncol(Gal))) {
    if (sum(Gal[, k] * Gref[, k]) < 0) {
      Gal[, k] <- -Gal[, k]
    }
  }
  
  list(G_aligned = Gal, R = R)
}

## =====================================================================
## Bootstrap Stability Assessment
## =====================================================================

#' Bootstrap-Based Subspace Stability Assessment
#'
#' Evaluates reproducibility of retained MCA dimensions via bootstrap resampling.
#' Quantifies stability using Procrustes principal angles (subspace-level) and
#' Tucker's congruence coefficients (dimension-level).
#'
#' @param data Data frame or path to .xlsx file
#' @param vars Character vector of binary variable names
#' @param K Integer, number of dimensions to retain and assess
#' @param B Integer, number of bootstrap resamples (default: 2000)
#' @param seed Integer, random seed for reproducibility
#' @param verbose Logical, print progress messages
#'
#' @return S3 object of class \code{mca_bootstrap} containing:
#'   \item{ref}{Reference MCA fit object (class \code{mca_fit})}
#'   \item{K}{Number of dimensions assessed}
#'   \item{B}{Number of bootstrap resamples performed}
#'   \item{angles}{Matrix of principal angles in degrees (B x K), measuring
#'     subspace similarity between bootstrap and reference solutions}
#'   \item{tucker}{Matrix of Tucker congruence coefficients (B x K), measuring
#'     dimension-level similarity after Procrustes alignment}
#'   \item{angles_summary}{Summary statistics (median, 5th, 95th percentiles) 
#'     for principal angles}
#'   \item{tucker_summary}{Summary statistics (median, 5th, 95th percentiles) 
#'     for Tucker congruence coefficients}
#'
#' @export
#' @examples
#' \donttest{
#' # Using included ANR2 dataset
#' data(ANR2)
#' boot <- mca_bootstrap(ANR2, vars = names(ANR2), K = 3, B = 100)
#' print(boot)
#' }
mca_bootstrap <- function(data, 
                          vars, 
                          K, 
                          B = 2000, 
                          seed = 20260123,
                          verbose = TRUE) {
  
  # Load and prepare data
  dat <- if (is.character(data)) .read_xlsx(data) else as.data.frame(data)
  
  if (!all(vars %in% names(dat))) {
    missing <- setdiff(vars, names(dat))
    stop("Missing variables: ", paste(missing, collapse = ", "))
  }
  
  # Clean data once
  Xbin <- dat[, vars, drop = FALSE]
  Xbin[] <- lapply(Xbin, to01)
  Xbin <- Xbin[complete.cases(Xbin), , drop = FALSE]
  
  # Reference fit
  fit_ref <- mca_indicator(Xbin)
  
  if (K < 1 || K > ncol(fit_ref$svd$v)) {
    stop("K must be between 1 and ", ncol(fit_ref$svd$v))
  }
  
  N <- fit_ref$N
  Vref <- fit_ref$svd$v[, 1:K, drop = FALSE]
  Gref <- fit_ref$G[, 1:K, drop = FALSE]
  
  # Pre-allocate results
  angles <- matrix(NA_real_, nrow = B, ncol = K)
  tucker <- matrix(NA_real_, nrow = B, ncol = K)
  
  set.seed(seed)
  
  if (verbose) {
    cat("Running bootstrap stability assessment with B =", B, "resamples...\n")
    pb <- txtProgressBar(0, B, style = 3)
  }
  
  for (b in seq_len(B)) {
    # Bootstrap resample
    idx <- sample.int(N, size = N, replace = TRUE)
    Xb <- fit_ref$Xbin[idx, , drop = FALSE]
    fit_b <- mca_indicator(Xb)
    
    # Extract K-dimensional solutions
    Vb <- fit_b$svd$v[, 1:K, drop = FALSE]
    Gb <- fit_b$G[, 1:K, drop = FALSE]
    
    # Principal angles: measure subspace similarity
    # Computed from singular values of V_ref' V_b
    s <- svd(crossprod(Vref, Vb), nu = 0, nv = 0)$d
    s <- pmax(pmin(s, 1), -1)  # numerical stability
    angles[b, ] <- sort(acos(s) * 180 / pi)
    
    # Tucker congruence: measure dimension-level similarity (after alignment)
    Gal <- mca_align(Gb, Gref)$G_aligned
    tucker[b, ] <- colSums(Gal * Gref) / sqrt(colSums(Gal^2) * colSums(Gref^2))
    
    if (verbose) setTxtProgressBar(pb, b)
  }
  
  if (verbose) close(pb)
  
  # Label columns
  colnames(angles) <- paste0("theta", 1:K)
  colnames(tucker) <- paste0("phi", 1:K)
  
  # Summary statistics
  angles_summary <- summarise_matrix(angles, probs = c(0.05, 0.95))
  tucker_summary <- summarise_matrix(tucker, probs = c(0.05, 0.95))
  
  out <- structure(
    list(
      ref = fit_ref,
      K = K,
      B = B,
      angles = angles,
      tucker = tucker,
      angles_summary = angles_summary,
      tucker_summary = tucker_summary
    ),
    class = "mca_bootstrap"
  )
  
  if (verbose) {
    cat("\n\n--- Principal Angles (degrees) ---\n")
    print(round(angles_summary, 2))
    cat("\n--- Tucker Congruence Coefficients ---\n")
    print(round(tucker_summary, 3))
    cat("\n")
  }
  
  invisible(out)
}

#' @export
print.mca_bootstrap <- function(x, ...) {
  cat("MCA Bootstrap Stability Assessment\n")
  cat("  K =", x$K, "dimensions\n")
  cat("  B =", x$B, "bootstrap resamples\n\n")
  
  cat("Principal Angles (degrees):\n")
  print(round(x$angles_summary, 2))
  
  cat("\nTucker Congruence:\n")
  print(round(x$tucker_summary, 3))
  
  invisible(x)
}

#' @export
plot.mca_bootstrap <- function(x, ...) {
  plot_subspace_stability(x)
}

## ===============================================================
## ALSI Computation
## ===============================================================

#' Compute Aggregated Latent Space Index (ALSI)
#'
#' Calculates ALSI as a variance-weighted Euclidean norm of row principal
#' coordinates within a retained K-dimensional MCA subspace.
#'
#' @param Fmat Matrix of row principal coordinates (N x K or larger)
#' @param eig Vector of eigenvalues (inertias)
#' @param K Integer, number of dimensions to aggregate
#'
#' @return S3 object of class \code{alsi} containing:
#'   \item{alpha}{Numeric vector of ALSI values (length N), representing
#'     each individual's variance-weighted distance from the centroid 
#'     in the retained MCA subspace}
#'   \item{w}{Variance weights (length K), computed as the proportion of 
#'     retained inertia for each dimension}
#'   \item{alpha_vec}{Aggregated direction vector (length K), equal to 
#'     sqrt(w), used for projecting category coordinates}
#'   \item{K}{Number of dimensions used in aggregation}
#'
#' @export
#' @examples
#' # Create example data
#' set.seed(123)
#' Fmat <- matrix(rnorm(100 * 4), nrow = 100, ncol = 4)
#' eig <- c(0.5, 0.3, 0.15, 0.05)
#' 
#' # Compute ALSI
#' a <- alsi(Fmat, eig, K = 3)
#' print(a)
#' hist(a$alpha, main = "Distribution of ALSI")
alsi <- function(Fmat, eig, K) {
  Fmat <- as.matrix(Fmat)
  
  if (K < 1) stop("K must be >= 1")
  if (ncol(Fmat) < K) stop("Fmat has fewer than K columns")
  if (length(eig) < K) stop("eig has fewer than K elements")
  
  # Variance weights: proportion of retained inertia
  w <- eig[1:K] / sum(eig[1:K])
  alpha_vec <- sqrt(w)
  
  # ALSI: weighted L2 norm
  # alpha_i = sqrt(sum_k w_k * f_ik^2)
  Fk <- Fmat[, 1:K, drop = FALSE]
  alpha_vals <- sqrt(rowSums(sweep(Fk^2, 2, w, "*")))
  
  structure(
    list(
      alpha = alpha_vals,
      w = w,
      alpha_vec = alpha_vec,
      K = K
    ),
    class = "alsi"
  )
}

#' @export
print.alsi <- function(x, ...) {
  cat("ALSI (Aggregated Latent Space Index)\n")
  cat("  K =", x$K, "dimensions\n")
  cat("  N =", length(x$alpha), "individuals\n")
  cat("  Variance weights:", round(x$w, 4), "\n")
  cat("  ALSI range: [", 
      round(min(x$alpha), 4), ", ", 
      round(max(x$alpha), 4), "]\n", sep = "")
  cat("  ALSI mean:", round(mean(x$alpha), 4), "\n")
  cat("  ALSI median:", round(median(x$alpha), 4), "\n")
  invisible(x)
}

#' @export
summary.alsi <- function(object, ...) {
  cat("ALSI Summary Statistics\n")
  print(summary(object$alpha))
  invisible(object)
}

## ==========================================================
## Visualization Functions
## ==========================================================

#' Plot Subspace Stability Diagnostics
#'
#' Creates diagnostic plots showing distributions of principal angles
#' and Tucker congruence coefficients across bootstrap resamples.
#'
#' @param boot_obj Object of class \code{mca_bootstrap}
#'
#' @return No return value, called for side effects. The function creates 
#'   a two-panel figure with: (1) boxplots of principal angles (left panel), 
#'   showing the distribution of subspace similarity across bootstrap resamples 
#'   for each dimension; and (2) boxplots of Tucker congruence coefficients 
#'   (right panel), showing dimension-level replicability with reference lines 
#'   at phi = 0.85 (good) and phi = 0.95 (excellent).
#'
#' @export
#' @examples
#' \donttest{
#' data(ANR2)
#' boot <- mca_bootstrap(ANR2, vars = names(ANR2), K = 3, B = 100)
#' plot_subspace_stability(boot)
#' }
plot_subspace_stability <- function(boot_obj) {
  if (!inherits(boot_obj, "mca_bootstrap")) {
    stop("boot_obj must be of class 'mca_bootstrap'")
  }
  
  angles <- boot_obj$angles
  tucker <- boot_obj$tucker
  
  # Save original par settings
  op <- par(mfrow = c(1, 2), mar = c(4.5, 4.5, 3, 1))
  on.exit(par(op), add = TRUE)
  
  # Principal angles boxplot
  boxplot(angles, 
          main = "Principal Angles (degrees)",
          xlab = "Dimension k", 
          ylab = expression(theta[k]),
          col = "lightblue",
          border = "darkblue")
  abline(h = 0, lty = 2, col = "gray50")
  
  # Tucker congruence boxplot
  boxplot(tucker, 
          main = "Tucker Congruence",
          xlab = "Dimension k", 
          ylab = expression(phi[k]),
          ylim = c(max(0.5, min(tucker, na.rm = TRUE) - 0.05), 1),
          col = "lightgreen",
          border = "darkgreen")
  abline(h = c(0.85, 0.95), lty = 2, col = c("orange", "red"))
  legend("bottomright", 
         legend = c("phi = 0.85 (fair)", "phi = 0.95 (excellent)"),
         lty = 2, col = c("orange", "red"), bty = "n", cex = 0.8)
  
  invisible(NULL)
}

#' Plot Category Projections in MCA Space
#'
#' Visualizes category coordinates in a 2D MCA subspace and optionally
#' displays projections onto the aggregated ALSI direction.
#'
#' @param fit MCA fit object (class \code{mca_fit})
#' @param K Number of dimensions in retained subspace
#' @param alpha_vec Optional aggregated direction vector (from \code{alsi()})
#' @param dim_pair Integer vector of length 2, dimensions to plot (default: c(1,2))
#' @param cex Character expansion for labels
#' @param top_n Number of top categories to display by projection (default: 15)
#'
#' @return No return value, called for side effects. The function creates 
#'   a scatter plot of category coordinates in the specified 2D subspace, 
#'   with category labels displayed. If \code{alpha_vec} is provided, it also 
#'   prints the top categories ranked by their absolute projection onto the 
#'   ALSI direction to the console.
#'
#' @export
#' @examples
#' \donttest{
#' data(ANR2)
#' pa <- mca_pa(ANR2, vars = names(ANR2), B = 100, verbose = FALSE)
#' fit <- pa$fit
#' plot_category_projections(fit, K = pa$K_star)
#' }
plot_category_projections <- function(fit, 
                                      K, 
                                      alpha_vec = NULL, 
                                      dim_pair = c(1, 2),
                                      cex = 0.8,
                                      top_n = 15) {
  
  if (!inherits(fit, "mca_fit")) {
    fit <- structure(fit, class = "mca_fit")  # try to coerce
  }
  
  Gk <- fit$G[, 1:K, drop = FALSE]
  
  # Validate dim_pair
  d1 <- dim_pair[1]
  d2 <- dim_pair[2]
  if (d1 > K || d2 > K || d1 < 1 || d2 < 1) {
    stop("dim_pair must be within 1..K")
  }
  
  # Category biplot
  plot(Gk[, d1], Gk[, d2], 
       pch = 19, col = "steelblue",
       xlab = paste0("Dimension ", d1),
       ylab = paste0("Dimension ", d2),
       main = sprintf("MCA Category Map (Dims %d-%d)", d1, d2))
  text(Gk[, d1], Gk[, d2], 
       labels = rownames(Gk), 
       pos = 3, cex = cex, col = "black")
  abline(h = 0, v = 0, lty = 2, col = "gray60")
  
  # Aggregated direction projections
  if (!is.null(alpha_vec)) {
    if (length(alpha_vec) != K) {
      stop("alpha_vec must have length K")
    }
    
    proj <- drop(Gk %*% alpha_vec)
    ord <- order(abs(proj), decreasing = TRUE)
    
    cat("\nTop", min(top_n, length(proj)), "categories by |projection onto ALSI direction|:\n")
    top_idx <- ord[1:min(top_n, length(proj))]
    top_proj <- data.frame(
      Category = rownames(Gk)[top_idx],
      Projection = round(proj[top_idx], 4)
    )
    print(top_proj, row.names = FALSE)
  }
  
  invisible(NULL)
}

## ===================
## Example Workflow
## ===================

#' Complete ALSI Analysis Workflow
#'
#' Runs a complete ALSI analysis including parallel analysis for dimensionality
#' assessment, bootstrap stability evaluation, ALSI computation, and visualization.
#'
#' @param path Path to data file (.xlsx) or data frame
#' @param vars Character vector of binary variable names
#' @param B_pa Number of permutations for parallel analysis (default: 2000)
#' @param B_boot Number of bootstrap resamples (default: 2000)
#' @param q Quantile for parallel analysis (default: 0.95)
#' @param seed Random seed for reproducibility
#'
#' @return List (returned invisibly) containing all analysis objects:
#'   \item{pa}{Parallel analysis results (class \code{mca_pa})}
#'   \item{boot}{Bootstrap stability results (class \code{mca_bootstrap})}
#'   \item{fit}{MCA fit object (class \code{mca_fit})}
#'   \item{alsi}{ALSI values (class \code{alsi})}
#'   \item{K}{Number of dimensions retained based on parallel analysis}
#'
#' @export
#' @examples
#' \donttest{
#' # Complete workflow using included ANR2 dataset
#' data(ANR2)
#' results <- alsi_workflow(
#'   path = ANR2,
#'   vars = names(ANR2),
#'   B_pa = 100,
#'   B_boot = 100
#' )
#' 
#' # Access components
#' results$pa       # Parallel analysis
#' results$boot     # Bootstrap stability
#' results$alsi     # ALSI values
#' }
alsi_workflow <- function(path,
                          vars,
                          B_pa = 2000,
                          B_boot = 2000,
                          q = 0.95,
                          seed = 20260123) {
  
  cat("========================================\n")
  cat("ALSI Workflow\n")
  cat("========================================\n\n")
  
  # Step 1: Parallel analysis
  cat("Step 1: Parallel Analysis\n")
  cat("----------------------------------------\n")
  pa <- mca_pa(path, vars, B = B_pa, q = q, seed = seed, verbose = TRUE)
  K <- pa$K_star
  
  cat("\n\n")
  
  # Step 2: Bootstrap stability
  cat("Step 2: Bootstrap Stability Assessment\n")
  cat("----------------------------------------\n")
  boot <- mca_bootstrap(path, vars, K = K, B = B_boot, seed = seed, verbose = TRUE)
  plot_subspace_stability(boot)
  
  cat("\n\n")
  
  # Step 3: ALSI computation
  cat("Step 3: ALSI Computation\n")
  cat("----------------------------------------\n")
  fit <- boot$ref
  alsi_obj <- alsi(fit$F, fit$eig, K = K)
  print(alsi_obj)
  
  cat("\n\n")
  
  # Step 4: Category projections
  cat("Step 4: Category Projections\n")
  cat("----------------------------------------\n")
  plot_category_projections(fit, K = K, alpha_vec = alsi_obj$alpha_vec)
  
  cat("\n========================================\n")
  cat("Workflow complete!\n")
  cat("========================================\n")
  
  invisible(list(
    pa = pa,
    boot = boot,
    fit = fit,
    alsi = alsi_obj,
    K = K
  ))
}
Any scripts or data that you put into this service are public.
alsi documentation built on Feb. 17, 2026, 5:07 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
alsi
Aggregated Latent Space Index for Multiple Correspondence Analysis

R/alsi.R
In alsi: Aggregated Latent Space Index for Multiple Correspondence Analysis

Defines functions alsi_workflow plot_category_projections plot_subspace_stability summary.alsi print.alsi alsi plot.mca_bootstrap print.mca_bootstrap mca_bootstrap mca_align print.mca_pa mca_pa print.mca_fit mca_indicator summarise_matrix make_disjunctive to01 .read_xlsx

Documented in alsi alsi_workflow make_disjunctive mca_align mca_bootstrap mca_indicator mca_pa plot_category_projections plot_subspace_stability .read_xlsx summarise_matrix to01

Try the alsi package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

alsi Aggregated Latent Space Index for Multiple Correspondence Analysis

R/alsi.R In alsi: Aggregated Latent Space Index for Multiple Correspondence Analysis

Defines functions alsi_workflow plot_category_projections plot_subspace_stability summary.alsi print.alsi alsi plot.mca_bootstrap print.mca_bootstrap mca_bootstrap mca_align print.mca_pa mca_pa print.mca_fit mca_indicator summarise_matrix make_disjunctive to01 .read_xlsx

Documented in alsi alsi_workflow make_disjunctive mca_align mca_bootstrap mca_indicator mca_pa plot_category_projections plot_subspace_stability .read_xlsx summarise_matrix to01

Try the alsi package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

alsi
Aggregated Latent Space Index for Multiple Correspondence Analysis

R/alsi.R
In alsi: Aggregated Latent Space Index for Multiple Correspondence Analysis