#' Compute Weighted Group Means
#'
#' Given a data matrix \code{X} (size \code{n x d}) and a class-weight matrix \code{F} (size \code{c x n}),
#' this function computes the weighted means of \code{X} for each class.
#' Unlike hard (0/1) assignments, each sample contributes partially to each class's mean
#' according to its weight.
#'
#' @param X A numeric matrix of shape \code{n x d}, where \code{n} is the number of samples (rows)
#' and \code{d} is the number of features (columns).
#' @param F A numeric matrix of shape \code{c x n}, where \code{c} is the number of classes
#' and \code{n} is the number of samples. \code{F[i, j]} is the weight of sample \code{j}
#' for class \code{i}, and each row must have a positive sum of weights.
#'
#' @return A numeric matrix of shape \code{c x d}, where each row is the weighted mean of \code{X}
#' for one class.
#'
#' @examples
#' # Suppose we have 5 samples (rows), 2 features (cols),
#' # and 3 classes. F has shape (3 x 5).
#' X <- matrix(1:10, nrow=5, ncol=2)
#' F <- matrix(runif(3*5, min=0, max=1), nrow=3)
#' # Ensure each row of F sums to something > 0
#' res_means <- weighted_group_means(X, F)
#'
#' @export
weighted_group_means <- function(X, F) {
row_sums_F <- rowSums(F)
if (any(row_sums_F <= 0)) {
stop("Each row of F must have a positive sum of weights (>=1 sample).")
}
ret <- do.call(rbind, lapply(seq_len(nrow(F)), function(i) {
w <- F[i, ]
# normalize by sum(w) => colWeightedMeans
matrixStats::colWeightedMeans(X, w / sum(w))
}))
rownames(ret) <- rownames(F)
ret
}
#' Soft-Label Linear Discriminant Analysis (SL-LDA)
#'
#' This function implements a \emph{soft-label} variant of Linear Discriminant Analysis (LDA),
#' following the approach described in:
#'
#' Zhao, M., Zhang, Z., Chow, T.W.S., & Li, B. (2014).
#' "A general soft label based Linear Discriminant Analysis for semi-supervised dimensionality reduction."
#' \emph{Neurocomputing, 135}, 250-264.
#'
#' Instead of hard (0/1) labels, each sample can have fractional memberships (soft labels) across
#' multiple classes. These memberships are encoded in a matrix \code{C}, typically obtained via
#' a label-propagation or fuzzy labeling step. SL-LDA uses these soft memberships to form
#' generalized scatter matrices \(\widetilde{S}_w\) and \(\widetilde{S}_b\), then solves
#' an LDA-like dimension-reduction problem in a PCA subspace via a \emph{two-step} approach:
#'
#' \enumerate{
#' \item \strong{Preprocessing}:
#' Apply a \code{preproc} function (e.g. \code{center()}) to the data \code{X}.
#' \item \strong{PCA}:
#' Project the data onto the top \code{dp} principal components (to handle rank deficiency).
#' \item \strong{Compute Soft-Label Scatter} in the PCA space:
#' \itemize{
#' \item Let \(\mathbf{F} = \mathbf{C}^\top\) be size \(\mathrm{c \times n}\).
#' \item Let \(\mathbf{E} = \mathrm{diag}(\text{rowSums}(\mathbf{C}))\) (size \(\mathrm{n \times n}\)).
#' \item Let \(\mathbf{G} = \mathrm{diag}(\text{colSums}(\mathbf{C}))\) (size \(\mathrm{c \times c}\)).
#' \item Form \(\widetilde{S}_w = X_p^\top ( E - F^\top G^{-1} F ) X_p + \alpha I\) (within-class),
#' and \(\widetilde{S}_b = X_p^\top \bigl(F^\top G^{-1}F - \tfrac{E e e^\top E}{e\,E\,e^\top}\bigr) X_p\) (between-class).
#' }
#' \item \strong{Within-class projection} (\code{di}):
#' Partially diagonalize \(\widetilde{S}_w\).
#' In code, we extract \code{di} eigenvectors.
#' (\emph{Note}: Some references keep the \emph{largest} eigenvalues, others the \emph{smallest}.)
#' \item \strong{Between-class projection} (\code{dl}):
#' Project the (soft) class means into the \code{di}-dim subspace, then run a small PCA
#' for dimension \code{dl}.
#' \item \strong{Combine}:
#' Multiply \(\mathrm{(d \times dp)} \cdot (\mathrm{dp \times di}) \cdot (\mathrm{di \times dl})\)
#' to get the final \(\mathrm{(d \times dl)}\) projection matrix.
#' }
#'
#' @param X A numeric matrix \(\mathrm{n \times d}\), rows = samples, columns = features.
#' @param C A numeric matrix \(\mathrm{n \times c}\) of soft memberships.
#' \code{C[i, j]} = weight of sample \code{i} for class \code{j}.
#' Must be \(\ge 0\); row sums can be any positive value
#' (if \(\sum_j C[i,j] = 1\), each row is a probability distribution).
#' @param preproc A \code{pre_processor} from \pkg{multivarious}, e.g. \code{center()} or \code{pass()}.
#' Defaults to \code{pass()} (no centering).
#' @param dp Integer. Number of principal components to keep in the first PCA step.
#' Defaults to \code{min(dim(X))}.
#' @param di Integer. Dimension of the \emph{within-class} subspace. Default \code{dp - 1}.
#' @param dl Integer. Dimension of the final subspace for \emph{between-class} separation.
#' Default \code{ncol(C) - 1}.
#' @param alpha A numeric ridge parameter (\(\ge 0\)). If \code{alpha > 0}, we add \(\alpha I\)
#' to \(\widetilde{S}_w\) to ensure invertibility. Default \code{0}.
#'
#' @return A \code{\link[multivarious]{discriminant_projector}} object with subclass
#' \code{"soft_lda"} containing:
#' \itemize{
#' \item \code{v} ~ The \(\mathrm{(d \times dl)}\) final projection matrix.
#' \item \code{s} ~ The \(\mathrm{(n \times dl)}\) projected scores of the training set.
#' \item \code{sdev} ~ The std dev of each dimension in \code{s}.
#' \item \code{labels} ~ Currently set to \code{colnames(C)} (or \code{NULL}).
#' \item \code{preproc} ~ The preprocessing object used.
#' \item \code{classes} ~ A string \code{"soft_lda"}.
#' }
#'
#' @details
#' In typical references, one might pick the \emph{largest} eigenvalues of \(\widetilde{S}_w\)
#' for stable inversion, but certain versions (like Null-LDA) use the \emph{smallest} eigenvalues.
#' Adjust the code in \code{RSpectra::eigs_sym()} accordingly if you prefer a different variant.
#'
#' If you want to confirm \(\widetilde{S}_t = \widetilde{S}_w + \widetilde{S}_b\) numerically,
#' you can define a helper function for \(\widetilde{S}_t\) and compare it to \(\widetilde{S}_w + \widetilde{S}_b\).
#'
#' @references
#' Zhao, M., Zhang, Z., Chow, T.W.S., & Li, B. (2014).
#' "A general soft label based Linear Discriminant Analysis for semi-supervised dimensionality reduction."
#' \emph{Neurocomputing, 135}, 250-264.
#'
#' @export
soft_lda <- function(X,
C,
preproc = pass(),
dp = min(dim(X)),
di = dp - 1,
dl = ncol(C) - 1,
alpha = 0.0)
{
## 1) Basic checks
if (!is.matrix(X)) {
stop("X must be a matrix.")
}
if (!is.matrix(C)) {
stop("C must be a matrix of soft membership weights.")
}
if (nrow(C) != nrow(X)) {
stop("C must have the same number of rows as X. (Each row of C => one sample.)")
}
if (any(C < 0)) {
stop("All entries of 'C' must be non-negative (soft memberships).")
}
n <- nrow(X)
d <- ncol(X)
c <- ncol(C) # number of classes
## 2) Preprocessing => typically center or pass
procres <- multivarious::prep(preproc)
Xp <- init_transform(procres, X) # shape (n x d)
## 3) PCA => reduce to dp
pca_red <- multivarious::pca(Xp, ncomp = dp)
proj_dp <- pca_red$v # (d x dp) loadings
Xpca <- scores(pca_red) # (n x dp)
## 4) Build the weighting matrices
F <- t(C) # shape (c x n)
E <- Matrix::Diagonal(x = rowSums(C)) # (n x n)
G <- diag(colSums(C)) # (c x c)
e_vec <- rep(1, n)
denom <- as.numeric(t(e_vec) %*% diag(E) %*% e_vec) # e E e^T => total mass
## 5) Weighted scatter in PCA space
# within-class => Sw = Xpca^T [ E - F^T G^-1 F ] Xpca + alpha I
# between-class => Sb = Xpca^T [F^T G^-1 F - (E e e^T E)/denom ] Xpca
sw_scatter <- function(X_in) {
Ft <- t(F)
invG <- diag(1 / diag(G))
M <- E - Ft %*% invG %*% F
crossprod(X_in, M %*% X_in)
}
sb_scatter <- function(X_in) {
Ft <- t(F)
invG <- diag(1 / diag(G))
M1 <- Ft %*% invG %*% F
M2 <- (E %*% tcrossprod(e_vec) %*% E) / denom
crossprod(X_in, (M1 - M2) %*% X_in)
}
Sw_raw <- sw_scatter(Xpca)
Sb_raw <- sb_scatter(Xpca)
# add alpha => ridge for invertibility
Sw <- Sw_raw + alpha * diag(dp)
## 6) Two-step LDA approach
# Step A: partial diagonalization of Sw => dimension di
if (di > dp) {
warning("di > dp is invalid; setting di = dp.")
di <- dp
}
if (di < 1) stop("di must be >= 1.")
# Typically for stable "whitening", many references pick the largest eigenvalues => which="LM"
# but some do "SM". Adjust as needed.
E_i <- RSpectra::eigs_sym(Sw, k = di, which = "LM") # <--- largest or smallest
proj_di <- E_i$vectors %*% diag(1 / sqrt(E_i$values)) # whiten
# Weighted group means in PCA space
group_means_pca <- weighted_group_means(Xpca, F) # c x dp
gm_proj <- group_means_pca %*% proj_di # c x di
# Step B: small PCA for dl
if (dl > di) {
warning("dl > di is invalid; setting dl = di.")
dl <- di
}
if (dl < 1) stop("dl must be >= 1.")
E_l <- multivarious::pca(gm_proj, ncomp = dl)
proj_dl <- E_l$v # (di x dl)
proj_final <- proj_dp %*% proj_di %*% proj_dl # (d x dl)
# build final training scores => (n x dl)
s_mat <- Xpca %*% proj_di %*% proj_dl
# return
dp_obj <- multivarious::discriminant_projector(
v = proj_final,
s = s_mat,
sdev = apply(s_mat, 2, sd),
preproc = procres,
labels = colnames(C),
classes = "soft_lda"
)
return(dp_obj)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.