#' Generalised Partial‑Least‑Squares **Correlation** (GPLS‑C)
#'
#' `genplsc()` extracts latent **correlation directions** between two data
#' sets \eqn{X\in\mathbb{R}^{n\times p}} and \eqn{Y\in\mathbb{R}^{n\times q}}
#' while respecting *row* and *column* metric constraints
#' \eqn{M_x,\,A_x,\,M_y,\,A_y}.
#' It unifies the “direct cross‑product” formulation and the
#' “row/column‑likeness operator” trick in a single function:
#'
#' \itemize{
#' \item **`dual = FALSE`** – explicitly forms the embedded cross‑product
#' \eqn{C = (\sqrt{M_x}XA_x^{1/2})^\top\,(\sqrt{M_y}Y A_y^{1/2})}
#' and does (partial) SVD on the \eqn{p\times q} matrix.
#' \item **`dual = TRUE`** – avoids the large \eqn{p\times q} object by
#' projecting onto row‑ or column‑space operators (à la *Allen
#' et al., 2014*), chosen automatically or via `force_row_likeness`
#' / `force_col_likeness`.
#' }
#'
#' Unlike `genpls()` from \code{genplsr.R}, which targets a regression setting and
#' extracts latent components that best predict \eqn{Y} from \eqn{X}, `genplsc()`
#' focuses purely on the correlation structure shared between \eqn{X} and \eqn{Y}.
#'
#' For every constraint matrix you may either
#'
#' * give a **fixed rank** (`rank_Mx = 50`) or
#' * leave it `NULL` / `NA` / `0` to let an **adaptive** scheme keep the
#' smallest eigen‑subspace that captures `var_threshold` of its total
#' variance (capped by `max_k`).
#'
#' A truncated SVD is used when `svd_method = "RSpectra"` or `"irlba"` and the
#' package is installed; otherwise base `svd()` is used.
#'
#' @section Returned object:
#' A [`cross_projector`][multivarious::cross_projector] with slots
#'
#' \describe{
#' \item{`vx`, `vy`}{original‑space loadings \eqn{p\times k}, \eqn{q\times k}}
#' \item{`Tx`, `Ty`}{embedded factor scores \eqn{n\times k}}
#' \item{`d_full`}{all singular values (length \eqn{\le k})}
#' }
#' and a class vector `c("genplsc", "cross_projector", "projector")`.
#'
#' @inheritParams build_sqrt_mult
#' @inheritParams build_invsqrt_mult
#'
#' @param X,Y Numeric matrices with identical numbers of rows.
#' @param Mx,Ax,My,Ay Row/column constraint matrices (NULL ⇒ identity).
#' @param ncomp Number of latent factors. If \eqn{\le 0},
#' uses \eqn{\min(p,q)}.
#' @param preproc_x,preproc_y [`multivarious`] preprocessing objects
#' (e.g. `center()`, `standardize()`, `pass()`).
#' @param rank_Mx,rank_Ax,rank_My,rank_Ay
#' Fixed ranks; `NULL`/`NA`/`0` ⇒ adaptive.
#' @param var_threshold Fraction of variance to keep in adaptive mode.
#' @param max_k Hard cap for adaptive sub‑space dimension.
#' @param dual `FALSE` for direct cross‑product; `TRUE` for
#' operator trick.
#' @param force_row_likeness,force_col_likeness
#' Override automatic choice when `dual = TRUE`.
#' @param svd_method `"base"`, `"RSpectra"`, or `"irlba"`.
#' @param tol Numerical tolerance (e.g. tiny eigenvalues).
#' @param verbose Logical; print progress if `TRUE`.
#' @param ... Ignored; added for forward compatibility.
#'
#' @return A [`cross_projector`][multivarious::cross_projector] object of class
#' `c("genplscorr", "cross_projector", "projector")` containing the original
#' space loadings `vx`, `vy`, embedded factor scores `Tx`, `Ty`, and the full
#' set of singular values `d_full`.
#'
#' @references
#' Allen, G. I., Grosenick, L., & Taylor, J. (2014).
#' *A Generalized Least‑Squares Matrix Decomposition*. **JASA**, 109(505), 145–159.
#'
#' @examples
#' set.seed(1)
#' n <- 50; p <- 40; q <- 30
#' X <- matrix(rnorm(n * p), n, p)
#' Y <- matrix(rnorm(n * q), n, q)
#'
#' # unconstrained, direct
#' fit1 <- genplsc(X, Y, ncomp = 5)
#'
#' # unconstrained, operator trick (row likeness here)
#' fit2 <- genplsc(X, Y, ncomp = 5, dual = TRUE)
#'
#' # diagonal row constraints and adaptive rank
#' Mx <- diag(abs(rnorm(n)))
#' My <- diag(abs(rnorm(n)))
#' if (requireNamespace("RSpectra", quietly = TRUE)) {
#' fit3 <- genplsc(
#' X, Y, Mx = Mx, My = My, dual = TRUE,
#' var_threshold = 0.95, ncomp = 4,
#' svd_method = "RSpectra"
#' )
#' }
#'
#' @aliases genplscorr genplscorr2
#' @export
genplsc <- function(X, Y,
Mx=NULL, Ax=NULL, My=NULL, Ay=NULL,
ncomp = 2,
preproc_x = pass(), preproc_y = pass(),
rank_Mx=NULL, rank_Ax=NULL,
rank_My=NULL, rank_Ay=NULL,
var_threshold = .99, max_k = 200,
dual = FALSE,
force_row_likeness = NULL,
force_col_likeness = NULL,
svd_method = c("base","RSpectra","irlba"),
tol = 1e-9,
verbose = FALSE)
{
## ---- 0. sanity ---------------------------------------------------
if(!is.numeric(X) || !is.matrix(X)) stop("X must be a numeric matrix")
if(!is.numeric(Y) || !is.matrix(Y)) stop("Y must be a numeric matrix")
if(nrow(X) != nrow(Y)) stop("X and Y must have the same number of rows")
if(ncomp <= 0) ncomp <- min(ncol(X), ncol(Y))
svd_method <- match.arg(svd_method)
n <- nrow(X); p <- ncol(X); q <- ncol(Y)
if(!is.null(force_row_likeness) && !is.null(force_col_likeness))
stop("Specify only one of force_row_likeness or force_col_likeness")
## ---- 1. defaults for constraints --------------------------------
if(is.null(Mx)) Mx <- Matrix::Diagonal(n)
if(is.null(My)) My <- Matrix::Diagonal(n)
if(is.null(Ax)) Ax <- Matrix::Diagonal(p)
if(is.null(Ay)) Ay <- Matrix::Diagonal(q)
## ---- 2. preprocess ----------------------------------------------
px <- prep(preproc_x); py <- prep(preproc_y)
Xp <- init_transform(px, X)
Yp <- init_transform(py, Y)
## ---- 3. helper closures & local SVD function ---------------------
Mx_sqrt <- build_sqrt_mult( Mx, rank_Mx, var_threshold, max_k, tol=tol)
My_sqrt <- build_sqrt_mult( My, rank_My, var_threshold, max_k, tol=tol)
Ax_sqrt <- build_sqrt_mult( Ax, rank_Ax, var_threshold, max_k, tol=tol)
Ay_sqrt <- build_sqrt_mult( Ay, rank_Ay, var_threshold, max_k, tol=tol)
Ax_isqrt<- build_invsqrt_mult(Ax, rank_Ax, var_threshold, max_k, tol=tol)
Ay_isqrt<- build_invsqrt_mult(Ay, rank_Ay, var_threshold, max_k, tol=tol)
partial_svd <- function(M, k, method = c("base", "RSpectra", "irlba")) {
method <- match.arg(method)
k_eff <- min(k, nrow(M), ncol(M))
sv <- switch(method,
RSpectra = if (requireNamespace("RSpectra", quietly = TRUE)) {
RSpectra::svds(M, k = k_eff, nu = k_eff, nv = k_eff)
} else {
base::svd(M, nu = k_eff, nv = k_eff)
},
irlba = if (requireNamespace("irlba", quietly = TRUE)) {
irlba::irlba(M, nv = k_eff, nu = k_eff)
} else {
base::svd(M, nu = k_eff, nv = k_eff)
},
base::svd(M, nu = k_eff, nv = k_eff))
list(u = sv$u[, seq_len(k_eff), drop = FALSE],
d = sv$d[seq_len(k_eff)],
v = sv$v[, seq_len(k_eff), drop = FALSE])
}
## ---- 4. decide row/col likeness (for dual path) ------------------
want_row_like <- if(!dual) NA else
if(!is.null(force_row_likeness)) TRUE else
if(!is.null(force_col_likeness)) FALSE else n < (p+q)/2
## ---- 5. DIRECT path (dual = FALSE) -------------------------------
if(!dual){
Xtil <- col_transform(row_transform(Xp,Mx_sqrt), Ax_sqrt) # n × p
Ytil <- col_transform(row_transform(Yp,My_sqrt), Ay_sqrt) # n × q
C <- crossprod(Xtil, Ytil) # p × q
sv <- partial_svd(C, ncomp, method = svd_method)
Tt <- Xtil %*% sv$u
Ut <- Ytil %*% sv$v
vx <- Ax_isqrt(sv$u) # original-space loadings
vy <- Ay_isqrt(sv$v)
return(cross_projector(
vx=vx, vy=vy, Tx=Tt, Ty=Ut,
preproc_x=px, preproc_y=py,
d_full=sv$d, ncomp=ncol(vx),
classes=c("genplscorr"),
dual=FALSE, rank_Mx=rank_Mx, rank_Ax=rank_Ax,
rank_My=rank_My, rank_Ay=rank_Ay,
var_threshold=var_threshold, max_k=max_k,
tol=tol, verbose=verbose))
}
## ---- 6. OPERATOR path (dual = TRUE) ------------------------------
if(want_row_like){ # ------ 6A. row likeness -----
Xr <- row_transform(Xp, Mx_sqrt) # n × p
Yr <- row_transform(Yp, My_sqrt) # n × q
Gx <- crossprod( Ax_sqrt(t(Xr)) ) # n × n
Gy <- crossprod( Ay_sqrt(t(Yr)) ) # n × n
Qx <- partial_eig_approx(Gx, rank_Mx,
var_threshold,max_k,tol=tol)$Q
Qy <- partial_eig_approx(Gy, rank_My,
var_threshold,max_k,tol=tol)$Q
## Make sure Xr and Yr have matching column counts for tcrossprod
if(ncol(Xr) != ncol(Yr)) {
if(ncol(Xr) < ncol(Yr)) {
Xpad <- cbind(Xr, matrix(0, nrow(Xr), ncol(Yr) - ncol(Xr)))
Ypad <- Yr
} else {
Xpad <- Xr
Ypad <- cbind(Yr, matrix(0, nrow(Yr), ncol(Xr) - ncol(Yr)))
}
XYR <- tcrossprod(Xpad, Ypad) # n × n
} else {
XYR <- tcrossprod(Xr, Yr) # n × n
}
sv <- partial_svd( crossprod(Qx, XYR %*% Qy), ncomp,
method = svd_method)
Tt <- Qx %*% sv$u
Ut <- Qy %*% sv$v
Xstar<- col_transform(Xr, Ax_sqrt) # n × p
Ystar<- col_transform(Yr, Ay_sqrt) # n × q
vx <- Ax_isqrt( crossprod(Xstar, Tt) )
vy <- Ay_isqrt( crossprod(Ystar, Ut) )
}else{ # ------ 6B. column likeness ---
Xc <- col_transform(Xp, Ax_sqrt) # n × p
Yc <- col_transform(Yp, Ay_sqrt) # n × q
Hx <- crossprod( Mx_sqrt(Xc) ) # p × p
Hy <- crossprod( My_sqrt(Yc) ) # q × q
Qx <- partial_eig_approx(Hx, rank_Ax,
var_threshold,max_k,tol=tol)$Q
Qy <- partial_eig_approx(Hy, rank_Ay,
var_threshold,max_k,tol=tol)$Q
XYc <- crossprod( row_transform(Xc,Mx_sqrt),
row_transform(Yc,My_sqrt) ) # p × q
sv <- partial_svd( crossprod(Qx, XYc %*% Qy), ncomp,
method = svd_method)
vx_emb <- Qx %*% sv$u
vy_emb <- Qy %*% sv$v
Tt <- row_transform(col_transform(Xp,Ax_sqrt),Mx_sqrt) %*% vx_emb
Ut <- row_transform(col_transform(Yp,Ay_sqrt),My_sqrt) %*% vy_emb
vx <- Ax_isqrt(vx_emb)
vy <- Ay_isqrt(vy_emb)
}
## ---- 6C. sign convention ---------------------------------------
signs <- sign(vx[1,])
signs[signs == 0] <- 1
vx <- sweep(vx, 2, signs, `*`)
vy <- sweep(vy, 2, signs, `*`)
Tt <- sweep(Tt, 2, signs, `*`)
Ut <- sweep(Ut, 2, signs, `*`)
## ---- 7. return ---------------------------------------------------
cross_projector(
vx=vx, vy=vy, Tx=Tt, Ty=Ut,
preproc_x=px, preproc_y=py,
d_full=sv$d, ncomp=ncol(vx),
classes=c("genplscorr"),
dual=TRUE, want_row_like=want_row_like,
rank_Mx=rank_Mx, rank_Ax=rank_Ax,
rank_My=rank_My, rank_Ay=rank_Ay,
var_threshold=var_threshold, max_k=max_k,
tol=tol, verbose=verbose)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.