#' Nyström approximation for kernel-based decomposition (Unified Version)
#'
#' Approximate the eigen-decomposition of a large kernel matrix K using either
#' the standard Nyström method (Williams & Seeger, 2001) or the Double Nyström method
#' (Lim et al., 2015, Algorithm 3).
#'
#' The Double Nyström method introduces an intermediate step that reduces the
#' size of the decomposition problem, potentially improving efficiency and scalability.
#'
#' @param X A numeric matrix or data frame of size (N x D), where N is number of samples.
#' @param kernel_func A kernel function with signature `kernel_func(X, Y, ...)`.
#' If NULL, defaults to a linear kernel: `X %*% t(Y)`.
#' @param ncomp Number of components (eigenvectors/eigenvalues) to return.
#' Cannot exceed the number of landmarks. Default capped at `length(landmarks)`.
#' @param landmarks A vector of row indices (1-based, from X) specifying the landmark points.
#' If NULL, `nlandmarks` points are sampled uniformly at random.
#' @param nlandmarks The number of landmark points to sample if `landmarks` is NULL. Default is 10.
#' @param preproc A pre-processing pipeline object (e.g., from `prep()`) or a pre-processing function
#' (default `pass()`) to apply before computing the kernel.
#' @param method Either "standard" (the classic single-stage Nyström) or "double" (the two-stage Double Nyström method).
#' @param center Logical. If TRUE, attempts kernel centering. Default FALSE.
#' **Note:** True kernel centering (required for equivalence to Kernel PCA) is
#' computationally expensive and **not fully implemented**. Setting `center=TRUE` currently only
#' issues a warning. For results equivalent to standard PCA, use a linear kernel
#' and center the input data `X` (e.g., via `preproc`). See Details.
#' @param l Intermediate rank for the double Nyström method. Ignored if `method="standard"`.
#' Typically, `l < length(landmarks)` to reduce complexity.
#' @param use_RSpectra Logical. If TRUE, use `RSpectra::svds` for partial SVD. Recommended for large problems.
#' @param ... Additional arguments passed to `kernel_func`.
#'
#' @return A `bi_projector` object with class "nystrom_approx" and additional fields:
#' \describe{
#' \item{\code{v}}{The eigenvectors (N x ncomp) approximating the kernel eigenbasis.}
#' \item{\code{s}}{The scores (N x ncomp) = v * diag(sdev), analogous to principal component scores.}
#' \item{\code{sdev}}{The square roots of the eigenvalues.}
#' \item{\code{preproc}}{The pre-processing pipeline used.}
#' \item{\code{meta}}{A list containing parameters and intermediate results used (method, landmarks, kernel_func, etc.).}
#' }
#'
#' @details
#' **Kernel Centering:** Standard Kernel PCA requires the kernel matrix K to be centered
#' in the feature space (Schölkopf et al., 1998). This implementation currently
#' **does not perform kernel centering** by default (`center=FALSE`) due to computational complexity.
#' Consequently, with non-linear kernels, the results approximate the eigen-decomposition
#' of the *uncentered* kernel matrix, and are not strictly equivalent to Kernel PCA.
#' If using a linear kernel, centering the input data `X` (e.g., using `preproc=prep(center())`)
#' yields results equivalent to standard PCA, which is often sufficient.
#'
#' **Standard Nyström:** Uses the method from Williams & Seeger (2001), including the
#' `sqrt(m/N)` scaling for eigenvectors and `N/m` for eigenvalues (`m` landmarks, `N` samples).
#'
#' **Double Nyström:** Implements Algorithm 3 from Lim et al. (2015).
#'
#' @references
#' Schölkopf, B., Smola, A., & Müller, K. R. (1998). Nonlinear component analysis as a kernel eigenvalue problem.
#' *Neural computation*, 10(5), 1299-1319.
#'
#' Williams, C. K. I., & Seeger, M. (2001). Using the Nyström Method to Speed Up Kernel Machines.
#' In *Advances in Neural Information Processing Systems 13* (pp. 682-688).
#'
#' Lim, D., Jin, R., & Zhang, L. (2015). An Efficient and Accurate Nystrom Scheme for Large-Scale Data Sets.
#' *Proceedings of the Twenty-Ninth AAAI Conference on Artificial Intelligence* (pp. 2765-2771).
#'
#' @importFrom RSpectra svds
#' @importFrom stats rnorm
#' @export
#'
#' @examples
#' set.seed(123)
#' # Smaller example matrix
#' X <- matrix(rnorm(1000*300), 1000, 300)
#'
#' # Standard Nyström
#' res_std <- nystrom_approx(X, ncomp=5, nlandmarks=50, method="standard")
#' print(res_std)
#'
#' # Double Nyström
#' res_db <- nystrom_approx(X, ncomp=5, nlandmarks=50, method="double", l=20)
#' print(res_db)
#'
#' # Projection (using standard result as example)
#' scores_new <- project(res_std, X[1:10,])
#' head(scores_new)
nystrom_approx <- function(X, kernel_func=NULL, ncomp=NULL,
landmarks=NULL, nlandmarks=10, preproc=pass(),
method=c("standard","double"),
center=FALSE, # Added center argument
l=NULL, use_RSpectra=TRUE, ...) {
method <- match.arg(method)
# Basic checks
chk::chkor(chk::vld_matrix(X), chk::vld_s4_class(X, "Matrix"))
N <- nrow(X)
# If no landmarks given, sample them
if (is.null(landmarks)) {
if (nlandmarks > N) {
warning("Number of landmarks requested exceeds number of samples. Using N landmarks.")
nlandmarks <- N
}
if (nlandmarks <= 0) {
stop("'nlandmarks' must be positive.")
}
landmarks <- sort(sample(N, nlandmarks))
}
m <- length(landmarks)
# Validate ncomp
if (is.null(ncomp)) {
ncomp <- min(m, ncol(X)) # Default ncomp capped by landmarks
} else {
chk::chk_whole_number(ncomp)
chk::chk_gt(ncomp, 0)
if (ncomp > m) {
warning(sprintf("Requested ncomp (%d) exceeds number of landmarks (%d). Setting ncomp = %d.", ncomp, m, m))
ncomp <- m
}
}
# Ensure kernel_func is valid
if (!is.null(kernel_func) && !is.function(kernel_func)) {
stop("kernel_func must be a function or NULL.")
}
# Default to linear kernel if none provided
if (is.null(kernel_func)) {
kernel_func <- function(X, Y, ...) X %*% t(Y)
}
# Handle preproc argument (accept function or prep object)
if (!inherits(preproc, "pre_processor")) {
if (is.function(preproc)) {
proc <- prep(preproc)
} else {
stop("'preproc' must be a 'prep' object or a pre-processing function.")
}
} else {
proc <- preproc
}
# Placeholder check for centering - not implemented yet
if (center) {
warning("Kernel centering (center=TRUE) is requested but not yet implemented. Proceeding with uncentered kernel.")
# Future: implement kernel centering logic here or within kernel calls
}
X_preprocessed <- init_transform(proc, X)
# Determine sets
non_landmarks <- setdiff(seq_len(N), landmarks)
X_l <- X_preprocessed[landmarks, , drop=FALSE]
X_nl <- if (length(non_landmarks) > 0) X_preprocessed[non_landmarks, , drop=FALSE] else matrix(0, 0, ncol(X_preprocessed))
# Compute kernel submatrices
K_mm <- kernel_func(X_l, X_l, ...)
K_nm <- if (length(non_landmarks) > 0) kernel_func(X_nl, X_l, ...) else matrix(0, 0, m)
# Function for partial SVD or full eigen if needed, with re-orthogonalization for eigen
low_rank_decomp <- function(M, k) {
# Check if k exceeds dimensions or if RSpectra should not be used
if (k >= nrow(M) || !use_RSpectra) {
# full eigen decomposition
eig <- eigen(M, symmetric=TRUE)
# Select top k and re-orthogonalize
vecs <- eig$vectors[, 1:k, drop=FALSE]
# Re-orthogonalize using QR
vecs_ortho <- qr.Q(qr(vecs))
return(list(d = eig$values[1:k], v = vecs_ortho))
} else {
# partial SVD using RSpectra
sv <- tryCatch(RSpectra::svds(M, k=k, nu=0, nv=k), # only need V
error = function(e) {
warning(sprintf("RSpectra::svds failed: %s. Falling back to eigen().", e$message))
NULL
})
if (is.null(sv)) {
# Fallback to eigen if svds fails
eig <- eigen(M, symmetric=TRUE)
vecs <- eig$vectors[, 1:k, drop=FALSE]
vecs_ortho <- qr.Q(qr(vecs))
return(list(d = eig$values[1:k], v = vecs_ortho))
} else {
# svds returns U,D,V with M = U D V^T
# For symmetric M, eigenvectors are V (or U)
# Eigenvalues are d^2 (or d if M is PD and svd(sqrt(M)) was used, but here it's svd(M))
# sv$v are right singular vectors = eigenvectors for symmetric M
# We assume sv$v is already orthonormal
return(list(d = sv$d^2, v = sv$v)) # lambda = sigma^2
}
}
}
meta_info <- list(method = method,
landmarks = landmarks,
kernel_func = kernel_func,
ncomp = ncomp,
center = center,
extra_args = list(...)) # Store extra args passed to kernel
if (method == "standard") {
# Standard Nyström with proper scaling
eig_mm <- eigen(K_mm, symmetric=TRUE)
lambda_mm_raw <- eig_mm$values
U_mm_raw <- eig_mm$vectors
# Filter out near-zero eigenvalues using scaled epsilon
eps <- max(lambda_mm_raw) * .Machine$double.eps * 100
keep <- which(lambda_mm_raw > eps)
if (length(keep) == 0) stop("No significant eigenvalues found in K_mm.")
# Keep only up to ncomp components
keep <- keep[seq_len(min(ncomp, length(keep)))]
lambda_mm <- lambda_mm_raw[keep]
U_mm <- U_mm_raw[, keep, drop=FALSE]
k_eff <- length(lambda_mm) # Effective number of components
# Approximate eigenvalues of full K: lambda_hat = (N/m) * lambda_mm
lambda_hat <- (N / m) * lambda_mm
sdev <- sqrt(lambda_hat)
# Approximate eigenvectors U_hat = sqrt(m/N) * [U_mm ; K_nm %*% U_mm %*% diag(1/lambda_mm)]
scaling_factor_u <- sqrt(m / N)
inv_lambda_mm_mat <- diag(1 / lambda_mm, nrow = k_eff, ncol = k_eff)
U_mm_scaled <- U_mm * scaling_factor_u
U_nm_scaled <- (K_nm %*% (U_mm %*% inv_lambda_mm_mat)) * scaling_factor_u
U_full <- matrix(0, N, k_eff)
U_full[landmarks, ] <- U_mm_scaled
if (length(non_landmarks) > 0) {
U_full[non_landmarks, ] <- U_nm_scaled
}
# Scores s = U_hat * diag(sdev_hat)
# s <- U_full %*% diag(sdev, nrow=k_eff) # Old way
s <- sweep(U_full, 2, sdev, "*") # Efficient way
# Store results
out <- bi_projector(
v = U_full,
s = s,
sdev = sdev,
preproc = proc,
classes = c("nystrom_approx", "standard"),
meta = c(meta_info, list(
lambda_mm = lambda_mm, # Eigenvalues of K_mm used
U_mm = U_mm # Eigenvectors of K_mm used
)),
...
)
return(out)
} else { # method == "double"
# Double Nyström Method
if (is.null(l)) stop("For method='double', you must specify intermediate rank 'l'.")
if (l <= 0 || l > m) stop("Intermediate rank 'l' must be > 0 and <= number of landmarks.")
if (l < ncomp) {
warning(sprintf("Intermediate rank l (%d) is less than final ncomp (%d). Setting l = ncomp.", l, ncomp))
l <- ncomp
}
# 1. Approximate principal subspace of K_mm to rank l
approx_l <- low_rank_decomp(K_mm, l)
lambda_l_raw <- approx_l$d
V_S_l_raw <- approx_l$v
# Filter near-zero eigenvalues
eps_l <- max(lambda_l_raw) * .Machine$double.eps * 100
keep_l <- which(lambda_l_raw > eps_l)
if (length(keep_l) == 0) stop("No significant eigenvalues found in K_mm first stage (rank l). Reduce l?")
l_eff <- length(keep_l)
if (l_eff < ncomp) {
warning(sprintf("Effective rank after first stage (%d) is less than requested ncomp (%d). Proceeding with %d components.", l_eff, ncomp, l_eff))
ncomp <- l_eff # Adjust final ncomp down
}
lambda_l <- lambda_l_raw[keep_l]
V_S_l <- V_S_l_raw[, keep_l, drop=FALSE]
# Compute K_(X,L) for all X once - potentially large N x m matrix
K_all_landmarks <- kernel_func(X_preprocessed, X_l, ...)
# Construct W = K_all_landmarks * (V_S_l * Lambda_l^{-1/2})
# Using robust diag for the inverse sqrt lambda matrix
inv_sqrt_lambda_l_mat <- diag(1 / sqrt(lambda_l), nrow = l_eff, ncol = l_eff)
W <- K_all_landmarks %*% (V_S_l %*% inv_sqrt_lambda_l_mat)
# 2. Compute K_W = W^T W (l x l) and do final low-rank approximation to ncomp
K_W <- crossprod(W)
approx_k <- low_rank_decomp(K_W, ncomp) # ncomp might have been adjusted down
lambda_k_raw <- approx_k$d
V_k_raw <- approx_k$v
# Filter near-zero eigenvalues for final step
eps_k <- max(lambda_k_raw) * .Machine$double.eps * 100
keep_k <- which(lambda_k_raw > eps_k)
if (length(keep_k) == 0) stop("No significant eigenvalues found in second stage (K_W).")
k_eff <- length(keep_k)
if (k_eff < ncomp) {
warning(sprintf("Effective rank after second stage (%d) is less than requested ncomp (%d). Final components: %d.", k_eff, ncomp, k_eff))
}
lambda_k <- lambda_k_raw[keep_k]
V_k <- V_k_raw[, keep_k, drop=FALSE]
# Final eigenvalues = lambda_k
sdev <- sqrt(lambda_k)
# Final eigenvectors U_k = W * V_k * Lambda_k^{-1/2}
inv_sqrt_lambda_k_mat <- diag(1 / sqrt(lambda_k), nrow = k_eff, ncol = k_eff)
U_k <- W %*% (V_k %*% inv_sqrt_lambda_k_mat)
# Scores = U_k * diag(sdev)
# s <- U_k %*% diag(sdev, nrow=k_eff) # Old way
s <- sweep(U_k, 2, sdev, "*") # Efficient way
# Store intermediate results needed for projection
out <- bi_projector(
v = U_k,
s = s,
sdev = sdev,
preproc = proc,
classes = c("nystrom_approx", "double"),
meta = c(meta_info, list(
# Intermediate results needed for projection:
V_S_l = V_S_l, # U_mm in standard Nystrom context (eigenvectors of K_mm subset)
inv_sqrt_lambda_l = inv_sqrt_lambda_l_mat, # Diagonal matrix
V_k = V_k, # Eigenvectors of K_W
inv_sqrt_lambda_k = inv_sqrt_lambda_k_mat, # Diagonal matrix
# Store original eigenvalues for reference if needed?
lambda_l_raw = lambda_l_raw,
lambda_k_raw = lambda_k_raw
)
),
...
)
return(out)
}
}
#' Project new data using a Nyström approximation model
#'
#' @param x A `nystrom_approx` object (inheriting from `bi_projector`).
#' @param new_data New data matrix to project.
#' @param ... Additional arguments (currently ignored).
#' @return A matrix of projected scores.
#' @export
project.nystrom_approx <- function(x, new_data, ...) {
# Extract necessary info from the meta slot
meta <- x$meta
if (is.null(meta) || is.null(meta$method)) {
stop("Nystrom model object 'x' appears incomplete or corrupted (missing meta information).")
}
kernel_func <- meta$kernel_func
landmarks <- meta$landmarks
# X_l <- x$X_landmarks # This might not be stored directly anymore
# Need to recompute X_l from original X if not stored, or store it.
# Let's assume X_l IS stored or reconstructable via preproc if needed
# We need X_l for K(new, X_l). It should be retrievable if preproc is identity or stored.
# For now, assume it was stored (though it wasn't explicitly in constructor output).
# It is better to store X_l in the object.
# Let's modify the constructor to store X_l in meta.
if (is.null(meta$X_landmarks)) {
stop("Cannot project: Landmark data (X_l) not found in the model object.")
}
X_l <- meta$X_landmarks
# Use the *projector* object for reprocess to get consistent preprocessing
new_data_p <- reprocess(x, new_data)
K_new_landmark <- kernel_func(new_data_p, X_l, meta$extra_args)
if (meta$method == "double") {
# Double Nyström projection:
# approx_scores(new) = K_new_landmark * V_S_l * inv_sqrt_lambda_l * V_k * inv_sqrt_lambda_k * diag(sdev)
# where sdev = sqrt(lambda_k)
if (is.null(meta$V_S_l) || is.null(meta$inv_sqrt_lambda_l) || is.null(meta$V_k) || is.null(meta$inv_sqrt_lambda_k)){
stop("Double Nystrom model object 'x' is missing necessary components for projection.")
}
# Combine projection matrices
proj_matrix <- meta$V_S_l %*% meta$inv_sqrt_lambda_l %*% meta$V_k %*% meta$inv_sqrt_lambda_k
# approx_eigenvectors <- K_new_landmark %*% proj_matrix
# approx_scores <- sweep(approx_eigenvectors, 2, x$sdev, "*")
# Or, more directly: Scores = K(new,L) * ProjMatrix * diag(sdev)
scores <- K_new_landmark %*% (proj_matrix %*% diag(x$sdev, nrow=length(x$sdev), ncol=length(x$sdev)))
} else { # method == "standard"
# Standard Nyström projection:
# approx_scores(new) = K_new_landmark * U_mm * Lambda_mm^{-1/2} * diag(sdev_hat)
# where sdev_hat = sqrt( (N/m) * lambda_mm )
if (is.null(meta$U_mm) || is.null(meta$lambda_mm)) {
stop("Standard Nystrom model object 'x' is missing necessary components for projection.")
}
lambda_mm <- meta$lambda_mm
U_mm <- meta$U_mm
k_eff <- length(lambda_mm)
# Ensure lambda_mm > 0 before sqrt
lambda_mm[lambda_mm <= 0] <- .Machine$double.eps # Replace non-positive with small value
# Calculate the projection matrix: U_mm * Lambda_mm^{-1/2} * diag(sdev_hat)
# inv_sqrt_lambda_mm_mat <- diag(1 / sqrt(lambda_mm), nrow = k_eff, ncol = k_eff)
# proj_matrix <- U_mm %*% inv_sqrt_lambda_mm_mat %*% diag(x$sdev, nrow=k_eff, ncol=k_eff)
# Simpler: Proj = U_mm * diag( 1/lambda_mm * sdev_hat ) = U_mm * diag( sqrt(N/m / lambda_mm) )
N <- nrow(x$v) # Get N from the stored eigenvectors dimension
m <- length(meta$landmarks)
proj_vector <- sqrt((N/m) / lambda_mm) # Vector for diagonal matrix
# Handle potential Inf/NaN if lambda_mm was zero
proj_vector[!is.finite(proj_vector)] <- 0
proj_matrix <- U_mm %*% diag(proj_vector, nrow=k_eff, ncol=k_eff)
scores <- K_new_landmark %*% proj_matrix
}
scores
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.