Nothing
#' @title Orthogonal projections
#' @name ortho_projection
#' @aliases ortho_projection predict.ortho_projection plot.ortho_projection
#' @description
#'
#' \loadmathjax
#' Performs orthogonal projections of high-dimensional data matrices using
#' principal component analysis (PCA) or partial least squares (PLS).
#'
#' @usage
#' ortho_projection(
#' Xr, Xu = NULL, Yr = NULL,
#' ncomp = ncomp_by_var(0.01),
#' method = c("pca", "pca_nipals", "pls", "mpls", "simpls"),
#' center = TRUE,
#' scale = FALSE,
#' tol = 1e-6,
#' max_iter = 1000L,
#' pc_selection = deprecated(),
#' ...
#' )
#'
#' \method{predict}{ortho_projection}(object, newdata, ...)
#'
#' \method{plot}{ortho_projection}(x, col = "#3B82F6", ...)
#'
#' @param Xr A numeric matrix of reference observations (rows) and variables
#' (columns).
#' @param Xu An optional matrix of additional observations to project.
#' @param Yr An optional response matrix. Required for PLS methods
#' (\code{"pls"}, \code{"mpls"}, \code{"simpls"}) and when using
#' \code{\link{ncomp_by_opc}()}.
#' @param ncomp Component selection method. Either:
#' \itemize{
#' \item A positive integer (equivalent to \code{ncomp_fixed(n)})
#' \item An \code{ncomp_selection} object: \code{\link{ncomp_by_var}()},
#' \code{\link{ncomp_by_cumvar}()}, \code{\link{ncomp_by_opc}()}, or
#' \code{\link{ncomp_fixed}()}
#' }
#' Default is \code{ncomp_by_var(0.01)}.
#' @param method A character string specifying the projection method:
#' \itemize{
#' \item \code{"pca"}: PCA via singular value decomposition (default)
#' \item \code{"pca_nipals"}: PCA via NIPALS algorithm
#' \item \code{"pls"}: PLS via NIPALS algorithm
#' \item \code{"mpls"}: Modified PLS via NIPALS (Shenk and Westerhaus, 1991)
#' \item \code{"simpls"}: PLS via SIMPLS algorithm (de Jong, 1993)
#' }
#' @param center A logical indicating whether to center the data. Default is
#' \code{TRUE}. PLS methods always center internally regardless of this
#' setting.
#' @param scale A logical indicating whether to scale the data to unit
#' variance. Default is \code{FALSE}.
#' @param tol Convergence tolerance for the NIPALS algorithm. Default is
#' \code{1e-6}. Ignored when \code{method = "simpls"}.
#' @param max_iter Maximum number of iterations for NIPALS. Default is
#' \code{1000}. Ignored when \code{method = "simpls"}.
#' @param pc_selection `r lifecycle::badge("deprecated")` Use \code{ncomp} instead.
#' @param object Object of class \code{"ortho_projection"}.
#' @param newdata Matrix of new observations to project.
#' @param x An object of class \code{ortho_projection} (as returned by
#' \code{\link{ortho_projection}}).
#' @param col Color for the plot elements. Default is \code{"#3B82F6"}.
#' @param ... Additional arguments (currently unused).
#'
#' @details
#' ## PCA methods
#'
#' When \code{method = "pca"}, singular value decomposition factorizes the
#' data matrix \mjeqn{X}{X} as:
#'
#' \mjdeqn{X = UDV^{T}}{X = UDV^T}
#'
#' where \mjeqn{U}{U} and \mjeqn{V}{V} are orthogonal matrices (left and right
#' singular vectors), and \mjeqn{D}{D} is a diagonal matrix of singular values.
#' The score matrix is \mjeqn{UD}{UD} and the loadings are \mjeqn{V}{V}.
#'
#' When \code{method = "pca_nipals"}, the non-linear iterative partial least
#' squares (NIPALS) algorithm is used instead.
#'
#' ## PLS methods
#'
#' Three PLS variants are available:
#'
#' \itemize{
#' \item \code{"pls"}: Standard PLS using the NIPALS algorithm with
#' covariance-based weights.
#' \item \code{"mpls"}: Modified PLS using the NIPALS algorithm with
#' correlation-based weights, giving equal influence to all predictors
#' regardless of variance (Shenk and Westerhaus, 1991).
#' \item \code{"simpls"}: SIMPLS algorithm (de Jong, 1993), which deflates
#' the cross-product matrix rather than X itself. Computationally faster
#' than NIPALS, especially for wide matrices.
#' }
#'
#' ## Component selection
#'
#' When \code{\link{ncomp_by_opc}()} is used, component selection minimizes
#' RMSD (for continuous \code{Yr}) or maximizes kappa (for categorical
#' \code{Yr}) between observations and their nearest neighbors. See
#' \code{\link{diss_evaluate}}.
#'
#' @return
#' An object of class \code{"ortho_projection"} containing:
#' \itemize{
#' \item \code{scores}: Matrix of projected scores for \code{Xr} (and \code{Xu}).
#' \item \code{X_loadings}: Matrix of X loadings.
#' \item \code{Y_loadings}: Matrix of Y loadings (PLS only).
#' \item \code{weights}: Matrix of PLS weights (PLS only).
#' \item \code{projection_mat}: Projection matrix for new data (PLS only).
#' \item \code{variance}: List with original and explained variance.
#' \item \code{scores_sd}: Standard deviation of scores.
#' \item \code{ncomp}: Number of components retained.
#' \item \code{center}: Centering vector used.
#' \item \code{scale}: Scaling vector used.
#' \item \code{method}: Projection method used.
#' \item \code{ncomp_method}: The value passed to the `ncomp` argument.
#' \item \code{opc_evaluation}: opc optimization results (if applicable).
#' }
#'
#' @author
#' \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez}
#'
#' @references
#' de Jong, S. 1993. SIMPLS: An alternative approach to partial least squares
#' regression. Chemometrics and Intelligent Laboratory Systems 18:251-263.
#'
#' Martens, H. 1991. Multivariate calibration. John Wiley & Sons.
#'
#' Ramirez-Lopez, L., Behrens, T., Schmidt, K., Stevens, A., Dematte, J.A.M.,
#' Scholten, T. 2013a. The spectrum-based learner: A new local approach for
#' modeling soil vis-NIR spectra of complex data sets. Geoderma 195-196:268-279.
#'
#' Ramirez-Lopez, L., Behrens, T., Schmidt, K., Viscarra Rossel, R., Dematte,
#' J.A.M., Scholten, T. 2013b. Distance and similarity-search metrics for use
#' with soil vis-NIR spectra. Geoderma 199:43-53.
#'
#' Shenk, J.S., Westerhaus, M.O. 1991. Populations structuring of near
#' infrared spectra and modified partial least squares regression. Crop
#' Science 31:1548-1555.
#'
#' @seealso
#' \code{\link{ncomp_by_var}}, \code{\link{ncomp_by_opc}},
#' \code{\link{diss_evaluate}}, \code{\link{mbl}}
#'
#' @examples
#' \donttest{
#' library(prospectr)
#' data(NIRsoil)
#'
#' # Preprocess
#' sg_det <- savitzkyGolay(
#' detrend(NIRsoil$spc, wav = as.numeric(colnames(NIRsoil$spc))),
#' m = 1, p = 1, w = 7
#' )
#'
#' # Split data
#' train_x <- sg_det[NIRsoil$train == 1 & !is.na(NIRsoil$CEC), ]
#' train_y <- NIRsoil$CEC[NIRsoil$train == 1 & !is.na(NIRsoil$CEC)]
#' test_x <- sg_det[NIRsoil$train == 0 & !is.na(NIRsoil$CEC), ]
#'
#' # PCA with fixed components
#' proj <- ortho_projection(train_x, ncomp = 5)
#'
#' plot(proj)
#'
#' # PCA with variance-based selection
#' proj <- ortho_projection(train_x, ncomp = ncomp_by_var(0.01))
#'
#' # PCA with OPC optimization
#' proj <- ortho_projection(train_x, Xu = test_x, Yr = train_y,
#' ncomp = ncomp_by_opc(40))
#'
#' #' plot(proj)
#'
#' # PLS projection (NIPALS)
#' proj <- ortho_projection(train_x, Xu = test_x, Yr = train_y,
#' method = "pls", ncomp = ncomp_by_opc(40))
#'
#' # Modified PLS
#' proj <- ortho_projection(train_x, Yr = train_y,
#' method = "mpls", ncomp = 10)
#'
#' # SIMPLS (faster for wide matrices)
#' proj <- ortho_projection(train_x, Yr = train_y,
#' method = "simpls", ncomp = 10)
#' }
#' @rdname ortho_projection
#' @export
ortho_projection <- function(
Xr, Xu = NULL, Yr = NULL,
ncomp = ncomp_by_var(0.01),
method = c("pca", "pca_nipals", "pls", "mpls", "simpls"),
center = TRUE,
scale = FALSE,
tol = 1e-6,
max_iter = 1000L,
pc_selection = deprecated(),
...
) {
# ---------------------------------------------------------------------------
# Handle deprecated pc_selection
# ---------------------------------------------------------------------------
if (is_present(pc_selection)) {
deprecate_warn(
when = "3.0.0",
what = "ortho_projection(pc_selection)",
with = "ortho_projection(ncomp)",
details = "Use ncomp_fixed(), ncomp_by_var(), ncomp_by_cumvar(), or ncomp_by_opc()."
)
ncomp <- .convert_pc_selection_to_ncomp(pc_selection)
}
# ---------------------------------------------------------------------------
# Handle legacy method names
# ---------------------------------------------------------------------------
if (identical(method, "pca.nipals")) {
deprecate_warn(
when = "3.0.0",
what = I('ortho_projection(method = "pca.nipals")'),
with = I('ortho_projection(method = "pca_nipals")')
)
method <- "pca_nipals"
}
method <- match.arg(method)
is_pca <- method %in% c("pca", "pca_nipals")
is_pls <- method %in% c("pls", "mpls", "simpls")
# ---------------------------------------------------------------------------
# Coerce ncomp
# ---------------------------------------------------------------------------
ncomp <- .coerce_ncomp(ncomp)
# ---------------------------------------------------------------------------
# Validate inputs
# ---------------------------------------------------------------------------
if (!is.logical(center) || length(center) != 1L || is.na(center)) {
stop("'center' must be TRUE or FALSE.")
}
if (!is.logical(scale) || length(scale) != 1L || is.na(scale)) {
stop("'scale' must be TRUE or FALSE.")
}
if (is_pls && is.null(Yr)) {
stop("'Yr' is required for PLS methods ('pls', 'mpls').")
}
if (inherits(ncomp, "ncomp_by_opc") && is.null(Yr)) {
stop("'Yr' is required when using ncomp_by_opc().")
}
# ---------------------------------------------------------------------------
# Dispatch to implementation
# ---------------------------------------------------------------------------
if (is_pls) {
proj <- .pls_projection_impl(
Xr = Xr, Xu = Xu, Yr = Yr,
ncomp = ncomp,
method = method,
scale = scale,
tol = tol,
max_iter = max_iter
)
} else {
proj <- .pca_projection_impl(
Xr = Xr, Xu = Xu, Yr = Yr,
ncomp = ncomp,
method = method,
center = center,
scale = scale,
tol = tol,
max_iter = max_iter
)
}
proj$method <- method
proj$ncomp_method <- ncomp
class(proj) <- c("ortho_projection", "list")
proj
}
# =============================================================================
# Deprecated wrappers
# =============================================================================
#' @keywords internal
#' @noRd
pc_projection <- function(
Xr, Xu = NULL, Yr = NULL,
pc_selection = list(method = "var", value = 0.01),
center = TRUE, scale = FALSE,
method = "pca",
tol = 1e-6, max_iter = 1000,
...
) {
deprecate_warn(
when = "3.0.0",
what = "pc_projection()",
with = "ortho_projection()"
)
ncomp <- .convert_pc_selection_to_ncomp(pc_selection)
new_method <- if (method == "pca.nipals") "pca_nipals" else "pca"
ortho_projection(
Xr = Xr, Xu = Xu, Yr = Yr,
ncomp = ncomp,
method = new_method,
center = center,
scale = scale,
tol = tol,
max_iter = max_iter
)
}
#' @keywords internal
#' @noRd
pls_projection <- function(
Xr, Xu = NULL, Yr,
pc_selection = list(method = "opc", value = min(dim(Xr), 40)),
scale = FALSE,
method = "pls",
tol = 1e-6, max_iter = 1000,
...
) {
deprecate_warn(
when = "3.0.0",
what = "pls_projection()",
with = "ortho_projection()"
)
ncomp <- .convert_pc_selection_to_ncomp(pc_selection)
ortho_projection(
Xr = Xr, Xu = Xu, Yr = Yr,
ncomp = ncomp,
method = method,
scale = scale,
tol = tol,
max_iter = max_iter
)
}
# =============================================================================
# predict method
# =============================================================================
#' @rdname ortho_projection
#' @export
predict.ortho_projection <- function(object, newdata, ...) {
if (missing(newdata)) {
return(object$scores)
}
nms_orig <- colnames(object$X_loadings)
nms_new <- colnames(newdata)
if (any(!nms_orig %in% nms_new)) {
stop("Variables missing in 'newdata' that are required for projection.")
}
is_pca <- grepl("pca", object$method)
if (is_pca) {
newdata <- sweep(newdata, 2L, object$center, FUN = "-")
newdata <- sweep(newdata, 2L, object$scale, FUN = "/")
scores <- newdata %*% t(object$X_loadings)
colnames(scores) <- rownames(object$X_loadings)
rownames(scores) <- rownames(newdata)
return(scores)
} else {
scores <- project_opls(
projection_mat = object$projection_mat,
ncomp = ncol(object$projection_mat),
newdata = newdata,
scale = TRUE,
Xcenter = object$center,
Xscale = object$scale
)
colnames(scores) <- paste0("pls_", seq_len(ncol(scores)))
rownames(scores) <- rownames(newdata)
return(scores)
}
}
# =============================================================================
# Helper: Convert legacy pc_selection to ncomp_selection
# =============================================================================
.convert_pc_selection_to_ncomp <- function(pc_selection) {
# Handle character shorthand
if (is.character(pc_selection) && length(pc_selection) == 1L) {
pc_selection <- switch(pc_selection,
opc = list(method = "opc", value = 40L),
cumvar = list(method = "cumvar", value = 0.99),
var = list(method = "var", value = 0.01),
manual = list(method = "manual", value = 40L),
stop("Unknown pc_selection method: ", pc_selection)
)
}
sel_method <- if (!is.null(pc_selection$method)) {
pc_selection$method
} else {
pc_selection[[1]]
}
value <- if (!is.null(pc_selection$value)) {
pc_selection$value
} else {
pc_selection[[2]]
}
switch(sel_method,
manual = ncomp_fixed(as.integer(value)),
var = ncomp_by_var(min_var = value, max_ncomp = 40L),
cumvar = ncomp_by_cumvar(min_cumvar = value, max_ncomp = 40L),
opc = ncomp_by_opc(max_ncomp = as.integer(value)),
stop("Unknown pc_selection method: ", sel_method)
)
}
# =============================================================================
# Helper: Get max_ncomp from ncomp_selection object
# =============================================================================
.get_max_ncomp <- function(ncomp) {
if (inherits(ncomp, "ncomp_fixed")) {
return(ncomp$ncomp)
}
ncomp$max_ncomp
}
# =============================================================================
# Helper: Bridge ncomp_selection to legacy pc_selection list
# =============================================================================
.ncomp_to_pc_selection <- function(ncomp_obj) {
stopifnot(inherits(ncomp_obj, "ncomp_selection"))
switch(
class(ncomp_obj)[[1]],
ncomp_fixed = list(method = "manual", value = ncomp_obj$ncomp),
ncomp_by_var = list(method = "var", value = ncomp_obj$min_var),
ncomp_by_cumvar = list(method = "cumvar", value = ncomp_obj$min_cumvar),
ncomp_by_opc = list(method = "opc", value = ncomp_obj$max_ncomp),
stop("Unknown ncomp_selection class: ", class(ncomp_obj)[[1]])
)
}
# =============================================================================
# Internal PCA implementation
# =============================================================================
.pca_projection_impl <- function(
Xr, Xu, Yr, ncomp, method, center, scale, tol, max_iter
) {
# Bridge to legacy format
pc_selection <- .ncomp_to_pc_selection(ncomp)
pc_selection_method <- pc_selection$method
max_comp <- .get_max_ncomp(ncomp)
# Validation
if (!is.null(Yr)) {
Yr <- as.matrix(Yr)
if (nrow(Yr) != nrow(Xr)) {
stop("Number of rows in 'Yr' must match 'Xr'.")
}
}
ny <- if (!is.null(Yr)) ncol(Yr) else 0L
if (!is.null(Xu) && ncol(Xr) != ncol(Xu)) {
stop("Number of columns in 'Xr' and 'Xu' must match.")
}
effective_rows_xr <- nrow(Xr)
effective_rows_xu <- if (is.null(Xu)) 0L else nrow(Xu)
n_cols <- ncol(Xr)
# Combine Xr and Xu
X <- rbind(Xr, Xu)
# Cap max_comp at data dimensions
max_comp <- min(max_comp, nrow(X), ncol(X))
# Center
if (center) {
mean_vector <- colMeans(X)
X0 <- sweep(X, 2L, mean_vector, FUN = "-")
} else {
mean_vector <- rep(0, n_cols)
X0 <- X
}
# Scale
if (scale) {
sd_vector <- get_column_sds(X0)
X0 <- sweep(X0, 2L, sd_vector, FUN = "/")
} else {
sd_vector <- rep(1, n_cols)
}
# Compute PCA
if (method == "pca") {
sv <- svd(X0, nu = max_comp, nv = max_comp)
sv$d <- sv$d[1:max_comp]
if (length(sv$d) == 1L) {
pc_scores <- sv$u %*% as.matrix(sv$d)
} else {
pc_scores <- sv$u %*% diag(sv$d)
}
pc_loadings <- t(sv$v)
xvariance <- overall_var(X0)
explained_v <- (sv$d)^2 / xvariance
cumulative_v <- cumsum(explained_v)
variance <- rbind(
var = (sv$d)^2,
explained_var = explained_v,
cumulative_explained_var = cumulative_v
)
} else {
# pca_nipals
nipals_result <- pca_nipals(
X = X0,
ncomp = max_comp,
center = FALSE,
scale = FALSE,
maxiter = max_iter,
tol = tol,
pcSelmethod = pc_selection_method,
pcSelvalue = pc_selection$value
)
pc_scores <- nipals_result$pc_scores
pc_loadings <- nipals_result$pc_loadings
xvariance <- nipals_result$original_x_variance
variance <- rbind(
var = nipals_result$pc_variance[1, ],
explained_var = nipals_result$pc_variance[2, ],
cumulative_explained_var = nipals_result$pc_variance[3, ]
)
}
# Assign names
colnames(pc_scores) <- paste0("pc_", seq_len(ncol(pc_scores)))
rownames(pc_scores) <- c(
paste0("Xr_", seq_len(effective_rows_xr)),
if (effective_rows_xu > 0) paste0("Xu_", seq_len(effective_rows_xu))
)
colnames(pc_loadings) <- colnames(X0)
rownames(pc_loadings) <- paste0("pc_", seq_len(nrow(pc_loadings)))
colnames(variance) <- paste0("pc_", seq_len(ncol(variance)))
# Select components
opc_evaluation <- NULL
if (pc_selection_method == "opc") {
if (is.null(Yr) || !is.matrix(Yr)) {
stop("'Yr' must be a matrix when using ncomp_by_opc().")
}
if (nrow(Yr) != effective_rows_xr) {
stop("Number of rows in 'Yr' must match 'Xr'.")
}
if (!is.null(colnames(Yr)) && any(duplicated(colnames(Yr)))) {
stop("Column names in 'Yr' must be unique.")
}
if (is.null(colnames(Yr))) {
colnames(Yr) <- paste0("Yr_", seq_len(ny))
}
results <- eval_multi_pc_diss(
pc_scores[seq_len(effective_rows_xr), 1:max_comp, drop = FALSE],
side_info = Yr,
method = "pc",
check_dims = FALSE
)
selected_pcs <- results$best_pc
opc_evaluation <- results$result
} else if (pc_selection_method == "cumvar") {
selected_pcs <- sum(variance["cumulative_explained_var", ] < pc_selection$value) + 1L
selected_pcs <- min(selected_pcs, ncol(variance))
} else if (pc_selection_method == "var") {
selected_pcs <- sum(variance["explained_var", ] >= pc_selection$value)
selected_pcs <- max(1L, selected_pcs)
} else {
# manual
selected_pcs <- min(pc_selection$value, ncol(pc_scores))
}
# Subset to selected components
scores_sd <- get_column_sds(pc_scores[, 1:selected_pcs, drop = FALSE])
colnames(scores_sd) <- colnames(pc_scores)[1:selected_pcs]
rownames(scores_sd) <- "sd"
result <- list(
scores = pc_scores[, 1:selected_pcs, drop = FALSE],
X_loadings = pc_loadings[1:selected_pcs, , drop = FALSE],
variance = list(
original_x_var = xvariance,
x_var = variance[, 1:selected_pcs, drop = FALSE]
),
scores_sd = scores_sd,
ncomp = selected_pcs,
center = mean_vector,
scale = sd_vector
)
if (!is.null(opc_evaluation)) {
result$opc_evaluation <- opc_evaluation
}
result
}
# =============================================================================
# Internal PLS implementation
# =============================================================================
.pls_projection_impl <- function(
Xr, Xu, Yr, ncomp, method, scale, tol, max_iter
) {
# Bridge to legacy format
pc_selection <- .ncomp_to_pc_selection(ncomp)
pc_selection_method <- pc_selection$method
max_comp <- .get_max_ncomp(ncomp)
# Validation
Yr <- as.matrix(Yr)
if (!is.numeric(Yr)) {
stop("'Yr' must be numeric.")
}
if (nrow(Yr) != nrow(Xr)) {
stop("Number of rows in 'Yr' must match 'Xr'.")
}
if (!is.null(Xu) && ncol(Xr) != ncol(Xu)) {
stop("Number of columns in 'Xr' and 'Xu' must match.")
}
ny <- ncol(Yr)
# Handle missing values in Yr
nas <- rowSums(is.na(Yr)) > 0
X0 <- Xr
Y0 <- Yr
non_nas_yr <- seq_len(nrow(Xr))
nas_yr <- integer(0)
Xout <- NULL
if (any(nas)) {
nas_yr <- which(nas)
non_nas_yr <- which(!nas)
Xout <- Xr[nas_yr, , drop = FALSE]
X0 <- Xr[non_nas_yr, , drop = FALSE]
Y0 <- Yr[non_nas_yr, , drop = FALSE]
if (pc_selection_method %in% c("opc", "manual")) {
if (min(dim(X0)) < max_comp) {
stop(
"Missing values in 'Yr' reduce available observations. ",
"The number of components exceeds available observations."
)
}
}
}
effective_rows_xr <- nrow(X0)
max_comp <- min(max_comp, effective_rows_xr, ncol(X0))
# Set up for C++ backend
cpp_method <- if (pc_selection_method %in% c("opc", "manual")) {
"manual"
} else {
pc_selection_method
}
cpp_value <- if (pc_selection_method %in% c("opc", "manual")) {
max_comp - 1L
} else {
pc_selection$value
}
# Run PLS
plsp <- opls_for_projection(
X = X0,
Y = Y0,
ncomp = max_comp,
scale = scale,
maxiter = max_iter,
tol = tol,
pcSelmethod = cpp_method,
pcSelvalue = cpp_value,
algorithm = method
)
max_comp <- plsp$ncomp
# OPC selection if requested
opc_evaluation <- NULL
if (pc_selection_method == "opc") {
if (is.null(colnames(Yr))) {
colnames(Y0) <- colnames(Yr) <- paste0("Yr_", seq_len(ny))
}
if (any(duplicated(colnames(Yr)))) {
stop("Column names in 'Yr' must be unique.")
}
results <- eval_multi_pc_diss(
plsp$scores[, 1:max_comp, drop = FALSE],
side_info = Y0,
method = "pls",
check_dims = FALSE
)
max_comp <- results$best_pc
opc_evaluation <- results$result
}
# Extract selected components
pls_variance <- plsp$variance$x_var[, 1:max_comp, drop = FALSE]
weights <- plsp$weights[1:max_comp, , drop = FALSE]
scores <- plsp$scores[, 1:max_comp, drop = FALSE]
X_loadings <- plsp$X_loadings[1:max_comp, , drop = FALSE]
Y_loadings <- plsp$Y_loadings[1:max_comp, , drop = FALSE]
projection_mat <- plsp$projection_mat[, 1:max_comp, drop = FALSE]
# Assign names
comp_names <- paste0("pls_", seq_len(max_comp))
rownames(projection_mat) <- colnames(X_loadings) <- colnames(X0)
colnames(projection_mat) <- rownames(X_loadings) <- comp_names
rownames(Y_loadings) <- comp_names
colnames(Y_loadings) <- paste0("Y_pls_", seq_len(ny))
rownames(weights) <- comp_names
colnames(weights) <- colnames(X0)
colnames(pls_variance) <- comp_names
rownames(pls_variance) <- c("var", "explained_var_X", "cumulative_explained_var_X")
yex <- plsp$variance$y_var[, 1:max_comp, drop = FALSE]
colnames(yex) <- comp_names
if (ny > 1) {
rownames(yex) <- paste0("cumulative_explained_var_", colnames(Yr))
} else {
rownames(yex) <- "cumulative_explained_var_Yr"
}
# Handle observations with missing Yr
if (length(nas_yr) > 0) {
scores_full <- matrix(NA_real_, nrow(Xr), ncol(scores))
scores_full[nas_yr, ] <- project_opls(
projection_mat = projection_mat,
ncomp = max_comp,
newdata = Xout,
scale = scale,
Xcenter = plsp$transf$Xcenter,
Xscale = plsp$transf$Xscale
)
scores_full[non_nas_yr, ] <- scores
scores <- scores_full
}
rownames(scores) <- paste0("Xr_", seq_len(nrow(scores)))
colnames(scores) <- comp_names
# Project Xu if provided
if (!is.null(Xu)) {
if (is.vector(Xu)) Xu <- t(Xu)
scores_Xu <- project_opls(
projection_mat = projection_mat,
ncomp = max_comp,
newdata = Xu,
scale = scale,
Xcenter = plsp$transf$Xcenter,
Xscale = plsp$transf$Xscale
)
colnames(scores_Xu) <- comp_names
rownames(scores_Xu) <- paste0("Xu_", seq_len(nrow(Xu)))
scores <- rbind(scores, scores_Xu)
}
# Ensure scale is a matrix
if (!nrow(plsp$transf$Xscale)) {
plsp$transf$Xscale <- matrix(1, 1L, length(plsp$transf$Xcenter))
}
scores_sd <- get_column_sds(scores)
colnames(scores_sd) <- colnames(scores)
rownames(scores_sd) <- "sd"
result <- list(
scores = scores,
X_loadings = X_loadings,
Y_loadings = Y_loadings,
weights = weights,
projection_mat = projection_mat,
variance = list(
original_x_var = plsp$variance$original_x_var,
x_var = pls_variance,
y_var = yex
),
scores_sd = scores_sd,
ncomp = max_comp,
center = plsp$transf$Xcenter,
scale = plsp$transf$Xscale
)
if (!is.null(opc_evaluation)) {
result$opc_evaluation <- opc_evaluation
}
result
}
#' @aliases ortho_projection
#' @importFrom graphics barplot grid par plot segments
#' @export
plot.ortho_projection <- function(x, col = "#3B82F6", ...) {
if (inherits(x$ncomp_method, "ncomp_by_opc")) {
.plot_opc_evaluation(x, col = col, ...)
} else {
.plot_variance(x, col = col, ...)
}
invisible(NULL)
}
# --- internal: OPC evaluation plot --------------------------------------------
.plot_opc_evaluation <- function(x, col, ...) {
opc_eval <- x$opc_evaluation
tpl <- opc_eval[, c(1, ncol(opc_eval))]
metric <- colnames(tpl)[2]
ylab <- switch(
metric,
mean_standardized_rmsd_Yr = "Mean standardized RMSD",
rmsd_Yr = "RMSD",
rmsd = "RMSD",
kappa = "Kappa",
metric
)
plot(
tpl,
type = "p",
pch = 16,
col = col,
ylab = ylab,
xlab = "Number of components",
las = 1,
...
)
grid(col = "#33415580", lty = 1)
segments(tpl[, 1], 0, tpl[, 1], tpl[, 2], col = col)
# Mark optimal
opt_idx <- which.min(tpl[, 2])
if (metric == "kappa") {
opt_idx <- which.max(tpl[, 2])
}
points(tpl[opt_idx, 1], tpl[opt_idx, 2], pch = 16, col = "#F59E0B", cex = 1.5)
}
# --- internal: variance plot --------------------------------------------------
.plot_variance <- function(x, col, ...) {
x_var <- x$variance$x_var
opar <- par(mfrow = c(1, 2), mar = c(4, 4, 2, 1))
on.exit(par(opar))
# Individual variance
ind_var <- x_var[grep("^explained_var", rownames(x_var)), ]
barplot(
ind_var,
horiz = FALSE,
names.arg = colnames(x_var),
ylim = c(0, 1),
ylab = "Explained variance",
xlab = "Component",
col = col,
border = NA,
las = 1,
...
)
# Cumulative variance
cum_var <- x_var[grep("cumulative", rownames(x_var)), ]
barplot(
cum_var,
horiz = FALSE,
names.arg = colnames(x_var),
ylim = c(0, 1),
ylab = "Cumulative explained variance",
xlab = "Component",
col = col,
border = NA,
las = 1,
...
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.