R/ComDim_PLS.R

Defines functions PLS2_DNR_2018 RowsPartition ColumnsPartition PCA_Tall_PCT_DNR Compress_Data_2020 ComDim_PLS

Documented in ColumnsPartition ComDim_PLS Compress_Data_2020 PCA_Tall_PCT_DNR RowsPartition

# For the DA situation I should include the formula of the VIPs.
# It requires pracma::inv, which is invoked in the package.
# Need to change all matrix divisions by pracma:inv * ...
# Check how the saliences are computed.
# See if the B values make sense (on June DNR was skeptical on how to deal with this situation in the multi-block scenareo due to the different scaling...)


#' ComDim_PLS
#'
#' Finding common dimensions in multi-block datasets using PLS.
#' @param MB A MultiBlock object.
#' @param y The Y-block to use in the PLS model as dependent data. A class vector or a dummy matrix.
#' @param method PLS-DA or PLS-R.
#' @param decisionRule Only used if method is set to PLS-DA. If 'fixed', samples are assigned to the class with Y-hat above 1/nclasses. If 'max', samples are assigned to the class with the highest Y-hat.
#' @param ndim Number of Common Dimensions.
#' @param normalise To apply block normalisation. FALSE == no (default), TRUE == yes.
#'   When TRUE each block is mean-centred and then divided by its Frobenius norm so
#'   that all blocks have unit total inertia entering the ComDim loop.
#'   Has no effect on the Y-block; use \code{scale.y} for that.
#' @param scale.y Logical (default FALSE). When TRUE and \code{method = 'PLS-R'},
#'   each column of Y is mean-centred and scaled to unit variance before the PLS
#'   fit. The stored B, B0, and Ypred are back-transformed to the original Y scale,
#'   so downstream outputs (R2Y, Q2, predictions) are always in the original units.
#'   Ignored when \code{method = 'PLS-DA'} (dummy Y should not be scaled).
#' @param threshold The threshold limit to stop the iterations. If the "difference of fit" < threshold (1e-10 as default).
#' @param loquace To display the calculation times. TRUE == yes, FALSE == no (default).
#' @param CompMethod To speed up the analysis for really big multi-blocks. 'Normal' (default), 'Kernel', 'PCT', 'Tall' or 'Wide'.
#' @param Partitions To speed up the analysis for really big multi-blocks. This parameter is used if CompMethod is 'Tall' or 'Wide'.
#' @param cv.k Number of folds for k-fold cross-validation (default 7). Set to 0 to skip CV. When cv.k >= 2, Q2 and DQ2 in the output reflect cross-validated predictive ability; otherwise they reflect training-set fit (R2).
#' @return A \code{ComDim} object. All slots are populated. Key slots:
#' \describe{
#'   \item{\code{Method}}{\code{"PLS-DA"} or \code{"PLS-R"}.}
#'   \item{\code{ndim}}{Number of Common Dimensions extracted.}
#'   \item{\code{Q.scores}}{Global consensus PLS scores (\eqn{n \times ndim}).
#'     Each column \eqn{\mathbf{q}_a} (unit-norm) is the dominant left
#'     singular vector from the NIPALS PLS applied to the salience-weighted
#'     concatenated blocks.}
#'   \item{\code{T.scores}}{Named list of block-specific local scores
#'     (\eqn{n \times ndim} each).  Local loading
#'     \eqn{\mathbf{p}_{ba} = \mathbf{X}_b'\mathbf{q}_a}; local score
#'     \eqn{\mathbf{t}_{ba} =
#'     \mathbf{X}_b\,\mathbf{p}_{ba}(\mathbf{p}_{ba}'\mathbf{p}_{ba})^{-1}}.}
#'   \item{\code{P.loadings}}{Global loadings (\eqn{p_{tot} \times ndim}):
#'     \eqn{\mathbf{P} = \mathbf{X}'\mathbf{Q}}, where \eqn{\mathbf{X}} is
#'     the mean-centred (and optionally normalised) concatenated blocks.}
#'   \item{\code{Saliences}}{Block salience matrix (\eqn{ntable \times ndim}):
#'     \eqn{\lambda_{ba} =
#'     \mathbf{q}_a'\mathbf{X}_b\mathbf{X}_b'\mathbf{q}_a}.}
#'   \item{\code{R2X}}{Proportion of X variance captured by each component
#'     (named vector, length \eqn{ndim}).  Let \eqn{\mathbf{t}_a} be the
#'     NIPALS PLS X-score vector for component \eqn{a} on the salience-
#'     weighted blocks; then
#'     \deqn{R2X_a = \|\mathbf{t}_a\|^4 \big/ \sum_k \|\mathbf{t}_k\|^4.}}
#'   \item{\code{R2Y}}{Cumulative Y-variance explained (named vector, length
#'     \eqn{ndim}).  \eqn{R2Y_a} is the \eqn{R^2} from an OLS regression of
#'     \eqn{\mathbf{Y}} on the first \eqn{a} global scores with an intercept:
#'     \deqn{R2Y_a = 1 - RSS_a / TSS_Y,}
#'     where \eqn{RSS_a} is the residual SS when predicting \eqn{\mathbf{Y}}
#'     from \eqn{[1, \mathbf{q}_1, \ldots, \mathbf{q}_a]}.
#'     \strong{Note:} \eqn{R2Y_a} is cumulative — it reflects the total
#'     Y-variance explained by the first \eqn{a} components together,
#'     not the marginal contribution of component \eqn{a} alone.}
#'   \item{\code{Q2}}{Predictive Q2 per response column (PLS-R) or per class
#'     (PLS-DA), named accordingly:
#'     \deqn{Q2 = 1 - PRESS / TSS_Y,}
#'     where \eqn{PRESS = \sum_i (\hat{y}_i - y_i)^2} and
#'     \eqn{TSS_Y = \sum_i (y_i - \bar{y})^2}.
#'     When \code{cv.k >= 2}, \eqn{\hat{y}_i} are out-of-sample
#'     cross-validated predictions; otherwise training-set predictions are
#'     used (i.e. Q2 = R2Y for the full model).}
#'   \item{\code{DQ2}}{(PLS-DA only) Discriminant Q2 per class.  Only
#'     penalising residuals contribute to the sum:
#'     \deqn{DQ2 = 1 - PRESSD / TSS_Y,}
#'     where \eqn{PRESSD} sums \eqn{\hat{y}_i^2} for class-0 samples with
#'     \eqn{\hat{y}_i > 0}, and \eqn{(\hat{y}_i - 1)^2} for class-1 samples
#'     with \eqn{\hat{y}_i < 1}.  Same cross-validation logic as \code{Q2}.}
#'   \item{\code{Singular}}{Squared L2 norm of the NIPALS PLS score vector
#'     per component (\eqn{\|\mathbf{t}_a\|^2}), used to derive
#'     \code{R2X}.}
#'   \item{\code{VIP}}{Global VIP scores (named numeric vector, length
#'     \eqn{p_{tot}}) using the Wold formula:
#'     \deqn{VIP_j = \sqrt{p_{tot}
#'       \cdot \frac{\sum_a s_a \tilde{w}_{j,a}^2}{\sum_a s_a}},}
#'     where \eqn{s_a = \|\mathbf{t}_a\|^2 \|\mathbf{q}_a\|^2},
#'     \eqn{\tilde{w}_{j,a} = w_{j,a} / \|\mathbf{w}_a\|} is the
#'     L2-normalised \eqn{j}-th element of the \eqn{a}-th NIPALS weight
#'     vector, and \eqn{p_{tot}} is the total number of variables.}
#'   \item{\code{VIP.block}}{Named list (one \code{data.frame} per block)
#'     with columns \code{p} (per-block predictive VIP computed with block
#'     size \eqn{p_b} instead of \eqn{p_{tot}}) and \code{tot} (= \code{p}
#'     for PLS; included for consistency with OPLS output).  Row names are
#'     variable names.}
#'   \item{\code{PLS.model}}{List with: \code{W} (NIPALS X weight matrix,
#'     \eqn{p_{tot} \times ndim}); \code{B} (regression coefficients,
#'     \eqn{p_{tot} \times ncol(Y)},
#'     \eqn{\mathbf{B} =
#'     \mathbf{W}(\mathbf{P}'\mathbf{W})^{-1}\mathbf{Q}'},
#'     in original Y units); \code{B0} (intercept vector, length
#'     \eqn{ncol(Y)},
#'     \eqn{\mathbf{B}_0 = \bar{\mathbf{y}} - \bar{\mathbf{x}}\mathbf{B}});
#'     \code{Y} (original response matrix as supplied).
#'     Training-set Y predictions:
#'     \eqn{\hat{\mathbf{Y}} = \mathbf{X}\mathbf{B} + \mathbf{B}_0}.}
#'   \item{\code{cv}}{Cross-validation results when \code{cv.k >= 2} (empty
#'     list otherwise): \code{k} (number of folds), \code{fold}
#'     (sample-to-fold vector), \code{Ypred} (\eqn{n \times ncol(Y)} matrix
#'     of out-of-sample predictions), \code{Q2} (CV Q2 per class/response),
#'     \code{DQ2} (mean CV DQ2 across classes, PLS-DA only),
#'     \code{DQ2.perclass} (CV DQ2 per class, PLS-DA only).}
#'   \item{\code{Prediction}}{Training-set predictions: \code{Y.pred}
#'     (\eqn{n \times ncol(Y)}); for PLS-DA also \code{decisionRule},
#'     \code{trueClass} (character vector), \code{predClass} (data.frame),
#'     \code{Sensitivity} and \code{Specificity} (named per class),
#'     \code{confusionMatrix} (named list of 2x2 matrices, one per class).}
#'   \item{\code{Mean}}{List with \code{MeanMB} (column means per block),
#'     \code{MeanY} (column means of Y before any scaling), and
#'     \code{ScaleY} (column SDs of Y; all ones when \code{scale.y = FALSE}).}
#'   \item{\code{Norm}}{List with \code{NormMB}: Frobenius norms for block
#'     normalisation.}
#'   \item{\code{variable.block}}{Character vector (length \eqn{p_{tot}})
#'     mapping each row of \code{P.loadings} and each element of \code{VIP}
#'     to its block.}
#'   \item{\code{runtime}}{Total computation time in seconds.}
#' }
#' @examples
#' b1 <- matrix(rnorm(500), 10, 50)
#' batch_b1 <- rep(1, 10)
#' b2 <- matrix(rnorm(2400), 30, 80)
#' batch_b2 <- c(rep(1, 10), rep(2, 10), rep(3, 10))
#' mb <- MultiBlock(
#'   Samples = list(
#'     b1 = paste0("samples_", 1:10),
#'     b2 = rep(paste0("samples_", 1:10), 3)
#'   ),
#'   Data = list(b1 = b1, b2 = b2),
#'   Batch = list(b1 = batch_b1, b2 = batch_b2),
#'   ignore.size = TRUE
#' )
#' rw <- SplitRW(mb)
#' y <- scale(1:10, center = TRUE)
#' results.plsr <- ComDim_PLS(rw, y, 2, method = "PLS-R")
#' groups <- c(rep("A", 5), rep("B", 5))
#' results.plsda <- ComDim_PLS(rw, y = groups, 2, method = "PLS-DA")
#' @references Jouan-Rimbaud Bouveresse D, Rutledge DN (2024).
#'   A synthetic review of some recent extensions of ComDim.
#'   \emph{Journal of Chemometrics}, 38(5), e3454.
#'   \doi{10.1002/cem.3454}
#' @export
ComDim_PLS <- function(MB = MB, y = y, ndim = NULL,
                       method = c("PLS-DA", "PLS-R"),
                       decisionRule = c("fixed", "max")[2],
                       normalise = FALSE, scale.y = FALSE,
                       threshold = 1e-10,
                       loquace = FALSE,
                       CompMethod = "Normal", Partitions = 1,
                       cv.k = 7) {
  # INITIALISATION

  if (!(method == "PLS-DA" || method == "PLS-R")) {
    stop("'method' must be 'PLS-DA' or 'PLS-R'.")
  }

  progress_bar <- utils::txtProgressBar(min = 0, max = 80, style = 3, char = "=")

  start_ini_time <- Sys.time() # To start counting calculation time for the initialization

  if (!inherits(MB, "MultiBlock")) {
    stop("'MB' is  not a MultiBlock.")
  }

  ntable <- length(blockNames(MB)) # number of blocks
  nrowMB <- length(sampleNames(MB)) # number of samples
  variable_number <- ncol(MB) # Number of variables per block

  give_error <- 0

  # Remove all variables with 0 variance. If there are no remaining variables, give_error
  numvar <- 0
  numblock0 <- vector()
  for (i in 1:ntable) {
    var0 <- apply(MB@Data[[i]], 2, function(x) {
      var(x)
    })
    var0 <- which(var0 == 0)

    if (length(var0) != 0) {
      numvar <- numvar + length(var0)
      if (length(var0) == length(MB@Variables[[i]])) {
        numblock0 <- append(numblock0, i)
      }
      MB@Data[[i]] <- MB@Data[[i]][, setdiff(1:variable_number[i], var0)]
      MB@Variables[[i]] <- MB@Variables[[i]][setdiff(1:variable_number[i], var0)]
      variable_number[i] <- length(MB@Variables[[i]])
    }
  }
  if (numvar != 0) {
    warning(sprintf("Number of variables excluded from the analysis because of their zero variance: %d", numvar))
  }
  if (length(numblock0) != 0) {
    numblock0 <- rev(numblock0) # To sort in decreasing order.
    for (i in seq_along(numblock0)) {
      MB@Data[[numblock0[i]]] <- NULL
      MB@Variables[[numblock0[i]]] <- NULL
      if (numblock0[[i]] %in% MB@Batch) {
        MB@Batch[[numblock0[i]]] <- NULL
      }
      if (numblock0[[i]] %in% MB@Metadata) {
        MB@Metadata[[numblock0[i]]] <- NULL
      }
    }
    warning(sprintf("Number of blocks excluded from the analysis because of their zero variance: %d", length(numblock0)))
  }

  ntable <- length(blockNames(MB)) # number of blocks
  variable_number <- ncol(MB) # In case the number of blocks has changed.

  if (length(MB@Data) == 0) { # In case all blocks had 0 variance...
    warning("All blocks had 0 variance")
    give_error <- 1
  }

  # Check for infinite or missing values (they cannot be handled with svd)
  for (i in 1:ntable) {
    if (any(is.na(MB@Data[[i]]))) {
      stop("The MB contains NA values. They must be removed first for ComDim analysis.")
    }
    if (any(is.infinite(MB@Data[[i]]))) {
      stop("The MB contains infinite values. They must be removed first for ComDim analysis.")
    }
  }

  if (give_error) {
    stop("The data is not ready for ComDim.")
  } else {
    message("The data can be used for ComDim.")
  }


  if (is.null(ndim)) {
    ndim <- ntable # If the number of components is not defined,
    # the number of components to extract is equal to the number of blocks.
  }

  ## Check y matrix (convert to dummy matrix if it is not already.)
  if (method == "PLS-DA") {
    tmp <- unique(as.vector(y))
    if (all(tmp %in% c(0, 1))) { # Is it dummy?

      if (!is.matrix(y)) { # If it's a vector and the method is PLS-R
        y <- as.matrix(y)
      }

      classVect <- rep(NA, nrow(y))

      if (ncol(y) == 1) {
        classVect <- as.vector(y)
      } else {
        if (any(rowSums(y) != 1)) {
          if (any(rowSums(y) == 0)) {
            stop("At least one sample was not assigned to a class.")
          } else if (any(colSums(y) > 1)) {
            stop("At least one sample was assigned to more than one class.")
          }
        }
      }
      if (is.null(colnames(y))) {
        colnames(y) <- paste0("Class", seq_len(ncol(y)))
      }
      for (i in seq_len(ncol(y))) {
        classVect[which(y[, i] == 1)] <- colnames(y)[i]
      }
    } else { # If not dummy
      if ((is.matrix(y) || is.data.frame(y)) && ncol(y) > 1) {
        stop("ComDim-PLS can only be applied if Y is a dummy matrix or a class vector")
      }
      classVect <- as.vector(y)
      classVect_sorted <- sort(unique(classVect))

      # Now generate the dummy matrix from y
      y <- matrix(rep(0, length(classVect_sorted) * length(classVect)),
        ncol = length(classVect_sorted), nrow = length(classVect)
      )
      for (i in seq_along(classVect_sorted)) {
        y[which(classVect == classVect_sorted[i]), i] <- 1
      }
      colnames(y) <- classVect_sorted
      rm(classVect_sorted)
    }
  }

  if (!is.matrix(y)) { # If it's a vector and the method is PLS-R
    y <- as.matrix(y)
  }

  # Store original y (after validation / dummy conversion) for use in R2Y, Q2,
  # and as the response saved in PLS.model$Y.
  y_raw <- y

  # Scale Y for PLS-R if requested.
  # For PLS-DA the dummy matrix must not be scaled; emit a warning and skip.
  if (scale.y && method == "PLS-DA") {
    warning("'scale.y = TRUE' is ignored when method = 'PLS-DA': dummy Y matrices should not be centred or scaled.")
    scale.y <- FALSE
  }
  if (scale.y) {
    y_center <- colMeans(y)
    y_sd <- apply(y, 2, sd)
    y_sd[y_sd < .Machine$double.eps] <- 1 # guard against constant columns
    y <- sweep(sweep(y, 2, y_center, "-"), 2, y_sd, "/")
  } else {
    y_center <- colMeans(y)
    y_sd <- rep(1, ncol(y))
  }

  pieceBar <- 4 + 2 * ntable + ndim # Number of updates in the progress bar.
  pieceBar <- 80 / pieceBar
  total_progress <- pieceBar

  DimLabels <- paste0("CC", 1:ndim) # One label per component.
  TableLabels <- blockNames(MB) # One label per block.

  # isT <- grepl(output, 'T')
  # isL <- grepl(output, 'L')
  # isP <- grepl(output, 'P')

  if (CompMethod == "Tall" && any(variable_number > 1000)) {
    message("The method 'Tall' is not recommeded for large blocks (> 1000 columns).")
    message("Do you still want to continue with the analysis?")
    message("You can change now the CompMethod if you prefer: (yes/no)")
    ans1 <- tolower(readline("Type your choice: (y/n)"))
    if (ans1 == "y" || ans1 == "yes") {
      message("Which method do you want to use instead (Normal/Kernel/PCT/Wide)?")
      ans2 <- tolower(readline("Type your choice: (n/k/p/w)"))
      if (ans2 == "n" || ans2 == "normal") {
        CompMethod <- "Normal"
      } else if (ans2 == "k" || ans2 == "kernel") {
        CompMethod <- "Kernel"
      } else if (ans2 == "p" || ans2 == "pct") {
        CompMethod <- "PCT"
      } else if (ans2 == "w" || ans2 == "wide") {
        CompMethod <- "Wide"
      } else {
        stop("Wrong answer typed.")
      }
    } else if (ans1 == "n" || ans1 == "no") {
      message("The method 'Tall' will be used")
    } else {
      stop("Wrong answer typed.")
    }
  }

  if (CompMethod == "Kernel" && nrowMB >= min(variable_number)) {
    stop(sprintf(
      paste0("CompMethod = 'Kernel' requires n < p for every block ",
             "(n = %d, smallest block p = %d). ",
             "Use 'Normal', 'PCT', or 'Wide' instead."),
      nrowMB, min(variable_number)
    ))
  }

  end_ini_time <- Sys.time() # To end the count of the calculation time.

  if (loquace) {
    message(sprintf("Initialisation finished after : %s millisecs",
                    (end_ini_time - start_ini_time) * 1000))
  }

  utils::setTxtProgressBar(progress_bar, value = total_progress)

  # NORMALISATION

  # start_norm_time <- Sys.time()  # To start counting calculation time for the initialization

  X_mat <- matrix(, nrow = nrowMB, ncol = sum(variable_number))
  Xnorm_mat <- matrix(, nrow = nrowMB, ncol = sum(variable_number))

  res_calib <- list()
  temp_tabCalib <- list()
  s_n <- list()
  res_calib$SingVal <- as.vector(NULL)
  res_calib$NormMB <- as.vector(NULL)
  res_calib$MeanMB <- list()

  for (i in 1:ntable) {
    res_calib$MeanMB[[TableLabels[i]]] <- colMeans(MB@Data[[i]])
    names(res_calib$MeanMB[[TableLabels[i]]]) <- MB@Variables[[i]]
    # res_calib$MeanMB$i <- TableLabels

    if (normalise) {
      # Normalise original blocks

      X_mean <- MB@Data[[i]] - matrix(data = rep(1, nrowMB), ncol = 1, nrow = nrowMB) %*% res_calib$MeanMB[[i]]
      # In a previous version (that worked), I had used as.matrix instead of matrix.
      XX <- X_mean * X_mean

      Norm_X <- sqrt(sum(XX))
      X_Normed <- X_mean / Norm_X

      res_calib$NormMB[i] <- Norm_X

      temp_tabCalib[[i]] <- X_Normed
      s_n[[i]] <- X_Normed
    } else {
      res_calib$NormMB[[TableLabels[i]]] <- rep(1, length(MB@Variables[[i]]))
      names(res_calib$NormMB[[TableLabels[i]]]) <- MB@Variables[[i]]
      # res_calib$MeanMB$Variables <- MB@Variables[[i]]
      # res_calib$MeanMB$i <- TableLabels

      temp_tabCalib[[i]] <- MB@Data[[i]]
      s_n[[i]] <- MB@Data[[i]]
    }

    if (i == 1) {
      X_mat[, 1:variable_number[1]] <- MB@Data[[i]]
      Xnorm_mat[, 1:variable_number[1]] <- temp_tabCalib[[i]]
    } else {
      beg <- sum(variable_number[1:(i - 1)]) + 1
      ending <- sum(variable_number[1:i])
      X_mat[, beg:ending] <- MB@Data[[i]]
      Xnorm_mat[, beg:ending] <- temp_tabCalib[[i]]
    }

    # PLS addition
    meanX <- colMeans(Xnorm_mat)
    meanY <- colMeans(y)

    # End PLS addition

    norm_comdim <- Sys.time()
    if (loquace) {
      message(sprintf("Normalization of block %s finished after : %s millisecs", i, (norm_comdim - start_ini_time) * 1000))
    }
    total_progress <- total_progress + pieceBar
    utils::setTxtProgressBar(progress_bar, value = total_progress)
  }

  # res_calib$NormMB$i <- 'Norm'
  # names(res_calib$NormMB$Data) <- TableLabels

  nR <- nrow(Xnorm_mat)
  nC <- ncol(Xnorm_mat)

  # COMPRESSION

  s_r <- Compress_Data_2020(s_n, CompMethod, Partitions)

  end_comp_time <- Sys.time()

  if (loquace) {
    message(sprintf("Compression finished after : %s millisecs", (end_comp_time - start_ini_time) * 1000))
  }

  total_progress <- total_progress + pieceBar * ntable
  utils::setTxtProgressBar(progress_bar, value = total_progress)

  # PLS addition
  ICs <- ndim
  # End of PLS addition

  ## DO ComDim
  # ini_comdim <- Sys.time()

  # saliences <- list()
  saliences <- matrix(, ncol = ndim, nrow = ntable)
  # Q <- list()
  Q <- matrix(, ncol = ndim, nrow = nrowMB)
  unexplained <- ntable # Warning!: true only if the tables were set to norm=1
  varexp <- as.vector(NULL)
  varexp.y <- as.vector(NULL)
  PLS2_Scores <- matrix(, ncol = ndim, nrow = nrowMB)
  PLS2_P <- matrix(, ncol = ndim, nrow = sum(variable_number))
  PLS2_W <- matrix(, ncol = ndim, nrow = sum(variable_number))
  PLS2_Q <- matrix(, ncol = ndim, nrow = ncol(as.matrix(y)))
  PLS2_U <- matrix(, ncol = ndim, nrow = nrowMB)

  for (dims in 1:ndim) {
    previousfit <- 10000
    deltafit <- 1000000
    lambda <- matrix(rep(1, ntable), nrow = ntable, ncol = 1)

    qini <- as.vector(s_r[[1]][, 1])
    qini <- qini / sqrt(as.vector(qini %*% qini)) # random initialization of t + set to unit length
    qold <- 100

    iters <- 0
    while (norm(qold - qini, "2") > threshold && iters < 100) {
      iters <- iters + 1
      qold <- qini
      q <- 0

      W <- matrix(, nrow = nrowMB, ncol = 0)
      for (j in 1:ntable) {
        W <- cbind(W, sqrt(lambda[j]) * s_r[[j]])
      }

      # usv <-svd(W) This was for ComDim_PCA

      # Addition for PLS
      PLS2_Res <- PLS2_DNR_2018(X = W, Y = y, lvs = ICs)

      PLS2_Scores[, dims] <- PLS2_Res$Scores[, 1]
      PLS2_P[, dims] <- PLS2_Res$P[, 1]
      PLS2_W[, dims] <- PLS2_Res$W[, 1]
      PLS2_Q[, dims] <- PLS2_Res$Q[, 1]
      PLS2_U[, dims] <- PLS2_Res$U[, 1]

      sv <- sqrt(t(PLS2_Scores[, dims]) %*% PLS2_Scores[, dims]) # For R2X
      varexp.y[dims] <- (t(PLS2_U[, dims]) %*% PLS2_U[, dims])^2 # For R2Y (see varexp)
      qtmp <- PLS2_Scores[, dims] / as.vector(sv)
      # End of addition for PLS

      # qtmp <- usv$u[,1]

      for (j in 1:ntable) {
        # takes each table into account for lambda after PCA
        lambda[j] <- t(qtmp) %*% (s_r[[j]] %*% t(s_r[[j]])) %*% qtmp
        q <- q + lambda[j] * qtmp
      }

      q <- q / sqrt(as.vector(t(q) %*% q)) # standardizes t
      if (abs(min(q)) > abs(max(q))) {
        q <- -q
      }
      qini <- q
    } # deltafit>threshold

    ICs <- ICs - 1

    saliences[, dims] <- lambda
    Q[, dims] <- q


    # SingVal is square of the SVD singular value
    # because here it is based on X, not X.X'
    # Does NOT work for Kernel & Wide & Tall
    if (CompMethod == "Normal" || CompMethod == "PCT") {
      res_calib$SingVal[dims] <- sv^2 # Calculated from the scores (new for PLS)
      varexp[dims] <- res_calib$SingVal[dims]^2
    } else {
      varexp[dims] <- 0 # This will be overwritten at the end
      res_calib$SingVal[dims] <- 0 # This will be overwritten at the end
    }


    # Deflate blocks
    aux <- diag(nrowMB) - as.matrix(q) %*% t(as.matrix(q))
    for (j in 1:ntable) {
      s_r[[j]] <- aux %*% s_r[[j]]
    }

    iter_comdim <- Sys.time()
    if (loquace) {
      message(sprintf("Component %s determined after : %s millisecs", dims, (iter_comdim - start_ini_time) * 1000))
    }
    total_progress <- total_progress + pieceBar
    utils::setTxtProgressBar(progress_bar, value = total_progress)
  }

  # res_calib$SingVal$i <- 'Singular value'
  names(res_calib$SingVal) <- DimLabels # Dimensions

  # Q$i <- data[[1]]$Samples
  # Q$v <- DimLabels
  colnames(Q) <- DimLabels
  rownames(Q) <- MB@Samples
  res_calib$Q <- Q

  ## Adding metadata. Metadata extracted from the first block
  res_calib$metadata <- list()
  if (length(MB@Metadata) != 0) {
    res_calib$metadata[[1]] <- MB@Metadata
  }

  end_comdim <- Sys.time()
  if (loquace) {
    message(sprintf("Scores finished after : %s millisecs", (end_comdim - start_ini_time) * 1000))
  }
  total_progress <- total_progress + pieceBar
  utils::setTxtProgressBar(progress_bar, value = total_progress)

  # res_calib$s_n <- s_n
  # res_calib$s_r <- s_r
  rm(s_n)
  rm(s_r)

  ##
  # explained <- list()
  explained <- varexp / sum(varexp)
  names(explained) <- DimLabels
  # explained$i <-'% explained'
  # explained$v <- DimLabels #Dimensions
  res_calib$explained <- explained
  rm(varexp)
  rm(explained)

  ## Calculate Sums of saliences for each Dim
  # Sum_saliences_Dim <- list()
  Sum_saliences_Dim <- colSums(saliences)
  names(Sum_saliences_Dim) <- DimLabels
  # Sum_saliences_Dim$i <-'Sum Dim Saliences'
  # Sum_saliences_Dim$v <- DimLabels #Dimensions

  res_calib$Sum_saliences_Dim <- Sum_saliences_Dim
  rm(Sum_saliences_Dim)

  # Calculate Sums of saliences for each Table
  # Sum_saliences_Tab <- list()
  Sum_saliences_Tab <- rowSums(saliences)
  # Sum_saliences_Tab$i <- 'Sum Tab Saliences'
  names(Sum_saliences_Tab) <- TableLabels
  # Sum_saliences_Tab$v <- TableLabels #tables

  res_calib$Sum_saliences_Tab <- Sum_saliences_Tab
  rm(Sum_saliences_Tab)

  # saliences$i <- TableLabels #tables
  # saliences$v <- DimLabels #Dimensions
  res_calib$saliences <- saliences
  colnames(res_calib$saliences) <- DimLabels
  rownames(res_calib$saliences) <- TableLabels

  ## Calculate Normalised concatenated Xs ('Calib') from col
  # Calculate concatenated CD loadings
  # Reorganise Loadings - 1 matrix / LV

  ## If Output = NULL, nothing else is calculated
  # if(!(is.null(output))){
  L_CD_Vec <- NULL
  L_X_Vec <- NULL
  Calib <- NULL

  nCalib <- nrowMB
  # }

  ## Already defined in during ComDim initiallization
  # isT <- grepl(output, 'T')
  # isL <- grepl(output, 'L')
  # isP <- grepl(output, 'P')
  # isLP <- c(isP,isL)
  # isTLP <- c(isT,isP,isL)

  # tic
  # Calculate concatenated CD loadings
  # Reorganise Loadings - 1 matrix / LV
  # L_CD <- list()
  L_X <- list()
  T_Loc <- list()
  for (i in 1:ntable) { # Prepare lists
    # L_CD[[TableLabels[i]]] <- matrix(,ncol = ndim, nrow = ncol(temp_tabCalib[[i]]))
    # L_X[[TableLabels[i]]] <- matrix(,ncol = ndim, nrow = ncol(temp_tabCalib[[i]]))
    T_Loc[[TableLabels[i]]] <- matrix(, ncol = ndim, nrow = nrowMB)
  }
  b <- matrix(, ncol = ndim, nrow = ntable) # Used to calculate R2X in Kernel and Tall

  if (!requireNamespace("pracma", quietly = TRUE)) {
    stop("The package pracma is needed.")
  }

  for (j in 1:ndim) {
    T_mat <- matrix(, nrow = nrowMB, ncol = 0)

    for (i in 1:ntable) {
      temp <- t(temp_tabCalib[[i]]) %*% res_calib$Q[, j] # Scaled CD 'local' Loadings

      # if  (!(is.null(isP))){
      # L_CD[[TableLabels[i]]][,j] <- temp
      # }

      # if  (!(is.null(isL))){
      #  L_X[[TableLabels[i]]][,j] <- t(data[[i]]$Data)%*%res_calib$Q[,j] #Unscaled X 'local' Loadings;
      # }

      # T_mat <- cbind(T_mat,temp_tabCalib[[i]]%*%(temp/as.vector(t(temp)%*%temp))) #local Scores
      T_mat <- cbind(T_mat, temp_tabCalib[[i]] %*% (temp %*% pracma::pinv(t(temp) %*% temp))) # local Scores

      # if(!(is.null(isT))){
      # T_Loc[[TableLabels[i]]][,j] <- temp_tabCalib[[i]]%*%(temp/as.vector(t(temp)%*%temp)) # local Scores
      T_Loc[[TableLabels[i]]][, j] <- temp_tabCalib[[i]] %*% (temp %*% pracma::pinv(t(temp) %*% temp)) # local Scores
      # }


      # Deflate each temp_tabCalib
      temp_tabCalib[[i]] <- temp_tabCalib[[i]] - res_calib$Q[, j] %*% t(temp)
    }

    # For each CC
    # MLR b-coefficients between Local and Global Scores
    # [b0,b[,j]] <- mlr_DB(T_mat,Q$d[,j],0)
    # b=pinv(X'*X)*X'*Y;
    b[, j] <- pracma::pinv(t(T_mat) %*% T_mat) %*% t(T_mat) %*% res_calib$Q[, j]

    # b0 <- 0
  }

  # If Output==[], nothing else is calculated
  # if(!(is.null(output))){
  # Calculate Global Loadings
  # if(!(is.null(isP))){
  L_CD_Vec <- t(Xnorm_mat) %*% res_calib$Q # Scaled CD 'global' Loadings
  # }

  # if(!(is.null(isL))){
  #  L_X_Vec <- t(X_mat)%*%res_calib$Q # Unscaled X 'global' Loadings
  # }
  # }

  #### If Output==[], nothing else is calculated
  load_comdim <- Sys.time()
  if (loquace) {
    message(sprintf("Loadings finished after : %s millisecs", (load_comdim - start_ini_time) * 1000))
  }

  total_progress <- total_progress + pieceBar
  utils::setTxtProgressBar(progress_bar, value = total_progress)

  rm(X_mat)
  rm(temp_tabCalib)
  rm(temp)

  ## Output
  end_function <- Sys.time()

  ## Addition for PLS
  ComDimPLS2_B <- PLS2_W %*% pracma::pinv(t(PLS2_P) %*% PLS2_W) %*% t(PLS2_Q)
  PLS2_Res$ComDimPLS2_B <- ComDimPLS2_B
  ComDimPLS2_B0 <- meanY - colMeans(Xnorm_mat) %*% ComDimPLS2_B
  PLS2_Res$ComDimPLS2_B0 <- ComDimPLS2_B0
  res_calib$PLS2_Res <- PLS2_Res

  # Back-transform B and B0 to the original Y scale when scale.y = TRUE.
  # B_raw[,j]  = B_scaled[,j]  * y_sd[j]
  # B0_raw[j]  = B0_scaled[j]  * y_sd[j] + y_center[j]
  # After this the existing Ypred = Xnorm_mat %*% B + B0 produces predictions
  # in the original (unscaled) Y units.
  if (scale.y) {
    PLS2_Res$ComDimPLS2_B <- sweep(PLS2_Res$ComDimPLS2_B, 2, y_sd, "*")
    PLS2_Res$ComDimPLS2_B0 <- PLS2_Res$ComDimPLS2_B0 * y_sd + y_center
    res_calib$PLS2_Res <- PLS2_Res
    y <- y_raw # restore original y for all downstream computations
  }
  # End of addition for PLS

  res_calib$b <- b
  colnames(res_calib$b) <- DimLabels
  rownames(res_calib$b) <- TableLabels
  # res_calib$b$i <- TableLabels # tables
  # res_calib$b$v <- DimLabels  # Dimensions

  # if(!(is.null(isT))){
  # res_calib$T_Loc$i <- res_calib$Q$i # samples
  # res_calib$T_Loc$v <- DimLabels # Dimensions
  res_calib$T_Loc <- T_Loc

  # }

  # T_Loc is no longer needed
  rm(T_Loc)

  # if(!(is.null(isP))){

  varnames <- vector()
  for (i in 1:ntable) {
    varnames <- append(varnames, MB@Variables[[i]])
  }
  # if(length(which(duplicated(varnames))) != 0){
  #  warning('There are at least two variables with the same name. Their name in the rownames of P has been altered.\n')
  # }

  res_calib$P <- L_CD_Vec
  # res_calib$P$i <- varnames # numbers of all variables;
  # res_calib$P$v <- DimLabels # Dimensions
  colnames(res_calib$P) <- DimLabels
  rownames(res_calib$P) <- varnames
  rownames(PLS2_Res$ComDimPLS2_B) <- varnames
  if (!is.null(colnames(y))) colnames(PLS2_Res$ComDimPLS2_B) <- colnames(y)

  # res_calib$P_Loc <- L_CD
  # res_calib$P_Loc$i <- TableLabels # Tables
  # res_calib$P_Loc$v <- DimLabels # Dimensions
  # for(i in 1:ntable){
  #  rownames(res_calib$P_Loc[[TableLabels[i]]]) <- MB@Variables[[i]]
  #  colnames(res_calib$P_Loc[[TableLabels[i]]]) <- DimLabels
  # }
  # }

  rm(L_CD_Vec)
  # rm(L_CD)

  ## -------------------------------------------------------------------------
  ## VIP (Variable Importance in Projection) — Wold formula.
  ##
  ##   W_j  = L2-normalised column of the PLS weight matrix W (per block)
  ##   s_a  = colSums(T^2) * colSums(Q^2)   [length ndim]
  ##   VIP_j = sqrt( p * sum_a(W_ja^2 * s_a) / sum_a(s_a) )
  ##
  ## where p = p_b (block size) for per-block VIP, p_total for global VIP.
  ## Per-block result: data.frame with columns "p" (pred VIP) and "tot" (= "p").
  ## -------------------------------------------------------------------------

  s_vip     <- colSums(PLS2_Scores^2) * colSums(PLS2_Q^2)
  sum_s_vip <- sum(s_vip)

  if (sum_s_vip < .Machine$double.eps) {
    warning("Sum of explained SS is zero; VIP scores cannot be computed.")
    VIP_global <- setNames(rep(NA_real_, length(varnames)), varnames)
    VIP_blocks <- setNames(
      lapply(seq_len(ntable), function(i) {
        v <- rep(NA_real_, variable_number[i])
        data.frame(p = v, tot = v, row.names = MB@Variables[[i]])
      }),
      TableLabels
    )
  } else {
    # Global VIP: L2-normalise W columns across all variables
    W_norm_all <- apply(PLS2_W, 2, function(w) {
      nrm <- sqrt(sum(w^2)); if (nrm > 1e-14) w / nrm else w
    })
    p_total    <- nrow(PLS2_W)
    VIP_global <- setNames(
      as.vector(sqrt(p_total * (W_norm_all^2 %*% s_vip) / sum_s_vip)),
      varnames
    )

    # Per-block VIP: L2-normalise W columns using only the block's rows
    VIP_blocks <- setNames(vector("list", ntable), TableLabels)
    k_vip <- 1L
    for (i in seq_len(ntable)) {
      p_b  <- variable_number[i]
      idx  <- k_vip:(k_vip + p_b - 1L)
      W_b_norm <- apply(PLS2_W[idx, , drop = FALSE], 2, function(w) {
        nrm <- sqrt(sum(w^2)); if (nrm > 1e-14) w / nrm else w
      })
      vip_i <- as.vector(sqrt(p_b * (W_b_norm^2 %*% s_vip) / sum_s_vip))
      VIP_blocks[[TableLabels[i]]] <- data.frame(
        p = vip_i, tot = vip_i, row.names = MB@Variables[[i]]
      )
      k_vip <- k_vip + p_b
    }
  }
  ## -------------------------------------------------------------------------

  # if(!(is.null(isL))){
  #   res_calib$Lx$i <- t(c(1:nC)) # numbers of all variables;
  #   res_calib$Lx$v <- DimLabels # Dimensions
  #   res_calib$Lx$Data <- L_X_Vec
  #   colnames(res_calib$Lx$Data) <- DimLabels
  #   rownames(res_calib$Lx$Data) <- t(c(1:nC))
  #   #res_calib$Lx_Loc$i <- TableLabels # Tables
  #   res_calib$Lx_Loc$v <- DimLabels # Dimensions
  #   res_calib$Lx_Loc$Data <- L_X
  #   for (i in 1:ntable){
  #     colnames(res_calib$Lx_Loc$Data[[TableLabels[i]]]) <- DimLabels
  #     rownames(res_calib$Lx_Loc$Data[[TableLabels[i]]]) <- data[[i]]$Variables
  #   }
  # }
  # rm(L_X_Vec)
  # rm(L_X)

  ##
  # Singular value calculated from b and saliences
  # Since :
  # b_T_Q(:,j)=saliences.d(:,j).*saliences.d(:,j)/SingVal.d(1,j);
  if (CompMethod == "Kernel" || CompMethod == "Tall" || CompMethod == "Wide") {
    for (dims in 1:ndim) {
      res_calib$SingVal[dims] <- t(b[, dims]) %*% ((saliences[, dims]^2) / as.vector(t(b[, dims]) %*% b[, dims]))
    }
    names(res_calib$SingVal) <- DimLabels
    varexp <- res_calib$SingVal^2
    res_calib$explained <- varexp / sum(varexp)
    names(res_calib$explained) <- DimLabels
  }

  rm(saliences)

  # Define block length
  belong_block <- rep(0, nrow(res_calib$P))
  k <- 1
  for (i in 1:ntable) {
    belong_block[k:(k + variable_number[i] - 1)] <- TableLabels[i]
    k <- k + variable_number[i]
  }

  #### If Output==[], nothing else is calculated
  # if(normalise == 1){
  #  res_calib$Xnorm_mat$i <- res_calib$Q$i # samples
  #  res_calib$Xnorm_mat$v <- t(c(1:nC)) # numbers of all variables;
  #  res_calib$Xnorm_mat$Data <- Xnorm_mat
  # }
  # rm(Xnorm_mat)
  #### If Output==[], nothing else is calculated

  rm(Q)

  ## 07/08/2022 Calculate Y predictive and rescale.
  Ypred <- Xnorm_mat %*% PLS2_Res$ComDimPLS2_B

  for (i in seq_len(ncol(y))) {
    Ypred[, i] <- unlist(Ypred[, i]) + PLS2_Res$ComDimPLS2_B0[i]
  }
  rownames(Ypred) <- MB@Samples
  if (!is.null(colnames(y))) colnames(Ypred) <- colnames(y)

  ## R2Y — cumulative OLS of Y on [1 | Q_1..j] for j = 1..ndim.
  ## r2y[j] reports the fraction of Y variance explained by the first j global
  ## ComDim scores. An explicit intercept is included so that Y with non-zero
  ## mean is handled correctly.
  Y_c_r2   <- sweep(y, 2, colMeans(y))
  sstot_r2 <- sum(Y_c_r2^2)
  if (sstot_r2 < .Machine$double.eps) {
    r2y <- setNames(rep(0, ndim), paste0("CC", seq_len(ndim)))
  } else {
    r2y <- setNames(numeric(ndim), paste0("CC", seq_len(ndim)))
    for (.j in seq_len(ndim)) {
      T_r2_j     <- cbind(1, res_calib$Q[, seq_len(.j), drop = FALSE])
      B_r2_j     <- pracma::pinv(crossprod(T_r2_j)) %*% crossprod(T_r2_j, y)
      Ypred_r2_j <- T_r2_j %*% B_r2_j
      r2y[.j]    <- 1 - sum((Ypred_r2_j - y)^2) / sstot_r2
    }
  }
  rm(Y_c_r2)

  ## 07/08/2022 Predict classes
  if (method == "PLS-DA") {
    predClass <- rep(NA, nrow(y))

    if (decisionRule == "max") {
      for (i in seq_len(nrow(Ypred))) {
        xx <- which(Ypred[i, ] == max(Ypred[i, ], na.rm = TRUE))
        if (length(xx) == 1) {
          predClass[i] <- colnames(y)[xx] # Y stores the Class names
        } else {
          predClass[i] <- NaN # There is a tie.
          warning(sprintf("Class for sample %s could not be predicted.", i))
        }
      }
    } else if (decisionRule == "fixed") {
      for (i in seq_len(nrow(Ypred))) {
        xx <- which(Ypred[, i] > 1 / ncol(y))
        if (length(xx) == 1) {
          predClass[i] <- colnames(y)[xx] # Y stores the Class names
        } else {
          predClass[i] <- NaN # There is a tie.
          warning(sprintf("Class for sample %s could not be predicted.", i))
        }
      }
    }

    if (is.matrix(y)) {
      meanY <- colMeans(y)
    } else {
      meanY <- mean(y)
    }

    classy <- colnames(y)
    Q2 <- DQ2 <- specvec <- sensvec <- rep(NA, length(classy))
    confusionMatrix <- list()

    for (i in seq_along(classy)) {
      pos <- which(classVect == classy[i])
      neg <- which(classVect != classy[i])
      tp <- length(which(classVect == classy[i] & predClass == classy[i]))
      tn <- length(which(classVect != classy[i] & predClass != classy[i]))
      fp <- length(which(classVect != classy[i] & predClass == classy[i]))
      fn <- length(which(classVect == classy[i] & predClass != classy[i]))

      if (length(tp) == 0) {
        tp <- 0
      }

      if (length(tn) == 0) {
        tn <- 0
      }

      if (length(fp) == 0) {
        fp <- 0
      }

      if (length(fn) == 0) {
        fn <- 0
      }

      sensvec[i] <- tp / length(pos)
      specvec[i] <- tn / length(neg)

      cm <- matrix(c(tp, fp, fn, tn), ncol = 2, nrow = 2)
      rownames(cm) <- c("predClass1", "predClass0")
      colnames(cm) <- c("trueClass1", "trueClass0")
      confusionMatrix[[classy[i]]] <- cm

      ## DQ2
      E0 <- Ypred[neg, i] - y[neg, i] # Calculate Residuals of Class0 samples
      E1 <- Ypred[pos, i] - y[pos, i] # Calculate Residuals of Class1 samples

      E0count <- which(E0 > 0) # Find predictions for Class0 samples larger than 0
      E1count <- which(E1 < 0) # Find predictions for Class1 samples larger than 1

      SSE0_all <- t(E0) %*% E0 # Calculate SSE for those samples of Class0
      SSE1_all <- t(E1) %*% E1 # Calculate SSE for those samples of Class1

      SSE0 <- t(E0[E0count]) %*% E0[E0count] # Calculate SSE for those samples of Class0
      SSE1 <- t(E1[E1count]) %*% E1[E1count] # Calculate SSE for those samples of Class1

      PRESSD <- SSE0 + SSE1 # Calculate total SSE for all samples = PRESSD
      PRESS <- SSE0_all + SSE1_all

      Ym <- y[, i] - mean(y[, i]) # Calculate Total sum of squares (over all samples)
      TSS <- t(Ym) %*% Ym

      DQ2[i] <- 1 - (PRESSD / TSS) # Calculate DQ2
      Q2[i] <- 1 - PRESS / TSS
    }
    names(Q2)      <- classy
    names(DQ2)     <- classy
    names(sensvec) <- classy
    names(specvec) <- classy
  } else if (method == "PLS-R") {
    PRESS_r <- colSums((Ypred - y)^2)
    TSS_r <- colSums(sweep(y, 2, colMeans(y))^2)
    Q2 <- setNames(1 - PRESS_r / TSS_r, colnames(y))
  }

  # Ypred <-Ypred*meanY # Re-scale Ypred. I think this is not needed.

  ## -------------------------------------------------------------------------
  ## k-fold cross-validation: Q2 and DQ2
  ## Fold assignment is sequential:
  ##   test indices for fold k = seq(k, n, by = cv.k)
  ## -------------------------------------------------------------------------
  if (cv.k >= 2) {
    # Build the fold-id vector from sequential assignment (reproducible,
    fold_ids <- integer(nrowMB)
    for (.f in seq_len(cv.k)) fold_ids[seq.int(.f, nrowMB, by = cv.k)] <- .f

    Ypred_cv <- matrix(NA_real_, nrow = nrowMB, ncol = ncol(y))
    colnames(Ypred_cv) <- colnames(y)

    for (fold in seq_len(cv.k)) {
      test_idx  <- seq.int(fold, nrowMB, by = cv.k)
      train_idx <- setdiff(seq_len(nrowMB), test_idx)

      # Subset MB to training samples
      MB_cv <- MB
      MB_cv@Data <- lapply(MB@Data, function(x) x[train_idx, , drop = FALSE])
      MB_cv@Samples <- MB@Samples[train_idx]

      # Fit inner model (cv.k = 0 prevents nested CV; output suppressed).
      # scale.y is forwarded so each fold independently centres/scales its
      # training Y and stores back-transformed B / B0 in original Y units.
      invisible(utils::capture.output(
        res_cv_fold <- ComDim_PLS(
          MB = MB_cv,
          y = y[train_idx, , drop = FALSE],
          ndim = ndim, method = method,
          normalise = normalise,
          scale.y = scale.y,
          threshold = threshold,
          loquace = FALSE,
          CompMethod = CompMethod,
          Partitions = Partitions,
          cv.k = 0
        )
      ))

      # Build raw test X (concatenated blocks, same order as Xnorm_mat)
      X_test <- matrix(0, nrow = length(test_idx), ncol = sum(variable_number))
      k_idx <- 1
      for (i in seq_len(ntable)) {
        vi <- variable_number[i]
        X_test[, k_idx:(k_idx + vi - 1)] <- MB@Data[[i]][test_idx, , drop = FALSE]
        k_idx <- k_idx + vi
      }

      # Apply training normalisation to test set (only when normalise = TRUE)
      if (normalise) {
        k_idx <- 1
        for (i in seq_len(ntable)) {
          vi <- variable_number[i]
          mn <- res_cv_fold@Mean$MeanMB[[i]] # training col means
          nrm <- res_cv_fold@Norm$NormMB[i] # training Frobenius norm
          X_test[, k_idx:(k_idx + vi - 1)] <-
            (X_test[, k_idx:(k_idx + vi - 1)] -
              matrix(mn, nrow = length(test_idx), ncol = vi, byrow = TRUE)) / nrm
          k_idx <- k_idx + vi
        }
      }

      # Predict Y for test fold
      B_cv <- res_cv_fold@PLS.model$B
      B0_cv <- res_cv_fold@PLS.model$B0
      Ypred_cv[test_idx, ] <-
        X_test %*% B_cv +
        matrix(B0_cv, nrow = length(test_idx), ncol = ncol(y), byrow = TRUE)
    }

    # Q2 per response column (or class)
    TSS_cv <- colSums(sweep(y, 2, colMeans(y))^2)
    PRESS_cv <- colSums((Ypred_cv - y)^2)
    Q2_cv <- setNames(1 - PRESS_cv / TSS_cv, colnames(y))

    # DQ2 per class then averaged across classes (PLS-DA only).
    # Per-class values are stored in cv$DQ2.perclass;
    if (method == "PLS-DA") {
      DQ2_per_class <- setNames(rep(NA_real_, ncol(y)), colnames(y))
      for (i in seq_len(ncol(y))) {
        pos <- which(classVect == colnames(y)[i])
        neg <- which(classVect != colnames(y)[i])
        E0 <- Ypred_cv[neg, i] - y[neg, i]
        E1 <- Ypred_cv[pos, i] - y[pos, i]
        PRESSD <- sum(E0[E0 > 0]^2) + sum(E1[E1 < 0]^2)
        DQ2_per_class[i] <- 1 - PRESSD / TSS_cv[i]
      }
      DQ2_cv <- mean(DQ2_per_class)   # single averaged value (matches ComDim_OPLS)
    } else {
      DQ2_per_class <- vector()
      DQ2_cv <- vector()
    }

    # Override training R2 with CV-based Q2/DQ2
    Q2 <- Q2_cv
    if (method == "PLS-DA") DQ2 <- DQ2_cv

    cv_result <- list(
      k              = cv.k,
      fold           = fold_ids,
      Ypred          = Ypred_cv,
      Q2             = Q2_cv,
      DQ2            = if (method == "PLS-DA") DQ2_cv        else NULL,
      DQ2.perclass   = if (method == "PLS-DA") DQ2_per_class else NULL
    )
  } else {
    if (cv.k != 0) {
      warning("'cv.k' must be >= 2 or 0 (skip CV). No cross-validation performed.")
    }
    cv_result <- list()
  }
  ## -------------------------------------------------------------------------

  # The way R2X and R2Y are calculated must be verified.
  # TSS <- sum(diag(Xnorm_mat))
  # R2X <- matrix(rep(NA, ndim),
  #              ncol = 1,
  #              nrow = ndim)
  # R2Y <- vector()
  #
  # #for (i in 1:ndim){
  #   #rss <- sum(diag(Xnorm_mat - (T_mat[,i] %*% t(T_mat[,i]))))
  #   #R2X[i,1] <- 1 - rss/TSS
  #   for(j in 1:ncol(y)){
  #     PRESS <- sum((y[,j] - Ypred[,j])^2)
  #     TSSY <- sum((y[,j] - mean(y[,j]))^2)
  #     R2Y[j] <- 1 - PRESS/TSSY
  #   }
  #
  # #}
  # #rownames(R2X) <- paste0("CC",1:ndim)
  # names(R2Y) <- paste0("Y",1:ncol(y))


  # ssqX = sum(sum(Xnorm_mat*Xnorm_mat))
  # ssqY = sum(sum(y*y))
  #
  #
  # R2X <- R2Y <- vector()
  # for (i in 1:ndim){
  #
  #   for(j in 1:ncol(y)){
  #      # PRESS for Q2
  #     R2X[i] = ((t(T_mat[,i]) %*% T_mat[,i] %*% t(PLS2_P[,i]) %*% PLS2_P[,i])/ssqX)*100
  #     R2Y[i] = ((t(T_mat[,i]) %*% T_mat[,i] %*% t(PLS2_Q[i,1])%*% PLS2_Q[i,1])/ssqY)*100
  #
  #     if(ncol(y) == 2){
  #
  #     }
  #   }
  #
  # }
  #
  # PLS2_Res$R2X <- R2X
  # PLS2_Res$R2Y <- R2Y
  rm(T_mat)

  end_output <- Sys.time()
  running_time <- (end_output - start_ini_time) # Total time of analysis
  if (loquace) {
    message(sprintf("Analysis finished after : %s millisecs", running_time * 1000))
  }

  progress_bar <- utils::txtProgressBar(min = 0, max = 80, style = 3, char = "=")
  utils::setTxtProgressBar(progress_bar, value = 80)

  close(progress_bar)

  # Save data in a ComDim structure

  res_calib <- new("ComDim",
    Method = method, # PLS-DA or PLS-R
    ndim = ndim,
    Q.scores = res_calib$Q,
    T.scores = res_calib$T_Loc,
    P.loadings = res_calib$P,
    Saliences = res_calib$saliences,
    Orthogonal = list(),
    VIP = VIP_global,
    VIP.block = VIP_blocks,
    R2X = res_calib$explained,
    R2Y = r2y,
    Q2 = Q2,
    DQ2 = if (method == "PLS-DA") {
      DQ2
    } else {
      vector()
    },
    Singular = res_calib$SingVal,
    Mean = list(
      MeanMB = res_calib$MeanMB,
      MeanY = y_center, # original Y column means
      ScaleY = y_sd
    ), # original Y column SDs (1 if scale.y=FALSE)
    Norm = list(NormMB = res_calib$NormMB),
    PLS.model = list(
      W = PLS2_W,
      B = PLS2_Res$ComDimPLS2_B, # in original Y units
      B0 = PLS2_Res$ComDimPLS2_B0, # in original Y units
      Y = y_raw
    ), # original (unscaled) Y
    cv = cv_result,
    Prediction = if (method == "PLS-DA") { # To do?
      list(
        Y.pred = Ypred,
        decisionRule = decisionRule,
        trueClass = classVect,
        predClass = data.frame(predClass = predClass, row.names = MB@Samples),
        Sensitivity = sensvec,
        Specificity = specvec,
        confusionMatrix = confusionMatrix
      )
    } else {
      list(Y.pred = Ypred)
    },
    Metadata = res_calib$metadata,
    variable.block = belong_block,
    runtime = as.numeric(running_time)
  )

  return(res_calib)
}


#' Compress_Data_2020
#'
#' Compress large multi-block objects. Internal function of ComDim_PLS().
#' @param s_n The multi-block object.
#' @param CompMethod It can be 'Normal' (default), 'Kernel', 'PCT', 'Tall' or 'Wide'.
#' @param Partitions The number of partitions.
#' @return The compressed multi-block.
#' @references Original MATLAB implementation:
#'   \url{https://github.com/DNRutledge/ComDim/blob/main/Compress_Data_2020.m}
#' @keywords internal
Compress_Data_2020 <- function(s_n = s_n, CompMethod = CompMethod, Partitions = Partitions) {
  s_r <- list()
  ntable <- length(s_n)
  nR <- nrow(s_n[[1]])

  if (!is.null(CompMethod)) {
    if (CompMethod == "Tall") {
      for (i in 1:ntable) {
        MaxRank <- min(c(nrow(s_n[[i]]), ncol(s_n[[i]])))
        # Tall segmented PCT
        s_r[[i]] <- PCA_Tall_PCT_DNR(Xn = s_n[[i]], PCs = MaxRank, Partitions = Partitions)
      }
    } else if (CompMethod == "Wide") {
      for (i in 1:ntable) {
        Xi <- ColumnsPartition(Xn = s_n[[i]], Partitions = Partitions)
        T_all <- matrix(, nrow = nrow(s_n[[i]]), ncol = 0)
        for (p in 1:Partitions) {
          usv <- svd(Xi[[p]])
          T_in <- usv$u %*% diag(usv$d)
          T_all <- cbind(T_all, T_in)
        }
        s_r[[i]] <- T_all
      }
    } else if (CompMethod == "Kernel") {
      for (i in 1:ntable) {
        Kern <- (s_n[[i]] %*% t(s_n[[i]])) / nR
        usv <- svd(Kern)
        s_r[[i]] <- sqrt(nR) * usv$u %*% diag(sqrt(usv$d))
      }
    } else if (CompMethod == "PCT") {
      for (i in 1:ntable) {
        usv <- svd(s_n[[i]])
        s_r[[i]] <- usv$u %*% diag(usv$d)
      }
    } else if (CompMethod == "Normal") {
      for (i in 1:ntable) {
        s_r[[i]] <- s_n[[i]]
      }
    }
  }
  return(s_r)
}


#' PCA_Tall_PCT_DNR
#'
#' Compress a block using the PCT method. Internal function of ComDim_PLS().
#' @param Xn The block to compress.
#' @param PCs Number of PCs to keep.
#' @param Partitions The number of partitions.
#' @return The compressed block.
#' @keywords internal
PCA_Tall_PCT_DNR <- function(Xn = Xn, PCs = PCs, Partitions = Partitions) {
  W <- RowsPartition(Xn, Partitions)
  cols <- ncol(Xn)

  D <- t(W[[1]]) %*% W[[1]]

  if (Partitions > 1) {
    for (par in 2:Partitions) {
      D <- D + (t(W[[par]]) %*% W[[par]])
    }
  }

  vs <- eigen(D)
  vs_flipped <- vs$vectors # eigen() in R returns eigenvalues in decreasing order; columns are eigenvectors

  Tm <- matrix(, nrow = nrow(Xn), ncol = 0)
  Taux <- matrix(, nrow = 0, ncol = PCs)

  for (par in 1:Partitions) {
    to <- W[[par]] %*% vs_flipped

    Taux <- rbind(Taux, to[, 1:PCs])
  }

  Tm <- cbind(Tm, Taux)
  return(Tm)
}

#' ColumnsPartition
#'
#' Calculate vertical partitions in a block. Internal function of ComDim_PLS().
#' @param Xn A block.
#' @param Partitions The number of partitions.
#' @return A list of column-partitioned sub-matrices.
#' @keywords internal
ColumnsPartition <- function(Xn = Xn, Partitions = Partitions) {
  cols <- ncol(Xn)
  stride <- floor(cols / Partitions)
  remaining <- cols %% Partitions

  count <- 1
  W <- list()

  for (i in 1:Partitions) {
    step <- count + stride - 1
    if (i == Partitions) {
      step <- step + remaining
    }
    W[[i]] <- Xn[, count:step]
    count <- count + stride
  }
  return(W)
}

#' RowsPartition
#'
#' Calculate horizontal partitions in a block. Internal function of ComDim_PLS().
#' @param Xn A block.
#' @param Partitions The number of partitions.
#' @return A list of row-partitioned sub-matrices.
#' @keywords internal
RowsPartition <- function(Xn = Xn, Partitions = Partitions) {
  rows <- nrow(Xn)
  stride <- floor(rows / Partitions)
  remaining <- rows %% Partitions

  count <- 1
  W <- list()

  for (i in 1:Partitions) {
    step <- count + stride - 1
    if (i == Partitions) {
      step <- step + remaining
    }
    W[[i]] <- Xn[count:step, ]
    count <- count + stride
  }
  return(W)
}

PLS2_DNR_2018 <- function(X = X, Y = Y, lvs = lvs) {
  # Store original X
  X_old <- X
  px <- ncol(X)
  rows <- nrow(X)
  # if(is.vector(Y)){
  #   u_old <- as.matrix(Y)
  #   py <- 1
  # } else {
  u_old <- Y[, 1] # Any column of Y can be used to initialize u.
  py <- ncol(Y)
  # }

  # Initialize matrices
  W <- matrix(data = rep(0, px * lvs), nrow = px, ncol = lvs)
  P <- W
  Q <- matrix(data = rep(0, py * lvs), nrow = py, ncol = lvs)
  Te <- matrix(data = rep(0, rows * lvs), nrow = rows, ncol = lvs)
  U <- Te

  if (is.matrix(X)) {
    meanX <- colMeans(X)
  } else {
    meanX <- mean(X)
  }

  if (is.matrix(Y)) {
    meanY <- colMeans(Y)
  } else {
    meanY <- mean(Y)
  }

  # Necessary for PLS_ICA where nLVs decreases to 1
  B0_mat <- B_mat <- list(NULL) # Each item in the list is a matrix with px nrow and py ncol.

  for (i in 1:lvs) {
    B_mat[[i]] <- matrix(rep(0, px * py), nrow = px, ncol = py)
    B0_mat[[i]] <- matrix(rep(0, lvs * py), nrow = lvs, ncol = py)
  }

  for (i in 1:lvs) {
    limite <- 0
    while (TRUE) { # Repeat until breaking the loop
      w <- t(u_old) %*% X
      w <- w / norm(w, type = "2")
      ti <- X %*% t(w)
      ti <- ti / as.vector(w %*% t(w))
      q <- t(ti) %*% Y
      q <- as.vector(q) / as.vector(t(ti) %*% ti)
      u <- (Y %*% q) / as.vector(t(q) %*% q)
      if (norm(u - u_old, type = "2") < 1e-6) {
        break
      }
      limite <- limite + 1
      if (limite > 1000) {
        break
      }
      u_old <- u
    }
    p <- t(ti) %*% X
    p <- p / as.vector(t(ti) %*% ti)

    W[, i] <- w
    P[, i] <- p
    Q[, i] <- q
    Te[, i] <- ti
    U[, i] <- u
    X <- X - ti %*% p
    Y <- Y - ti %*% q

    i_nums <- c(1:i)
    # print(dim(pracma::pinv(t(P[,i_nums]) %*% W[,i_nums])))
    if (ncol(Y) == 1) {
      B_mat[[i]] <- (W[, i_nums] %*% pracma::pinv(t(P[, i_nums]) %*% W[, i_nums])) %*% as.matrix(Q[, i_nums])
    } else {
      B_mat[[i]] <- (W[, i_nums] %*% pracma::pinv(t(P[, i_nums]) %*% W[, i_nums])) %*% t(Q[, i_nums])
    }

    B0_mat[[i]] <- meanY - meanX %*% B_mat[[i]]
  }

  B <- (W %*% pracma::pinv(t(P) %*% W)) %*% t(Q)
  B0 <- meanY - meanX %*% B

  PLS2_Res <- list()
  PLS2_Res$B <- B
  PLS2_Res$B0 <- B0
  PLS2_Res$Scores <- Te
  PLS2_Res$P <- P
  PLS2_Res$W <- W
  PLS2_Res$U <- U
  PLS2_Res$Q <- Q

  PLS2_Res$B_mat <- B_mat
  PLS2_Res$B0_mat <- B0_mat
  PLS2_Res$Yhat <- X_old %*% B + rep(1, rows) %*% B0

  return(PLS2_Res)
}

Try the R.ComDim package in your browser

Any scripts or data that you put into this service are public.

R.ComDim documentation built on May 13, 2026, 9:07 a.m.