Nothing
#' ComDim_Exploratory - Extending matrix decomposition methods to multi-block data.
#'
#' Extends any matrix decomposition method used for exploratory purposes to the multi-block
#' field. The user provides a function (\code{FUN}) to compute the scores from the
#' salience-weighted concatenated blocks; these scores are then used to derive the global
#' scores, local scores, and loadings following the traditional ComDim-PCA framework.
#' @param MB A MultiBlock object.
#' @param ndim Number of Common Dimensions.
#' @param FUN The function used as the core of the ComDim analysis. It must accept a matrix
#' \code{W} (salience-weighted concatenated blocks) and \code{ndim} as its first two
#' arguments, and return a matrix of scores with one column per component.
#' @param normalise To apply block normalisation. FALSE == no (default), TRUE == yes.
#' @param threshold The threshold limit to stop the iterations. Iterations stop when the
#' change in the global score vector is below this value (1e-10 as default).
#' @param loquace To display the calculation times. TRUE == yes, FALSE == no (default).
#' @param method A string label identifying the decomposition method used (default: 'FUN').
#' @param ... Additional arguments passed to \code{FUN}.
#' @return A \code{ComDim} object. Slots for supervised analysis
#' (\code{R2Y}, \code{Q2}, \code{DQ2}, \code{VIP}, \code{VIP.block},
#' \code{PLS.model}, \code{cv}, \code{Prediction}) are empty. The
#' populated slots are:
#' \describe{
#' \item{\code{Method}}{The label supplied via the \code{method} argument.}
#' \item{\code{ndim}}{Number of Common Dimensions extracted.}
#' \item{\code{Q.scores}}{Global scores matrix (\eqn{n \times ndim}).
#' Column names are \code{CC1}, \code{CC2}, etc.; row names are sample
#' names. Each column \eqn{\mathbf{q}_a} is a unit-norm consensus score
#' derived from the dominant left direction of FUN applied to the
#' salience-weighted concatenated blocks
#' \eqn{\mathbf{W} = [\sqrt{\lambda_1}\mathbf{X}_1 \mid \cdots \mid
#' \sqrt{\lambda_B}\mathbf{X}_B]}.}
#' \item{\code{T.scores}}{Named list of block-specific local scores matrices
#' (\eqn{n \times ndim} each). For block \eqn{b} and component \eqn{a}:
#' local loading \eqn{\mathbf{p}_{ba} = \mathbf{X}_b'\mathbf{q}_a} and
#' local score
#' \eqn{\mathbf{t}_{ba} =
#' \mathbf{X}_b\,\mathbf{p}_{ba}(\mathbf{p}_{ba}'\mathbf{p}_{ba})^{-1}}.}
#' \item{\code{P.loadings}}{Global loadings matrix (\eqn{p_{tot} \times
#' ndim}). Column \eqn{a} is
#' \eqn{\mathbf{P}_a = \mathbf{X}'\mathbf{q}_a},
#' where \eqn{\mathbf{X}} is the mean-centred (and optionally normalised)
#' concatenated blocks.}
#' \item{\code{Saliences}}{Block salience (weight) matrix (\eqn{ntable
#' \times ndim}, row names = block names). Entry \eqn{(b,a)} is
#' \eqn{\lambda_{ba} =
#' \mathbf{q}_a'\mathbf{X}_b\mathbf{X}_b'\mathbf{q}_a},
#' the variance of block \eqn{b} captured by global score \eqn{a}.}
#' \item{\code{R2X}}{Proportion of multi-block variance captured by each
#' component (named vector, length \eqn{ndim}). Let \eqn{\mathbf{s}_a}
#' be the score vector returned by FUN for component \eqn{a} (before
#' unit-normalisation to obtain \eqn{\mathbf{q}_a}), so that
#' \eqn{sv_a = \|\mathbf{s}_a\|}; then
#' \deqn{R2X_a = sv_a^4 \big/ \sum_k sv_k^4 =
#' Singular_a^2 \big/ \sum_k Singular_k^2.}}
#' \item{\code{Singular}}{Squared L2 norms of the FUN score vectors:
#' \eqn{Singular_a = sv_a^2 = \|\mathbf{s}_a\|^2}, used to derive
#' \code{R2X}.}
#' \item{\code{Mean}}{List with \code{MeanMB}: named list of column-mean
#' vectors per block, used for mean-centring.}
#' \item{\code{Norm}}{List with \code{NormMB}: Frobenius norms used for
#' block normalisation (all ones when \code{normalise = FALSE}).}
#' \item{\code{variable.block}}{Character vector (length \eqn{p_{tot}})
#' indicating the block name of each row in \code{P.loadings}.}
#' \item{\code{runtime}}{Total computation time in seconds.}
#' }
#' @examples
#' b1 <- matrix(rnorm(500), 10, 50) # 10 samples, 50 variables
#' b2 <- matrix(rnorm(800), 10, 80) # 10 samples, 80 variables
#' mb <- MultiBlock(Data = list(b1 = b1, b2 = b2))
#'
#' if (requireNamespace("ica", quietly = TRUE)) {
#' fun.ICA <- function(W, ndim, ...) {
#' # W is the concatenated MB.
#' # ndim is the number of components.
#' result <- ica::ica(W, ndim)
#' # The function must return the source estimates (analogous to PCA scores).
#' return(result$S)
#' }
#' resultsICA <- ComDim_Exploratory(mb,
#' ndim = 2,
#' FUN = fun.ICA,
#' method = "ICA"
#' )
#' }
#' @references Jouan-Rimbaud Bouveresse D, Rutledge DN (2024).
#' A synthetic review of some recent extensions of ComDim.
#' \emph{Journal of Chemometrics}, 38(5), e3454.
#' \doi{10.1002/cem.3454}
#' @export
ComDim_Exploratory <- function(MB = MB, ndim = NULL,
FUN = FUN,
normalise = FALSE, threshold = 1e-10,
loquace = FALSE,
method = "FUN",
...) {
# INITIALISATION
progress_bar <- utils::txtProgressBar(min = 0, max = 80, style = 3, char = "=")
start_ini_time <- Sys.time() # To start counting calculation time for the initialization
if (!inherits(MB, "MultiBlock")) {
stop("'MB' is not a MultiBlock.")
}
ntable <- length(blockNames(MB)) # number of blocks
nrowMB <- length(sampleNames(MB)) # number of samples
variable_number <- ncol(MB) # Number of variables per block
give_error <- 0
# Remove all variables with 0 variance. If there are no remaining variables, give_error
numvar <- 0
numblock0 <- vector()
for (i in 1:ntable) {
var0 <- apply(MB@Data[[i]], 2, function(x) {
var(x)
})
var0 <- which(var0 == 0)
if (length(var0) != 0) {
numvar <- numvar + length(var0)
if (length(var0) == length(MB@Variables[[i]])) {
numblock0 <- append(numblock0, i)
}
MB@Data[[i]] <- MB@Data[[i]][, setdiff(1:variable_number[i], var0)]
MB@Variables[[i]] <- MB@Variables[[i]][setdiff(1:variable_number[i], var0)]
variable_number[i] <- length(MB@Variables[[i]])
}
}
if (numvar != 0) {
warning(sprintf("Number of variables excluded from the analysis because of their zero variance: %d", numvar))
}
if (length(numblock0) != 0) {
numblock0 <- rev(numblock0) # To sort in decreasing order.
for (i in seq_along(numblock0)) {
MB@Data[[numblock0[i]]] <- NULL
MB@Variables[[numblock0[i]]] <- NULL
if (numblock0[[i]] %in% MB@Batch) {
MB@Batch[[numblock0[i]]] <- NULL
}
if (numblock0[[i]] %in% MB@Metadata) {
MB@Metadata[[numblock0[i]]] <- NULL
}
}
warning(sprintf("Number of blocks excluded from the analysis because of their zero variance: %d", length(numblock0)))
}
ntable <- length(blockNames(MB)) # number of blocks
variable_number <- ncol(MB) # In case the number of blocks has changed.
if (length(MB@Data) == 0) { # In case all blocks had 0 variance...
warning("All blocks had 0 variance")
give_error <- 1
}
# Check for infinite or missing values (they cannot be handled with svd)
for (i in 1:ntable) {
if (any(is.na(MB@Data[[i]]))) {
stop("The MB contains NA values. They must be removed first for ComDim analysis.")
}
if (any(is.infinite(MB@Data[[i]]))) {
stop("The MB contains infinite values. They must be removed first for ComDim analysis.")
}
}
if (give_error) {
stop("The data is not ready for ComDim.")
} else {
message("The data can be used for ComDim.")
}
if (is.null(ndim)) {
ndim <- ntable # If the number of components is not defined,
# the number of components to extract is equal to the number of blocks.
}
pieceBar <- 4 + 2 * ntable + ndim # Number of updates in the progress bar.
pieceBar <- 80 / pieceBar
total_progress <- pieceBar
DimLabels <- paste0("CC", 1:ndim) # One label per component.
TableLabels <- blockNames(MB) # One label per block.
end_ini_time <- Sys.time() # To end the count of the calculation time.
if (loquace) {
message(sprintf("Initialisation finished after : %s millisecs", (end_ini_time - start_ini_time) * 1000))
}
utils::setTxtProgressBar(progress_bar, value = total_progress)
# NORMALISATION
X_mat <- matrix(, nrow = nrowMB, ncol = sum(variable_number))
Xnorm_mat <- matrix(, nrow = nrowMB, ncol = sum(variable_number))
res_calib <- list()
temp_tabCalib <- list()
s_r <- list()
res_calib$SingVal <- as.vector(NULL)
res_calib$NormMB <- as.vector(NULL)
res_calib$MeanMB <- list()
for (i in 1:ntable) {
res_calib$MeanMB[[TableLabels[i]]] <- colMeans(MB@Data[[i]])
names(res_calib$MeanMB[[TableLabels[i]]]) <- MB@Variables[[i]]
if (normalise) {
# Normalise original blocks
X_mean <- MB@Data[[i]] - matrix(data = rep(1, nrowMB), ncol = 1, nrow = nrowMB) %*% res_calib$MeanMB[[i]]
XX <- X_mean * X_mean
Norm_X <- sqrt(sum(XX))
X_Normed <- X_mean / Norm_X
res_calib$NormMB[i] <- Norm_X
temp_tabCalib[[i]] <- X_Normed
s_r[[i]] <- X_Normed
} else {
res_calib$NormMB[[TableLabels[i]]] <- rep(1, length(MB@Variables[[i]]))
names(res_calib$NormMB[[TableLabels[i]]]) <- MB@Variables[[i]]
temp_tabCalib[[i]] <- MB@Data[[i]]
s_r[[i]] <- MB@Data[[i]]
}
if (i == 1) {
X_mat[, 1:variable_number[1]] <- MB@Data[[i]]
Xnorm_mat[, 1:variable_number[1]] <- temp_tabCalib[[i]]
} else {
beg <- sum(variable_number[1:(i - 1)]) + 1
ending <- sum(variable_number[1:i])
X_mat[, beg:ending] <- MB@Data[[i]]
Xnorm_mat[, beg:ending] <- temp_tabCalib[[i]]
}
norm_comdim <- Sys.time()
if (loquace) {
message(sprintf("Normalization of block %s finished after : %s millisecs", i, (norm_comdim - start_ini_time) * 1000))
}
total_progress <- total_progress + pieceBar
utils::setTxtProgressBar(progress_bar, value = total_progress)
}
names(res_calib$NormMB) <- TableLabels
nR <- nrow(Xnorm_mat)
nC <- ncol(Xnorm_mat)
total_progress <- total_progress + pieceBar * ntable
utils::setTxtProgressBar(progress_bar, value = total_progress)
## DO ComDim
saliences <- matrix(, ncol = ndim, nrow = ntable)
Q <- matrix(, ncol = ndim, nrow = nrowMB)
unexplained <- ntable # Warning!: true only if the tables were set to norm=1
varexp <- as.vector(NULL)
for (dims in 1:ndim) {
previousfit <- 10000
deltafit <- 1000000
lambda <- matrix(rep(1, ntable), nrow = ntable, ncol = 1)
qini <- as.vector(s_r[[1]][, 1])
qini <- qini / sqrt(as.vector(qini %*% qini)) # random initialization of t + set to unit length
qold <- 100
iters <- 0
while (norm(qold - qini, "2") > threshold && iters < 100) {
iters <- iters + 1
qold <- qini
q <- 0
W <- matrix(, nrow = nrowMB, ncol = 0)
for (j in 1:ntable) {
W <- cbind(W, sqrt(lambda[j]) * s_r[[j]])
}
# HERE IS THE CORE COMDIM
scores <- FUN(W = W, ndim = ndim, ...)
sv <- sqrt(t(scores[, dims]) %*% scores[, dims]) # For R2X
qtmp <- scores[, dims] / as.vector(sv)
for (j in 1:ntable) {
# takes each table into account for lambda after PCA
lambda[j] <- t(qtmp) %*% (s_r[[j]] %*% t(s_r[[j]])) %*% qtmp
q <- q + lambda[j] * qtmp
}
q <- q / sqrt(as.vector(t(q) %*% q)) # standardizes t
if (abs(min(q)) > abs(max(q))) {
q <- -q
}
qini <- q
} # deltafit>threshold
saliences[, dims] <- lambda
Q[, dims] <- q
res_calib$SingVal[dims] <- sv^2
varexp[dims] <- res_calib$SingVal[dims]^2
# Deflate blocks
aux <- diag(nrowMB) - as.matrix(q) %*% t(as.matrix(q))
for (j in 1:ntable) {
s_r[[j]] <- aux %*% s_r[[j]]
}
iter_comdim <- Sys.time()
if (loquace) {
message(sprintf("Component %s determined after : %s millisecs", dims, (iter_comdim - start_ini_time) * 1000))
}
total_progress <- total_progress + pieceBar
utils::setTxtProgressBar(progress_bar, value = total_progress)
}
# res_calib$SingVal$i <- 'Singular value'
names(res_calib$SingVal) <- DimLabels # Dimensions
# Q$i <- data[[1]]$Samples
# Q$v <- DimLabels
colnames(Q) <- DimLabels
rownames(Q) <- MB@Samples
res_calib$Q <- Q
rm(Q)
## Adding metadata. Metadata extracted from the first block
res_calib$metadata <- list()
if (length(MB@Metadata) != 0) {
res_calib$metadata[[1]] <- MB@Metadata
}
end_comdim <- Sys.time()
if (loquace) {
message(sprintf("Scores finished after : %s millisecs", (end_comdim - start_ini_time) * 1000))
}
total_progress <- total_progress + pieceBar
utils::setTxtProgressBar(progress_bar, value = total_progress)
meanW <- as.vector(NULL)
for (j in 1:ntable) {
meanW <- c(meanW, mean(s_r[[j]]))
}
# res_calib$s_n <- s_n
# res_calib$s_r <- s_r
rm(s_r)
##
# explained <- list()
explained <- varexp / sum(varexp)
names(explained) <- DimLabels
# explained$i <-'% explained'
# explained$v <- DimLabels #Dimensions
res_calib$explained <- explained
rm(varexp)
rm(explained)
## Calculate Sums of saliences for each Dim
# Sum_saliences_Dim <- list()
Sum_saliences_Dim <- colSums(saliences)
names(Sum_saliences_Dim) <- DimLabels
# Sum_saliences_Dim$i <-'Sum Dim Saliences'
# Sum_saliences_Dim$v <- DimLabels #Dimensions
res_calib$Sum_saliences_Dim <- Sum_saliences_Dim
rm(Sum_saliences_Dim)
# Calculate Sums of saliences for each Table
# Sum_saliences_Tab <- list()
Sum_saliences_Tab <- rowSums(saliences)
# Sum_saliences_Tab$i <- 'Sum Tab Saliences'
names(Sum_saliences_Tab) <- TableLabels
# Sum_saliences_Tab$v <- TableLabels #tables
res_calib$Sum_saliences_Tab <- Sum_saliences_Tab
rm(Sum_saliences_Tab)
# saliences$i <- TableLabels #tables
# saliences$v <- DimLabels #Dimensions
res_calib$saliences <- saliences
colnames(res_calib$saliences) <- DimLabels
rownames(res_calib$saliences) <- TableLabels
## Calculate Normalised concatenated Xs ('Calib') from col
# Calculate concatenated CD loadings
# Reorganise Loadings - 1 matrix / LV
## If Output = NULL, nothing else is calculated
# if(!(is.null(output))){
L_CD_Vec <- NULL
L_X_Vec <- NULL
Calib <- NULL
nCalib <- nrowMB
# }
## Already defined in during ComDim initiallization
# isT <- grepl(output, 'T')
# isL <- grepl(output, 'L')
# isP <- grepl(output, 'P')
# isLP <- c(isP,isL)
# isTLP <- c(isT,isP,isL)
# tic
# Calculate concatenated CD loadings
# Reorganise Loadings - 1 matrix / LV
# L_CD <- list()
# L_X <- list()
T_Loc <- list()
for (i in 1:ntable) { # Prepare lists
# L_CD[[TableLabels[i]]] <- matrix(,ncol = ndim, nrow = ncol(temp_tabCalib[[i]]))
# L_X[[TableLabels[i]]] <- matrix(,ncol = ndim, nrow = ncol(temp_tabCalib[[i]]))
T_Loc[[TableLabels[i]]] <- matrix(, ncol = ndim, nrow = nrowMB)
}
# b <- matrix(,ncol = ndim, nrow = ntable)
if (!requireNamespace("pracma", quietly = TRUE)) {
stop("The package pracma is needed.")
}
for (j in 1:ndim) {
for (i in 1:ntable) {
temp <- t(temp_tabCalib[[i]]) %*% res_calib$Q[, j] # Scaled CD 'local' Loadings
T_Loc[[TableLabels[i]]][, j] <- temp_tabCalib[[i]] %*% (temp %*% pracma::pinv(t(temp) %*% temp)) # local Scores
# Deflate each temp_tabCalib
temp_tabCalib[[i]] <- temp_tabCalib[[i]] - res_calib$Q[, j] %*% t(temp)
}
}
for (i in 1:ntable) {
rownames(T_Loc[[TableLabels[i]]]) <- rownames(res_calib$Q)
colnames(T_Loc[[TableLabels[i]]]) <- colnames(res_calib$Q)
}
# If Output==[], nothing else is calculated
# if(!(is.null(output))){
# Calculate Global Loadings
# if(!(is.null(isP))){
L_CD_Vec <- t(Xnorm_mat) %*% res_calib$Q # Scaled CD 'global' Loadings
# }
# if(!(is.null(isL))){
# L_X_Vec <- t(X_mat)%*%Q$Data # Unscaled X 'global' Loadings
# }
# }
#### If Output==[], nothing else is calculated
load_comdim <- Sys.time()
if (loquace) {
message(sprintf("Loadings finished after : %s millisecs", (load_comdim - start_ini_time) * 1000))
}
total_progress <- total_progress + pieceBar
utils::setTxtProgressBar(progress_bar, value = total_progress)
rm(X_mat)
rm(temp_tabCalib)
rm(temp)
## Output
end_function <- Sys.time()
# if(!(is.null(isT))){
res_calib$T_Loc <- T_Loc
# T_Loc is no longer needed
rm(T_Loc)
# if(!(is.null(isP))){
varnames <- vector()
for (i in 1:ntable) {
varnames <- append(varnames, MB@Variables[[i]])
}
# if(length(which(duplicated(varnames))) != 0){
# warning('There are at least two variables with the same name. Their name in the rownames of P has been altered.\n')
# }
res_calib$P <- L_CD_Vec
# res_calib$P$i <- varnames # numbers of all variables;
# res_calib$P$v <- DimLabels # Dimensions
colnames(res_calib$P) <- DimLabels
rownames(res_calib$P) <- varnames
rm(L_CD_Vec)
# rm(L_CD)
##
rm(saliences)
# Define block length
belong_block <- rep(0, nrow(res_calib$P))
k <- 1
for (i in 1:ntable) {
belong_block[k:(k + variable_number[i] - 1)] <- TableLabels[i]
k <- k + variable_number[i]
}
end_output <- Sys.time()
running_time <- (end_output - start_ini_time)
if (loquace) {
message(sprintf("Analysis finished after : %s seconds", running_time))
}
res_calib$runtime <- running_time # Total time of analysis
progress_bar <- utils::txtProgressBar(min = 0, max = 80, style = 3, char = "=")
utils::setTxtProgressBar(progress_bar, value = 80)
close(progress_bar)
# Save data in a ComDim structure
res_calib <- new("ComDim",
Method = method,
ndim = ndim,
Q.scores = res_calib$Q,
T.scores = res_calib$T_Loc,
P.loadings = res_calib$P,
Saliences = res_calib$saliences,
Orthogonal = list(),
R2X = res_calib$explained,
R2Y = vector(),
Q2 = vector(),
DQ2 = vector(),
Singular = res_calib$SingVal,
Mean = list(MeanMB = res_calib$MeanMB, MeanY = NULL),
Norm = list(NormMB = res_calib$NormMB),
PLS.model = list(),
cv = list(),
Prediction = list(),
Metadata = res_calib$metadata,
variable.block = belong_block,
runtime = as.numeric(res_calib$runtime)
)
return(res_calib)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.