Nothing
#' ComDim_PCA
#'
#' Finding common dimensions in multi-block datasets.
#' @param MB A MultiBlock object.
#' @param ndim Number of Common Dimensions.
#' @param normalise To apply normalisation. FALSE == no (default), TRUE == yes.
#' @param threshold The threshold limit to stop the iterations. If the "difference of fit" < threshold (1e-10 as default).
#' @param loquace To display the calculation times. TRUE == yes, FALSE == no (default).
#' @param CompMethod To speed-up the analysis for really big MultiBlocks. 'Normal' (default), 'Kernel', 'PCT', 'Tall' or 'Wide'.
#' @param Partitions To speed-up the analysis for really big MultiBlocks. This parameter is used if CompMethod is 'Tall' or 'Wide'.
#' @return A \code{ComDim} object. Slots for supervised analysis
#' (\code{R2Y}, \code{Q2}, \code{DQ2}, \code{VIP}, \code{VIP.block},
#' \code{PLS.model}, \code{cv}, \code{Prediction}) are empty. The
#' populated slots are:
#' \describe{
#' \item{\code{Method}}{\code{"PCA"}.}
#' \item{\code{ndim}}{Number of Common Dimensions extracted.}
#' \item{\code{Q.scores}}{Global scores matrix (\eqn{n \times ndim}).
#' Column names are \code{CC1}, \code{CC2}, etc.; row names are sample
#' names. Each column \eqn{\mathbf{q}_a} is a unit-norm consensus score,
#' the dominant left singular vector of the salience-weighted concatenated
#' blocks
#' \eqn{\mathbf{W} = [\sqrt{\lambda_1}\mathbf{X}_1 \mid \cdots \mid
#' \sqrt{\lambda_B}\mathbf{X}_B]}.}
#' \item{\code{T.scores}}{Named list of block-specific local scores matrices
#' (\eqn{n \times ndim} each). For block \eqn{b} and component \eqn{a}:
#' local loading \eqn{\mathbf{p}_{ba} = \mathbf{X}_b'\mathbf{q}_a} and
#' local score
#' \eqn{\mathbf{t}_{ba} =
#' \mathbf{X}_b\,\mathbf{p}_{ba}(\mathbf{p}_{ba}'\mathbf{p}_{ba})^{-1}}.}
#' \item{\code{P.loadings}}{Global loadings matrix (\eqn{p_{tot} \times
#' ndim}). Column \eqn{a} is
#' \eqn{\mathbf{P}_a = \mathbf{X}'\mathbf{q}_a},
#' where \eqn{\mathbf{X}} is the mean-centred (and optionally normalised)
#' concatenated blocks.}
#' \item{\code{Saliences}}{Block salience (weight) matrix (\eqn{ntable
#' \times ndim}, row names = block names). Entry \eqn{(b,a)} is
#' \eqn{\lambda_{ba} =
#' \mathbf{q}_a'\mathbf{X}_b\mathbf{X}_b'\mathbf{q}_a},
#' the variance of block \eqn{b} captured by global score \eqn{a}.}
#' \item{\code{R2X}}{Proportion of multi-block inertia captured by each
#' component (named vector, length \eqn{ndim}). Let
#' \eqn{d_a} be the leading singular value of \eqn{\mathbf{W}} for
#' component \eqn{a} (stored as \eqn{Singular_a = d_a^2}); then
#' \deqn{R2X_a = Singular_a^2 \big/ \sum_k Singular_k^2 = d_a^4 \big/
#' \sum_k d_k^4.}}
#' \item{\code{Singular}}{Squared leading singular values of
#' \eqn{\mathbf{W}}, one per component:
#' \eqn{Singular_a = d_a^2}.}
#' \item{\code{Mean}}{List with \code{MeanMB}: named list of column-mean
#' vectors per block, used for mean-centring.}
#' \item{\code{Norm}}{List with \code{NormMB}: Frobenius norms used for
#' block normalisation (all ones when \code{normalise = FALSE}).}
#' \item{\code{variable.block}}{Character vector (length \eqn{p_{tot}})
#' indicating the block name of each row in \code{P.loadings}.}
#' \item{\code{runtime}}{Total computation time in seconds.}
#' }
#' @examples
#' # Example 1: two data blocks.
#' b1 <- matrix(rnorm(500), 10, 50) # 10 samples, 50 variables
#' b2 <- matrix(rnorm(800), 10, 80) # 10 samples, 80 variables
#' mb <- MultiBlock(Data = list(b1 = b1, b2 = b2))
#' results <- ComDim_PCA(mb, 2)
#'
#' # Example 2: two data blocks, each with different replicate number
#' b1 <- matrix(rnorm(500), 10, 50)
#' batch_b1 <- rep(1, 10)
#' b2 <- matrix(rnorm(2400), 30, 80)
#' batch_b2 <- c(rep(1, 10), rep(2, 10), rep(3, 10))
#' mb <- MultiBlock(
#' Samples = list(
#' b1 = paste0("samples_", 1:10),
#' b2 = rep(paste0("samples_", 1:10), 3)
#' ),
#' Data = list(b1 = b1, b2 = b2),
#' Batch = list(b1 = batch_b1, b2 = batch_b2),
#' ignore.size = TRUE
#' )
#' rw <- SplitRW(mb)
#' results <- ComDim_PCA(rw, 2)
#' @references Jouan-Rimbaud Bouveresse D, Rutledge DN (2024).
#' A synthetic review of some recent extensions of ComDim.
#' \emph{Journal of Chemometrics}, 38(5), e3454.
#' \doi{10.1002/cem.3454}
#'
#' Original MATLAB implementation: \url{https://github.com/DNRutledge/ComDim/}
#' @export
ComDim_PCA <- function(MB = MB, ndim = NULL,
normalise = FALSE, threshold = 1e-10,
loquace = FALSE,
CompMethod = "Normal", Partitions = 1) {
# INITIALISATION
progress_bar <- utils::txtProgressBar(min = 0, max = 80, style = 3, char = "=")
start_ini_time <- Sys.time() # To start counting calculation time for the initialization
if (!inherits(MB, "MultiBlock")) {
stop("'MB' is not a MultiBlock.")
}
ntable <- length(blockNames(MB)) # number of blocks
nrowMB <- length(sampleNames(MB)) # number of samples
variable_number <- ncol(MB) # Number of variables per block
give_error <- 0
# Remove all variables with 0 variance. If there are no remaining variables, give_error
numvar <- 0
numblock0 <- vector()
for (i in 1:ntable) {
var0 <- apply(MB@Data[[i]], 2, function(x) {
var(x)
})
var0 <- which(var0 == 0)
if (length(var0) != 0) {
numvar <- numvar + length(var0)
if (length(var0) == length(MB@Variables[[i]])) {
numblock0 <- append(numblock0, i)
}
MB@Data[[i]] <- MB@Data[[i]][, setdiff(1:variable_number[i], var0)]
MB@Variables[[i]] <- MB@Variables[[i]][setdiff(1:variable_number[i], var0)]
variable_number[i] <- length(MB@Variables[[i]])
}
}
if (numvar != 0) {
warning(sprintf("Number of variables excluded from the analysis because of their zero variance: %d", numvar))
}
if (length(numblock0) != 0) {
numblock0 <- rev(numblock0) # To sort in decreasing order.
for (i in seq_along(numblock0)) {
MB@Data[[numblock0[i]]] <- NULL
MB@Variables[[numblock0[i]]] <- NULL
if (numblock0[[i]] %in% MB@Batch) {
MB@Batch[[numblock0[i]]] <- NULL
}
if (numblock0[[i]] %in% MB@Metadata) {
MB@Metadata[[numblock0[i]]] <- NULL
}
}
warning(sprintf("Number of blocks excluded from the analysis because of their zero variance: %d", length(numblock0)))
}
ntable <- length(blockNames(MB)) # number of blocks
variable_number <- ncol(MB) # In case the number of blocks has changed.
if (length(MB@Data) == 0) { # In case all blocks had 0 variance...
warning("All blocks had 0 variance")
give_error <- 1
}
# Check for infinite or missing values (they cannot be handled with svd)
for (i in 1:ntable) {
if (any(is.na(MB@Data[[i]]))) {
stop("The MB contains NA values. They must be removed first for ComDim analysis.")
}
if (any(is.infinite(MB@Data[[i]]))) {
stop("The MB contains infinite values. They must be removed first for ComDim analysis.")
}
}
if (give_error) {
stop("The data is not ready for ComDim.")
} else {
message("The data can be used for ComDim.")
}
if (is.null(ndim)) {
ndim <- ntable # If the number of components is not defined,
# the number of components to extract is equal to the number of blocks.
}
pieceBar <- 4 + 2 * ntable + ndim # Number of updates in the progress bar.
pieceBar <- 80 / pieceBar
total_progress <- pieceBar
DimLabels <- paste0("CC", 1:ndim) # One label per component.
TableLabels <- blockNames(MB) # One label per block.
# isT <- grepl(output, 'T')
# isL <- grepl(output, 'L')
# isP <- grepl(output, 'P')
if (CompMethod == "Tall" && any(variable_number > 1000)) {
message("The method 'Tall' is not recommeded for large blocks (> 1000 columns).")
message("Do you still want to continue with the analysis?")
message("You can change now the CompMethod if you prefer: (yes/no)")
ans1 <- tolower(readline("Type your choice: (y/n)"))
if (ans1 == "y" || ans1 == "yes") {
message("Which method do you want to use instead (Normal/Kernel/PCT/Wide)?")
ans2 <- tolower(readline("Type your choice: (n/k/p/w)"))
if (ans2 == "n" || ans2 == "normal") {
CompMethod <- "Normal"
} else if (ans2 == "k" || ans2 == "kernel") {
CompMethod <- "Kernel"
} else if (ans2 == "p" || ans2 == "pct") {
CompMethod <- "PCT"
} else if (ans2 == "w" || ans2 == "wide") {
CompMethod <- "Wide"
} else {
stop("Wrong answer typed.")
}
} else if (ans1 == "n" || ans1 == "no") {
message("The method 'Tall' will be used")
} else {
stop("Wrong answer typed.")
}
}
if (CompMethod == "Kernel" && nrowMB >= min(variable_number)) {
stop(sprintf(
paste0("CompMethod = 'Kernel' requires n < p for every block ",
"(n = %d, smallest block p = %d). ",
"Use 'Normal', 'PCT', or 'Wide' instead."),
nrowMB, min(variable_number)
))
}
end_ini_time <- Sys.time() # To end the count of the calculation time.
if (loquace) {
message(sprintf("Initialisation finished after : %s millisecs", (end_ini_time - start_ini_time) * 1000))
}
utils::setTxtProgressBar(progress_bar, value = total_progress)
# NORMALISATION
# start_norm_time <- Sys.time() # To start counting calculation time for the initialization
X_mat <- matrix(, nrow = nrowMB, ncol = sum(variable_number))
Xnorm_mat <- matrix(, nrow = nrowMB, ncol = sum(variable_number))
res_calib <- list()
temp_tabCalib <- list()
s_n <- list()
res_calib$SingVal <- as.vector(NULL)
res_calib$NormMB <- as.vector(NULL)
res_calib$MeanMB <- list()
for (i in 1:ntable) {
res_calib$MeanMB[[TableLabels[i]]] <- colMeans(MB@Data[[i]])
names(res_calib$MeanMB[[TableLabels[i]]]) <- MB@Variables[[i]]
# res_calib$MeanMB$i <- TableLabels
if (normalise) {
# Normalise original blocks
X_mean <- MB@Data[[i]] - matrix(data = rep(1, nrowMB), ncol = 1, nrow = nrowMB) %*% res_calib$MeanMB[[i]]
# In a previous version (that worked), I had used as.matrix instead of matrix.
XX <- X_mean * X_mean
Norm_X <- sqrt(sum(XX))
X_Normed <- X_mean / Norm_X
res_calib$NormMB[i] <- Norm_X
temp_tabCalib[[i]] <- X_Normed
s_n[[i]] <- X_Normed
} else {
res_calib$NormMB[[TableLabels[i]]] <- rep(1, length(MB@Variables[[i]]))
names(res_calib$NormMB[[TableLabels[i]]]) <- MB@Variables[[i]]
# res_calib$MeanMB$Variables <- MB@Variables[[i]]
# res_calib$MeanMB$i <- TableLabels
temp_tabCalib[[i]] <- MB@Data[[i]]
s_n[[i]] <- MB@Data[[i]]
}
if (i == 1) {
X_mat[, 1:variable_number[1]] <- MB@Data[[i]]
Xnorm_mat[, 1:variable_number[1]] <- temp_tabCalib[[i]]
} else {
beg <- sum(variable_number[1:(i - 1)]) + 1
ending <- sum(variable_number[1:i])
X_mat[, beg:ending] <- MB@Data[[i]]
Xnorm_mat[, beg:ending] <- temp_tabCalib[[i]]
}
norm_comdim <- Sys.time()
if (loquace) {
message(sprintf("Normalization of block %s finished after : %s millisecs", i, (norm_comdim - start_ini_time) * 1000))
}
total_progress <- total_progress + pieceBar
utils::setTxtProgressBar(progress_bar, value = total_progress)
}
# res_calib$NormMB$i <- 'Norm'
names(res_calib$NormMB) <- TableLabels
nR <- nrow(Xnorm_mat)
nC <- ncol(Xnorm_mat)
# COMPRESSION
s_r <- Compress_Data_2020(s_n, CompMethod, Partitions)
end_comp_time <- Sys.time()
if (loquace) {
message(sprintf("Compression finished after : %s millisecs", (end_comp_time - start_ini_time) * 1000))
}
total_progress <- total_progress + pieceBar * ntable
utils::setTxtProgressBar(progress_bar, value = total_progress)
## DO ComDim
# ini_comdim <- Sys.time()
# saliences <- list()
saliences <- matrix(, ncol = ndim, nrow = ntable)
# Q <- list()
Q <- matrix(, ncol = ndim, nrow = nrowMB)
unexplained <- ntable # Warning!: true only if the tables were set to norm=1
varexp <- as.vector(NULL)
for (dims in 1:ndim) {
previousfit <- 10000
deltafit <- 1000000
lambda <- matrix(rep(1, ntable), nrow = ntable, ncol = 1)
qini <- as.vector(s_r[[1]][, 1])
qini <- qini / sqrt(as.vector(qini %*% qini)) # random initialization of t + set to unit length
qold <- 100
iters <- 0
while (norm(qold - qini, "2") > threshold && iters < 100) {
iters <- iters + 1
qold <- qini
q <- 0
W <- matrix(, nrow = nrowMB, ncol = 0)
for (j in 1:ntable) {
W <- cbind(W, sqrt(lambda[j]) * s_r[[j]])
}
usv <- svd(W)
qtmp <- usv$u[, 1]
for (j in 1:ntable) {
# takes each table into account for lambda after PCA
lambda[j] <- t(qtmp) %*% (s_r[[j]] %*% t(s_r[[j]])) %*% qtmp
q <- q + lambda[j] * qtmp
}
q <- q / sqrt(as.vector(t(q) %*% q)) # standardizes t
if (abs(min(q)) > abs(max(q))) {
q <- -q
}
qini <- q
} # deltafit>threshold
saliences[, dims] <- lambda
Q[, dims] <- q
# SingVal is square of the SVD singular value
# because here it is based on X, not X.X'
# Does NOT work for Kernel & Wide & Tall
if (CompMethod == "Normal" || CompMethod == "PCT") {
res_calib$SingVal[dims] <- usv$d[1] * usv$d[1] # from SVD on W
varexp[dims] <- res_calib$SingVal[dims]^2
} else {
varexp[dims] <- 0 # This will be overwritten at the end
res_calib$SingVal[dims] <- 0 # This will be overwritted at the end
}
# Deflate blocks
aux <- diag(nrowMB) - as.matrix(q) %*% t(as.matrix(q))
for (j in 1:ntable) {
s_r[[j]] <- aux %*% s_r[[j]]
}
iter_comdim <- Sys.time()
if (loquace) {
message(sprintf("Component %s determined after : %s millisecs", dims, (iter_comdim - start_ini_time) * 1000))
}
total_progress <- total_progress + pieceBar
utils::setTxtProgressBar(progress_bar, value = total_progress)
}
# res_calib$SingVal$i <- 'Singular value'
names(res_calib$SingVal) <- DimLabels # Dimensions
# Q$i <- data[[1]]$Samples
# Q$v <- DimLabels
colnames(Q) <- DimLabels
rownames(Q) <- MB@Samples
res_calib$Q <- Q
rm(Q)
## Adding metadata. Metadata extracted from the first block
res_calib$metadata <- list()
if (length(MB@Metadata) != 0) {
res_calib$metadata[[1]] <- MB@Metadata
}
end_comdim <- Sys.time()
if (loquace) {
message(sprintf("Scores finished after : %s millisecs", (end_comdim - start_ini_time) * 1000))
}
total_progress <- total_progress + pieceBar
utils::setTxtProgressBar(progress_bar, value = total_progress)
meanW <- as.vector(NULL)
for (j in 1:ntable) {
meanW <- c(meanW, mean(s_r[[j]]))
}
# res_calib$s_n <- s_n
# res_calib$s_r <- s_r
rm(s_n)
rm(s_r)
##
# explained <- list()
explained <- varexp / sum(varexp)
names(explained) <- DimLabels
# explained$i <-'% explained'
# explained$v <- DimLabels #Dimensions
res_calib$explained <- explained
rm(varexp)
rm(explained)
## Calculate Sums of saliences for each Dim
# Sum_saliences_Dim <- list()
Sum_saliences_Dim <- colSums(saliences)
names(Sum_saliences_Dim) <- DimLabels
# Sum_saliences_Dim$i <-'Sum Dim Saliences'
# Sum_saliences_Dim$v <- DimLabels #Dimensions
res_calib$Sum_saliences_Dim <- Sum_saliences_Dim
rm(Sum_saliences_Dim)
# Calculate Sums of saliences for each Table
# Sum_saliences_Tab <- list()
Sum_saliences_Tab <- rowSums(saliences)
# Sum_saliences_Tab$i <- 'Sum Tab Saliences'
names(Sum_saliences_Tab) <- TableLabels
# Sum_saliences_Tab$v <- TableLabels #tables
res_calib$Sum_saliences_Tab <- Sum_saliences_Tab
rm(Sum_saliences_Tab)
# saliences$i <- TableLabels #tables
# saliences$v <- DimLabels #Dimensions
res_calib$saliences <- saliences
colnames(res_calib$saliences) <- DimLabels
rownames(res_calib$saliences) <- TableLabels
## Calculate Normalised concatenated Xs ('Calib') from col
# Calculate concatenated CD loadings
# Reorganise Loadings - 1 matrix / LV
## If Output = NULL, nothing else is calculated
# if(!(is.null(output))){
L_CD_Vec <- NULL
L_X_Vec <- NULL
Calib <- NULL
nCalib <- nrowMB
# }
## Already defined in during ComDim initiallization
# isT <- grepl(output, 'T')
# isL <- grepl(output, 'L')
# isP <- grepl(output, 'P')
# isLP <- c(isP,isL)
# isTLP <- c(isT,isP,isL)
# tic
# Calculate concatenated CD loadings
# Reorganise Loadings - 1 matrix / LV
# L_CD <- list()
# L_X <- list()
T_Loc <- list()
for (i in 1:ntable) { # Prepare lists
# L_CD[[TableLabels[i]]] <- matrix(,ncol = ndim, nrow = ncol(temp_tabCalib[[i]]))
# L_X[[TableLabels[i]]] <- matrix(,ncol = ndim, nrow = ncol(temp_tabCalib[[i]]))
T_Loc[[TableLabels[i]]] <- matrix(, ncol = ndim, nrow = nrowMB)
}
b <- matrix(, ncol = ndim, nrow = ntable) # Used to calculate R2X in Kernel and Tall
if (!requireNamespace("pracma", quietly = TRUE)) {
stop("The package pracma is needed.")
}
for (j in 1:ndim) {
T_mat <- matrix(, nrow = nrowMB, ncol = 0)
for (i in 1:ntable) {
# Q.d are orthonormal in ICA & PCA
temp <- t(temp_tabCalib[[i]]) %*% res_calib$Q[, j] # Scaled CD 'local' Loadings
# if (!(is.null(isP))){
# L_CD[[TableLabels[i]]][,j] <- temp
# }
# if (!(is.null(isL))){
# #Q.d are orthonormal in ICA & PCA
# L_X[[TableLabels[i]]][,j] <- t(MB@Data[[i]])%*%Q$Data[,j] #Unscaled X 'local' Loadings;
# }
# T_mat <- cbind(T_mat,temp_tabCalib[[i]]%*%(temp/as.vector(t(temp)%*%temp))) #local Scores
T_mat <- cbind(T_mat, temp_tabCalib[[i]] %*% (temp %*% pracma::pinv(t(temp) %*% temp))) # local Scores
# if(!(is.null(isT))){
# T_Loc[[TableLabels[i]]][,j] <- temp_tabCalib[[i]]%*%(temp/as.vector(t(temp)%*%temp)) # local Scores
T_Loc[[TableLabels[i]]][, j] <- temp_tabCalib[[i]] %*% (temp %*% pracma::pinv(t(temp) %*% temp)) # local Scores
# }
# Deflate each temp_tabCalib
temp_tabCalib[[i]] <- temp_tabCalib[[i]] - res_calib$Q[, j] %*% t(temp)
}
# For each CC
# MLR b-coefficients between Local and Global Scores
# [b0,b[,j]] <- mlr_DB(T_mat,Q$d[,j],0)
# b=pinv(X'*X)*X'*Y;
b[, j] <- pracma::pinv(t(T_mat) %*% T_mat) %*% t(T_mat) %*% res_calib$Q[, j]
# b0 <- 0
}
for (i in 1:ntable) {
rownames(T_Loc[[TableLabels[i]]]) <- rownames(res_calib$Q)
colnames(T_Loc[[TableLabels[i]]]) <- colnames(res_calib$Q)
}
# If Output==[], nothing else is calculated
# if(!(is.null(output))){
# Calculate Global Loadings
# if(!(is.null(isP))){
L_CD_Vec <- t(Xnorm_mat) %*% res_calib$Q # Scaled CD 'global' Loadings
# }
# if(!(is.null(isL))){
# L_X_Vec <- t(X_mat)%*%Q$Data # Unscaled X 'global' Loadings
# }
# }
#### If Output==[], nothing else is calculated
load_comdim <- Sys.time()
if (loquace) {
message(sprintf("Loadings finished after : %s millisecs", (load_comdim - start_ini_time) * 1000))
}
total_progress <- total_progress + pieceBar
utils::setTxtProgressBar(progress_bar, value = total_progress)
rm(X_mat)
rm(temp_tabCalib)
rm(temp)
## Output
end_function <- Sys.time()
# res_calib$b <- b
# colnames(res_calib$b) <- DimLabels
# rownames(res_calib$b) <- TableLabels
# res_calib$b$i <- TableLabels # tables
# res_calib$b$v <- DimLabels # Dimensions
# if(!(is.null(isT))){
res_calib$T_Loc <- T_Loc
# res_calib$T_Loc$i <- res_calib$Q$i # samples
# res_calib$T_Loc$v <- DimLabels # Dimensions
# }
# T_Loc is no longer needed
rm(T_Loc)
# if(!(is.null(isP))){
varnames <- vector()
for (i in 1:ntable) {
varnames <- append(varnames, MB@Variables[[i]])
}
# if(length(which(duplicated(varnames))) != 0){
# warning('There are at least two variables with the same name. Their name in the rownames of P has been altered.\n')
# }
res_calib$P <- L_CD_Vec
# res_calib$P$i <- varnames # numbers of all variables;
# res_calib$P$v <- DimLabels # Dimensions
colnames(res_calib$P) <- DimLabels
rownames(res_calib$P) <- varnames
# res_calib$P_Loc <- L_CD
# res_calib$P_Loc$i <- TableLabels # Tables
# res_calib$P_Loc$v <- DimLabels # Dimensions
# for(i in 1:ntable){
# rownames(res_calib$P_Loc[[TableLabels[i]]]) <- MB@Variables[[i]]
# colnames(res_calib$P_Loc[[TableLabels[i]]]) <- DimLabels
# }
# }
rm(L_CD_Vec)
# rm(L_CD)
# if(!(is.null(isL))){
# res_calib$Lx$Data <- L_X_Vec
# res_calib$Lx$i <- t(c(1:nC)) # numbers of all variables;
# res_calib$Lx$v <- DimLabels # Dimensions
# colnames(res_calib$Lx$Data) <- DimLabels
# rownames(res_calib$Lx$Data) <- t(c(1:nC))
# #res_calib$Lx_Loc$i <- TableLabels # Tables
# #res_calib$Lx_Loc$v <- DimLabels # Dimensions
# res_calib$Lx_Loc$Data <- L_X
# for (i in 1:ntable){
# colnames(res_calib$Lx_Loc$Data[[TableLabels[i]]]) <- DimLabels
# rownames(res_calib$Lx_Loc$Data[[TableLabels[i]]]) <- MB@Variables[[i]]
# }
# }
# rm(L_X_Vec)
# rm(L_X)
##
# Singular value calculated from b and saliences
# Since :
# b_T_Q(:,j)=saliences.d(:,j).*saliences.d(:,j)/SingVal.d(1,j);
if (CompMethod == "Kernel" || CompMethod == "Tall" || CompMethod == "Wide") {
for (dims in 1:ndim) {
res_calib$SingVal[dims] <- t(b[, dims]) %*% ((saliences[, dims]^2) / as.vector(t(b[, dims]) %*% b[, dims]))
}
names(res_calib$SingVal) <- DimLabels
varexp <- res_calib$SingVal^2
res_calib$explained <- varexp / sum(varexp)
names(res_calib$explained) <- DimLabels
}
rm(saliences)
# Define block length
belong_block <- rep(0, nrow(res_calib$P))
k <- 1
for (i in 1:ntable) {
belong_block[k:(k + variable_number[i] - 1)] <- TableLabels[i]
k <- k + variable_number[i]
}
#### If Output==[], nothing else is calculated
# if(normalise == 1){
# res_calib$Xnorm_mat$i <- res_calib$Q$i # samples
# res_calib$Xnorm_mat$v <- t(c(1:nC)) # numbers of all variables;
# res_calib$Xnorm_mat$Data <- Xnorm_mat
# }
# rm(Xnorm_mat)
#### If Output==[], nothing else is calculated
end_output <- Sys.time()
running_time <- (end_output - start_ini_time)
if (loquace) {
message(sprintf("Analysis finished after : %s seconds", running_time))
}
res_calib$runtime <- running_time # Total time of analysis
progress_bar <- utils::txtProgressBar(min = 0, max = 80, style = 3, char = "=")
utils::setTxtProgressBar(progress_bar, value = 80)
close(progress_bar)
# Save data in a ComDim structure
res_calib <- new("ComDim",
Method = "PCA",
ndim = ndim,
Q.scores = res_calib$Q,
T.scores = res_calib$T_Loc,
P.loadings = res_calib$P,
Saliences = res_calib$saliences,
Orthogonal = list(),
R2X = res_calib$explained,
R2Y = vector(),
Q2 = vector(),
DQ2 = vector(),
Singular = res_calib$SingVal,
Mean = list(MeanMB = res_calib$MeanMB, MeanY = NULL),
Norm = list(NormMB = res_calib$NormMB),
PLS.model = list(),
cv = list(),
Prediction = list(),
Metadata = res_calib$metadata,
variable.block = belong_block,
runtime = as.numeric(res_calib$runtime)
)
return(res_calib)
}
#' Compress_Data_2020
#'
#' Compress large multi-block objects. Internal function of ComDim_PCA().
#' @param s_n The multi-block object.
#' @param CompMethod It can be 'Normal' (default), 'Kernel', 'PCT', 'Tall' or 'Wide'.
#' @param Partitions The number of partitions.
#' @return The compressed multi-block.
#' @references Original MATLAB implementation:
#' \url{https://github.com/DNRutledge/ComDim/blob/main/Compress_Data_2020.m}
#' @keywords internal
Compress_Data_2020 <- function(s_n = s_n, CompMethod = CompMethod, Partitions = Partitions) {
s_r <- list()
ntable <- length(s_n)
nR <- nrow(s_n[[1]])
if (!is.null(CompMethod)) {
if (CompMethod == "Tall") {
for (i in 1:ntable) {
MaxRank <- min(c(nrow(s_n[[i]]), ncol(s_n[[i]])))
# Tall segmented PCT
s_r[[i]] <- PCA_Tall_PCT_DNR(Xn = s_n[[i]], PCs = MaxRank, Partitions = Partitions)
}
} else if (CompMethod == "Wide") {
for (i in 1:ntable) {
Xi <- ColumnsPartition(Xn = s_n[[i]], Partitions = Partitions)
T_all <- matrix(, nrow = nrow(s_n[[i]]), ncol = 0)
for (p in 1:Partitions) {
usv <- svd(Xi[[p]])
T_in <- usv$u %*% diag(usv$d)
T_all <- cbind(T_all, T_in)
}
s_r[[i]] <- T_all
# print(s_r[[i]][1:3,1:4])
}
} else if (CompMethod == "Kernel") {
for (i in 1:ntable) {
Kern <- (s_n[[i]] %*% t(s_n[[i]])) / nR
usv <- svd(Kern)
s_r[[i]] <- sqrt(nR) * usv$u %*% diag(sqrt(usv$d))
}
} else if (CompMethod == "PCT") {
for (i in 1:ntable) {
usv <- svd(s_n[[i]])
s_r[[i]] <- usv$u %*% diag(usv$d)
}
} else if (CompMethod == "Normal") {
for (i in 1:ntable) {
s_r[[i]] <- s_n[[i]]
}
}
}
return(s_r)
}
#' PCA_Tall_PCT_DNR
#'
#' Compress a block using the PCT method. Internal function of ComDim_PCA().
#' @param Xn The block to compress.
#' @param PCs Number of PCs to keep.
#' @param Partitions The number of partitions.
#' @return The compressed block.
#' @keywords internal
PCA_Tall_PCT_DNR <- function(Xn = Xn, PCs = PCs, Partitions = Partitions) {
W <- RowsPartition(Xn, Partitions)
cols <- ncol(Xn)
D <- t(W[[1]]) %*% W[[1]]
if (Partitions > 1) {
for (par in 2:Partitions) {
D <- D + (t(W[[par]]) %*% W[[par]])
}
}
vs <- eigen(D)
vs_flipped <- vs$vectors # eigen() in R returns eigenvalues in decreasing order; columns are eigenvectors
Tm <- matrix(, nrow = nrow(Xn), ncol = 0)
Taux <- matrix(, nrow = 0, ncol = PCs)
for (par in 1:Partitions) {
to <- W[[par]] %*% vs_flipped
Taux <- rbind(Taux, to[, 1:PCs])
}
Tm <- cbind(Tm, Taux)
return(Tm)
}
#' ColumnsPartition
#'
#' Calculate vertical partitions in a block. Internal function of ComDim_PCA().
#' @param Xn A block.
#' @param Partitions The number of partitions.
#' @return A list of column-partitioned sub-matrices.
#' @keywords internal
ColumnsPartition <- function(Xn = Xn, Partitions = Partitions) {
cols <- ncol(Xn)
stride <- floor(cols / Partitions)
remaining <- cols %% Partitions
count <- 1
W <- list()
for (i in 1:Partitions) {
step <- count + stride - 1
if (i == Partitions) {
step <- step + remaining
}
W[[i]] <- Xn[, count:step]
count <- count + stride
}
return(W)
}
#' RowsPartition
#'
#' Calculate horizontal partitions in a block. Internal function of ComDim_PCA().
#' @param Xn A block.
#' @param Partitions The number of partitions.
#' @return A list of row-partitioned sub-matrices.
#' @keywords internal
RowsPartition <- function(Xn = Xn, Partitions = Partitions) {
rows <- nrow(Xn)
stride <- floor(rows / Partitions)
remaining <- rows %% Partitions
count <- 1
W <- list()
for (i in 1:Partitions) {
step <- count + stride - 1
if (i == Partitions) {
step <- step + remaining
}
W[[i]] <- Xn[count:step, ]
count <- count + stride
}
return(W)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.