# print.rgcca <- function(x,verbose=FALSE,...) {
# nbloc <- length(x$Y)
# cat("Outputs for RGCCA: \n")
# }
#' Regularized Generalized Canonical Correlation Analysis (RGCCA) is a generalization
#' of regularized canonical correlation analysis to three or more sets of variables.
#' Given \eqn{J} matrices \eqn{X_1, X_2, ..., X_J} that represent
#' \eqn{J} sets of variables observed on the same set of \eqn{n} individuals. The matrices
#' \eqn{X_1, X_2, ..., X_J} must have the same number of rows,
#' but may (and usually will) have different numbers of columns. The aim of RGCCA is to study
#' the relationships between these \eqn{J} blocks of variables. It constitutes a general
#' framework for many multi-block data analysis methods. It combines the power of
#' multi-block data analysis methods (maximization of well identified criteria)
#' and the flexibility of PLS path modeling (the researcher decides which blocks
#' are connected and which are not). Hence, the use of RGCCA requires the construction
#' (user specified) of a design matrix \eqn{C}, that characterize
#' the connections between blocks. Elements of the symmetric design matrix \eqn{C = (c_{jk})}
#' is equal to 1 if block \eqn{j} and block \eqn{k} are connected, and 0 otherwise.
#' The function rgcca() implements a monotonically convergent algorithm (i.e. the bounded
#' criteria to be maximized increases at each step of the iterative procedure) that is very
#' similar to the PLS algorithm proposed by Herman Wold and finds at convergence a stationnary point
#' of the RGCCA optimization problem. . Moreover, depending on the
#' dimensionality of each block \eqn{X_j}, \eqn{j = 1, ..., J}, the primal (when \eqn{n > p_j}) algorithm or
#' the dual (when \eqn{n < p_j}) algorithm is used (see Tenenhaus et al. 2015).
#' Moreover, by deflation strategy, rgcca() allow to compute several RGCCA block
#' components (specified by ncomp) for each block. Within each block, block components are guaranteed to
#' be orthogonal using the deflation procedure. The so-called symmetric deflation is considered in
#' this implementation, i.e. each block is deflated with respect to its own component(s).
#' It should be noted that the numbers of components per block can differ from one block to another.
#' @param A A list that contains the \eqn{J} blocks of variables \eqn{X_1, X_2, ..., X_J}.
#' @param C A design matrix that describes the relationships between blocks (default: complete design).
#' @param tau tau is either a \eqn{1 * J} vector or a \eqn{max(ncomp) * J} matrix, and contains the values
#' of the shrinkage parameters (default: tau = 1, for each block and each dimension).
#' If tau = "optimal" the shrinkage paramaters are estimated for each block and each dimension using the Schafer and Strimmer (2005)
#' analytical formula . If tau is a \eqn{1* J} numeric vector, tau[j] is identical across the dimensions of block \eqn{X_j}.
#' If tau is a matrix, tau[k, j] is associated with \eqn{X_{jk}} (\eqn{k}th residual matrix for block \eqn{j})
#' @param ncomp A \eqn{1 * J} vector that contains the numbers of components for each block (default: rep(1, length(A)), which gives one component per block.)
#' @param scheme The value is "horst", "factorial", "centroid" or any diffentiable convex scheme function g designed by the user (default: "centroid").
#' @param scale If scale = TRUE, each block is standardized to zero means and unit variances and then divided by the square root of its number of variables (default: TRUE).
#' @param verbose If verbose = TRUE, the progress will be report while computing (default: TRUE).
#' @param init The mode of initialization to use in RGCCA algorithm. The alternatives are either by Singular Value Decompostion ("svd") or random ("random") (Default: "svd").
#' @param bias A logical value for biaised or unbiaised estimator of the var/cov (default: bias = TRUE).
#' @param tol The stopping value for convergence.
#' @return \item{Y}{A list of \eqn{J} elements. Each element of \eqn{Y} is a matrix that contains the RGCCA components for the corresponding block.}
#' @return \item{a}{A list of \eqn{J} elements. Each element of \eqn{a} is a matrix that contains the outer weight vectors for each block.}
#' @return \item{astar}{A list of \eqn{J} elements. Each element of astar is a matrix defined as Y[[j]][, h] = A[[j]]\%*\%astar[[j]][, h].}
#' @return \item{C}{A design matrix that describes the relation between blocks (user specified).}
#' @return \item{tau}{A vector or matrix that contains the values of the shrinkage parameters applied to each block and each dimension (user specified).}
#' @return \item{scheme}{The scheme chosen by the user (user specified).}
#' @return \item{ncomp}{A \eqn{1 * J} vector that contains the numbers of components for each block (user specified).}
#' @return \item{crit}{A vector that contains the values of the criteria across iterations.}
#' @return \item{primal_dual}{A \eqn{1 * J} vector that contains the formulation ("primal" or "dual") applied to each of the \eqn{J} blocks within the RGCCA alogrithm}
#' @return \item{AVE}{indicators of model quality based on the Average Variance Explained (AVE): AVE(for one block), AVE(outer model), AVE(inner model).}
#' @references Tenenhaus M., Tenenhaus A. and Groenen PJF (2017), Regularized generalized canonical correlation analysis: A framework for sequential multiblock component methods, Psychometrika, in press
#' @references Tenenhaus A., Philippe C., & Frouin V. (2015). Kernel Generalized Canonical Correlation Analysis. Computational Statistics and Data Analysis, 90, 114-131.
#' @references Tenenhaus A. and Tenenhaus M., (2011), Regularized Generalized Canonical Correlation Analysis, Psychometrika, Vol. 76, Nr 2, pp 257-284.
#' @references Schafer J. and Strimmer K., (2005), A shrinkage approach to large-scale covariance matrix estimation and implications for functional genomics. Statist. Appl. Genet. Mol. Biol. 4:32.
#' @title Regularized Generalized Canonical Correlation Analysis (RGCCA)
#' @examples
#' #############
#' # Example 1 #
#' #############
#' data(Russett)
#' X_agric =as.matrix(Russett[,c("gini","farm","rent")])
#' X_ind = as.matrix(Russett[,c("gnpr","labo")])
#' X_polit = as.matrix(Russett[ , c("demostab", "dictator")])
#' A = list(X_agric, X_ind, X_polit)
#' #Define the design matrix (output = C)
#' C = matrix(c(0, 0, 1, 0, 0, 1, 1, 1, 0), 3, 3)
#' result.rgcca = rgcca(A, C, tau = c(1, 1, 1), scheme = "factorial", scale = TRUE)
#' lab = as.vector(apply(Russett[, 9:11], 1, which.max))
#' plot(result.rgcca$Y[[1]], result.rgcca$Y[[2]], col = "white",
#' xlab = "Y1 (Agric. inequality)", ylab = "Y2 (Industrial Development)")
#' text(result.rgcca$Y[[1]], result.rgcca$Y[[2]], rownames(Russett), col = lab, cex = .7)
#'
#' #############
#' # Example 2 #
#' #############
#' data(Russett)
#' X_agric =as.matrix(Russett[,c("gini","farm","rent")])
#' X_ind = as.matrix(Russett[,c("gnpr","labo")])
#' X_polit = as.matrix(Russett[ , c("inst", "ecks", "death",
#' "demostab", "dictator")])
#' A = list(X_agric, X_ind, X_polit, cbind(X_agric, X_ind, X_polit))
#'
#' #Define the design matrix (output = C)
#' C = matrix(c(0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0), 4, 4)
#' result.rgcca = rgcca(A, C, tau = c(1, 1, 1, 0), ncomp = rep(2, 4),
#' scheme = function(x) x^4, scale = TRUE) # HPCA
#' lab = as.vector(apply(Russett[, 9:11], 1, which.max))
#' plot(result.rgcca$Y[[4]][, 1], result.rgcca$Y[[4]][, 2], col = "white",
#' xlab = "Global Component 1", ylab = "Global Component 2")
#' text(result.rgcca$Y[[4]][, 1], result.rgcca$Y[[4]][, 2], rownames(Russett),
#' col = lab, cex = .7)
#'
#' \dontrun{
#' ######################################
#' # example 3: RGCCA and leave one out #
#' ######################################
#' Ytest = matrix(0, 47, 3)
#' X_agric =as.matrix(Russett[,c("gini","farm","rent")])
#' X_ind = as.matrix(Russett[,c("gnpr","labo")])
#' X_polit = as.matrix(Russett[ , c("demostab", "dictator")])
#' A = list(X_agric, X_ind, X_polit)
#' #Define the design matrix (output = C)
#' C = matrix(c(0, 0, 1, 0, 0, 1, 1, 1, 0), 3, 3)
#' result.rgcca = rgcca(A, C, tau = rep(1, 3), ncomp = rep(1, 3),
#' scheme = "factorial", verbose = TRUE)
#'
#' for (i in 1:nrow(Russett)){
#' B = lapply(A, function(x) x[-i, ])
#' B = lapply(B, scale2)
#' resB = rgcca(B, C, tau = rep(1, 3), scheme = "factorial", scale = FALSE, verbose = FALSE)
#' # look for potential conflicting sign among components within the loo loop.
#' for (k in 1:length(B)){
#' if (cor(result.rgcca$a[[k]], resB$a[[k]]) >= 0)
#' resB$a[[k]] = resB$a[[k]] else resB$a[[k]] = -resB$a[[k]]
#' }
#' Btest =lapply(A, function(x) x[i, ])
#' Btest[[1]]=(Btest[[1]]-attr(B[[1]],"scaled:center")) /
#' (attr(B[[1]],"scaled:scale"))/sqrt(NCOL(B[[1]]))
#' Btest[[2]]=(Btest[[2]]-attr(B[[2]],"scaled:center")) /
#' (attr(B[[2]],"scaled:scale"))/sqrt(NCOL(B[[2]]))
#' Btest[[3]]=(Btest[[3]]-attr(B[[3]],"scaled:center")) /
#' (attr(B[[3]],"scaled:scale"))/sqrt(NCOL(B[[3]]))
#' Ytest[i, 1] = Btest[[1]]%*%resB$a[[1]]
#' Ytest[i, 2] = Btest[[2]]%*%resB$a[[2]]
#' Ytest[i, 3] = Btest[[3]]%*%resB$a[[3]]
#' }
#' lab = apply(Russett[, 9:11], 1, which.max)
#' plot(result.rgcca$Y[[1]], result.rgcca$Y[[2]], col = "white",
#' xlab = "Y1 (Agric. inequality)", ylab = "Y2 (Ind. Development)")
#' text(result.rgcca$Y[[1]], result.rgcca$Y[[2]], rownames(Russett),
#' col = lab, cex = .7)
#' text(Ytest[, 1], Ytest[, 2], substr(rownames(Russett), 1, 1),
#' col = lab, cex = .7)
#'}
#' @export rgcca
rgcca <- function(A, C = 1-diag(length(A)), tau = rep(1, length(A)), ncomp = rep(1, length(A)), scheme = "centroid", scale = TRUE , init="svd", bias = TRUE, tol = 1e-8, verbose=TRUE) {
if (any(ncomp < 1)) stop("Compute at least one component per block!")
pjs <- sapply(A, NCOL)
nb_row <- NROW(A[[1]])
if (any(ncomp-pjs > 0)) stop("For each block, choose a number of components smaller than the number of variables!")
#-------------------------------------------------------
if (mode(scheme) != "function") {
if ((scheme != "horst" ) & (scheme != "factorial") & (scheme != "centroid")) {
stop("Choose one of the three following schemes: horst, centroid, factorial or design the g function")
}
if (verbose) cat("Computation of the RGCCA block components based on the", scheme, "scheme \n")
}
if (mode(scheme) == "function" & verbose) {
cat("Computation of the RGCCA block components based on the g scheme \n")
}
#-------------------------------------------------------
if (scale == TRUE) {
A = lapply(A, function(x) scale2(x, bias = bias))
A = lapply(A, function(x) x/sqrt(NCOL(x)))
}
if (!is.numeric(tau) & verbose) {
cat("Optimal Shrinkage intensity paramaters are estimated \n")
}
else {
if (is.numeric(tau) & verbose) {
cat("Shrinkage intensity paramaters are chosen manually \n")
}
}
AVE_X = list()
AVE_outer <- vector()
ndefl <- ncomp-1
N <- max(ndefl)
nb_ind <- NROW(A[[1]])
J <- length(A)
primal_dual = rep("primal", J)
primal_dual[which(nb_row<pjs)] = "dual"
if (N == 0) {
result <- rgccak(A, C, tau=tau , scheme=scheme, init = init, bias = bias, tol = tol, verbose=verbose)
Y <- NULL
for (b in 1:J) Y[[b]] <- result$Y[,b, drop = FALSE]
#Average Variance Explained (AVE) per block
for (j in 1:J) AVE_X[[j]] = mean(cor(A[[j]], Y[[j]])^2)
#AVE outer
AVE_outer <- sum(pjs * unlist(AVE_X))/sum(pjs)
AVE <- list(AVE_X = AVE_X,
AVE_outer = AVE_outer,
AVE_inner = result$AVE_inner)
a <- lapply(result$a, cbind)
for (b in 1:J) {
rownames(a[[b]]) = colnames(A[[b]])
rownames(Y[[b]]) = rownames(A[[b]])
colnames(Y[[b]]) = "comp1"
}
out <- list(Y=Y, a =a, astar=a, C=C,
tau=result$tau, scheme=scheme,
ncomp=ncomp, crit=result$crit,
primal_dual = primal_dual,
AVE=AVE)
class(out) <- "rgcca"
return(out)
}
Y <- NULL
crit = list()
AVE_inner <- rep(NA,max(ncomp))
R <- A
P <- a <- astar <- NULL
if (is.numeric(tau)) tau_mat = tau
else tau_mat = matrix(NA, max(ncomp), J)
for (b in 1:J) P[[b]] <- a[[b]] <- astar[[b]] <- matrix(NA,pjs[[b]],N+1)
for (b in 1:J) Y[[b]] <- matrix(NA,nb_ind,N+1)
for (n in 1:N) {
if (verbose) cat(paste0("Computation of the RGCCA block components #", n, " is under progress...\n"))
if(is.vector(tau))
rgcca.result <- rgccak(R, C, tau = tau , scheme=scheme, init = init, bias = bias, tol = tol, verbose=verbose)
else
rgcca.result <- rgccak(R, C, tau = tau[n, ] , scheme=scheme, init = init, bias = bias, tol = tol, verbose=verbose)
if (!is.numeric(tau)) tau_mat[n, ] = rgcca.result$tau
AVE_inner[n] <- rgcca.result$AVE_inner
crit[[n]] <- rgcca.result$crit
for (b in 1:J) Y[[b]][,n] <- rgcca.result$Y[ , b]
defla.result <- defl.select(rgcca.result$Y, R, ndefl, n, nbloc = J)
R <- defla.result$resdefl
for (b in 1:J) P[[b]][,n] <- defla.result$pdefl[[b]]
for (b in 1:J) a[[b]][,n] <- rgcca.result$a[[b]]
if (n==1) {
for (b in 1:J) astar[[b]][,n] <- rgcca.result$a[[b]]
}
else {
for (b in 1:J) astar[[b]][,n] <- rgcca.result$a[[b]] - astar[[b]][,(1:n-1), drop=F] %*% drop( t(a[[b]][,n]) %*% P[[b]][,1:(n-1),drop=F] )
}
}
if (verbose) cat(paste0("Computation of the RGCCA block components #", N+1, " is under progress ... \n"))
if(is.vector(tau))
rgcca.result <- rgccak(R, C, tau = tau , scheme=scheme, init = init, bias = bias, tol = tol, verbose=verbose)
else
rgcca.result <- rgccak(R, C, tau = tau[N+1, ] , scheme=scheme, init = init, bias = bias, tol = tol, verbose=verbose)
crit[[N+1]] <- rgcca.result$crit
if (!is.numeric(tau)) tau_mat[N+1, ] = rgcca.result$tau
AVE_inner[max(ncomp)] <- rgcca.result$AVE_inner
for (b in 1:J) {
Y[[b]][,N+1] <- rgcca.result$Y[ ,b]
a[[b]][,N+1] <- rgcca.result$a[[b]]
astar[[b]][,N+1] <- rgcca.result$a[[b]] - astar[[b]][,(1:N),drop=F] %*% drop( t(a[[b]][,(N+1)]) %*% P[[b]][,1:(N),drop=F] )
rownames(a[[b]]) = rownames(astar[[b]]) = colnames(A[[b]])
rownames(Y[[b]]) = rownames(A[[b]])
colnames(Y[[b]]) = paste0("comp", 1:max(ncomp))
}
shave.matlist <- function(mat_list, nb_cols) mapply(function(m, nbcomp) m[, 1:nbcomp, drop = FALSE], mat_list, nb_cols,SIMPLIFY=FALSE)
shave.veclist <- function(vec_list, nb_elts) mapply(function(m, nbcomp) m[1:nbcomp], vec_list, nb_elts, SIMPLIFY=FALSE)
#Average Variance Explained (AVE) per block
for (j in 1:J) AVE_X[[j]] = apply(cor(A[[j]], Y[[j]])^2, 2, mean)
#AVE outer
outer = matrix(unlist(AVE_X), nrow = max(ncomp))
for (j in 1:max(ncomp)) AVE_outer[j] <- sum(pjs * outer[j, ])/sum(pjs)
Y = shave.matlist(Y, ncomp)
AVE_X = shave.veclist(AVE_X, ncomp)
AVE <- list(AVE_X = AVE_X,
AVE_outer_model = AVE_outer,
AVE_inner_model = AVE_inner)
out <- list(Y = shave.matlist(Y, ncomp),
a = shave.matlist(a, ncomp),
astar = shave.matlist(astar, ncomp),
C = C, tau = tau_mat,
scheme = scheme, ncomp=ncomp, crit = crit,
primal_dual = primal_dual,
AVE = AVE)
class(out) <- "rgcca"
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.