R/ComDim.R
In MBAnalysis: Multiblock Exploratory and Predictive Data Analysis

Documented in ComDim

#' Common Dimensions analysis (ComDim)
#'
#' Performs ComDim analysis on a set of quantitative blocks of variables. ComDim can be viewed as a Multiblock Weighted Principal Components Analysis (MBWPCA)
#'
#' @param X  Dataset obtained by horizontally merging all the blocks of variables.
#' @param block  Vector indicating the number of variables in each block.
#' @param name.block names of the blocks of variables (NULL by default).
#' @param ncomp  Number of dimensions to compute. By default (NULL), all the global components are extracted.
#' @param scale  Logical, if TRUE (by default) then variables are scaled to unit variance (all variables are centered anyway).
#' @param scale.block  Logical, if TRUE (by default) each block of variables is divided by the square root of its inertia (Frobenius norm).
#' @param threshold Convergence threshold
#'
#' @return Returns a list of the following elements:\cr
#' @return \item{optimalcrit}{Numeric vector of the optimal value of the criterion (sum of squared saliences) obtained for each dimension.}
#' @return \item{saliences}{Matrix of the specific weights of each block of variables on the global components, for each dimension.}
#' @return \item{T.g}{Matrix of normed global components.}
#' @return \item{Scor.g}{Matrix of global components (scores of individuals).}
#' @return \item{W.g}{Matrix of global weights (normed) associated with deflated X.}
#' @return \item{Load.g}{Matrix of global loadings (normed).}
#' @return \item{Proj.g}{Matrix of global projection (to compute scores from pretreated X).}
#' @return \item{explained.X}{Matrix of percentages of inertia explained in each block of variables.}
#' @return \item{cumexplained}{Matrix giving the percentages, and cumulative percentages, of total inertia of X blocks explained by the global components.}
#' @return \item{Block}{A list containing block components (T.b) and block weights (W.b)}
#'
#' @references
#' E.M. Qannari, I. Wakeling, P. Courcoux, J.M. MacFie (2000). Defining the underlying sensory dimensions, Food Quality and Preference, 11: 151-154.\cr
#'
#' E. Tchandao Mangamana, V. Cariou, E. Vigneau, R. Glèlè Kakaï, E.M. Qannari (2019). Unsupervised multiblock data analysis: A unified approach and extensions, Chemometrics and Intelligent Laboratory Systems, 194, 103856.
#'
#' @seealso \code{\link{summary.ComDim}}    \code{\link{plot.ComDim}}
#'
#' @examples
#' data(ham)
#' X=ham$X
#' block=ham$block
#' res.comdim <- ComDim(X,block,name.block=names(block))
#' summary(res.comdim)
#' plot(res.comdim)
#'
#' @export



# Create a class object ComDim.
ComDim <- function(X, block, name.block, ncomp=NULL, scale=TRUE, scale.block=TRUE, threshold=1e-8){
  # ---------------------------------------------------------------------------
  # 0. Preliminary tests
  # ---------------------------------------------------------------------------

  if (any(is.na(X))){   # to be modified if missing values allowed
    stop("No NA values are allowed")
  }
  if (sum(block) != ncol(X)){
    stop("The sum of block must be equal to the total number of variables ofX")
  }
  # if (!algo %in% c("eigen", "nipals"))
  #   stop("algo must be either eigen or nipals")
  if (!is.logical(scale)){
    stop("scale must be must be logical")
  }
  if (!is.logical(scale.block)){
    stop("scale.block must be logical")
  }
  if (!is.null(name.block) & (length(name.block)!=length(block))){
    stop("name.block and block must be of same length")
  }
  X             <- as.matrix(X)

  # ---------------------------------------------------------------------------
  # 1. Output preparation
  # ---------------------------------------------------------------------------

  if (is.null(rownames(X))){
    rownames(X) <- paste("Ind.", seq_len(nrow(X)), sep="") # Give the names for rows if X does not have.
  }
  if (is.null(colnames(X))){
    colnames(X) <- paste("X", seq_len(ncol(X)), sep=".")   # Give the names for columns if X does not have.
  }
  if (is.null(name.block)){
    names(block) <- paste("Block", 1:length(block), sep="") # Give the names for the blocks if it was not defined.
  }else{
    names(block) <- name.block
  }

  ntab          <- length(block);    # Number of blocks of variables
  nind          <- nrow(X)           # Number of individuals
  ncolX         <- sum(block)        # Number of columns in all the blocks of variables
  ncompmax= min(nind-1, ncolX)
  if (is.null (ncomp)){
    ncomp   <- min(nind-1, ncolX)    # Number of global components of the analysis
  }else{
    ncomp   <- min(ncomp,min(nind-1, ncolX))
  }

  J             <- rep(1:ntab,block) # Will be used to identify the variables of each dataset
  optimalcrit   <- vector("numeric",length=ncomp)    # Gives the optimal value of the criterion
  names(optimalcrit) <- paste("Dim.",1:ncomp,sep="")
  Trace        <- vector("numeric",length=ntab)      # Trace of (XbXb')

  criterion    <- vector("numeric",length=ntab)      # Storage of the criterion to be maximized
  LAMBDA       <- matrix(1,ntab,ncomp)               # Specific weights for each dataset and each dimension
  dimnames(LAMBDA) <- list(names(block), paste("Dim.",1:ncomp,sep=""))
  T.g          <- matrix(0,nrow=nind,ncol=ncomp)     # Global components associated with X (normed)
  dimnames(T.g)<- list(rownames(X), paste("Dim.",1:ncomp,sep=""))
  T.b   <- array(0,dim=c(nind,ncomp,ntab))           # Block components (not normed)
  dimnames(T.b)<- list(rownames(X), paste("Dim.",1:ncomp,sep=""), names(block))

  W.b          <- vector("list",length=ntab)   # Weights for the block components
  for (j in 1:ntab) {
    W.b[[j]]   <-  matrix(0,nrow=block[j],ncol=ncomp)
    dimnames(W.b[[j]]) <-list(colnames(X[,J==j]), paste("Dim.",1:ncomp,sep=""))
  }
  wb            <- matrix(0,nrow=ncolX,ncol=ncomp)         # W.b in matrix form
  tb            <- matrix(0,nrow=nind,ncol=ntab)           # Concatenated Xb block components at current dimension comp

  W.g           <- matrix(0,nrow=ncolX,ncol=ncomp)      # Weights for the global components
  Proj.g        <- matrix(0,nrow=ncolX,ncol=ncomp)      # Modified Weights to take account of deflation
  Load.g        <- matrix(0,nrow=ncolX,ncol=ncomp)
  dimnames(W.g)<- dimnames(Proj.g)<- dimnames(Load.g) <- list(colnames(X), paste("Dim.",1:ncomp,sep=""))
  IT.X          <- vector("numeric",length=ntab+1)
  explained.X   <- matrix(0,nrow=ntab+1,ncol=ncomp)      # Percentage of explained inertia of each Xb block and global
  dimnames(explained.X)<-  list(c(names(block),'Global'), paste("Dim.",1:ncomp, sep=""))
  cumexplained  <- matrix(0,nrow=ncomp,ncol=2)
  rownames(cumexplained) <- paste("Dim.",1:ncomp,sep="")
  colnames(cumexplained) <- c("%explX", "cum%explX")


  Xscale        <- NULL
  Block         <- NULL
  call          <- NULL
  res           <- NULL


  # ---------------------------------------------------------------------------
  # 2. Required parameters and data preparation
  # ---------------------------------------------------------------------------
  Xinput        <- X
  Xscale$mean   <- apply(X, 2, mean)
  X             <-sweep(X, 2, Xscale$mean, "-")              # Default centering

  if (scale==FALSE) {   # no scaling
    Xscale$scale <-rep(1,times=ncol(X))
  }   else {           # scaling (division by sd) for each variable
    sd.tab    <- apply(X, 2, function (x) {return(sqrt(sum(x^2)/length(x)))})   # sd based on biased variance
    temp      <- sd.tab < 1e-14
    if (any(temp)) {
      warning("Variables with null variance not standardized.")
      sd.tab[temp] <- 1
    }
    X         <- sweep(X, 2, sd.tab, "/")
    Xscale$scale <-sd.tab
  }

  #  Pre-processing: block weighting to set each block inertia equal to 1
  if (scale.block) {
    inertia     <- sapply(1:ntab, function(j) inertie(X[, J==j]))
    w.tab       <- rep(sqrt(inertia) , times =  block )     # Weighting parameter applied to each variable
    X           <- sweep(X, 2, w.tab, "/")
    Xscale$scale<- Xscale$scale*w.tab
  }

  IT.X[1:ntab]     <- sapply(1:ntab, function(j) {inertie(X[,J==j]) })  # Inertia of each block of variable
  IT.X[ntab+1]     <- sum(IT.X[1:ntab])

  X00=X           # Keep the blocks of variables with the preprocessing
  X  <- lapply(seq_len(ntab),function(j) {as.matrix(X[,J==j])}) #X made as a list
  X0=X            # Keep the blocks of variables with the pre-treatment made (as a list).


  # ---------------------------------------------------------------------------
  # 3. computation of T.g and LAMBDA for the various dimensions
  # ---------------------------------------------------------------------------
  #pb <- txtProgressBar(min = 0, max = ncomp, style = 3)

  for (comp in 1:ncomp)  {           # Iterative computation of the various components

    algo="eigen"  # fixed parameter
    if  (algo=="eigen"){     # fixed parameter in this version

      # initialization
      P       <- matrix(0,nrow=nind,ncol=nind)   # Cross product matrix
      for(j in 1:ntab) {P <- P + tcrossprod(X[[j]])}
      ressvd <- svd(P,nu=1,nv=1)
      t<- ressvd$u
      lambda <- matrix(sapply(seq_len(ntab),
                              function(j){t(t)%*%tcrossprod(X[[j]])%*%t}),nrow=ntab,byrow=FALSE)
      previousfit=sum(lambda^2)
      deltafit=threshold+1

      # loop to estimate ComDim saliences
      while(deltafit>threshold) {
        P       <- matrix(0,nrow=nind,ncol=nind)   # Cross product matrix
        for(j in 1:ntab){
          P <- P + lambda[j]*tcrossprod(X[[j]])
        }
        ressvd <- svd(P,nu=1,nv=1)
        t<- ressvd$u
        lambda <- matrix(sapply(seq_len(ntab),
                                function(j){t(t)%*%tcrossprod(X[[j]])%*%t}),nrow=ntab,byrow=FALSE)
        fit=sum(lambda^2)
        deltafit=(fit-previousfit)/previousfit
        previousfit=fit
      }# end while(deltacrit>threshold)
      LAMBDA[,comp]<-lambda
      T.g[,comp]<-t
      optimalcrit[comp]<-sum(lambda^2)
    } # end if (algo==eigen)

    # ---------------------------------------------------------------------------
    # 3.2 Storage of the results associated with dimension h
    # ---------------------------------------------------------------------------

    for(j in 1:ntab) {
      W.b[[j]][,comp] <- t(X[[j]])%*%T.g[,comp]
      T.b[,comp,j]    <- X[[j]]%*%W.b[[j]][,comp]     # tcrossprod(X[[j]])%*%T[,comp]
    }
    wb[,comp]  <- unlist(sapply(1:ntab, function(j){W.b[[j]][,comp]}))  # Weights in matrix form

    # global weights
    W.g[,comp]   <- unlist(sapply(1:ntab,function(j){LAMBDA[j,comp]*W.b[[j]][,comp]}))  # Weights
    W.g[,comp]   <- normv(W.g[,comp])

    # ---------------------------------------------------------------------------
    # 3.3 Deflation
    # ---------------------------------------------------------------------------

    X.exp                   <- lapply(X,function(Xj) {tcrossprod(T.g[,comp]) %*% Xj})
    X0.exp                  <- lapply(X0,function(Xj){tcrossprod(T.g[,comp]) %*% Xj})
    explained.X[1:ntab,comp]   <- sapply(X0.exp,function(x) {sum(x^2)})
    explained.X[ntab+1,comp]   <- sum(explained.X[1:ntab,comp])
    X         <- lapply(seq_len(ntab),function(j) {X[[j]]-X.exp[[j]]}) # Deflation of X

    #setTxtProgressBar(pb, comp)
  }# end for (comp in 1:ncomp)

  explained.X       <- sweep(explained.X[,1:ncomp,drop=FALSE] ,1,IT.X,"/")  # Explained inertia for X
  cumexplained[,1]  <- explained.X[ntab+1,1:ncomp]
  cumexplained[,2]  <- cumsum(cumexplained[,1])

  # Unnormed global components
  if (ncomp==1) {
    Scor.g=T.g*sqrt(colSums(LAMBDA))
  }  else {
    Scor.g=T.g%*%sqrt(diag(colSums(LAMBDA)))
  }
  dimnames(Scor.g) <- dimnames(T.g)    # Unnormed global components

  # Global Loadings
  Load.g=crossprod(X00,Scor.g)
  Load.g=t(t(Load.g)/colSums(Scor.g^2))

  # Global Projection
  Proj.g <- W.g %*% solve(crossprod(Load.g,W.g),tol=1e-300)      # Weights that take into account the deflation procedure



  # ---------------------------------------------------------------------------
  # 4.1 Preparation of the results Global
  # ---------------------------------------------------------------------------

  res$optimalcrit         <- optimalcrit[1:ncomp]
  res$saliences           <- LAMBDA[,1:ncomp,drop=FALSE]
  res$T.g                 <- T.g[,1:ncomp,drop=FALSE]     # Storage of the normed global components associated with X (pre-treated)
  res$Scor.g              <- Scor.g[,1:ncomp,drop=FALSE]  # Storage of the global components (unnormed) associated with X (pre-treated)
  res$W.g                 <- W.g[,1:ncomp,drop=FALSE]     # Storage of the normed global weights associated with deflated X (pre-treated)
  res$Load.g              <- Load.g[,1:ncomp,drop=FALSE]  # Storage of the normed global loadings associated with X (pre-treated)
  res$Proj.g              <- Proj.g[,1:ncomp,drop=FALSE]  # Storage of the global projection weights associated with X (pre-treated)
  res$explained.X         <- round(100*explained.X[1:ntab,1:ncomp,drop=FALSE],2)
  res$cumexplained        <- round(100*cumexplained[1:ncomp,],2)


  # ---------------------------------------------------------------------------
  # 4.2 Preparation of the results Block
  # ---------------------------------------------------------------------------

  Block$T.b         <-  T.b[,1:ncomp,]
  Block$W.b         <-  W.b
  res$Block         <-  Block                         # Results for each block


  # ---------------------------------------------------------------------------
  # Return res
  # ---------------------------------------------------------------------------

  call$X<-Xinput
  call$size.block<-block
  call$name.block<-names(block)
  call$ncomp<-ncomp
  call$scale<-scale
  call$scale.block<-scale.block
  call$Xscale<-Xscale
  call$call <- match.call()
  res$call<-call

  class(res) <- c("ComDim","list")

  return(res)
}