MultBiplotR: Multivariate Analysis Using Biplots in R

Documented in CanonicalStatisBiplot

# The original matrix will be provided as a list 
# each cell of the list is the data matrix for one ocassion
# the number of rows for each occasion must be the same

CanonicalStatisBiplot <- function(X, Groups, InitTransform = "Standardize columns", dimens=2, SameVar=FALSE) {
  
  ContinuousDataTransform = c("Raw Data", "Substract the global mean", "Double centering", "Column centering", "Standardize columns", "Row centering", 
                              "Standardize rows", "Divide by the column means and center", "Normalized residuals from independence")
  if (is.numeric(InitialTransform)) 
    InitialTransform = ContinuousDataTransform[InitialTransform]
  
  if (is.factor(Groups)) {
    GroupNames = levels(Groups)
  }
  else{
    stop("You must provide a factor with the groups")
  }
  
  nst = length(X) #Number of occasions
  StudyNames=names(X)
  nri = matrix(0, nst, 1) #Number of rows in each group
  nci = matrix(0, nst, 1) #Number of cols in each group
  for (i in 1:nst) {
    nri[i] = dim(X[[i]])[1]
    nci[i] = dim(X[[i]])[2]
  }
  nr = nri[1]
  if (sum(nri == nr) < nst) 
    stop("The number of rows must be the same in all ocassions")
  if (SameVar){
    nc = nci[1]
    if (sum(nci == nc) < nst) 
      stop("The number of variables can not be the same in all ocassions (use SameVar=FALSE)")
  }
  
  OccNames=names(X)
  if (is.null(OccNames)) {
    for (i in 1:nst) OccNames= c(OccNames, paste("Occasion_",i,sep=""))
  }
  
  # Initial transformation of data and calculaton of statistics for the biplot
  BiplotStatis=MultiTableStatistics(X)
  BiplotStatis$Type="Statis"
  BiplotStatis$Title="Biplot induced by Canonical Statis"
  BiplotStatis$Initial_Transformation=InitTransform
  BiplotStatis$alpha=1
  BiplotStatis$nrows=nr
  BiplotStatis$ncols=sum(nci)
  StatisRes=list()
  StatisRes$Title="CANONICAL STATIS-ACT Biplot"
  StatisRes$Type="CANONICAL STATIS-ACT"
  StatisRes$NTables=nst
  StatisRes$NRows=nr
  if (SameVar)
    StatisRes$NVars=nc
  else
    StatisRes$NVars=nci
  X=MultiTableTransform(X, InitTransform = InitTransform)
  
  StatisRes$RowLabels=levels(Groups)
  StatisRes$TableLabels=names(X)
  if (SameVar) StatisRes$VarLabels=colnames(X[[1]])
  
  # Calculation of the means, between groups and within groups covariance matrices for each occassion
  Z = Factor2Binary(Groups)
  ng = colSums(Z)
  nr=length(ng)
  S11 = t(Z) %*% Z
  Xbt = list()
  Bt = list()
  St = list()
  
  for (i in 1:nst) {
    Xbt[[i]] = solve(S11) %*% t(Z) %*% X[[i]]
    Bt[[i]] = t(Xbt[[i]]) %*% S11 %*% Xbt[[i]]
    St[[i]] = t(X[[i]]) %*% X[[i]] - Bt[[i]]
  }
  
  #Calculation of the objects
  Wt = list()
  for (i in 1:nst) {
    Wt[[i]] = Xbt[[i]] %*% solve(St[[i]]) %*% t(Xbt[[i]])
  }
  
  # Calculation of the scalar products
  P = matrix(0, nst, nst)
  for (i in 1:nst) for (j in 1:nst) {
    P[i, j] = sum(diag(Wt[[i]] %*% t(Wt[[j]])))
    P[j, i] = P[i, j]
  }
  
  #RV: correlations among the occasions
  RV = solve(diag(sqrt(diag(P)))) %*% P %*% solve(diag(sqrt(diag(P))))
  
  # This information will be common to all the techniques, then a single
  # function to plot is necassary
  StatisRes$Data = X
  StatisRes$Means=Xbt
  StatisRes$Between=Bt
  StatisRes$Within=Xbt
  
  StatisRes$Objects = Wt
  StatisRes$ScalarProducts = P
  StatisRes$RV = RV
  Inter = svd(RV)
  StatisRes$EigInter = Inter$d
  StatisRes$InerInter=(Inter$d/sum(Inter$d))*100
  names(StatisRes$EigInter)=paste("Dim",1:length(OccNames))
  StatisRes$InterStructure = -1 * Inter$u %*% diag(sqrt(Inter$d))
  
  rownames(StatisRes$InterStructure)=OccNames
  colnames(StatisRes$InterStructure)=paste("Dim", 1:nst)
  
  # Weigths for the Compromise
  StatisRes$Weights = abs(Inter$u[, 1])/sum(abs(Inter$u[, 1]))
  
  # Compromise
  W=matrix(0,nr,nr)
 
  for (i in 1:nst)
    W=W + StatisRes$Weights[i]*Wt[[i]]
  StatisRes$Compromise=W
  
  # Euclidean Configuration of the compromise
  Intra = svd(W)
  r=sum(Intra$d>0.00001)
  
  BiplotStatis$EigenValues=Intra$d[1:r]
  BiplotStatis$Inertia=(Intra$d[1:r]/sum(diag(Intra$d[1:r])))*100
  BiplotStatis$CumInertia = cumsum(BiplotStatis$Inertia)
  BiplotStatis$alpha=1
  BiplotStatis$Dimension=dimens
  # Compromise-Consensus Coordinates and contributions
  BiplotStatis$RowCoordinates = Intra$u[,1:dimens] %*% diag(sqrt(Intra$d[1:dimens]))
  sf=apply((Intra$u[,1:r] %*% diag(sqrt(Intra$d[1:r])))^2,1,sum)
  BiplotStatis$RowContributions=(diag(1/sf) %*% BiplotStatis$RowCoordinates^2)*100
  rownames(BiplotStatis$RowCoordinates)=levels(Groups)
  colnames(BiplotStatis$RowCoordinates)=paste("Dim", 1:dimens)
  rownames(BiplotStatis$RowContributions)=levels(Groups)
  colnames(BiplotStatis$RowContributions)=paste("Dim", 1:dimens)
  
  # Trajectories for individuals (old kind)
  trajin=list()
  for (j in 1:nst)
    trajin[[j]] = Wt[[j]] %*% Intra$u[,1:r] %*% diag(sqrt(1/Intra$d[1:r]))
  
  StatisRes$TrajInd=list()
  for (i in 1:nr){
    Traj=NULL
    for (j in 1:nst)
      Traj=rbind(Traj , trajin[[j]][i,1:dimens])
    rownames(Traj)=OccNames
    colnames(Traj)=paste("Dim", 1:dimens)
    StatisRes$TrajInd[[i]]=Traj
  }
  names(StatisRes$TrajInd)=levels(Groups)
  
  # Squared cosines of the individual objects projected onto the compromise
  contbt=list()
  for (j in 1:nst){
    sf=apply(trajin[[j]]^2,1,sum)
    contbt[[j]] = (diag(1/sf) %*% trajin[[j]]^2)*100
  }
  
  StatisRes$ContribInd=list()
  for (i in 1:nr){
    Traj=NULL
    for (j in 1:nst)
      Traj=rbind(Traj , contbt[[j]][i,1:dimens])
    rownames(Traj)=OccNames
    colnames(Traj)=paste("Dim", 1:dimens)
    StatisRes$ContribInd[[i]]=Traj
  }
  names(StatisRes$ContribInd)=levels(Groups)
  
  # Coordinates and contributions of the variables on the biplot
  # First we calculate the principal coordinates in order to calculate the contributions
  trajvar=list()
  for (j in 1:nst){
    trajvar[[j]]=t(Xbt[[j]]) %*% Intra$u[,1:r]
    }
  

  # Coordinates of the variables on the biplot
  contvar=list()
  for (j in 1:nst){
    sf=apply(trajvar[[j]]^2,1,sum)
    contvar[[j]] = (diag(1/sf) %*% trajvar[[j]]^2)*100
    rownames(contvar[[j]])=paste(colnames(X[[j]]), "-", StudyNames[j], sep="")
  }
  
  # Standard coodinates to represent on the biplot (RMP-Biplot)
  # Standard coordinates are needed to represent the biplot axes with graded scales
  for (j in 1:nst){
    trajvar[[j]]=t(Xbt[[j]]) %*% Intra$u[,1:r] %*% diag(1/Intra$d[1:r])
  rownames(trajvar[[j]])=paste(rownames(trajvar[[j]]), "-", StudyNames[j], sep="")}
  
  # This is just a reorganization of the column coordinates in order to represent them
  # as a biplot using the standard procedure
  
  BiplotStatis$ColCoordinates=NULL
  for (j in 1:nst)
    BiplotStatis$ColCoordinates=rbind(BiplotStatis$ColCoordinates, trajvar[[j]][,1:dimens])
  
  BiplotStatis$ColContributions=NULL
  for (j in 1:nst)
    BiplotStatis$ColContributions=rbind(BiplotStatis$ColContributions, contvar[[j]][,1:dimens])
  
  # Rescaling the biplot for better visualization. Used to be optional
  # but now I do for every biplot. 
  # The row coordinates are multiplied for a constant and the coumn coordinates divided
  # by the same constant. The scalar (inner product) in which biplot interpretation
  # is based does not change, but the visuatization is much better.
  # In a biplot with scales for each varibale, the rescaling does not affect 
  # the calculation of the scale marks.
  # The rescaling is performed with all the dimensiond retained in the final solution
  
  sca = sum(BiplotStatis$RowCoordinates^2)
  scb = sum(BiplotStatis$ColCoordinates^2)
  p=sum(nci)
  sca = sca/nr
  scb = scb/p
  scf = sqrt(sqrt(scb/sca))
  BiplotStatis$RowCoordinates = BiplotStatis$RowCoordinates * scf
  BiplotStatis$ColCoordinates = BiplotStatis$ColCoordinates/scf
  BiplotStatis$Scale_Factor=scf
  print(scf)
  for (i in 1:nr){
    StatisRes$TrajInd[[i]]=StatisRes$TrajInd[[i]]*scf
  }
  
  BiplotStatis$ClusterType="us"
  BiplotStatis$Clusters = as.factor(matrix(1,nrow(BiplotStatis$RowContributions), 1))
  BiplotStatis$ClusterColors="blue"
  BiplotStatis$ClusterNames="Cluster 1"
  
  class(BiplotStatis) ="ContinuousBiplot"
  # If the variables are the same for all the occasions or studies
  # aditional trajectories for the variables can be constructed
  # selecting conveniently the c¡var coordinates. This are original
  # in our work, together with the simultaneous representation
  # of rows and columns.
  StatisRes$Biplot=BiplotStatis
  StatisRes$SameIndiv = TRUE
  StatisRes$SameVar = SameVar

 
  if (SameVar){
    p=nci[1]
    StatisRes$TrajVar=list()
    for (i in 1:p){
      Traj=NULL
      for (j in 1:nst)
        Traj=rbind(Traj , trajvar[[j]][i,1:dimens])
      rownames(Traj)=StudyNames
      colnames(Traj)=paste("Dim", 1:dimens)
      Traj=Traj/scf
      StatisRes$TrajVar[[i]]=Traj}
    names(StatisRes$TrajVar)=colnames(X[[1]])
  }
  

  
  # Calculation of the Inertia of each study (or occasion) accounted for the STATIS solution.
  # Comparing this with the inertia accounted by the biplot for each separate study
  # we have an index of the goodness of the consensus (or compromise) in explaining the 
  # study.
  
  class(StatisRes) = "StatisBiplot"
  return(StatisRes)
}