Nothing
#' @title Multigroup Principal Component Analysis
#'
#' @description
#' Multigroup PCA algorithm (NIPALS for Multigroup PCA)
#'
#' @param Data a numeric matrix or data frame
#' @param Group a vector of factors associated with group structure
#' @param ncomp number of components, if NULL number of components is equal to 2
#' @param Scale scaling variables, by defalt is FALSE. By default data are centered within groups
#' @param graph should loading and component be plotted
#' @return list with the following results:
#' @return \item{Data}{ Original data}
#' @return \item{Con.Data}{ Concatenated centered data}
#' @return \item{split.Data}{ Group centered data}
#' @return \item{Group}{ Group as a factor vector}
#' @return \item{loadings.group}{ Loadings associated with each group}
#' @return \item{score.group}{ Scores associated with each group}
#' @return \item{loadings.common}{ Matrix of common loadings}
#' @return \item{score.Global}{ Global scores}
#' @return \item{cumper.inertigroup}{ Cumulative percentage of group components inertia}
#' @return \item{cumper.inertiglobal}{ Cumulative percentage of global component inertia}
#' @return \item{noncumper.inertiglobal}{ Percentage of global component inertia}
#' @return \item{lambda}{ The specific variances of groups}
#' @return \item{exp.var}{ Percentages of total variance recovered associated with each dimension }
#' @return \item{Similarity.Common.Group.load}{Cumulative similarity between group and common loadings}
#' @return \item{Similarity.noncum.Common.Group.load}{ NonCumulative similarity between group and common loadings}
#' @seealso \code{\link{BGC}}, \code{\link{FCPCA}}, \code{\link{DCCSWA}}, \code{\link{DSTATIS}}, \code{\link{DGPA}}, \code{\link{summarize}}, \code{\link{TBWvariance}}, \code{\link{loadingsplot}}, \code{\link{scoreplot}}, \code{\link{iris}}
#' @export
#'
#'
#' @references A. Eslami, E. M. Qannari, A. Kohler and S. Bougeard (2013). General overview
#' of methods of analysis of multi-group datasets,
#' \emph{Revue des Nouvelles Technologies de l'Information}, 25, 108-123.
#'
#' @references A. Eslami, E. M. Qannari, A. Kohler and S. Bougeard (2013). Analyses
#' factorielles de donnces structurces en groupes d'individus,
#' \emph{Journal de la Societe Francaise de Statistique}, 154(3), 44-57.
#'
#'
#' @examples
#' Data = iris[,-5]
#' Group = iris[,5]
#' res.mgPCA = mgPCA (Data, Group)
#' barplot(res.mgPCA$noncumper.inertiglobal)
#' #----------------
#' #Similarity index: group loadings are compared to the common structure (first dimension)
#' Xzero = rep(0, 3)
#' MIN = min(res.mgPCA$Similarity.noncum.Common.Group.load[[1]][-1, 1])-0.0005
#' XLAB = paste("Dim1, %",res.mgPCA$noncumper.inertiglobal[1])
#' plot(Xzero, res.mgPCA$Similarity.noncum.Common.Group.load[[1]][-1, 1], pch=15, ylim=c(MIN, 1),
#' main="Similarity between groups and common structure", xlab=XLAB, ylab="", xaxt="n")
#' abline(v=0)
#' abline(h=seq(MIN, 1, by=0.05), col="black", lty=3)
#' XX=res.mgPCA$Similarity.noncum.Common.Group.load[[1]][-1, 1, drop=FALSE]
#' text(Xzero, XX, labels=rownames(XX), pos=4)
#' #----------------
#' # Similarity index: group loadings are compared to the common structure (dimensions 1 and 2)
#' XX1=res.mgPCA$Similarity.noncum.Common.Group.load[[1]][-1, 1]
#' XX2=res.mgPCA$Similarity.noncum.Common.Group.load[[2]][-1, 1]
#' simil <- cbind(XX1, XX2)
#' YLAB = paste("Dim1, %",res.mgPCA$noncumper.inertiglobal[2])
#' plot(simil, xlab=XLAB, ylab=YLAB, main="Similarity between groups and common structure", pch=20)
#' text(simil, labels=rownames(simil), cex=1, font.lab=1, pos=3)
#' #------------------
#' loadingsplot(res.mgPCA, axes=c(1,2), INERTIE=res.mgPCA$noncumper.inertiglobal)
#' scoreplot(res.mgPCA, axes=c(1,2))
mgPCA <- function(Data, Group, ncomp=NULL, Scale=FALSE, graph=FALSE){
#=========================================================================
# 1. Checking the inputs
#=========================================================================
check(Data, Group)
#=========================================================================
# 2. preparing Data
#=========================================================================
if (is.data.frame(Data) == TRUE) {
Data=as.matrix(Data)
}
if(is.null(ncomp)) {ncomp=2}
if(is.null(colnames(Data))) {
colnames(Data) = paste('V', 1:ncol(Data), sep='')
}
Group = as.factor(Group)
rownames(Data) = Group #---- rownames of data=groups
M = length(levels(Group)) #----number of groups: M
P = dim(Data)[2] #----number of variables: P
n = as.vector(table(Group)) #----number of individuals in each group
N = sum(n) #----number of individuals
split.Data = split(Data,Group) #----split Data to M parts
# centering and scaling if TRUE
for(m in 1:M){
split.Data[[m]] = matrix(split.Data[[m]], nrow=n[m])
split.Data[[m]] = scale(split.Data[[m]], center=TRUE, scale=Scale)
}
# concatinated dataset by row as groups
Con.Data = split.Data[[1]]
for(m in 2:M) {
Con.Data = rbind(Con.Data, split.Data[[m]])
}
rownames(Con.Data) = Group
colnames(Con.Data) = colnames(Data)
# Variance-covariance matrix for each group
cov.Group = vector("list", M)
for(m in 1:M){
cov.Group[[m]] = t(split.Data[[m]]) %*% split.Data[[m]] / n[m]
}
#==========================================================================
# 3. Outputs
#==========================================================================
res <- list(
Data = Data,
Con.Data = Con.Data,
split.Data = split.Data,
Group=Group)
res$loadings.group = vector("list", M)
res$score.group = vector("list", M)
for(m in 1:M){
res$score.group[[m]]=matrix(0, nrow=n[m], ncol=ncomp)
#rownames(res$score.group[[m]])=Group
colnames(res$score.group[[m]]) = paste("Dim", 1:ncomp, sep="")
res$loadings.group[[m]] = matrix(0, nrow=P, ncol=ncomp)
rownames(res$loadings.group[[m]]) = colnames(Data)
colnames(res$loadings.group[[m]]) = paste("Dim", 1:ncomp, sep="")
}
res$loadings.common = matrix(0, nrow=P, ncol=ncomp)
rownames(res$loadings.common) = colnames(Data)
colnames(res$loadings.common) = paste("Dim", 1:ncomp, sep="")
res$score.Global = matrix(0, nrow=N, ncol=ncomp)
rownames(res$score.Global) = Group
colnames(res$score.Global) = paste("Dim", 1:ncomp, sep="")
res$Group.weight=matrix(0, nrow=M, ncol=ncomp)
rownames(res$Group.weight)=levels(Group)
colnames(res$Group.weight)=paste("Dim", 1:ncomp, sep="")
res$cumper.inertigroup=matrix(0, ncol=ncomp,nrow=M)
rownames(res$cumper.inertigroup)=levels(Group)
colnames(res$cumper.inertigroup)=paste("Dim", 1:ncomp, sep="")
res$cumper.inertiglobal=matrix(0, ncol=ncomp, nrow=1)
colnames(res$cumper.inertiglobal)=paste("Dim", 1:ncomp, sep="")
res$noncumper.inertiglobal = matrix(0, ncol=ncomp, nrow=1)
colnames(res$noncumper.inertiglobal) = paste("Dim", 1:ncomp, sep="")
#=========================================================================
# 4. NIPALS for multi_group CPCA
#=========================================================================
eps = 1e-7
for(h in 1:ncomp){ #------ for iteration loops
threshold = 1.0;
iNIPALS = 0;
critt = 0
#======================= 4.1 initial value of vector of common loadings
w.common = rnorm(P)
w.common = normv(w.common)
W=matrix(0, ncol=P, nrow=M) #------ combined group loadings
#======================= 4.2 starting iteration
while (threshold > eps) {
t.global = matrix(0, ncol=1) # concatinated components
# 4.2.1 group analysis
for (m in 1:M){
t.group = (split.Data[[m]]) %*% w.common
t.group = normv(t.group)
res$score.group[[m]][,h] = t.group
t.global=rbind(t.global,t.group)
w.group=t(split.Data[[m]])%*%t.group # group loadings
res$loadings.group[[m]][,h]= normv(w.group) #w.group
W[m,] = w.group # combined group loadings
}
res$score.Global[,h]= Con.Data %*% w.common
# 4.2.2 new value of common loadings (w.common)
Group.weight = W %*% w.common # global loading weights, W is projected to w
Group.weight = normv(Group.weight)
res$Group.weight[,h] = Group.weight
w.common = t(t(Group.weight) %*% W )
w.common = normv(w.common)
# 4.2.3 criterion
iNIPALS=iNIPALS+1
Crit=0
for(m in 1:M){
Crit= Crit+(t(res$score.group[[m]][,h])%*%
(split.Data[[m]]) %*% w.common %*% t(w.common)
%*% t(split.Data[[m]]) %*%
res$score.group[[m]][,h] )
}
critt[iNIPALS]=Crit
if (iNIPALS>1){
threshold=critt[iNIPALS]-critt[(iNIPALS-1)]
}
res$loadings.common[,h]=w.common
} # end of iteration
#=========================================================================
# 5. Explained variance
#=========================================================================
for(m in 1:M){
project.group=res$score.group[[m]][,1:h] %*% ginv( t(res$score.group[[m]][,1:h]) %*% res$score.group[[m]][,1:h]) %*% t(res$score.group[[m]][,1:h])
projet.data.group=project.group %*% res$split.Data[[m]]
res$cumper.inertigroup[m,h]=100*sum(diag(t(projet.data.group) %*% projet.data.group))/ sum(diag(t(res$split.Data[[m]]) %*% res$split.Data[[m]]))
}
project.global=res$score.Global[,1:h] %*% ginv( t(res$score.Global[,1:h]) %*% res$score.Global[,1:h]) %*% t(res$score.Global[,1:h])
projet.data.global=project.global%*% res$Con.Data
res$cumper.inertiglobal[h]=100*sum(diag(t(projet.data.global) %*% projet.data.global))/ sum(diag(t(res$Con.Data) %*% res$Con.Data))
projectn.global=res$score.Global[,h] %*% ginv( t(res$score.Global[,h]) %*% res$score.Global[,h]) %*% t(res$score.Global[,h])
projetn.data.global=projectn.global %*% res$Con.Data
res$noncumper.inertiglobal[h] = round(100*sum(diag(t(projetn.data.global) %*% projetn.data.global))/ sum(diag(t(res$Con.Data) %*%res$Con.Data)),1)
#=========================================================================
# Deflation
#=========================================================================
Con.Data= deflation(Con.Data, res$score.Global[,h])
rownames(Con.Data) = Group
split.Data = split(Con.Data,Group)
for(m in 1:M){
split.Data[[m]] = matrix(split.Data[[m]],ncol=P)
colnames(split.Data[[m]]) = colnames(Con.Data)
}
} #------ END OF DIMENSION
lambda = matrix(0, nrow=M, ncol=ncomp)
for(m in 1:M){
lambda[m,]=round(diag(t(res$loadings.common) %*% cov.Group[[m]] %*% res$loadings.common),3)
}
res$lambda = lambda
rownames(res$lambda) = levels(Group)
colnames(res$lambda) = paste("Dim", 1:ncomp, sep="")
exp.var = matrix(0,M,ncomp)
for(m in 1:M){
exp.var[m,] = 100 * lambda[m,]/ sum(diag(cov.Group[[m]]))
}
res$exp.var = exp.var
rownames(res$exp.var) = levels(Group)
colnames(res$exp.var) = paste("Dim", 1:ncomp, sep="")
##----------------------------------------------------------------------------
## Similarity between partial and common loadings
##----------------------------------------------------------------------------
loadings_matrices = list()
loadings_matrices[[1]] = res$loadings.common
for(m in 2:(M+1)){
loadings_matrices[[m]] = res$loadings.group[[(m-1)]]
}
NAMES = c("Commonload", levels(Group))
Similarity =similarity_function(loadings_matrices=loadings_matrices, NAMES)
Similarity_noncum = similarity_noncum(loadings_matrices=loadings_matrices, NAMES)
res$Similarity.Common.Group.load = Similarity
res$Similarity.noncum.Common.Group.load = Similarity_noncum
#--------------------------------------
if(graph) {plot.mg(res)}
# add class
class(res) = c("mgpca", "mg")
return(res)
}
#' @S3method print mgpca
print.mgpca <- function(x, ...)
{
cat("\nMulti-group Principal Component Analysis\n")
cat(rep("-",43), sep="")
cat("\n$score.group ", "score groups")
cat("\n$loadings.common ", "common loadings")
cat("\n$Data ", "Data set")
cat("\n")
invisible(x)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.