Nothing
## =============================================================================
##' @title Perform a cluster analysis of subjects on CATA/liking combination
##'
##' @description
##' Clustering of subjects (blocks) from combination of CATA and liking experiments.
##'
##' @usage
##' cluscata_liking(Data, nblo, NameBlocks=NULL, NameVar=NULL, Itermax=30,
##' Graph_dend=TRUE, Graph_bar=TRUE,
##' printlevel=FALSE,gpmax=min(6, nblo-1))
##'
##' @param Data data frame or matrix where the blocks of variables (attributes) are merged horizontally thanks to \code{\link{combinCATALiking}} function
##'
##' @param nblo numerical. Number of blocks (subjects).
##'
##' @param NameBlocks string vector. Name of each block (subject). Length must be equal to the number of blocks. If NULL, the names are S1,...Sm. Default: NULL
##'
##' @param NameVar string vector. Name of each variable (attribute, the same names for each subject). Length must be equal to the number of attributes. If NULL, the colnames of the first block are taken. Default: NULL
##'
##' @param Itermax numerical. Maximum of iteration for the partitioning algorithm. Default:30
##'
##' @param Graph_dend logical. Should the dendrogram be plotted? Default: TRUE
##'
##' @param Graph_bar logical. Should the barplot of the difference of the criterion and the barplot of the overall homogeneity at each merging step of the hierarchical algorithm be plotted? Default: TRUE
##'
##' @param printlevel logical. Print the number of remaining levels during the hierarchical clustering algorithm? Default: FALSE
##'
##' @param gpmax logical. What is maximum number of clusters to consider? Default: min(6, nblo-2)
##'
##' @return Each partitionK contains a list for each number of clusters of the partition, K=1 to gpmax with:
##' \itemize{
##' \item group: the clustering partition after consolidation.
##' \item compromise: the compromise of each cluster
##' \item dist_all_cluster: the distance between each subject and each cluster compromise
##' \item criterion: the CLUSCATA-liking criterion error
##' \item param: parameters called
##' \item type: parameter passed to other functions
##' }
##' There is also at the end of the list:
##' \itemize{
##' \item dend: The CLUSCATA dendrogram
##' \item cutree_k: the partition obtained by cutting the dendrogram in K clusters (before consolidation).
##' \item diff_crit_ng: variation of criterion when a merging is done before consolidation (and after)
##' \item param: parameters called
##' \item type: parameter passed to other functions
##' }
##'
##' @keywords CATA liking
##'
##' @references
##' Vigneau, E., Cariou, V., Giacalone, D., Berget, I., & Llobell, F. (2022). Combining hedonic information and CATA description for consumer segmentation. Food Quality and Preference, 95, 104358.
##'
##' @importFrom FactoMineR tab.disjonctif
##'
##' @examples
##' data(cata_ryebread)
##' data(liking_ryebread)
##' cataliking=combinCATALiking(cata_ryebread, liking_ryebread)
##'
##'#with only 40 subjects
##' resclustcatal=cluscata_liking(Data=cataliking[,1:(40*14)], nblo=40, gpmax=5)
##' plot(resclustcatal, cata_ryebread[,1:(40*14)], liking_ryebread[,1:40])
##'
##'
##' @seealso \code{\link{plot.cluscata_liking}}, \code{\link{summary.cluscata_liking}} , \code{\link{combinCATALiking}}
##'
##' @export
## =============================================================================
cluscata_liking=function(Data, nblo, NameBlocks=NULL, NameVar=NULL, Itermax=30,
Graph_dend=TRUE, Graph_bar=TRUE, printlevel=FALSE,gpmax=min(6, nblo-1)){
#initialisation
n=nrow(Data)
p=ncol(Data)
nvar=p/nblo
#parapet for nblo
if (as.integer(nvar)!=nvar)
{
stop("number of columns modulo nblo != 0")
}
Blocks=rep(nvar,nblo)
J=rep(1:nblo , times = Blocks )# indicates which block each variable belongs to
#rownames, colnames, NameBlocks
if (is.null(NameBlocks)) NameBlocks=paste("S",1:nblo,sep="-")
if(is.null(rownames(Data))) rownames(Data)=paste0("X", 1:nrow(Data))
if(is.null(colnames(Data))) colnames(Data)=rep(paste0("Y",1:Blocks[1]), nblo)
X=Data
#Parapet for NameBlocks
if(length(NameBlocks)!=nblo)
{
stop("Error with the length of NameBlocks")
}
#parapet for number of objects
if(n<3)
{
stop("At least 3 objects are required")
}
#parapet for number of blocks
if(nblo<4)
{
stop("At least 4 blocks are required")
}
#parapet for gpmax
if (gpmax>(nblo-1))
{
stop(paste("gpmax > number of blocks-1"))
}
#no NA
if(sum(is.na(Data))>0)
{
print("NA detected:")
tabna=which(is.na(Data), arr.ind = TRUE)
print(tabna)
stop(paste("NA are not accepted"))
}
#prepare data
Xi=array(0,dim=c(n,Blocks[1],nblo)) # array with all subjects matrices
muk=NULL
for(j in 1:nblo)
{
Aj=as.matrix(X[,J==j])
normXi=sqrt(sum(diag(Aj%*%t(Aj))))
muk[j]=normXi
if(normXi==0)
{
stop(paste("error: the subject",NameBlocks[j], "has only 0"))
}
Xi[,,j]=Aj/normXi #standardization
}
# criterion when each data table forms a group itself: 0
crit=rep(0,nblo)
Q_current=0
critS=rep(1,nblo)
S_current=nblo
cc=1:nblo
# the names of the clusters are 1 to 'number of clusters'
code=1:nblo;
# the names of the clusters are taken from the HCA
cvar=1:nblo
# when a new cluster is formed from clusters i and j, it is named min(i,j))
mergelist=matrix(0,nblo-1,2)
results=matrix(0,nblo-1,6)
colnames(results)=c("merg1", "merg2", "new.clust", "agg.crit.hac","clust.crit.hac","S")
idgr=matrix(0,nblo,nblo)
idgr[nblo,]=1:nblo;
ordr = c()
fusions = -(1:nblo)
hmerge =matrix(0,nblo-1,2)
ncluster=nblo
for (level in 1:(nblo-1)) {
################
# loops i et j
################
# We search the two clusters that will be merged.
deltamin=10^100
for (i in 1:(ncluster-1)) {
for (j in (i+1):(ncluster)) {
# merging the i'th and j'th cluster:
newcluster=c(which(cc==i), which(cc==j))
Xj=list()
for (tab in 1:length(newcluster)) {
Xj[[tab]]=Xi[,,newcluster[tab]]
}
newcatatislik=.crit_cataMean(Xj)
deltacurrent=newcatatislik$Q-sum(crit[c(i,j)])
# if deltacurrent is smaller than deltamin, the current information is saved:
if (deltacurrent<deltamin) {
deltamin=deltacurrent
c1=cvar[cc==i]
c2=cvar[cc==j]
merge1=c(c1[1], c2[1])
c1=code[cc==i]
c2=code[cc==j]
merge=c(c1[1], c2[1])
cl_1=i
cl_2=j
catatislikmerge=newcatatislik
}
} # end of loop j
} # end of loop i
ncluster=ncluster-1
###############################
# renewal of the parameters
###############################
Q_current=Q_current+deltamin
mergelist[level,]=merge1
results[level,1:5]=c(merge, nblo+level, deltamin, Q_current)
crit[cl_1]=catatislikmerge$Q
crit=crit[-cl_2]
cc[which(cc==cl_2)]=cl_1
cc[which(cc>cl_2)]=cc[which(cc>cl_2)]-1
cvar[cc==cl_1]=min(merge1)
code[cc==cl_1]=nblo+level
idgr[ncluster,]=cc
indic= c(fusions[merge1[1]],fusions[merge1[2]])
hmerge[level,]<-indic
fusions[which(idgr[ncluster,]==cl_1)]<- level
ordr <- .order_var(ordr,which(idgr[ncluster,]==cl_1))
if(printlevel==TRUE)
{
print(nblo-1-level)
}
} # end of level
# Dendogram:
resultscah=list(labels=NameBlocks, height=results[,4], merge=hmerge,order=ordr)
mytot<-resultscah
class(mytot)="hclust"
mydendC=as.dendrogram(mytot)
#show the dendrogram
if (Graph_dend==TRUE)
{
dev.new()
cex=0.6
par(cex=cex)
par(mar=c(7, 4, 4, 2) + 0.1)
plot(mydendC, type ="rectangle", main="CLUSCATA liking Dendrogram", axes=TRUE, cex=cex,ylab="Height")
par(cex=1)
par(mar=c(5, 4, 4, 2) + 0.1)
}
#show the criterion and homogeneity evolutions
if (Graph_bar==TRUE)
{
dev.new()
barplot(results[,4][(nblo-gpmax):(nblo-1)],xlab="Nb clusters",ylab="delta", main="Variation of criterion",
axisnames = TRUE,names.arg = paste((gpmax+1):2,"->",(gpmax):1),las=2,cex.names = 0.6,cex.main=1.2,col = "blue")
}
#number of clusters advised
criter=sort(results[,5],decreasing = TRUE)
H=NULL
for (k in 1:(gpmax-1))
{
H[k]=((criter[k]/criter[k+1]-1))*(nblo-k-1)
}
#barplot(H[-(gpmax-1)]-H[-1], names.arg=2:gpmax)
nbgroup_hart=which.max(H[-(gpmax-1)]-H[-1])+1
cat(paste("Recommended number of clusters =", nbgroup_hart),"\n")
###### consolidation
res.consol=list()
cutree_k=list()
for (K in 1:gpmax)
{
coupe=cutree(mytot,K)
cutree_k[[K]]=coupe
#consolidation
res.consol[[K]]=.cluscata_kmeans_liking(Data, nblo, coupe,
NameBlocks = NameBlocks, NameVar=NameVar, Itermax=Itermax)
}
names(cutree_k)=names(res.consol)=paste0("partition", 1:gpmax)
#after consolidation
diff_crit_bef=results[,4][(nblo-gpmax+1):(nblo-1)]
#after consolidation
overall_after=NULL
crit_after=NULL
for (i in 1: gpmax)
{
crit_after[i]=res.consol[[i]]$criterion
}
diff_crit_after=crit_after[-gpmax]-crit_after[-1]
diff_crit_after=sort(diff_crit_after)
diff_crit_ng=rbind(diff_crit_bef, diff_crit_after)
rownames(diff_crit_ng)=c("before consolidation", "after consolidation")
colnames(diff_crit_ng)=paste((gpmax):2,"->",(gpmax-1):1)
#results
res=c(res.consol, list(dend=mydendC, cutree_k=cutree_k,
diff_crit_ng=round(diff_crit_ng,2), param=list(nblo=nblo, ng=nbgroup_hart,
gpmax=gpmax, n=n, nvar=nvar), type="H+C"))
class(res)="cluscata_liking"
return(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.