R/NbClust_fanny.R

Defines functions NbClust_fanny

Documented in NbClust_fanny

#' Calculate cluster validation metrics for FANNY method. See NbClust::NbClust() and cluster::fanny() for more details.
#'
#' @param data matrix or dataset
#' @param diss dissimilarity matrix to be used. By default, diss=NULL, but if it is replaced by a dissimilarity matrix, distance should be "NULL".
#' @param distance the distance measure to be used to compute the dissimilarity matrix. This must be one of: "euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski" or "NULL". By default, distance="euclidean". If the distance is "NULL", the dissimilarity matrix (diss) should be given by the user. If distance is not "NULL", the dissimilarity matrix should be "NULL".
#' @param min.nc minimal number of clusters, between 1 and (number of objects - 1)
#' @param max.nc maximal number of clusters, between 2 and (number of objects - 1), greater or equal to min.nc. By default, max.nc=15
#' @param method the cluster analysis method to be used. This should be one of: "ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median", "centroid", "kmeans"
#' @param index the index to be calculated. This should be one of : "kl", "ch", "hartigan", "ccc", "scott", "marriot", "trcovw", "tracew", "friedman", "rubin", "cindex", "db", "silhouette", "duda", "pseudot2", "beale", "ratkowsky", "ball", "ptbiserial", "gap", "frey", "mcclain", "gamma", "gplus", "tau", "dunn", "hubert", "sdindex", "dindex", "sdbw", "all" (all indices except GAP, Gamma, Gplus and Tau), "alllong" (all indices with Gap, Gamma, Gplus and Tau included).
#' @param alphaBeale significance value for Beale's index.
#' @param memb.exp value to pass to cluster::fanny()
#'
#' @return list containing index values, critical values, best number of clusters from each metric, and the best partitioning results
#' @export

# data = ord.df
# diss = chord.df
# distance = NULL
# min.nc = 2
# max.nc = 12
# method = "fanny"
# index = "all"
# alphaBeale = 0.1
# memb.exp= 1.2

NbClust_fanny <-function(data = NULL,
                         diss=NULL,
                         distance ="euclidean",
                         min.nc=2,
                         max.nc=15,
                         method =NULL,
                         index = "all",
                         alphaBeale = 0.1,
                         memb.exp= 2
                         ){

  x<-0
  min_nc <- min.nc
  max_nc <- max.nc

  if(is.null(method))stop("method is NULL")
  method <- pmatch(method, c("ward.D2", "single", "complete", "average",
                             "mcquitty", "median", "centroid", "kmeans","ward.D", "fanny"))


  indice <- pmatch(index, c("kl","ch","hartigan","ccc","scott","marriot","trcovw","tracew","friedman",
                            "rubin","cindex","db","silhouette","duda","pseudot2","beale","ratkowsky","ball",
                            "ptbiserial","gap", "frey", "mcclain",  "gamma", "gplus", "tau", "dunn",
                            "hubert", "sdindex", "dindex", "sdbw", "all","alllong"))
  if (is.na(indice))stop("invalid clustering index")

  if (indice == -1)stop("ambiguous index")

  if ((indice == 3)|| (indice == 5)|| (indice == 6)|| (indice == 7)|| (indice == 8)|| (indice == 9)|| (indice == 10)|| (indice == 11)|| (indice == 18)|| (indice == 27)|| (indice == 29)|| (indice == 31)|| (indice == 32)){
    if((max.nc-min.nc)<2)
      stop("The difference between the minimum and the maximum number of clusters must be at least equal to 2")
  }



  if(is.null(data)){

    if(method==8){
      stop("\n","method = kmeans, data matrix is needed")
    }else{
      if ((indice == 1 )|| (indice == 2)|| (indice == 3 )|| (indice == 4 )|| (indice == 5)|| (indice == 6)|| (indice == 7)|| (indice == 8)|| (indice == 9)|| (indice == 10)|| (indice == 12)|| (indice == 14)|| (indice == 15)|| (indice == 16)|| (indice == 17)|| (indice == 18)
          || (indice == 19)|| (indice == 20)|| (indice == 23)|| (indice == 24)|| (indice == 25) || (indice == 27)|| (indice == 28)
          || (indice == 29) || (indice == 30)|| (indice == 31) || (indice == 32))
        stop("\n","Data matrix is needed. Only frey, mcclain, cindex, sihouette and dunn can be computed.", "\n")

      if(is.null(diss)){
        stop("data matrix and dissimilarity matrix are both null")
      }else{
        cat("\n","Only frey, mcclain, cindex, sihouette and dunn can be computed. To compute the other indices, data matrix is needed","\n")
      }
    }
  }else{
    jeu1 <- as.matrix(data)
    numberObsBefore <- dim(jeu1)[1]
    jeu <- na.omit(jeu1) # returns the object with incomplete cases removed
    nn <- numberObsAfter <- dim(jeu)[1]
    pp <- dim(jeu)[2]
    TT <- t(jeu)%*%jeu
    sizeEigenTT <- length(eigen(TT)$value)
    eigenValues <- eigen(TT/(nn-1))$value

    # Only for indices using vv : CCC, Scott, marriot, tracecovw, tracew, friedman, rubin

    if (any(indice == 4) || (indice == 5) || (indice == 6) || (indice == 7) || (indice == 8) || (indice == 9) || (indice == 10) || (indice == 31) || (indice == 32)){
      for (i in 1:sizeEigenTT) {
        if (eigenValues[i] < 0) {
          #cat(paste("There are only", numberObsAfter,"nonmissing observations out of a possible", numberObsBefore ,"observations."))
          stop("The TSS matrix is indefinite. There must be too many missing values. The index cannot be calculated.")
        }
      }
      s1 <- sqrt(eigenValues)
      ss <- rep(1,sizeEigenTT)
      for (i in 1:sizeEigenTT)
      {
        if (s1[i]!=0)
          ss[i]=s1[i]
      }
      vv <- prod(ss)
    }

  }





  #&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&#
  #                                                                                                                      #
  #                                              Distances                                                               #
  #                                                                                                                      #
  #&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&#

  if(is.null(distance)){
    distanceM<-7}
  if(!is.null(distance)){
    distanceM <- pmatch(distance, c("euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski"))
  }

  if (is.na(distanceM)){
    stop("invalid distance")
  }

  if(is.null(diss)){

    if (distanceM == 1) {
      md <- dist(jeu, method="euclidean")	# "dist" function computes and returns the distance matrix computed by using the specified distance measure to compute the distances between the rows of a data matrix.
    }
    if (distanceM == 2) {
      md <- dist(jeu, method="maximum")
    }
    if (distanceM == 3) {
      md <- dist(jeu, method="manhattan")
    }
    if (distanceM == 4) {
      md <- dist(jeu, method="canberra")
    }
    if (distanceM == 5) {
      md <- dist(jeu, method="binary")
    }
    if (distanceM == 6) {
      md <- dist(jeu, method="minkowski")
    }

    if (distanceM == 7) {
      stop("dissimilarity matrix and distance are both NULL")
    }
  }

  if(!is.null(diss)){
    if((distanceM==1)||(distanceM==2)|| (distanceM==3)|| (distanceM==4)|| (distanceM==5)|| (distanceM==6)){
      stop("dissimilarity matrix and distance are both not null")
    }else{
      md <- diss
    }
  }



  #&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&#
  #                                                                                                                      #
  #                                              Methods                                                                 #
  #                                                                                                                      #
  #&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&#


  res <- array(0, c(max_nc-min_nc+1,30))
  x_axis <- min_nc:max_nc
  resCritical <- array(0, c(max_nc-min_nc+1,4))
  rownames(resCritical) <- min_nc:max_nc
  colnames(resCritical) <- c("CritValue_Duda", "CritValue_PseudoT2", "Fvalue_Beale", "CritValue_Gap")
  rownames(res) <- min_nc:max_nc
  colnames(res) <- c("KL","CH","Hartigan","CCC","Scott","Marriot", "TrCovW", "TraceW","Friedman","Rubin","Cindex","DB",
                     "Silhouette", "Duda", "Pseudot2", "Beale", "Ratkowsky", "Ball", "Ptbiserial", "Gap", "Frey", "McClain","Gamma", "Gplus", "Tau", "Dunn",
                     "Hubert", "SDindex", "Dindex", "SDbw")

  if (is.na(method))
    stop("invalid clustering method")
  if (method == -1)
    stop("ambiguous method")
  if (method == 1) {
    hc<-hclust(md,method = "ward.D2")
  }
  if (method == 2) {
    hc<-hclust(md,method = "single")
  }
  if (method == 3){
    hc<-hclust(md,method = "complete")
  }

  if (method == 4) {
    hc<-hclust(md,method = "average")
  }

  if (method == 5) {
    hc<-hclust(md,method = "mcquitty")

  }
  if (method == 6) {
    hc<-hclust(md,method = "median")

  }
  if (method == 7) {
    hc<-hclust(md,method = "centroid")

  }
  if (method == 9) {
    hc<-hclust(md,method = "ward.D")

  }



  #&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&#
  #                                                                                                                      #
  #                                              Indices                                                                 #
  #                                                                                                                      #
  #&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&#




  ##############################
  #                            #
  #        SD and SDbw         #
  #                            #
  ##############################


  centers<-function(cl,x){
    x <- as.matrix(x)
    n <- length(cl)
    k <- max(cl)
    centers <- matrix(nrow = k, ncol = ncol(x))
    {
      for (i in 1:k)
      {
        for (j in 1:ncol(x))
        {
          centers[i, j] <- mean(x[cl == i, j])
        }
      }
    }
    return(centers)
  }

  Average.scattering <- function (cl, x){
    x <- as.matrix(x)
    n <- length(cl)
    k <- max(cl)
    centers.matrix <- centers(cl,x)

    cluster.size <- numeric(0)
    variance.clusters <- matrix(0, ncol = ncol(x), nrow = k)
    var <- matrix(0, ncol = ncol(x), nrow = k)

    for (u in 1:k)
      cluster.size[u] <- sum(cl == u)

    for (u in 1:k) {
      for (j in 1:ncol(x)) {
        for(i in 1:n) {
          if(cl[i]==u)
            variance.clusters[u,j]<- variance.clusters[u,j]+(x[i, j]-centers.matrix[u,j])^2
        }
      }
    }

    for (u in 1:k) {
      for (j in 1:ncol(x))
        variance.clusters[u,j]= variance.clusters[u,j]/ cluster.size[u]
    }


    variance.matrix <- numeric(0)
    for(j in 1:ncol(x))
      variance.matrix[j]=var(x[,j])*(n-1)/n


    Somme.variance.clusters<-0
    for (u in 1:k)
      Somme.variance.clusters<-Somme.variance.clusters+sqrt((variance.clusters[u,]%*%(variance.clusters[u,])))


    # Standard deviation
    stdev<-(1/k)*sqrt(Somme.variance.clusters)

    #Average scattering for clusters
    scat<- (1/k)* (Somme.variance.clusters /sqrt(variance.matrix %*% variance.matrix))

    scat <- list(stdev=stdev, centers=centers.matrix, variance.intraclusters= variance.clusters, scatt=scat)
    return(scat)
  }

  density.clusters<-function(cl, x){
    x <- as.matrix(x)
    k <- max(cl)
    n <- length(cl)

    distance <- matrix(0, ncol = 1, nrow = n)
    density <-  matrix(0, ncol = 1, nrow = k)
    centers.matrix<-centers(cl,x)
    stdev<-Average.scattering(cl,x)$stdev
    for(i in 1:n) {
      u=1
      while(cl[i] != u )
        u<-u+1
      for (j in 1:ncol(x)){
        distance[i]<- distance[i]+(x[i,j]-centers.matrix[u,j])^2
      }
      distance[i]<-sqrt(distance[i])
      if (distance[i] <= stdev)
        density[u]= density[u]+1
    }
    dens<-list(distance=distance, density=density)
    return(dens)

  }


  density.bw<-function(cl, x){
    x <- as.matrix(x)
    k <- max(cl)
    n <- length(cl)
    centers.matrix<-centers(cl,x)
    stdev<-Average.scattering(cl,x)$stdev
    density.bw<- matrix(0, ncol = k, nrow = k)
    u<-1

    for(u in 1:k){
      for(v in 1:k){
        if(v!=u){
          distance<- matrix(0, ncol = 1, nrow = n)
          moy<-(centers.matrix[u,]+centers.matrix[v,])/2
          for(i in 1:n){
            if((cl[i]==u)||(cl[i]==v)){
              for (j in 1:ncol(x)){
                distance[i]<- distance[i]+(x[i,j]-moy[j])^2
              }
              distance[i]<- sqrt(distance[i])
              if(distance[i]<= stdev){
                density.bw[u,v]<-density.bw[u,v]+1
              }
            }
          }
        }
      }
    }
    density.clust<-density.clusters(cl,x)$density
    S<-0
    for(u in 1:k)
      for(v in 1:k){
        if(max(density.clust[u], density.clust[v])!=0)
          S=S+ (density.bw[u,v]/max(density.clust[u], density.clust[v]))
      }
    density.bw<-S/(k*(k-1))
    return(density.bw)

  }



  Dis <- function (cl, x){   # Dis : Total separation between clusters

    x <- as.matrix(x)
    k <- max(cl)
    centers.matrix <- centers(cl,x)
    Distance.centers<-dist(centers.matrix)
    Dmin<-min(Distance.centers)
    Dmax<-max(Distance.centers)
    Distance.centers<-as.matrix(Distance.centers)
    s2<-0
    for (u in 1:k){
      s1=0
      for(j in 1:ncol(Distance.centers)){
        s1<-s1 + Distance.centers[u,j]
      }
      s2<-s2+1/s1
    }
    Dis<-(Dmax/Dmin)*s2
    return(Dis)
  }



  ##################################
  #                                #
  #         Hubert index           #
  #                                #
  ##################################



  Index.Hubert<-function(x, cl){

    k <- max(cl)
    n<-dim(x)[1]
    y <- matrix(0, ncol = dim(x)[2], nrow = n)
    P<- as.matrix(md)
    meanP<-mean(P)
    variance.matrix <- numeric(0)
    md <- dist(x, method="euclidean")
    for(j in 1:n)
      variance.matrix[j]=var(P[,j])*(n-1)/n
    varP<-sqrt(variance.matrix %*% variance.matrix)

    centers.clusters<-centers(cl,x)
    for(i in 1:n){
      for(u in 1:k){
        if(cl[i]==u)
          y[i,]<-centers.clusters[u,]
      }
    }

    Q<- as.matrix(dist(y, method="euclidean"))
    meanQ<-mean(Q)
    for(j in 1:n)
      variance.matrix[j]=var(Q[,j])*(n-1)/n
    varQ<-sqrt(variance.matrix %*% variance.matrix)

    M<-n*(n-1)/2
    S<-0
    n1<-n-1

    for(i in 1:n1){
      j<-i+1
      while(j<=n){
        S<-S+(P[i,j]-meanP)*(Q[i,j]-meanQ)
        j<-j+1
      }

    }
    gamma<-S/(M*varP*varQ)

    return(gamma)
  }



  ##################################
  #                                #
  #   Gamma, Gplus and Tau         #
  #                                #
  ##################################


  Index.sPlussMoins <- function (cl1,md){
    cn1 <- max(cl1)
    n1 <- length(cl1)
    dmat <- as.matrix(md)
    average.distance <- median.distance <- separation <- cluster.size <- within.dist1 <- between.dist1 <- numeric(0)
    separation.matrix <- matrix(0, ncol = cn1, nrow = cn1)
    di <- list()
    for (u in 1:cn1) {
      cluster.size[u] <- sum(cl1 == u)
      du <- as.dist(dmat[cl1 == u, cl1 == u])
      within.dist1 <- c(within.dist1, du)
      average.distance[u] <- mean(du)
      median.distance[u] <- median(du)
      bv <- numeric(0)
      for (v in 1:cn1) {
        if (v != u) {
          suv <- dmat[cl1 == u, cl1 == v]
          bv <- c(bv, suv)
          if (u < v) {
            separation.matrix[u, v] <- separation.matrix[v,u] <- min(suv)
            between.dist1 <- c(between.dist1, suv)
          }
        }
      }
    }

    nwithin1 <- length(within.dist1)
    nbetween1 <- length(between.dist1)
    meanwithin1 <- mean(within.dist1)
    meanbetween1 <- mean(between.dist1)

    s.plus <- s.moins <- 0
    #s.moins<-sum(rank(c(within.dist1,between.dist1),ties="first")[1:nwithin1]-rank(within.dist1,ties="first"))
    #s.plus  <-sum(rank(c(-within.dist1,-between.dist1),ties="first")[1:nwithin1]-rank(-within.dist1,ties="first"))
    for (k in 1: nwithin1){
      s.plus <- s.plus+(colSums(outer(between.dist1,within.dist1[k], ">")))
      s.moins <- s.moins+(colSums(outer(between.dist1,within.dist1[k], "<")))
    }

    Index.Gamma <- (s.plus-s.moins)/(s.plus+s.moins)
    Index.Gplus <- (2*s.moins)/(n1*(n1-1))
    t.tau  <- (nwithin1*nbetween1)-(s.plus+s.moins)
    Index.Tau <- (s.plus-s.moins)/(((n1*(n1-1)/2-t.tau)*(n1*(n1-1)/2))^(1/2))

    results <- list(gamma=Index.Gamma, gplus=Index.Gplus, tau=Index.Tau)
    return(results)
  }




  ##################################
  #                                #
  #      Frey and McClain          #
  #                                #
  ##################################




  Index.15and28  <- function (cl1,cl2,md){
    cn1 <- max(cl1)
    n1 <- length(cl1)
    dmat <- as.matrix(md)
    average.distance <- median.distance <- separation <- cluster.size <- within.dist1 <- between.dist1 <- numeric(0)
    separation.matrix <- matrix(0, ncol = cn1, nrow = cn1)
    di <- list()
    for (u in 1:cn1){
      cluster.size[u] <- sum(cl1 == u)
      du <- as.dist(dmat[cl1 == u, cl1 == u])
      within.dist1 <- c(within.dist1, du)
      #average.distance[u] <- mean(du)
      #median.distance[u] <- median(du)
      #bv <- numeric(0)
      for (v in 1:cn1) {
        if (v != u) {
          suv <- dmat[cl1 == u, cl1 == v]
          #bv <- c(bv, suv)
          if (u < v) {
            separation.matrix[u, v] <- separation.matrix[v,u] <- min(suv)
            between.dist1 <- c(between.dist1, suv)
          }
        }
      }
    }
    cn2 <- max(cl2)
    n2 <- length(cl2)
    dmat <- as.matrix(md)
    average.distance <- median.distance <- separation <- cluster.size <- within.dist2 <- between.dist2 <- numeric(0)
    separation.matrix <- matrix(0, ncol = cn2, nrow = cn2)
    di <- list()
    for (w in 1:cn2) {
      cluster.size[w] <- sum(cl2 == w)
      dw <- as.dist(dmat[cl2 == w, cl2 == w])
      within.dist2 <- c(within.dist2, dw)
      #average.distance[w] <- mean(dw)
      #median.distance[w] <- median(dw)
      bx <- numeric(0)
      for (x in 1:cn2) {
        if (x != w) {
          swx <- dmat[cl2 == w, cl2 == x]
          bx <- c(bx, swx)
          if (w < x) {
            separation.matrix[w, x] <- separation.matrix[x,w] <- min(swx)
            between.dist2 <- c(between.dist2, swx)
          }
        }
      }
    }
    nwithin1 <- length(within.dist1)
    nbetween1 <- length(between.dist1)
    meanwithin1 <- mean(within.dist1)
    meanbetween1 <- mean(between.dist1)
    meanwithin2 <- mean(within.dist2)
    meanbetween2 <- mean(between.dist2)
    Index.15 <- (meanbetween2-meanbetween1)/(meanwithin2-meanwithin1)
    Index.28 <- (meanwithin1/nwithin1)/(meanbetween1/nbetween1)

    results <- list(frey=Index.15,mcclain=Index.28)
    return(results)
  }


  ##################################
  #                                #
  #      Point-biserial            #
  #                                #
  ##################################



  Indice.ptbiserial <- function (x,md,cl1){
    nn <- dim(x)[1]
    pp <- dim(x)[2]

    md2 <- as.matrix(md)
    m01 <- array(NA, c(nn,nn))
    nbr <- (nn*(nn-1))/2
    pb <- array(0,c(nbr,2))

    m3 <- 1
    for (m1 in 2:nn)
    {
      m12 <- m1-1
      for (m2 in 1:m12)
      {
        if (cl1[m1]==cl1[m2]) m01[m1,m2]<-0
        if (cl1[m1]!=cl1[m2]) m01[m1,m2]<-1
        pb[m3,1] <- m01[m1,m2]
        pb[m3,2] <- md2[m1,m2]
        m3 <- m3+1
      }
    }

    y <- pb[,1]
    x <- pb[,2]

    biserial.cor <- function (x, y, use = c("all.obs", "complete.obs"), level = 1){
      if (!is.numeric(x))
        stop("'x' must be a numeric variable.\n")
      y <- as.factor(y)
      if (length(levs <- levels(y)) > 2)
        stop("'y' must be a dichotomous variable.\n")
      if (length(x) != length(y))
        stop("'x' and 'y' do not have the same length")
      use <- match.arg(use)
      if (use == "complete.obs") {
        cc.ind <- complete.cases(x, y)
        x <- x[cc.ind]
        y <- y[cc.ind]
      }
      ind <- y == levs[level]
      diff.mu <- mean(x[ind]) - mean(x[!ind])
      prob <- mean(ind)
      diff.mu * sqrt(prob * (1 - prob))/sd(x)
    }

    ptbiserial <- biserial.cor(x=pb[,2], y=pb[,1], level = 2)
    return(ptbiserial)
  }


  ##########################################
  #                                        #
  #       Duda, pseudot2 and beale         #
  #                                        #
  ##########################################


  Indices.WKWL <- function (x,cl1=cl1,cl2=cl2){
    dim2 <- dim(x)[2]
    wss <- function(x) {
      x <- as.matrix(x)
      n <- length(x)
      centers <- matrix(nrow = 1, ncol = ncol(x))

      if (ncol(x) == 1) {
        centers[1, ] <- mean(x) 	}
      if (is.null(dim(x))){
        bb <- matrix(x,byrow=FALSE,nrow=1,ncol=ncol(x))
        centers[1, ] <- apply(bb, 2, mean)
      }else{
        centers[1, ] <- apply(x, 2, mean)
      }

      x.2 <- sweep(x,2,centers[1,],"-")
      withins <- sum(x.2^2)
      wss <- sum(withins)
      return(wss)
    }

    ncg1 <- 1
    ncg1max <- max(cl1)
    while((sum(cl1==ncg1)==sum(cl2==ncg1)) && ncg1 <=ncg1max){
      ncg1 <- ncg1+1 }
    g1 <- ncg1

    ncg2 <- max(cl2)
    nc2g2 <- ncg2-1
    while((sum(cl1==nc2g2)==sum(cl2==ncg2)) && nc2g2 >=1){
      ncg2 <- ncg2-1
      nc2g2 <- nc2g2-1
    }
    g2 <- ncg2

    NK <- sum(cl2==g1)
    WK.x <- x[cl2==g1,]
    WK <- wss(x=WK.x)

    NL <- sum(cl2==g2)
    WL.x <- x[cl2==g2,]
    WL <- wss(x=WL.x)

    NM <- sum(cl1==g1)
    WM.x <- x[cl1==g1,]
    WM <- wss(x=WM.x)

    duda <- (WK+WL)/WM

    BKL <- WM-WK-WL
    pseudot2 <- BKL/((WK+WL)/(NK+NL-2))

    beale <- (BKL/(WK+WL))/(((NM-1)/(NM-2))*(2^(2/dim2)-1))

    results <- list(duda=duda,pseudot2=pseudot2,NM=NM,NK=NK,NL=NL,beale=beale)
    return(results)
  }


  ########################################################################
  #                                                                      #
  #       ccc, scott, marriot, trcovw, tracew, friedman and rubin        #
  #                                                                      #
  ########################################################################



  Indices.WBT <- function(x,cl,P,s,vv){
    n <- dim(x)[1]
    pp <- dim(x)[2]
    qq <- max(cl)
    z <- matrix(0,ncol=qq,nrow=n)
    clX <- as.matrix(cl)

    for (i in 1:n)
      for (j in 1:qq){
        z[i,j]==0
        if (clX[i,1]==j)
        {z[i,j]=1}
      }

    xbar <- solve(t(z)%*%z)%*%t(z)%*%x
    B <- t(xbar)%*%t(z)%*%z%*%xbar
    W <- P-B
    marriot <- (qq^2)*det(W)
    trcovw <- sum(diag(cov(W)))
    tracew <- sum(diag(W))
    if(det(W)!=0){
      scott <- n*log(det(P)/det(W))
    }else{
      cat("Error: division by zero!")}
    friedman <- sum(diag(solve(W)*B))
    rubin <- sum(diag(P))/sum(diag(W))


    R2 <- 1-sum(diag(W))/sum(diag(P))
    v1 <- 1
    u <- rep(0,pp)
    c <- (vv/(qq))^(1/pp)
    u <- s/c
    k1 <- sum((u>=1)==TRUE)
    p1 <- min(k1,qq-1)
    if (all(p1>0,p1<pp)){
      for (i in 1:p1)
        v1 <- v1*s[i]
      c <- (v1/(qq))^(1/p1)
      u <- s/c
      b1 <- sum(1/(n+u[1:p1]))
      b2 <- sum(u[p1+1:pp]^2/(n+u[p1+1:pp]),na.rm=TRUE)
      E_R2 <- 1-((b1+b2)/sum(u^2))*((n-qq)^2/n)*(1+4/n)
      ccc <- log((1-E_R2)/(1-R2))*(sqrt(n*p1/2)/((0.001+E_R2)^1.2))
    }else{
      b1 <- sum(1/(n+u))
      E_R2 <- 1-(b1/sum(u^2))*((n-qq)^2/n)*(1+4/n)
      ccc <- log((1-E_R2)/(1-R2))*(sqrt(n*pp/2)/((0.001+E_R2)^1.2))
    }
    results <- list(ccc=ccc,scott=scott,marriot=marriot,trcovw=trcovw,tracew=tracew,friedman=friedman,rubin=rubin)
    return(results)
  }



  ########################################################################
  #                                                                      #
  #                   Kl, Ch, Hartigan, Ratkowsky and Ball               #
  #                                                                      #
  ########################################################################




  Indices.Traces <- function(data, d, clall, index = "all"){
    x <- data
    cl0 <- clall[,1]
    cl1 <- clall[,2]
    cl2 <- clall[,3]
    clall <- clall
    nb.cl0 <- table(cl0)
    nb.cl1 <- table(cl1)
    nb.cl2 <- table(cl2)
    nb1.cl0 <- sum(nb.cl0==1)
    nb1.cl1 <- sum(nb.cl1==1)
    nb1.cl2 <- sum(nb.cl2==1)

    gss <- function(x, cl, d){
      n <- length(cl)
      k <- max(cl)
      centers <- matrix(nrow = k, ncol = ncol(x))
      for (i in 1:k){

        if (ncol(x) == 1){
          centers[i, ] <- mean(x[cl == i, ])
        }
        if (is.null(dim(x[cl == i, ]))){
          bb <- matrix(x[cl == i, ],byrow=FALSE,nrow=1,ncol=ncol(x))
          centers[i, ] <- apply(bb, 2, mean)
        }else {
          centers[i, ] <- apply(x[cl == i, ], 2, mean)
        }

      }
      allmean <- apply(x, 2, mean)
      dmean <- sweep(x, 2, allmean, "-")
      allmeandist <- sum(dmean^2)
      withins <- rep(0, k)
      x.2 <- (x - centers[cl, ])^2
      for (i in 1:k){
        withins[i] <- sum(x.2[cl == i, ])
      }
      wgss <- sum(withins)
      bgss <- allmeandist - wgss

      results <- list(wgss=wgss,bgss=bgss, centers=centers)
      return(results)
    }

    vargss <- function(x, clsize, varwithins){
      nvar <- dim(x)[2]
      n <- sum(clsize)
      k <- length(clsize)
      varallmean <- rep(0, nvar)
      varallmeandist <- rep(0, nvar)
      varwgss <- rep(0, nvar)
      for (l in 1:nvar) varallmean[l] <- mean(x[, l])
      vardmean <- sweep(x, 2, varallmean, "-")
      for (l in 1:nvar) {
        varallmeandist[l] <- sum((vardmean[, l])^2)
        varwgss[l] <- sum(varwithins[, l])
      }
      varbgss <- varallmeandist - varwgss
      vartss <- varbgss + varwgss
      zvargss <- list(vartss = vartss, varbgss = varbgss)
      return(zvargss)
    }
    varwithinss <- function(x, centers, cluster) {
      nrow <- dim(centers)[1]
      nvar <- dim(x)[2]
      varwithins <- matrix(0, nrow, nvar)
      x <- (x - centers[cluster, ])^2
      for (l in 1:nvar) {
        for (k in 1:nrow) {
          varwithins[k, l] <- sum(x[cluster == k, l])
        }
      }
      return(varwithins)
    }



    indice.kl <- function (x, clall, d = NULL, centrotypes = "centroids"){
      if (nb1.cl1 > 0){
        KL <- NA
      }
      if (sum(c("centroids", "medoids") == centrotypes) == 0)
        stop("Wrong centrotypes argument")
      if ("medoids" == centrotypes && is.null(d))
        stop("For argument centrotypes = 'medoids' d cannot be null")
      if (!is.null(d)) {
        if (!is.matrix(d)) {
          d <- as.matrix(d)
        }
        row.names(d) <- row.names(x)
      }
      #if (is.null(dim(x))) {
      #	    dim(x) <- c(length(x), 1)
      #}
      m <- ncol(x)
      g <- k <- max(clall[, 2])
      KL <- abs((g - 1)^(2/m) * gss(x, clall[, 1], d)$wgss -
                  g^(2/m) * gss(x, clall[, 2], d)$wgss)/abs((g)^(2/m) *
                                                              gss(x, clall[, 2], d)$wgss - (g + 1)^(2/m) *
                                                              gss(x, clall[, 3], d)$wgss)
      return(KL)
    }



    indice.ch <- function (x, cl, d = NULL, centrotypes = "centroids"){
      if (nb1.cl1 > 0){
        CH <- NA
      }
      if (sum(c("centroids", "medoids") == centrotypes) == 0)
        stop("Wrong centrotypes argument")
      if ("medoids" == centrotypes && is.null(d))
        stop("For argument centrotypes = 'medoids' d cannot be null")
      if (!is.null(d)) {
        if (!is.matrix(d)) {
          d <- as.matrix(d)
        }
        row.names(d) <- row.names(x)
      }
      #if (is.null(dim(x))) {
      #	    dim(x) <- c(length(x), 1)
      #}
      n <- length(cl)
      k <- max(cl)
      CH <- (gss(x, cl, d)$bgss/(k-1))/
        (gss(x, cl, d)$wgss/(n-k))
      return(CH)
    }


    # hartigan

    indice.hart <- function(x, clall, d = NULL, centrotypes = "centroids"){
      if (sum(c("centroids", "medoids") == centrotypes) == 0)
        stop("Wrong centrotypes argument")
      if ("medoids" == centrotypes && is.null(d))
        stop("For argument centrotypes = 'medoids' d cannot be null")
      if (!is.null(d)) {
        if (!is.matrix(d)) {
          d <- as.matrix(d)
        }
        row.names(d) <- row.names(x)
      }
      #if (is.null(dim(x))) {
      #    dim(x) <- c(length(x), 1)
      #}
      n <- nrow(x)
      g <- max(clall[, 1])
      HART <- (gss(x, clall[, 2], d)$wgss/gss(x, clall[, 3],d)$wgss - 1) * (n - g - 1)
      return(HART)
    }




    indice.ball <- function(x, cl, d = NULL, centrotypes = "centroids"){
      wgssB <- gss(x, cl, d)$wgss
      qq <- max(cl)
      ball <- wgssB/qq
      return(ball)
    }




    indice.ratkowsky <- function(x, cl, d, centrotypes = "centroids"){
      qq <- max(cl)
      clsize <- table(cl)
      centers <- gss(x, cl, d)$centers
      varwithins <- varwithinss(x, centers, cl)
      zvargss <- vargss(x, clsize, varwithins)
      ratio <- mean(sqrt(zvargss$varbgss/zvargss$vartss))
      ratkowsky <- ratio/sqrt(qq)
      return(ratkowsky)
    }

    indice <- pmatch(index, c("kl", "ch", "hart", "ratkowsky", "ball", "all"))
    if (is.na(indice))
      stop("invalid clustering index")
    if (indice == -1)
      stop("ambiguous index")
    vecallindex <- numeric(5)
    if (any(indice == 1) || (indice == 6))
      vecallindex[1] <- indice.kl(x,clall,d)
    if (any(indice == 2) || (indice == 6))
      vecallindex[2] <- indice.ch(x,cl=clall[,2],d)
    if (any(indice == 3) || (indice == 6))
      vecallindex[3] <- indice.hart(x,clall,d)
    if (any(indice == 4) || (indice == 6))
      vecallindex[4] <- indice.ratkowsky(x,cl=cl1, d)
    if (any(indice == 5) || (indice == 6))
      vecallindex[5] <- indice.ball(x,cl=cl1,d)
    names(vecallindex) <- c("kl", "ch", "hart", "ratkowsky", "ball")
    if (indice < 6)
      vecallindex <- vecallindex[indice]
    return(vecallindex)

  }



  ########################################################################
  #                                                                      #
  #                              C-index                                 #
  #                                                                      #
  ########################################################################



  Indice.cindex <- function (d, cl){
    d <- data.matrix(d)
    DU <- 0
    r <- 0
    v_max <- array(1, max(cl))
    v_min <- array(1, max(cl))
    for (i in 1:max(cl)) {
      n <- sum(cl == i)
      if (n > 1) {
        t <- d[cl == i, cl == i]
        DU = DU + sum(t)/2
        v_max[i] = max(t)
        if (sum(t == 0) == n){
          v_min[i] <- min(t[t != 0])
        }else{
          v_min[i] <- 0}
        r <- r + n * (n - 1)/2
      }
    }
    Dmin = min(v_min)
    Dmax = max(v_max)
    if (Dmin == Dmax) {
      result <- NA
    }else{
      result <- (DU - r * Dmin)/(Dmax * r - Dmin * r)
    }
    result
  }



  ########################################################################
  #                                                                      #
  #                                 DB                                   #
  #                                                                      #
  ########################################################################


  Indice.DB <- function (x, cl, d = NULL, centrotypes = "centroids", p = 2, q = 2){
    if (sum(c("centroids") == centrotypes) == 0)
      stop("Wrong centrotypes argument")
    if (!is.null(d)) {
      if (!is.matrix(d)) {
        d <- as.matrix(d)
      }
      row.names(d) <- row.names(x)
    }
    if (is.null(dim(x))) {
      dim(x) <- c(length(x), 1)
    }
    x <- as.matrix(x)
    n <- length(cl)
    k <- max(cl)
    dAm <- d
    centers <- matrix(nrow = k, ncol = ncol(x))
    if (centrotypes == "centroids") {
      for (i in 1:k) {
        for (j in 1:ncol(x)) {
          centers[i, j] <- mean(x[cl == i, j])
        }
      }
    }else {
      stop("wrong centrotypes argument")
    }
    S <- rep(0, k)
    for (i in 1:k) {
      ind <- (cl == i)
      if (sum(ind) > 1) {
        centerI <- centers[i, ]
        centerI <- rep(centerI, sum(ind))
        centerI <- matrix(centerI, nrow = sum(ind), ncol = ncol(x),
                          byrow = TRUE)
        S[i] <- mean(sqrt(apply((x[ind, ] - centerI)^2, 1,
                                sum))^q)^(1/q)
      }else{ S[i] <- 0}
    }
    M <- as.matrix(dist(centers, p = p))
    R <- array(Inf, c(k, k))
    r = rep(0, k)
    for (i in 1:k) {
      for (j in 1:k) {
        R[i, j] = (S[i] + S[j])/M[i, j]
      }
      r[i] = max(R[i, ][is.finite(R[i, ])])
    }
    DB = mean(r[is.finite(r)])
    resul <- list(DB = DB, r = r, R = R, d = M, S = S, centers = centers)
    resul
  }




  ########################################################################
  #                                                                      #
  #                             Silhouette                               #
  #                                                                      #
  ########################################################################



  Indice.S <- function (d, cl) {
    d <- as.matrix(d)
    Si <- 0
    for (k in 1:max(cl)) {
      if ((sum(cl == k)) <= 1){
        Sil <- 1
      }else{
        Sil <- 0
        for (i in 1:length(cl)) {
          if (cl[i] == k) {
            ai <- sum(d[i, cl == k])/(sum(cl == k) - 1)
            dips <- NULL
            for (j in 1:max(cl)) if (cl[i] != j)
              if (sum(cl == j) != 1)
                dips <- cbind(dips, c((sum(d[i, cl == j]))/(sum(cl ==
                                                                  j))))
            else dips <- cbind(dips, c((sum(d[i, cl ==
                                                j]))))
            bi <- min(dips)
            Sil <- Sil + (bi - ai)/max(c(ai, bi))
          }
        }
      }
      Si <- Si + Sil
    }
    Si/length(cl)
  }



  ########################################################################
  #                                                                      #
  #                                  Gap                                 #
  #                                                                      #
  ########################################################################

  Indice.Gap <- function (x, clall, reference.distribution = "unif", B = 10,
                          method = "ward.D2", d = NULL, centrotypes = "centroids"){
    GAP <- function(X, cl, referenceDistribution, B, method, d, centrotypes)
    {
      set.seed(1)
      simgap <- function(Xvec){
        ma <- max(Xvec)
        mi <- min(Xvec)
        set.seed(1)
        Xout <- runif(length(Xvec), min = mi, max = ma)
        return(Xout)
      }
      pcsim <- function(X, d, centrotypes){
        if (centrotypes == "centroids") {
          Xmm <- apply(X, 2, mean)
        }

        for (k in (1:dim(X)[2])) {
          X[, k] <- X[, k] - Xmm[k]
        }
        ss <- svd(X)
        Xs <- X %*% ss$v
        Xnew <- apply(Xs, 2, simgap)
        Xt <- Xnew %*% t(ss$v)
        for (k in (1:dim(X)[2])) {
          Xt[, k] <- Xt[, k] + Xmm[k]
        }
        return(Xt)
      }
      if (is.null(dim(x))){
        dim(x) <- c(length(x), 1)
      }
      ClassNr <- max(cl)
      Wk0 <- 0
      WkB <- matrix(0, 1, B)
      for (bb in (1:B)) {
        if (reference.distribution == "unif")
          Xnew <- apply(X, 2, simgap)
        else if (reference.distribution == "pc")
          Xnew <- pcsim(X, d, centrotypes)
        else stop("Wrong reference distribution type")
        if (bb == 1) {
          pp <- cl
          if (ClassNr == length(cl))
            pp2 <- 1:ClassNr
          else if (method == "k-means")
          { set.seed(1)
            pp2 <- kmeans(Xnew, ClassNr, 100)$cluster
          }
          else if (method == "single" || method == "complete" ||
                   method == "average" || method == "ward.D2" ||
                   method == "mcquitty" || method == "median" ||
                   method == "centroid"|| method=="ward.D")
            pp2 <- cutree(hclust(dist(Xnew), method = method),
                          ClassNr)
          else stop("Wrong clustering method")
          if (ClassNr > 1) {
            for (zz in (1:ClassNr)) {
              Xuse <- X[pp == zz, ]
              Wk0 <- Wk0 + sum(diag(var(Xuse))) * (length(pp[pp ==
                                                               zz]) - 1)/(dim(X)[1] - ClassNr)
              Xuse2 <- Xnew[pp2 == zz, ]
              WkB[1, bb] <- WkB[1, bb] + sum(diag(var(Xuse2))) *
                (length(pp2[pp2 == zz]) - 1)/(dim(X)[1] -
                                                ClassNr)
            }
          }
          if (ClassNr == 1)
          {
            Wk0 <- sum(diag(var(X)))
            WkB[1, bb] <- sum(diag(var(Xnew)))
          }
        }
        if (bb > 1) {
          if (ClassNr == length(cl))
            pp2 <- 1:ClassNr
          else if (method == "k-means")
          {
            set.seed(1)
            pp2 <- kmeans(Xnew, ClassNr, 100)$cluster
          }
          else if (method == "single" || method == "complete" ||
                   method == "average" || method == "ward.D2" ||
                   method == "mcquitty" || method == "median" ||
                   method == "centroid"||method == "ward.D")
            pp2 <- cutree(hclust(dist(Xnew), method = method),
                          ClassNr)
          else stop("Wrong clustering method")
          if (ClassNr > 1) {
            for (zz in (1:ClassNr)) {
              Xuse2 <- Xnew[pp2 == zz, ]
              WkB[1, bb] <- WkB[1, bb] + sum(diag(var(Xuse2))) *
                length(pp2[pp2 == zz])/(dim(X)[1] - ClassNr)
            }
          }
          if (ClassNr == 1) {
            WkB[1, bb] <- sum(diag(var(Xnew)))
          }
        }
      }
      Sgap <- mean(log(WkB[1, ])) - log(Wk0)
      Sdgap <- sqrt(1 + 1/B) * sqrt(var(log(WkB[1, ]))) * sqrt((B -
                                                                  1)/B)
      resul <- list(Sgap = Sgap, Sdgap = Sdgap)
      resul
    }
    if (sum(c("centroids", "medoids") == centrotypes) == 0)
      stop("Wrong centrotypes argument")
    if ("medoids" == centrotypes && is.null(d))
      stop("For argument centrotypes = 'medoids' d can not be null")
    if (!is.null(d)) {
      if (!is.matrix(d)) {
        d <- as.matrix(d)
      }
      row.names(d) <- row.names(x)
    }
    X <- as.matrix(x)
    gap1 <- GAP(X, clall[, 1], reference.distribution, B, method,
                d, centrotypes)
    gap <- gap1$Sgap
    gap2 <- GAP(X, clall[, 2], reference.distribution, B, method,
                d, centrotypes)
    diffu <- gap - (gap2$Sgap - gap2$Sdgap)
    resul <- list(gap = gap, diffu = diffu)
    resul

  }




  ########################################################################
  #                                                                      #
  #                              SD, sdbw, dunn                          #
  #                                                                      #
  ########################################################################




  Index.sdindex<-function(x, clmax, cl)
  {
    x <- as.matrix(x)
    Alpha<-Dis(clmax,x)
    Scatt<-Average.scattering(cl,x)$scatt
    Dis0<-Dis(cl,x)
    SD.indice<-Alpha*Scatt + Dis0
    return(SD.indice)
  }

  Index.SDbw<-function(x, cl)
  {
    x <- as.matrix(x)
    Scatt<-Average.scattering(cl,x)$scatt
    Dens.bw<-density.bw(cl,x)
    SDbw<-Scatt+Dens.bw
    return(SDbw)
  }



  ########################################################################
  #                                                                      #
  #                              D index                                 #
  #                                                                      #
  ########################################################################




  Index.Dindex<- function(cl, x)
  {
    x <- as.matrix(x)
    distance<-density.clusters(cl, x)$distance
    n<-length(distance)
    S<-0
    for(i in 1:n)
      S<-S+distance[i]
    inertieIntra<-S/n
    return(inertieIntra)
  }


  #####################################################################
  #                                                                   #
  #                            Dunn index                             #
  #                                                                   #
  #####################################################################



  Index.dunn <- function(md, clusters, Data=NULL, method="euclidean")
  {

    distance <- as.matrix(md)
    nc <- max(clusters)
    interClust <- matrix(NA, nc, nc)
    intraClust <- rep(NA, nc)

    for (i in 1:nc)
    {
      c1 <- which(clusters==i)
      for (j in i:nc) {
        if (j==i) intraClust[i] <- max(distance[c1,c1])
        if (j>i) {
          c2 <- which(clusters==j)
          interClust[i,j] <- min(distance[c1,c2])
        }
      }
    }
    dunn <- min(interClust,na.rm=TRUE)/max(intraClust)
    return(dunn)
  }



  ################


  for (nc in min_nc:max_nc)
  {

    if (any(method == 1) || (method == 2) || (method == 3) || (method == 4) ||
        (method == 5) || (method == 6) || (method == 7)||(method == 9))
    {
      cl1 <- cutree(hc, k=nc)
      cl2 <- cutree(hc, k=nc+1)
      clall <- cbind(cl1, cl2)
      clmax <- cutree(hc, k=max_nc)


      if (nc >= 2)
      {
        cl0 <- cutree(hc, k=nc-1)
        clall1 <- cbind(cl0, cl1, cl2)
      }
      if (nc == 1)
      {
        cl0 <-rep(NA,nn)
        clall1 <- cbind(cl0, cl1, cl2)
      }
    }

    if (method == 8)
    {
      set.seed(1)
      cl2 <- kmeans(jeu,nc+1)$cluster
      set.seed(1)
      clmax <- kmeans(jeu,max_nc)$cluster
      if (nc > 2)
      {
        set.seed(1)
        cl1 <- kmeans(jeu,nc)$cluster
        clall <- cbind(cl1, cl2)
        set.seed(1)
        cl0 <- kmeans(jeu,nc-1)$cluster
        clall1 <- cbind(cl0, cl1, cl2)
      }
      if (nc == 2)
      {
        set.seed(1)
        cl1 <- kmeans(jeu,nc)$cluster
        clall <- cbind(cl1, cl2)
        cl0 <- rep(1,nn)
        clall1 <- cbind(cl0, cl1, cl2)
      }
      if (nc == 1)
      {
        stop("Number of clusters must be higher than 2")
      }

    }

    if (method == 10)
    {
      set.seed(1)
      cl2 <- fanny(x = md, k = nc+1, diss = T, memb.exp = memb.exp)$clustering
      set.seed(1)
      clmax <-fanny(x = md, k = max_nc, diss = T, memb.exp = memb.exp)$clustering
      if (nc > 2)
      {
        set.seed(1)
        cl1 <- fanny(x = md, k = nc, diss = T, memb.exp = memb.exp)$clustering
        clall <- cbind(cl1, cl2)
        set.seed(1)
        cl0 <- fanny(x = md, k = nc-1, diss = T, memb.exp = memb.exp)$clustering
        clall1 <- cbind(cl0, cl1, cl2)
      }
      if (nc == 2)
      {
        set.seed(1)
        cl1 <- fanny(x = md, k = nc, diss = T, memb.exp = memb.exp)$clustering
        clall <- cbind(cl1, cl2)
        cl0 <- rep(1,nn)
        clall1 <- cbind(cl0, cl1, cl2)
      }
      if (nc == 1)
      {
        stop("Number of clusters must be higher than 2")
      }

    }

    j <- table(cl1)  # table uses the cross-classifying factors to build a contingency table of the counts at each combination of factor levels.
    s <- sum(j==1)
    j2 <- table(cl2)
    s2 <- sum(j2==1)


    ########### Indices.Traces-hartigan - 3e colonne de res ############
    if (any(indice == 3) || (indice == 31) || (indice == 32))
    {
      res[nc-min_nc+1,3] <- Indices.Traces(jeu, md, clall1, index = "hart")
    }

    ########### Cubic Clustering Criterion-CCC  - 4e colonne de res ############
    if (any(indice == 4) || (indice == 31) || (indice == 32))
    {
      res[nc-min_nc+1,4] <- Indices.WBT(x=jeu, cl=cl1, P=TT,s=ss,vv=vv)$ccc
    }

    ########### Scott and Symons - 5e colonne de res ############
    if (any(indice == 5) || (indice == 31) || (indice == 32))
    {
      res[nc-min_nc+1,5] <- Indices.WBT(x=jeu, cl=cl1, P=TT,s=ss,vv=vv)$scott
    }

    ########### Marriot - 6e colonne de res ############
    if (any(indice == 6) || (indice == 31) || (indice == 32))
    {
      res[nc-min_nc+1,6] <- Indices.WBT(x=jeu, cl=cl1, P=TT,s=ss,vv=vv)$marriot
    }

    ########### Trace Cov W - 7e colonne de res ############
    if (any(indice == 7) || (indice == 31) || (indice == 32))
    {
      res[nc-min_nc+1,7] <- Indices.WBT(x=jeu, cl=cl1, P=TT,s=ss,vv=vv)$trcovw
    }

    ########### Trace W - 8e colonne de res ############
    if (any(indice == 8) || (indice == 31) || (indice == 32))
    {
      res[nc-min_nc+1,8] <- Indices.WBT(x=jeu, cl=cl1, P=TT,s=ss,vv=vv)$tracew
    }

    ########### Friedman - 9e colonne de res ############
    if (any(indice == 9) || (indice == 31) || (indice == 32))
    {
      res[nc-min_nc+1,9] <- Indices.WBT(x=jeu, cl=cl1, P=TT,s=ss,vv=vv)$friedman
    }

    ########### Rubin - 10e colonne de res ############
    if (any(indice == 10) || (indice == 31) || (indice == 32))
    {
      res[nc-min_nc+1,10] <- Indices.WBT(x=jeu, cl=cl1, P=TT,s=ss,vv=vv)$rubin
    }


    ########### Indices.WKWL-duda - 14e colonne de res ############
    if (any(indice == 14) || (indice == 31) || (indice == 32))
    {
      res[nc-min_nc+1,14] <- Indices.WKWL(x=jeu,cl1=cl1,cl2=cl2)$duda
    }


    ########### Indices.WKWL-pseudot2 - 15e colonne de res ############
    if (any(indice == 15) || (indice == 31) || (indice == 32))
    {
      res[nc-min_nc+1,15] <- Indices.WKWL(x=jeu,cl1=cl1,cl2=cl2)$pseudot2
    }

    ########### Indices.WKWL-beale - 16e colonne de res ############
    if (any(indice == 16) || (indice == 31) || (indice == 32))
    {
      res[nc-min_nc+1,16] <- beale <- Indices.WKWL(x=jeu,cl1=cl1,cl2=cl2)$beale
    }

    ########### Indices.WKWL- duda or pseudot2 or beale ############
    if (any(indice == 14) || (indice == 15) || (indice == 16) || (indice == 31) || (indice == 32))
    {
      NM <- Indices.WKWL(x=jeu,cl1=cl1,cl2=cl2)$NM
      NK <- Indices.WKWL(x=jeu,cl1=cl1,cl2=cl2)$NK
      NL <- Indices.WKWL(x=jeu,cl1=cl1,cl2=cl2)$NL
      zz <- 3.20 # Best standard score in Milligan and Cooper 1985
      zzz <- zz*sqrt(2*(1-8/((pi^2)*pp))/(NM*pp))



      if (any(indice == 14) || (indice == 31) || (indice == 32))
      {
        resCritical[nc-min_nc+1,1] <- critValue <- 1-(2/(pi*pp))-zzz
      }

      if ((indice == 15)|| (indice == 31) || (indice == 32))
      {
        critValue <- 1-(2/(pi*pp))-zzz
        resCritical[nc-min_nc+1,2] <- ((1-critValue)/critValue)*(NK+NL-2)

      }


      if (any(indice == 16) || (indice == 31) || (indice == 32))
      {
        df2 <- (NM-2)*pp
        resCritical[nc-min_nc+1,3] <- 1-pf(beale,pp,df2)
      }
    }

    ########### Indices.TracesL-ball - 18e colonne de res ############
    if (any(indice == 18) || (indice == 31) || (indice == 32))
    {
      res[nc-min_nc+1,18] <- Indices.Traces(jeu, md, clall1, index = "ball")
    }

    ########### Indice.Point-Biserial - 19e colonne de res ############
    if (any(indice == 19) || (indice == 31) || (indice == 32))
    {
      res[nc-min_nc+1,19] <- Indice.ptbiserial(x=jeu, md=md, cl1=cl1)
    }

    ########### Gap - 20e colonne de res ############
    if (any(indice == 20) || (indice == 32))
    {

      if (method == 1) {
        resultSGAP <- Indice.Gap(x=jeu, clall=clall, reference.distribution = "unif", B = 10, method = "ward.D2", d = NULL, centrotypes = "centroids")
      }
      if (method == 2) {
        resultSGAP <- Indice.Gap(x=jeu, clall=clall, reference.distribution = "unif", B = 10, method = "single", d = NULL, centrotypes = "centroids")
      }
      if (method == 3) {
        resultSGAP <- Indice.Gap(x=jeu, clall=clall, reference.distribution = "unif", B = 10, method = "complete", d = NULL, centrotypes = "centroids")
      }
      if (method == 4) {
        resultSGAP <- Indice.Gap(x=jeu, clall=clall, reference.distribution = "unif", B = 10, method = "average", d = NULL, centrotypes = "centroids")
      }
      if (method == 5) {
        resultSGAP <- Indice.Gap(x=jeu, clall=clall, reference.distribution = "unif", B = 10, method = "mcquitty", d = NULL, centrotypes = "centroids")
      }
      if (method == 6) {
        resultSGAP <- Indice.Gap(x=jeu, clall=clall, reference.distribution = "unif", B = 10, method = "median", d = NULL, centrotypes = "centroids")
      }
      if (method == 7) {
        resultSGAP <- Indice.Gap(x=jeu, clall=clall, reference.distribution = "unif", B = 10, method = "centroid", d = NULL, centrotypes = "centroids")
      }
      if (method == 9) {
        resultSGAP <- Indice.Gap(x=jeu, clall=clall, reference.distribution = "unif", B = 10, method = "ward.D", d = NULL, centrotypes = "centroids")
      }
      if (method == 8) {
        resultSGAP <- Indice.Gap(x=jeu, clall=clall, reference.distribution = "unif", B = 10, method = "k-means", d = NULL, centrotypes = "centroids")
      }
      res[nc-min_nc+1,20] <- resultSGAP$gap
      resCritical[nc-min_nc+1,4] <- resultSGAP$diffu
    }

    if (nc >=2)
    {
      ########### Indices.Traces-kl - 1e colonne de res ############
      if (any(indice == 1) || (indice == 31) || (indice == 32))
      {
        res[nc-min_nc+1,1] <- Indices.Traces(jeu, md, clall1, index = "kl")
      }

      ########### Indices.Traces-ch - 2e colonne de res ############
      if (any(indice == 2) || (indice == 31) || (indice == 32))
      {
        res[nc-min_nc+1,2] <- Indices.Traces(jeu, md, clall1, index = "ch")
      }

      ########### Indice.cindex - 11e colonne de res ############
      if (any(indice == 11) || (indice == 31) || (indice == 32))
      {
        res[nc-min_nc+1,11] <- Indice.cindex(d=md, cl=cl1)
      }

      ########### Indice.DB  - 12e colonne de res ############
      if (any(indice == 12) || (indice == 31) || (indice == 32))
      {
        res[nc-min_nc+1,12] <- Indice.DB(x=jeu, cl=cl1, d = NULL, centrotypes = "centroids", p = 2, q = 2)$DB
      }

      ########### Silhouette - 13e colonne de res ############
      if (any(indice == 13) || (indice == 31) || (indice == 32))
      {
        res[nc-min_nc+1,13] <- Indice.S(d=md, cl=cl1)
      }

      ########### Indices.Traces-ratkowsky- 17e colonne de res ############
      if (any(indice == 17) || (indice == 31) || (indice == 32))
      {
        res[nc-min_nc+1,17] <- Indices.Traces(jeu, md, clall1, index = "ratkowsky")
      }

      ########### Indice.Frey - 21e colonne de res ############
      if (any(indice == 21) || (indice == 31) || (indice == 32))
      {
        res[nc-min_nc+1,21] <- Index.15and28(cl1=cl1,cl2=cl2,md=md)$frey
      }

      ########### Indice.McClain - 22e colonne de res ############
      if (any(indice == 22) || (indice == 31) || (indice == 32))
      {
        res[nc-min_nc+1,22] <- Index.15and28(cl1=cl1,cl2=cl2,md=md)$mcclain
      }

      ########### Indice.Gamma - 23e colonne de res ############
      if (any(indice == 23) || (indice == 32))
      {

        res[nc-min_nc+1,23] <- Index.sPlussMoins(cl1=cl1,md=md)$gamma
      }

      ########### Indice.Gplus- 24e colonne de res ############
      if (any(indice == 24) || (indice == 32))
      {
        res[nc-min_nc+1,24] <- Index.sPlussMoins(cl1=cl1,md=md)$gplus
      }

      ########### Indice.Tau  - 25e colonne de res ############
      if (any(indice == 25) || (indice == 32))
      {
        res[nc-min_nc+1,25] <- Index.sPlussMoins(cl1=cl1,md=md)$tau
      }

      ########### Indices.Dunn  - 26e colonne de res ############
      if (any(indice == 26 ) || (indice == 31) || (indice == 32))
      {
        res[nc-min_nc+1,26] <- Index.dunn(md, cl1, Data=jeu, method=NULL)
      }

      ########### Indices.Hubert - 27e colonne de res ############
      if (any(indice == 27 ) || (indice == 31) || (indice == 32))
      {
        res[nc-min_nc+1,27] <- Index.Hubert(jeu, cl1)
      }

      ########### Indices.SD - 28e colonne de res ############
      if (any(indice == 28 ) || (indice == 31) || (indice == 32))
      {
        res[nc-min_nc+1,28] <- Index.sdindex(jeu, clmax, cl1)
      }

      ########### Indices.Dindex - 29e colonne de res ############
      if (any(indice == 29 ) || (indice == 31) || (indice == 32))
      {
        res[nc-min_nc+1,29] <- Index.Dindex(cl1, jeu)
      }

      ########### Indices.SDbw - 30e colonne de res ############
      if (any(indice == 30 ) || (indice == 31) || (indice == 32))
      {
        res[nc-min_nc+1,30] <- Index.SDbw(jeu, cl1)
      }

    }else{
      res[nc-min_nc+1,1] <- NA
      res[nc-min_nc+1,2] <- NA
      res[nc-min_nc+1,11] <- NA
      res[nc-min_nc+1,12] <- NA
      res[nc-min_nc+1,13] <- NA
      res[nc-min_nc+1,17] <- NA
      res[nc-min_nc+1,21] <- NA
      res[nc-min_nc+1,22] <- NA
      res[nc-min_nc+1,23] <- NA
      res[nc-min_nc+1,24] <- NA
      res[nc-min_nc+1,25] <- NA
      res[nc-min_nc+1,26] <- NA
      res[nc-min_nc+1,27] <- NA
      res[nc-min_nc+1,28] <- NA
      res[nc-min_nc+1,29] <- NA
      res[nc-min_nc+1,30] <- NA
    }
  }

  #########################################################################################################
  #######################                      Best Number of Clusters                  ###################
  #########################################################################################################



  nc.KL<-indice.KL<-0
  if (any(indice == 1) || (indice == 31) || (indice == 32))
  {
    # KL - The value of u, which maximizes KL(u), is regarded as specifying the number of clusters [ClusterSim package].
    nc.KL <- (min_nc:max_nc)[which.max(res[,1])]
    indice.KL <- max(res[,1],na.rm = TRUE)
    best.nc<-nc.KL
  }

  nc.CH<-indice.CH<-0
  if (any(indice == 2) || (indice == 31) || (indice == 32))
  {
    # CH - The value of u, which maximizes CH(u), is regarded as specifying the number of clusters [ClusterSim package].
    nc.CH <- (min_nc:max_nc)[which.max(res[,2])]
    indice.CH <- max(res[,2],na.rm = TRUE)
    best.nc<-nc.CH
  }

  nc.CCC<-indice.CCC<-0
  if (any(indice == 4) || (indice == 31) || (indice == 32))
  {
    # CCC - The maximum value accross the hierarchy levels is used to indicate the optimal number of clusters in data [29].
    nc.CCC <- (min_nc:max_nc)[which.max(res[,4])]
    indice.CCC <- max(res[,4],na.rm = TRUE)
    best.nc<-nc.CCC
  }

  nc.DB<-indice.DB<-0
  if (any(indice == 12) || (indice == 31) || (indice == 32))
  {
    # DB - The value of u, which minimizes DB(u), is regarded as specifying the number of clusters [clusterSim package].
    nc.DB <- (min_nc:max_nc)[which.min(res[,12])]
    indice.DB <- min(res[,12],na.rm = TRUE)
    best.nc<-nc.DB
  }

  nc.Silhouette<-indice.Silhouette<-0
  if (any(indice == 13) || (indice == 31) || (indice == 32))
  {
    # SILHOUETTE - The value of u, which maximizes S(u), is regarded as specifying the number of clusters [ClusterSim package].
    nc.Silhouette <- (min_nc:max_nc)[which.max(res[,13])]
    indice.Silhouette <- max(res[,13],na.rm = TRUE)
    best.nc<-nc.Silhouette
  }

  nc.Gap<-indice.Gap<-0
  # GAP - Choose the number of clusters via finding the smallest q such that: Gap(q)=Gap(q+1)-Sq+1 (q=1,\u{85},n-2) [ClusterSim package].
  if (any(indice == 20) || (indice == 32))
  {
    found <- FALSE
    for (ncG in min_nc:max_nc){
      if ((resCritical[ncG-min_nc+1,4] >=0) && (!found)){
        ncGap <- ncG
        indiceGap <- res[ncG-min_nc+1,20]
        found <- TRUE
      }
    }
    if (found){
      nc.Gap <- ncGap
      indice.Gap <- indiceGap
      best.nc<-nc.Gap
    }else{
      nc.Gap <- NA
      indice.Gap <- NA
    }

  }

  nc.Duda<-indice.Duda<-0
  # DUDA - Choose the number of clusters via finding the smallest q such that: duda >= critical_value [Duda and Hart (1973)].


  if (any(indice == 14) || (indice == 31) || (indice == 32))
  {

    foundDuda <- FALSE
    for (ncD in min_nc:max_nc)
    {

      if ((res[ncD-min_nc+1,14]>=resCritical[ncD-min_nc+1,1]) && (!foundDuda))
      {
        ncDuda <- ncD
        indiceDuda <- res[ncD-min_nc+1,14]
        foundDuda <- TRUE
      }
    }
    if (foundDuda)
    {
      nc.Duda <- ncDuda
      indice.Duda <- indiceDuda
      best.nc<-nc.Duda
    }
    else
    {
      nc.Duda <- NA
      indice.Duda <- NA
    }


  }

  nc.Pseudo<-indice.Pseudo<-0
  # PSEUDOT2 - Chooses the number of clusters via finding the smallest q such that: pseudot2 <= critical_value [SAS User's guide].
  if (any(indice == 15) || (indice == 31) || (indice == 32))
  {

    foundPseudo <- FALSE
    for (ncP in min_nc:max_nc)
    {

      if ((res[ncP-min_nc+1,15]<=resCritical[ncP-min_nc+1,2]) && (!foundPseudo))
      {
        ncPseudo <- ncP
        indicePseudo <- res[ncP-min_nc+1,15]
        foundPseudo <- TRUE
      }
    }
    if (foundPseudo)
    {
      nc.Pseudo <- ncPseudo
      indice.Pseudo <- indicePseudo
      best.nc<-nc.Pseudo
    }
    else
    {
      nc.Pseudo <- NA
      indice.Pseudo <- NA
    }
  }


  nc.Beale<-indice.Beale<-0
  if (any(indice == 16) || (indice == 31) || (indice == 32))
  {
    # BEALE - Chooses the number of clusters via finding the smallest q such that: Fvalue_beale >= 0.1 [Gordon (1999)].
    foundBeale <- FALSE
    for (ncB in min_nc:max_nc)
    {

      if ((resCritical[ncB-min_nc+1,3]>=alphaBeale) && (!foundBeale)){
        ncBeale <- ncB
        indiceBeale <- res[ncB-min_nc+1,16]
        foundBeale <- TRUE
      }
    }
    if (foundBeale){
      nc.Beale <- ncBeale
      indice.Beale <- indiceBeale
      best.nc<-nc.Beale
    }
    else
    {
      nc.Beale <- NA
      indice.Beale <- NA
    }
  }


  nc.ptbiserial<-indice.ptbiserial<-0
  if (any(indice == 19) || (indice == 31) || (indice == 32))
  {
    # POINT-BISERIAL - The maximum value was used to suggest the optimal number of clusters in the data [29].
    nc.ptbiserial <- (min_nc:max_nc)[which.max(res[,19])]
    indice.ptbiserial <- max(res[,19],na.rm = TRUE)
    best.nc<-nc.ptbiserial
  }

  foundNC<-foundIndice<-numeric(0)
  nc.Frey<-indice.Frey<-0
  if (any(indice == 21) || (indice == 31) || (indice == 32))
  {
    # FREY AND VAN GROENEWOUD - The best results occured when clustering was continued until the ratio fell below 1.00 for the last
    #			      series of times. At this point, the cluster level before this series was taken as the optimal partition.
    #			      If the ration never fell below 1.00, a one cluster solution was assumed [29].

    foundFrey <- FALSE
    i<-1
    for (ncF in min_nc:max_nc)
    {

      if (res[ncF-min_nc+1,21] < 1)
      {

        ncFrey <- ncF-1
        indiceFrey <- res[ncF-1-min_nc+1,21]
        foundFrey <- TRUE
        foundNC[i]<-ncFrey
        foundIndice[i]<-indiceFrey
        i<-i+1

      }

    }
    if (foundFrey)
    {
      nc.Frey <- foundNC[1]
      indice.Frey <- foundIndice[1]
      best.nc<-nc.Frey
    }
    else
    {
      nc.Frey <- NA
      indice.Frey <- NA
      print(paste("Frey index : No clustering structure in this data set"))
    }


  }


  nc.McClain<-indice.McClain<-0
  if (any(indice == 22) || (indice == 31) || (indice == 32))
  {
    # MCCLAIN AND RAO - The minimum value of the index was found to give the best recovery information [29].
    nc.McClain <- (min_nc:max_nc)[which.min(res[,22])]
    indice.McClain <- min(res[,22],na.rm = TRUE)
    best.nc<-nc.McClain

  }

  nc.Gamma<-indice.Gamma<-0
  if (any(indice == 23) || (indice == 31) || (indice == 32))
  {
    # GAMMA - Maximum values were taken to represent the correct hierarchy level [29].
    nc.Gamma <- (min_nc:max_nc)[which.max(res[,23])]
    indice.Gamma <- max(res[,23],na.rm = TRUE)
    best.nc<-nc.Gamma

  }

  nc.Gplus<-indice.Gplus<-0
  if (any(indice == 24) || (indice == 31) || (indice == 32))
  {
    # GPLUS - Minimum values were used to determine the number of clusters in the data [29].
    nc.Gplus  <- (min_nc:max_nc)[which.min(res[,24])]
    indice.Gplus <- min(res[,24],na.rm = TRUE)
    best.nc<-nc.Gplus
  }

  nc.Tau<-indice.Tau<-0
  if (any(indice == 25) || (indice == 31) || (indice == 32))
  {
    # TAU - The maximum value in the hierarchy sequence was taken as indicating the correct number of clusters [29].
    nc.Tau <- (min_nc:max_nc)[which.max(res[,25])]
    indice.Tau <- max(res[,25],na.rm = TRUE)
    best.nc<-nc.Tau
  }


  #Some indices need to compute difference between hierarchy levels to identify relevant number of clusters


  if((indice==3)||(indice == 5)||(indice == 6)||(indice == 7)||(indice == 8)||(indice == 9)||(indice == 10)||(indice == 18)||(indice == 27)||(indice == 11)||(indice == 29)||(indice == 31)||(indice == 32))
  {

    DiffLev <- array(0, c(max_nc-min_nc+1,12))
    DiffLev[,1] <- min_nc:max_nc
    for (nc3 in min_nc:max_nc)
    {
      if (nc3==min_nc)
      {
        DiffLev[nc3-min_nc+1,2] <- abs(res[nc3-min_nc+1,3]-NA)   # Hartigan
        DiffLev[nc3-min_nc+1,3] <- abs(res[nc3-min_nc+1,5]-NA)   #Scott
        DiffLev[nc3-min_nc+1,4] <- abs(res[nc3-min_nc+1,6]-NA)   # Marriot
        DiffLev[nc3-min_nc+1,5] <- abs(res[nc3-min_nc+1,7]-NA)   #Trcovw
        DiffLev[nc3-min_nc+1,6] <- abs(res[nc3-min_nc+1,8]-NA)   #Tracew
        DiffLev[nc3-min_nc+1,7] <- abs(res[nc3-min_nc+1,9]-NA)   #Friedman
        DiffLev[nc3-min_nc+1,8] <- abs(res[nc3-min_nc+1,10]-NA)  #Rubin
        DiffLev[nc3-min_nc+1,9] <- abs(res[nc3-min_nc+1,18]-NA)  # Ball
        DiffLev[nc3-min_nc+1,10] <- abs(res[nc3-min_nc+1,27]-NA) # Hubert
        DiffLev[nc3-min_nc+1,12] <- abs(res[nc3-min_nc+1,29]-NA) # D index


      }
      else
      {
        if(nc3==max_nc)
        {
          DiffLev[nc3-min_nc+1,2] <- abs(res[nc3-min_nc+1,3]-res[nc3-min_nc,3])
          DiffLev[nc3-min_nc+1,3] <- abs(res[nc3-min_nc+1,5]-res[nc3-min_nc,5])
          DiffLev[nc3-min_nc+1,4] <- abs(res[nc3-min_nc+1,6]-NA) # Marriot
          DiffLev[nc3-min_nc+1,5] <- abs(res[nc3-min_nc+1,7]-res[nc3-min_nc,7])  # trcovw
          DiffLev[nc3-min_nc+1,6] <- abs(res[nc3-min_nc+1,8]-NA) #traceW
          DiffLev[nc3-min_nc+1,7] <- abs(res[nc3-min_nc+1,9]-res[nc3-min_nc,9])
          DiffLev[nc3-min_nc+1,8] <- abs(res[nc3-min_nc+1,10]-NA) #Rubin
          DiffLev[nc3-min_nc+1,9] <- abs(res[nc3-min_nc+1,18]-res[nc3-min_nc,18])
          DiffLev[nc3-min_nc+1,10] <- abs(res[nc3-min_nc+1,27]-NA)
          DiffLev[nc3-min_nc+1,12] <- abs(res[nc3-min_nc+1,29]-NA) # D index


        }
        else
        {

          DiffLev[nc3-min_nc+1,2] <- abs(res[nc3-min_nc+1,3]-res[nc3-min_nc,3]) # Hartigan
          DiffLev[nc3-min_nc+1,3] <- abs(res[nc3-min_nc+1,5]-res[nc3-min_nc,5])
          DiffLev[nc3-min_nc+1,4] <- ((res[nc3-min_nc+2,6]-res[nc3-min_nc+1,6])-(res[nc3-min_nc+1,6]-res[nc3-min_nc,6]))
          DiffLev[nc3-min_nc+1,5] <- abs(res[nc3-min_nc+1,7]-res[nc3-min_nc,7])
          DiffLev[nc3-min_nc+1,6] <- ((res[nc3-min_nc+2,8]-res[nc3-min_nc+1,8])-(res[nc3-min_nc+1,8]-res[nc3-min_nc,8]))
          DiffLev[nc3-min_nc+1,7] <- abs(res[nc3-min_nc+1,9]-res[nc3-min_nc,9])
          DiffLev[nc3-min_nc+1,8] <- ((res[nc3-min_nc+2,10]-res[nc3-min_nc+1,10])-(res[nc3-min_nc+1,10]-res[nc3-min_nc,10]))
          DiffLev[nc3-min_nc+1,9] <- abs(res[nc3-min_nc+1,18]-res[nc3-min_nc,18])
          DiffLev[nc3-min_nc+1,10] <- abs((res[nc3-min_nc+1,27]-res[nc3-min_nc,27]))
          DiffLev[nc3-min_nc+1,12] <-((res[nc3-min_nc+2,29]-res[nc3-min_nc+1,29])-(res[nc3-min_nc+1,29]-res[nc3-min_nc,29])) #Dindex

        }
      }
    }
  }

  nc.Hartigan<-indice.Hartigan<-0
  if (any(indice == 3) || (indice == 31) || (indice == 32))
  {
    # HARTIGAN - The maximum differences between hierarchy levels were taken as indicating the correct number of clusters in the data [29].
    nc.Hartigan <- DiffLev[,1][which.max(DiffLev[,2])]
    indice.Hartigan <- max(DiffLev[,2],na.rm = TRUE)
    best.nc<-nc.Hartigan
  }

  nc.Ratkowsky<-indice.Ratkowsky<-0
  if (any(indice == 17) || (indice == 31) || (indice == 32))
  {
    # RATKOWSKY - The optimal number of groups is taken as the level where this criterion exhibits its maximum value [29].
    nc.Ratkowsky <- (min_nc:max_nc)[which.max(res[,17])]
    indice.Ratkowsky <- max(res[,17],na.rm = TRUE)
    best.nc<-nc.Ratkowsky
  }

  nc.cindex<-indice.cindex<-0
  if (any(indice == 11) || (indice == 31) || (indice == 32))
  {
    #CINDEX - The minimum value across the hierarchy levels was used to indicate the optimal number of clusters [29].
    nc.cindex <- (min_nc:max_nc)[which.min(res[,11])]
    indice.cindex <- min(res[,11],na.rm = TRUE)
    best.nc<-nc.cindex
  }

  nc.Scott<-indice.Scott<-0
  if (any(indice == 5) || (indice == 31) || (indice == 32))
  {
    # SCOTT - The maximum difference between hierarchy levels was used to suggest the correct number of partitions [29].
    nc.Scott <- DiffLev[,1][which.max(DiffLev[,3])]
    indice.Scott <- max(DiffLev[,3],na.rm = TRUE)
    best.nc<-nc.Scott
  }

  nc.Marriot<-indice.Marriot<-0
  if (any(indice == 6) || (indice == 31) || (indice == 32))
  {
    # MARRIOT - The maximum difference between successive levels was used to determine the best partition level [29].
    nc.Marriot <- DiffLev[,1][which.max(DiffLev[,4])]
    round(nc.Marriot, digits=1)
    indice.Marriot <- max(DiffLev[,4],na.rm = TRUE)
    best.nc<-nc.Marriot
  }

  nc.TrCovW<-indice.TrCovW<-0
  if (any(indice == 7) || (indice == 31) || (indice == 32))
  {
    nc.TrCovW <- DiffLev[,1][which.max(DiffLev[,5])]
    indice.TrCovW <- max(DiffLev[,5],na.rm = TRUE)
    best.nc<-nc.TrCovW
  }


  nc.TraceW<-indice.TraceW<-0
  if (any(indice == 8) || (indice == 31) || (indice == 32))
  {
    # TRACE W - To determine the number of clusters in the data, maximum difference scores were used [29].
    nc.TraceW <- DiffLev[,1][which.max(DiffLev[,6])]
    indice.TraceW <- max(DiffLev[,6],na.rm = TRUE)
    best.nc<-nc.TraceW
  }

  nc.Friedman<-indice.Friedman<-0
  if (any(indice == 9) || (indice == 31) || (indice == 32))
  {
    # FRIEDMAN - The maximum difference in values of trace W-1B criterion was used to indicate the optimal number of clusters [29].
    nc.Friedman <- DiffLev[,1][which.max(DiffLev[,7])]
    indice.Friedman <- max(DiffLev[,7],na.rm = TRUE)
    best.nc<-nc.Friedman
  }

  nc.Rubin<-indice.Rubin<-0
  if (any(indice == 10) || (indice == 31) || (indice == 32))
  {
    # RUBIN - The difference between levels was used [29].
    nc.Rubin <- DiffLev[,1][which.min(DiffLev[,8])]
    indice.Rubin <- min(DiffLev[,8],na.rm = TRUE)
    best.nc<-nc.Rubin
  }

  nc.Ball<-indice.Ball<-0
  if (any(indice == 18) || (indice == 31) || (indice == 32))
  {
    # BALL - The largest difference between levels was used to indicate the optimal solution [29].
    nc.Ball <- DiffLev[,1][which.max(DiffLev[,9])]
    indice.Ball <- max(DiffLev[,9],na.rm = TRUE)
    best.nc<-nc.Ball
  }


  nc.Dunn<-indice.Dunn<-0
  if (any(indice == 26) || (indice == 31) || (indice == 32))
  {
    # Dunn -
    nc.Dunn <- (min_nc:max_nc)[which.max(res[,26])]
    indice.Dunn <- max(res[,26],na.rm = TRUE)
    best.nc<-nc.Dunn
  }


  nc.Hubert<-indice.Hubert<-0
  if (any(indice == 27) || (indice == 31) || (indice == 32))
  {
    # Hubert -
    nc.Hubert  <- 0.00
    indice.Hubert  <- 0.00
    #x11()
    par(mfrow = c(1,2))
    plot(x_axis,res[,27], tck=0, type="b", col="red", xlab= expression(paste("Number of clusters ")), ylab= expression(paste("Hubert Statistic values")))
    plot(DiffLev[,1],DiffLev[,10], tck=0, type="b", col="blue", xlab= expression(paste("Number of clusters ")), ylab= expression(paste("Hubert statistic second differences")))
    cat(paste ("*** : The Hubert index is a graphical method of determining the number of clusters.
                In the plot of Hubert index, we seek a significant knee that corresponds to a
                significant increase of the value of the measure i.e the significant peak in Hubert
                index second differences plot.", "\n", "\n"))
  }

  nc.sdindex<-indice.sdindex<-0
  if (any(indice == 28) || (indice == 31) || (indice == 32))
  {
    # SD -
    nc.sdindex <- (min_nc:max_nc)[which.min(res[,28])]
    indice.sdindex<- min(res[,28],na.rm = TRUE)
    best.nc<-nc.sdindex
  }


  nc.Dindex<-indice.Dindex<-0
  if (any(indice == 29) || (indice == 31) || (indice == 32))
  {

    nc.Dindex <- 0.00
    indice.Dindex<- 0.00
    #x11()
    par(mfrow = c(1,2))
    plot(x_axis,res[,29], tck=0, type="b", col="red", xlab= expression(paste("Number of clusters ")), ylab= expression(paste("Dindex Values")))
    plot(DiffLev[,1],DiffLev[,12], tck=0, type="b", col="blue", xlab= expression(paste("Number of clusters ")), ylab= expression(paste("Second differences Dindex Values")))
    cat(paste ("*** : The D index is a graphical method of determining the number of clusters.
                In the plot of D index, we seek a significant knee (the significant peak in Dindex
                second differences plot) that corresponds to a significant increase of the value of
                the measure.", "\n", "\n"))
  }

  nc.SDbw<-indice.SDbw<-0
  if (any(indice == 30) || (indice == 31) || (indice == 32))
  {
    # SDbw -
    nc.SDbw <- (min_nc:max_nc)[which.min(res[,30])]
    indice.SDbw<- min(res[,30],na.rm = TRUE)
    best.nc<-nc.SDbw
  }



  ######################################################################################################################
  ########################                Displaying results             #########################################
  ######################################################################################################################

  if (indice < 31)
  {
    res <- res[,c(indice)]

    if (indice == 14) { resCritical <- resCritical[,1]  }
    if (indice == 15) { resCritical <- resCritical[,2] }
    if (indice == 16) { resCritical <- resCritical[,3] }
    if (indice == 20) { resCritical <- resCritical[,4] }

  }

  if (indice == 31)
  {
    res <- res[,c(1:19,21:22,26:30)]
    resCritical <- resCritical[,c(1:3)]

  }



  if (any(indice == 20) || (indice == 23) || (indice == 24) || (indice == 25) || (indice == 32)){

    results <- c(nc.KL, indice.KL, nc.CH, indice.CH, nc.Hartigan, indice.Hartigan, nc.CCC, indice.CCC, nc.Scott, indice.Scott,
                 nc.Marriot, indice.Marriot, nc.TrCovW, indice.TrCovW, nc.TraceW, indice.TraceW, nc.Friedman,
                 indice.Friedman, nc.Rubin, indice.Rubin, nc.cindex, indice.cindex, nc.DB, indice.DB, nc.Silhouette,
                 indice.Silhouette, nc.Duda, indice.Duda, nc.Pseudo, indice.Pseudo, nc.Beale, indice.Beale, nc.Ratkowsky,
                 indice.Ratkowsky, nc.Ball, indice.Ball, nc.ptbiserial, indice.ptbiserial, nc.Gap, indice.Gap,
                 nc.Frey, indice.Frey, nc.McClain, indice.McClain, nc.Gamma, indice.Gamma, nc.Gplus, indice.Gplus,
                 nc.Tau, indice.Tau, nc.Dunn, indice.Dunn, nc.Hubert, indice.Hubert, nc.sdindex, indice.sdindex, nc.Dindex, indice.Dindex, nc.SDbw, indice.SDbw)
    results1<-matrix(c(results),nrow=2,ncol=30)
    resultats <- matrix(c(results),nrow=2,ncol=30,dimnames = list(c("Number_clusters","Value_Index"),
                                                                  c("KL","CH","Hartigan","CCC", "Scott", "Marriot", "TrCovW",
                                                                    "TraceW", "Friedman", "Rubin", "Cindex", "DB", "Silhouette",
                                                                    "Duda","PseudoT2", "Beale", "Ratkowsky", "Ball", "PtBiserial",
                                                                    "Gap", "Frey", "McClain", "Gamma", "Gplus", "Tau", "Dunn",
                                                                    "Hubert", "SDindex", "Dindex", "SDbw")))
  }else{

    results <- c(nc.KL, indice.KL, nc.CH, indice.CH, nc.Hartigan, indice.Hartigan, nc.CCC, indice.CCC, nc.Scott, indice.Scott,
                 nc.Marriot, indice.Marriot, nc.TrCovW, indice.TrCovW, nc.TraceW, indice.TraceW, nc.Friedman, indice.Friedman,
                 nc.Rubin, indice.Rubin, nc.cindex, indice.cindex, nc.DB, indice.DB, nc.Silhouette, indice.Silhouette,
                 nc.Duda, indice.Duda, nc.Pseudo, indice.Pseudo, nc.Beale, indice.Beale, nc.Ratkowsky, indice.Ratkowsky,
                 nc.Ball, indice.Ball, nc.ptbiserial, indice.ptbiserial, nc.Frey, indice.Frey, nc.McClain, indice.McClain,
                 nc.Dunn, indice.Dunn, nc.Hubert, indice.Hubert, nc.sdindex, indice.sdindex, nc.Dindex, indice.Dindex, nc.SDbw, indice.SDbw
    )
    results1<-matrix(c(results),nrow=2,ncol=26)
    resultats <- matrix(c(results),nrow=2,ncol=26,dimnames = list(c("Number_clusters","Value_Index"),
                                                                  c("KL","CH","Hartigan","CCC", "Scott", "Marriot", "TrCovW",
                                                                    "TraceW", "Friedman", "Rubin", "Cindex", "DB", "Silhouette",
                                                                    "Duda","PseudoT2", "Beale", "Ratkowsky", "Ball", "PtBiserial",
                                                                    "Frey", "McClain", "Dunn", 		"Hubert", "SDindex", "Dindex", "SDbw")))

  }


  if (any(indice <= 20)||(indice == 23)||(indice == 24)||(indice == 25))
  {
    resultats <- resultats[,c(indice)]
  }

  if (any(indice == 21)|| (indice == 22))
  {
    indice3 <-indice-1
    resultats <- resultats[,c(indice3)]
  }

  if (any(indice == 26) || (indice == 27) || (indice == 28) || ( indice == 29)|| ( indice == 30))
  {
    indice4 <- indice-4
    resultats <- resultats[,c(indice4)]
  }


  resultats<-round(resultats, digits=4)
  res<-round(res, digits=4)
  resCritical<-round(resCritical, digits=4)

  #  if (numberObsAfter != numberObsBefore)
  #  {
  #     cat(paste(numberObsAfter,"observations were used out of", numberObsBefore ,"possible observations due to missing values."))
  #  }

  #  if (numberObsAfter == numberObsBefore)
  #  {
  #     cat(paste("All", numberObsAfter,"observations were used.", "\n", "\n"))
  #  }



  ######################## Summary results #####################################


  if(any(indice == 31) || (indice == 32))
  {
    cat("*******************************************************************", "\n")
    cat("* Among all indices:                                               ", "\n")
    BestCluster<-results1[1,]
    c=0
    for(i in min.nc:max.nc)
    {
      vect<-which(BestCluster==i)
      if(length(vect)>0)
        cat("*",length(vect), "proposed", i,"as the best number of clusters", "\n")

      if(c<length(vect))
      {
        j=i
        c<-length(vect)
      }
    }

    cat("\n","                  ***** Conclusion *****                           ", "\n", "\n")
    cat("* According to the majority rule, the best number of clusters is ",j , "\n", "\n", "\n")
    cat("*******************************************************************", "\n")


    ########################## The Best partition    ###################

    if (any(method == 1) || (method == 2) || (method == 3) || (method == 4) ||
        (method == 5) || (method == 6) || (method == 7)||(method == 9))
      partition<- cutree(hc, k=j)

    else
    {
      set.seed(1)
      partition<-kmeans(jeu,j)$cluster
    }

  }


  if (any(indice==1)||(indice==2)||(indice==3)||(indice==4)||(indice==5)||(indice==6)||(indice==7)
      ||(indice==8)||(indice==9)||(indice==10)||(indice==11)||(indice==12)||(indice==13)||(indice==14)
      ||(indice==15)||(indice==16)||(indice==17)||(indice==18)||(indice==19)||(indice==20)
      ||(indice==21)||(indice==22)||(indice==23)||(indice==24)||(indice==25)||(indice==26)
      ||(indice==28)||(indice==30))
  {
    if (any(method == 1) || (method == 2) || (method == 3) || (method == 4) ||
        (method == 5) || (method == 6) || (method == 7) || (method == 9))

      partition<- cutree(hc, k=best.nc)

    else
    {
      set.seed(1)
      partition<-kmeans(jeu,best.nc)$cluster
    }

  }


  #########################  Summary results   ############################



  if ((indice == 14)|| (indice == 15)|| (indice == 16)|| (indice == 20)|| (indice == 31)|| (indice == 32))
  {
    results.final <- list(All.index=res,All.CriticalValues=resCritical,Best.nc=resultats, Best.partition=partition)
  }

  if ((indice == 27)|| (indice == 29))
    results.final <- list(All.index=res)

  if (any(indice==1)||(indice==2)||(indice==3)||(indice==4)||(indice==5)||(indice==6)||(indice==7)
      ||(indice==8)||(indice==9)||(indice==10)||(indice==11)||(indice==12)||(indice==13)
      ||(indice==17)||(indice==18)||(indice==19)||(indice==21)||(indice==22)||(indice==23)||(indice==24)
      ||(indice==25)||(indice==26)||(indice==28)||(indice==30))

    results.final <- list(All.index=res,Best.nc=resultats, Best.partition=partition)



  return(results.final)


}
annack84/STMdevelopment documentation built on April 12, 2024, 6:46 p.m.