R/NbClust_mod.R

# NbClust_mod is a modified version of NbClust:
# - it does not compute some indices:
#       - Some that required nonsingular scatter matrix (of the transposed data)
#         ccc, scott, marriott, friedman, beale
#       - Two graphical methods: hubert and dindex.
# - I reduced as much as possible repeated calls to the same function with the
#     same parameters, when specifying indiex = "all" or "alllong", to make
#     the computations slightly faster.
# The available options for index are:
#  "kl", "ch", "hartigan", "cindex", "db", "silhouette", "duda", "pseudot2",
#  "ratkowsky", "ball", "ptbiserial", "gap", "frey", "mclain", "gamma", "gplus",
#  "tau", "dunn", "sdindex", "sdbw", "trcovw", "tracew", "rubin", "all", "alllong".
# refer to the help of package NbClust for other options
#
# To verify that the function works, just compare its results to the original NbClust
# in a situation in which NbClust works (i.e., use a data matrix with less subjects
# than variables). Inspect each index separately, otherwise the original NbClust
# will probably crash anyway. Gap index produces slightly different results every time.


NbClust_mod <- function (data, diss = "NULL", min.nc = 2, 
                      max.nc = 10, method = "average", index = "alllong",
                      alphaBeale = 0.1) 
{
  x <- 0
  min_nc <- min.nc
  max_nc <- max.nc
  jeu1 <- as.matrix(data)
  numberObsBefore <- dim(jeu1)[1]
  jeu <- na.omit(jeu1)
  nn <- numberObsAfter <- dim(jeu)[1]
  pp <- dim(jeu)[2]
  
  TT <- t(jeu) %*% jeu
  
  if (diss[1] == "NULL") {
    stop("ambiguous distance or dissimilarity matrix")
  } else {
    md <- diss
  }
  
  
  # define dataframes for output.
  # I reduced res from 31 to 24, because of the 7 methods excluded.
  # I reduced resCritical by removing Beale column, because beale method is not
  # considered.
  res <- array(0, c(max_nc - min_nc + 1, 24))
  res[, 1] <- min_nc:max_nc
  rownames(res) <- min_nc:max_nc
  resCritical <- array(0, c(max_nc - min_nc + 1, 4))
  resCritical[, 1] <- min_nc:max_nc
  rownames(resCritical) <- min_nc:max_nc
  colnames(resCritical) <- c("nc.CritValue", "CritValue_Duda", 
                             "CritValue_PseudoT2", "CritValue_Gap")
  colnames(res) <- c("nc.Ward", "index.KL", "index.CH", 
                     "index.Hartigan", "index.TrCovW", "index.TraceW", 
                     "index.Rubin", "index.Cindex", "index.DB",
                     "index.Silhouette", "index.Duda", "index.Pseudot2",
                     "index.Ratkowsky", "index.Ball", "index.ptbiserial",
                     "index.Gap", "index.Frey",  "index.McClain", "index.Gamma",
                     "index.Gplus", "index.Tau", "index.Dunn", "index.SDindex",
                     "index.SDbw")
  
  # perform hierarchical cluster analysis according to the specified method
  # if the method is kmeans, the cluster analysis will be performed later
  if (is.na(method)) 
    stop("invalid clustering method")
  if (method != "kmeans") {
    hc <- hclust(md, method = method)
  }
  method <- pmatch(method, c("ward", "single", "complete", 
                             "average", "mcquitty", "median",
                             "centroid", "kmeans"))
  
  
  indice <- pmatch(index, c("kl", "ch", "hartigan", "ccc_removed", 
                            "scott_removed", "marriot_removed", "trcovw", "tracew",
                            "friedman_removed", "rubin", 
                            "cindex", "db", "silhouette", "duda", "pseudot2", "beale_removed", 
                            "ratkowsky", "ball", "ptbiserial", "gap", "frey", "mcclain", 
                            "gamma", "gplus", "tau", "dunn", "hubert_removed", "sdindex", 
                            "dindex_removed", "sdbw", "all", "alllong"))
  
  
  if ((indice == 3) || (indice == 5) || (indice == 6) || (indice == 7) || 
        (indice == 8) || (indice == 9) || (indice == 10) || 
        (indice == 11) || (indice == 18) || (indice == 27) || 
        (indice == 29) || (indice == 31) || (indice == 32)) {
    if ((max.nc - min.nc) < 2) 
      stop("The difference between the minimum and the maximum number of clusters must be at least equal to 2")
  }
  
  
  # here the computation starts
  for (nc in min_nc:max_nc) {
    
    # create the partitions according to nc and method
    if (any(method == 1) || (method == 2) || (method == 3) || 
          (method == 4) || (method == 5) || (method == 6) || 
          (method == 7)) {
      cl1 <- cutree(hc, k = nc)
      cl2 <- cutree(hc, k = nc + 1)
      clall <- cbind(cl1, cl2)
      clmax <- cutree(hc, k = max_nc)
      if (nc >= 2) {
        cl0 <- cutree(hc, k = nc - 1)
        clall1 <- cbind(cl0, cl1, cl2)
      }
      if (nc == 1) {
        cl0 <- rep(NA, nn)
        clall1 <- cbind(cl0, cl1, cl2)
      }
    }
    if (method == 8) {
      cl2 <- kmeans(jeu, nc + 1)$cluster
      clmax <- kmeans(jeu, max_nc)$cluster
      if (nc > 2) {
        cl1 <- kmeans(jeu, nc)$cluster
        clall <- cbind(cl1, cl2)
        cl0 <- kmeans(jeu, nc - 1)$cluster
        clall1 <- cbind(cl0, cl1, cl2)
      }
      if (nc == 2) {
        cl1 <- kmeans(jeu, nc)$cluster
        clall <- cbind(cl1, cl2)
        cl0 <- rep(1, nn)
        clall1 <- cbind(cl0, cl1, cl2)
      }
      if (nc == 1) {
        stop("Number of clusters must be higher than 2")
      }
    }
    j <- table(cl1)
    s <- sum(j == 1)
    j2 <- table(cl2)
    s2 <- sum(j2 == 1)
    
    
    if(any(indice == 31 || indice == 32))
    {
      # indices.traces
      if(nc < 2)
      {
        res[nc - min_nc + 1, "index.Hartigan"] <- Indices.Traces(jeu, md, clall1, index = "hart")
        res[nc - min_nc + 1, "index.Ball"] <-  Indices.Traces(jeu, md, clall1, index = "ball")
        res[nc - min_nc + 1, c("index.KL", "index.CH",
                               "index.Ratkowsky")]  <- NA
      } else if (nc >= 2)
      {
        res[nc - min_nc + 1, c("index.KL", "index.CH", "index.Hartigan",
                               "index.Ratkowsky", "index.Ball")]  <-
          Indices.Traces(jeu, md, clall1, index = "all")
      }
      
      # indices WBT (mod)
      WBT <- Indices.WBT_mod(x = jeu, cl = cl1, 
                             P = TT)
      res[nc - min_nc + 1, c("index.TrCovW", "index.TraceW",
                             "index.Rubin")] <- unlist(WBT)
      
      
      # indices WKWL
      WKWL <- Indices.WKWL(x = jeu, cl1 = cl1, cl2 = cl2)
      res[nc - min_nc + 1, "index.Duda"] <- WKWL$duda
      if(method != 8)
      {
        res[nc - min_nc + 1, "index.Pseudot2"] <- WKWL$pseudot2
      } else  res[nc - min_nc + 1, "index.Pseudot2"] <- NA
      
      NM <- WKWL$NM
      NK <- WKWL$NK
      NL <- WKWL$NL
      zz <- 3.2
      zzz <- zz * sqrt(2 * (1 - 8/((pi^2) * pp))/(NM * pp))
      resCritical[nc - min_nc + 1, "CritValue_Duda"] <- critValue <- 1 - 
        (2/(pi * pp)) - zzz
      if (method == 8) 
      {resCritical[nc - min_nc + 1, "CritValue_PseudoT2"] <- NA
      } else {
        critValue <- 1 - (2/(pi * pp)) - zzz
        resCritical[nc - min_nc + 1, "CritValue_PseudoT2"] <- ((1 - critValue)/critValue) * 
          (NK + NL - 2)
      }
      
      # indices frey and mcclain
      if(nc >= 2)
      {
        I1528 <- Index.15and28(cl1 = cl1, cl2 = cl2, md = md)
        if (method != 8)
        {
          res[nc - min_nc + 1, c("index.Frey", "index.McClain")] <- unlist(I1528)
        } else if(method == 8)
        {
          res[nc - min_nc + 1, "index.Frey"] <- NA
          res[nc - min_nc + 1, "index.McClain"] <- I1528$mcclain
        }
      } else if (nc < 2)
      {
        res[nc - min_nc + 1, c("index.Frey", "index.McClain")] <- NA
      }
      
      # indices gamma gplus and tau
      if(nc >=2)
      {
        ISPM <- Index.sPlussMoins(cl1 = cl1, md = md)
        if (method == 8) 
        {
          res[nc - min_nc + 1, "index.Gamma"] <- NA
        } else res[nc - min_nc + 1, "index.Gamma"] <- ISPM$gamma 
        res[nc - min_nc + 1, "index.Gplus"] <- ISPM$gplus
        res[nc - min_nc + 1, "index.Tau"] <- ISPM$tau
      } else if (nc < 2)
      {
        res[nc - min_nc + 1, c("index.Gamma", "index.Gplus", "index.Tau")] <- NA
      }
    }
    
    # compute the specific indices according to value "indice"
    if (indice == 3) {
      res[nc - min_nc + 1, "index.Hartigan"] <- Indices.Traces(jeu, md, 
                                                               clall1, index = "hart")
    }
    # method ccc excluded
    #   if (any(indice == 4) || (indice == 31) || (indice == 
    #                                                32)) {
    #     res[nc - min_nc + 1, 5] <- Indices.WBT_mod(x = jeu, cl = cl1, 
    #                                                P = TT)$ccc
    #  }
    # method scott excluded
    #   if (any(indice == 5) || (indice == 31) || (indice == 
    #                                                32)) {
    #     res[nc - min_nc + 1, 6] <- Indices.WBT_mod(x = jeu, cl = cl1, 
    #                                                P = TT)$scott
    #   }
    # method marriott excluded
    #   if (any(indice == 6) || (indice == 31) || (indice == 
    #                                                32)) {
    #     res[nc - min_nc + 1, 7] <- Indices.WBT_mod(x = jeu, cl = cl1, 
    #                                                P = TT)$marriot
    #   }
    
    if (indice == 7) {
      res[nc - min_nc + 1, "index.TrCovW"] <- Indices.WBT_mod(x = jeu, cl = cl1, 
                                                              P = TT)$trcovw
    }
    if (indice == 8) {
      res[nc - min_nc + 1, "index.TraceW"] <- Indices.WBT_mod(x = jeu, cl = cl1, 
                                                              P = TT)$tracew
    }
    # friedman removed
    #   if (any(indice == 9) || (indice == 31) || (indice == 
    #                                                32)) {
    #     res[nc - min_nc + 1, 10] <- Indices.WBT_mod(x = jeu, 
    #                                                 cl = cl1, P = TT)$friedman
    #   }
    if (indice == 10) {
      res[nc - min_nc + 1, "index.Rubin"] <- Indices.WBT_mod(x = jeu, 
                                                             cl = cl1, P = TT)$rubin
    }
    if (indice == 14) {
      res[nc - min_nc + 1, "index.Duda"] <- Indices.WKWL(x = jeu, 
                                                         cl1 = cl1, cl2 = cl2)$duda
    }
    if (indice == 15) {
      if (method == 8) 
      { res[nc - min_nc + 1, 16] <- NA
      } else res[nc - min_nc + 1, "index.Pseudot2"] <- Indices.WKWL(x = jeu, 
                                                                    cl1 = cl1, cl2 = cl2)$pseudot2
    }
    # index beale removed
    #   if (any(indice == 16) || (indice == 31) || (indice == 
    #                                                 32)) {
    #     res[nc - min_nc + 1, 17] <- beale <- Indices.WKWL(x = jeu, 
    #                                                       cl1 = cl1, cl2 = cl2)$beale
    #   }
    
    
    # compute critical values for indices duda, pseudot2. Beale is not computed.
    if (any(indice == 14) || (indice == 15) || (indice == 
                                                  16)) {
      NM <- Indices.WKWL(x = jeu, cl1 = cl1, cl2 = cl2)$NM
      NK <- Indices.WKWL(x = jeu, cl1 = cl1, cl2 = cl2)$NK
      NL <- Indices.WKWL(x = jeu, cl1 = cl1, cl2 = cl2)$NL
      zz <- 3.2
      zzz <- zz * sqrt(2 * (1 - 8/((pi^2) * pp))/(NM * 
                                                    pp))
      if (any(indice == 14) || (indice == 31) || (indice == 
                                                    32)) {
        resCritical[nc - min_nc + 1, 2] <- critValue <- 1 - 
          (2/(pi * pp)) - zzz
      }
      if ((indice == 15) || (indice == 31) || (indice == 
                                                 32)) {
        if (method == 8) 
        {resCritical[nc - min_nc + 1, 3] <- NA
        } else {
          critValue <- 1 - (2/(pi * pp)) - zzz
          resCritical[nc - min_nc + 1, 3] <- ((1 - critValue)/critValue) * 
            (NK + NL - 2)
        }
      }
      # beale removed
      #     if (any(indice == 16) || (indice == 31) || (indice == 
      #                                                   32)) {
      #       df2 <- (NM - 2) * pp
      #       resCritical[nc - min_nc + 1, 4] <- 1 - pf(beale, 
      #                                                 pp, df2)
      #     }
    }
    
    if (any(indice == 18)) {
      res[nc - min_nc + 1, "index.Ball"] <- Indices.Traces(jeu, md, 
                                                           clall1, index = "ball")
    }
    if (any(indice == 19) || (indice == 31) || (indice == 
                                                  32)) {
      res[nc - min_nc + 1, "index.ptbiserial"] <- Indice.ptbiserial(x = jeu, 
                                                                    md = md, cl1 = cl1)
    }
    
    # GAP index
    if (any(indice == 20) || (indice == 32)) {
      if (method == 1) {
        resultSGAP <- Indice.Gap(x = jeu, clall = clall, 
                                 reference.distribution = "unif", B = 10, method = "ward", 
                                 d = NULL, centrotypes = "centroids")
      }
      if (method == 2) {
        resultSGAP <- Indice.Gap(x = jeu, clall = clall, 
                                 reference.distribution = "unif", B = 10, method = "single", 
                                 d = NULL, centrotypes = "centroids")
      }
      if (method == 3) {
        resultSGAP <- Indice.Gap(x = jeu, clall = clall, 
                                 reference.distribution = "unif", B = 10, method = "complete", 
                                 d = NULL, centrotypes = "centroids")
      }
      if (method == 4) {
        resultSGAP <- Indice.Gap(x = jeu, clall = clall, 
                                 reference.distribution = "unif", B = 10, method = "average", 
                                 d = NULL, centrotypes = "centroids")
      }
      if (method == 5) {
        resultSGAP <- Indice.Gap(x = jeu, clall = clall, 
                                 reference.distribution = "unif", B = 10, method = "mcquitty", 
                                 d = NULL, centrotypes = "centroids")
      }
      if (method == 6) {
        resultSGAP <- Indice.Gap(x = jeu, clall = clall, 
                                 reference.distribution = "unif", B = 10, method = "median", 
                                 d = NULL, centrotypes = "centroids")
      }
      if (method == 7) {
        resultSGAP <- Indice.Gap(x = jeu, clall = clall, 
                                 reference.distribution = "unif", B = 10, method = "centroid", 
                                 d = NULL, centrotypes = "centroids")
      }
      if (method == 8) {
        resultSGAP <- Indice.Gap(x = jeu, clall = clall, 
                                 reference.distribution = "unif", B = 10, method = "k-means", 
                                 d = NULL, centrotypes = "centroids")
      }
      res[nc - min_nc + 1, "index.Gap"] <- resultSGAP$gap
      resCritical[nc - min_nc + 1, "CritValue_Gap" ] <- resultSGAP$diffu
    }
    
    
    if (nc >= 2) {
      if (any(indice == 1)) {
        res[nc - min_nc + 1, "index.KL"] <- Indices.Traces(jeu, 
                                                           md, clall1, index = "kl")
      }
      if (any(indice == 2)) {
        res[nc - min_nc + 1, "index.CH"] <- Indices.Traces(jeu, 
                                                           md, clall1, index = "ch")
      }
      
      if (any(indice == 11) || (indice == 31) || (indice == 
                                                    32)) {
        res[nc - min_nc + 1, "index.Cindex"] <- Indice.cindex(d = md, 
                                                              cl = cl1)
      }
      if (any(indice == 12) || (indice == 31) || (indice == 
                                                    32)) {
        res[nc - min_nc + 1, "index.DB"] <- Indice.DB(x = jeu, 
                                                      cl = cl1, d = NULL, centrotypes = "centroids", 
                                                      p = 2, q = 2)$DB
      }
      if (any(indice == 13) || (indice == 31) || (indice == 
                                                    32)) {
        res[nc - min_nc + 1, "index.Silhouette"] <- Indice.S(d = md, 
                                                             cl = cl1)
      }
      if (any(indice == 17)) {
        res[nc - min_nc + 1, "index.Ratkowsky"] <- Indices.Traces(jeu, 
                                                                  md, clall1, index = "ratkowsky")
      }
      if (indice == 21) {
        if (method == 8) 
        { res[nc - min_nc + 1, "index.Frey"] <- NA
        } else res[nc - min_nc + 1, "index.Frey"] <- Index.15and28(cl1 = cl1, 
                                                                   cl2 = cl2, md = md)$frey
      }
      if (indice == 22) {
        res[nc - min_nc + 1, "index.McClain"] <- Index.15and28(cl1 = cl1, 
                                                               cl2 = cl2, md = md)$mcclain
      }
      
      if (indice == 23) {
        if (method == 8) 
        {res[nc - min_nc + 1, "index.Gamma"] <- NA
        } else res[nc - min_nc + 1, "index.Gamma"] <- Index.sPlussMoins(cl1 = cl1, 
                                                                        md = md)$gamma
      }
      if (indice == 24) {
        res[nc - min_nc + 1, "index.Gplus"] <- Index.sPlussMoins(cl1 = cl1, 
                                                                 md = md)$gplus
      }
      if (indice == 25) {
        res[nc - min_nc + 1, "index.Tau"] <- Index.sPlussMoins(cl1 = cl1, 
                                                               md = md)$tau
      }
      
      
      if (any(indice == 26) || (indice == 31) || (indice == 
                                                    32)) {
        res[nc - min_nc + 1, "index.Dunn"] <- Index.dunn(md, cl1, 
                                                         Data = jeu, method = NULL)
      }
      # hubert removed
      #     if (any(indice == 27) || (indice == 31) || (indice == 
      #                                                   32)) {
      #       res[nc - min_nc + 1, 28] <- Index.Hubert(jeu, 
      #                                                cl1)
      #     }
      if (any(indice == 28) || (indice == 31) || (indice == 
                                                    32)) {
        res[nc - min_nc + 1, "index.SDindex"] <- Index.sdindex(jeu, 
                                                               clmax, cl1)
      }
      # dindex excluded
      #     if (any(indice == 29) || (indice == 31) || (indice == 
      #                                                   32)) {
      #       res[nc - min_nc + 1, 30] <- Index.Dindex(cl1, 
      #                                                jeu)
      #     }
      if (any(indice == 30) || (indice == 31) || (indice == 
                                                    32)) {
        res[nc - min_nc + 1, "index.SDbw"] <- Index.SDbw(jeu, cl1)
      }
    } else {
      res[nc - min_nc + 1, c("index.KL", "index.CH",  "index.Cindex", "index.DB",
                             "index.Silhouette", "index.Ratkowsky", "index.Frey", 
                             "index.McClain", "index.Gamma", "index.Gplus",
                             "index.Tau", "index.Dunn", "index.SDindex",
                             "index.SDbw")] <- NA
    }
  }
  
  
  
  
  # identify the number of clusters suggested by each index
  nc.KL <- indice.KL <- 0
  if (any(indice == 1) || (indice == 31) || (indice == 32)) {
    nc.KL <- (min_nc:max_nc)[which.max(res[, "index.KL" ])]
    indice.KL <- max(res[, "index.KL" ], na.rm = TRUE)
  }
  nc.CH <- indice.CH <- 0
  if (any(indice == 2) || (indice == 31) || (indice == 32)) {
    nc.CH <- (min_nc:max_nc)[which.max(res[, "index.CH"])]
    indice.CH <- max(res[, "index.CH"], na.rm = TRUE)
  }
  # nc.CCC <- indice.CCC <- 0
  # if (any(indice == 4) || (indice == 31) || (indice == 32)) {
  #   nc.CCC <- (min_nc:max_nc)[which.max(res[, 5])]
  #   indice.CCC <- max(res[, 5], na.rm = TRUE)
  # }
  nc.DB <- indice.DB <- 0
  if (any(indice == 12) || (indice == 31) || (indice == 32)) {
    nc.DB <- (min_nc:max_nc)[which.min(res[, "index.DB"])]
    indice.DB <- min(res[, "index.DB"], na.rm = TRUE)
  }
  nc.Silhouette <- indice.Silhouette <- 0
  if (any(indice == 13) || (indice == 31) || (indice == 32)) {
    nc.Silhouette <- (min_nc:max_nc)[which.max(res[, "index.Silhouette"])]
    indice.Silhouette <- max(res[, "index.Silhouette"], na.rm = TRUE)
  }
  
  nc.Gap <- indice.Gap <- 0
  if (any(indice == 20) || (indice == 32)) {
    found <- FALSE
    for (ncG in min_nc:max_nc) {
      if ((resCritical[ncG - min_nc + 1, "CritValue_Gap"] >= 0) && (!found)) {
        ncGap <- ncG
        indiceGap <- res[ncG - min_nc + 1, "index.Gap"]
        found <- TRUE
      }
    }
    if (found) {
      nc.Gap <- ncGap
      indice.Gap <- indiceGap
    } else {
      nc.Gap <- NA
      indice.Gap <- NA
    }
  }
  
  nc.Duda <- indice.Duda <- 0
  if (any(indice == 14) || (indice == 31) || (indice == 32)) {
    foundDuda <- FALSE
    for (ncD in min_nc:max_nc) {
      if ((res[ncD - min_nc + 1, "index.Duda"] >=
             resCritical[ncD - min_nc + 1,"CritValue_Duda"]) && (!foundDuda))
      {
        ncDuda <- ncD
        indiceDuda <- res[ncD - min_nc + 1, "index.Duda"]
        foundDuda <- TRUE
      }
    }
    if (foundDuda) {
      nc.Duda <- ncDuda
      indice.Duda <- indiceDuda
    } else {
      nc.Duda <- NA
      indice.Duda <- NA
    }
  }
  nc.Pseudo <- indice.Pseudo <- 0
  if (any(indice == 15) || (indice == 31) || (indice == 32)) {
    if (method == 8) {
      nc.Pseudo <- NA
      indice.Pseudo <- NA
    } else {
      foundPseudo <- FALSE
      for (ncP in min_nc:max_nc) {
        if ((res[ncP - min_nc + 1, "index.Pseudot2"] <=
               resCritical[ncP - min_nc + 1, "CritValue_PseudoT2"]) && (!foundPseudo))
        {
          ncPseudo <- ncP
          indicePseudo <- res[ncP - min_nc + 1, "index.Pseudot2"]
          foundPseudo <- TRUE
        }
      }
      if (foundPseudo) {
        nc.Pseudo <- ncPseudo
        indice.Pseudo <- indicePseudo
      } else {
        nc.Pseudo <- NA
        indice.Pseudo <- NA
      }
    }
  }
  # nc.Beale <- indice.Beale <- 0
  # if (any(indice == 16) || (indice == 31) || (indice == 32)) {
  #   foundBeale <- FALSE
  #   for (ncB in min_nc:max_nc) {
  #     if ((resCritical[ncB - min_nc + 1, 4] >= alphaBeale) && 
  #           (!foundBeale)) {
  #       ncBeale <- ncB
  #       indiceBeale <- res[ncB - min_nc + 1, 17]
  #       foundBeale <- TRUE
  #     }
  #   }
  #   if (foundBeale) {
  #     nc.Beale <- ncBeale
  #     indice.Beale <- indiceBeale
  #   } else {
  #     nc.Beale <- NA
  #     indice.Beale <- NA
  #   }
  # }
  nc.ptbiserial <- indice.ptbiserial <- 0
  if (any(indice == 19) || (indice == 31) || (indice == 32)) {
    nc.ptbiserial <- (min_nc:max_nc)[which.max(res[, "index.ptbiserial"])]
    indice.ptbiserial <- max(res[, "index.ptbiserial"], na.rm = TRUE)
  }
  foundNC <- foundIndice <- numeric(0)
  nc.Frey <- indice.Frey <- 0
  if (any(indice == 21) || (indice == 31) || (indice == 32)) {
    if (method == 8) {
      nc.Frey <- NA
      indice.Frey <- NA
    } else {
      foundFrey <- FALSE
      i <- 1
      for (ncF in min_nc:max_nc) {
        if (!is.na(res[ncF - min_nc + 1, "index.Frey"]))
        {
          if (res[ncF - min_nc + 1, "index.Frey"] < 1)
          {
            if (ncF != min_nc) {
              ncFrey <- ncF - 1
              indiceFrey <- res[ncF - min_nc, "index.Frey"]
              foundFrey <- TRUE
              foundNC[i] <- ncFrey
              foundIndice[i] <- indiceFrey
              i <- i + 1
            }
          }
        }
      }
      if (foundFrey) {
        nc.Frey <- foundNC[1]
        indice.Frey <- foundIndice[1]
      } else {
        nc.Frey <- NA
        indice.Frey <- NA
        print(paste("No clustering structure in this data set"))
      }
    }
  }
  nc.McClain <- indice.McClain <- 0
  if (any(indice == 22) || (indice == 31) || (indice == 32)) {
    nc.McClain <- (min_nc:max_nc)[which.min(res[, "index.McClain"])]
    indice.McClain <- min(res[, "index.McClain"], na.rm = TRUE)
  }
  nc.Gamma <- indice.Gamma <- 0
  if (any(indice == 23) || (indice == 31) || (indice == 32)) {
    if (method == 8) {
      nc.Gamma <- NA
      indice.Gamma <- NA
    } else {
      nc.Gamma <- (min_nc:max_nc)[which.max(res[, "index.Gamma"])]
      indice.Gamma <- max(res[, "index.Gamma"], na.rm = TRUE)
    }
  }
  
  nc.Gplus <- indice.Gplus <- 0
  if (any(indice == 24) || (indice == 31) || (indice == 32)) {
    nc.Gplus <- (min_nc:max_nc)[which.min(res[, "index.Gplus"])]
    indice.Gplus <- min(res[, "index.Gplus"], na.rm = TRUE)
  }
  nc.Tau <- indice.Tau <- 0
  if (any(indice == 25) || (indice == 31) || (indice == 32)) {
    nc.Tau <- (min_nc:max_nc)[which.max(res[, "index.Tau"])]
    indice.Tau <- max(res[, "index.Tau"], na.rm = TRUE)
  }
  
  if ((indice == 3) || (indice == 5) || (indice == 6) ||
        (indice == 7) || (indice == 8) || (indice == 9) || (indice == 10) || 
        (indice == 18) || (indice == 27) || (indice == 11) || 
        (indice == 29) || (indice == 31) || (indice == 32)) {
    DiffLev <- array(0, c(max_nc - min_nc + 1, 6))
    DiffLev[, 1] <- min_nc:max_nc
    colnames(DiffLev) <- c("N", "hartigan", "trcovw", "tracew", "rubin", "ball")
    
    for (nc3 in min_nc:max_nc) {
      if (nc3 == min_nc) {
        DiffLev[nc3 - min_nc + 1, c("hartigan", "trcovw", "tracew", 
                                    "rubin",  "ball")] <- NA
      } else {
        if (nc3 == max_nc) {
          DiffLev[nc3 - min_nc + 1, "hartigan"] <- abs(res[
            nc3 - min_nc + 1, "index.Hartigan"] - res[nc3 - min_nc, "index.Hartigan"])
          # scott removed 
          #         DiffLev[nc3 - min_nc + 1, 3] <- abs(res[nc3 - 
          #                                                   min_nc + 1, 6] - res[nc3 - min_nc, 6])
          # marriott removed
          #         DiffLev[nc3 - min_nc + 1, 4] <- abs(res[nc3 - 
          #                                                   min_nc + 1, 7] - NA)
          DiffLev[nc3 - min_nc + 1, "trcovw"] <- abs(res[
            nc3 - min_nc + 1, "index.TrCovW"] - res[nc3 - min_nc, 8])
          DiffLev[nc3 - min_nc + 1,  "tracew"] <- NA
          # friedman removed
          #         DiffLev[nc3 - min_nc + 1, 7] <- abs(res[nc3 - 
          #                                                   min_nc + 1, 10] - res[nc3 - min_nc, 10])
          DiffLev[nc3 - min_nc + 1, "rubin"] <- NA
          DiffLev[nc3 - min_nc + 1, "ball"] <- abs(res[
            nc3 - min_nc + 1,  "index.Ball"] - res[nc3 - min_nc,  "index.Ball"])
          # hubert removed
          #         DiffLev[nc3 - min_nc + 1, 10] <- abs(res[nc3 - 
          #                                                    min_nc + 1, 28] - NA)
          # dindex removed
          #         DiffLev[nc3 - min_nc + 1, 12] <- abs(res[nc3 - 
          #                                                    min_nc + 1, 30] - NA)
        } else {
          DiffLev[nc3 - min_nc + 1, "hartigan"] <-
            abs(res[nc3 - min_nc + 1, "index.Hartigan"] - res[nc3 - min_nc, "index.Hartigan"])
          # scott removed
          #         DiffLev[nc3 - min_nc + 1, 3] <- abs(res[nc3 - 
          #                                                   min_nc + 1, 6] - res[nc3 - min_nc, 6])
          # marriott removed
          #         DiffLev[nc3 - min_nc + 1, 4] <- ((res[nc3 - 
          #                                                 min_nc + 2, 7] - res[nc3 - min_nc + 1, 7]) - 
          #                                            (res[nc3 - min_nc + 1, 7] - res[nc3 - min_nc, 
          #                                                                            7]))
          
          DiffLev[nc3 - min_nc + 1, "trcovw"] <- abs(res[
            nc3 - min_nc + 1, "index.TrCovW"] - res[nc3 - min_nc, "index.TrCovW"])
          
          DiffLev[nc3 - min_nc + 1, "tracew"] <- 
            ((res[nc3 -  min_nc + 2, "index.TraceW"] - res[nc3 - min_nc + 1, "index.TraceW"]) -
               (res[nc3 - min_nc + 1, "index.TraceW" ] - res[nc3 - min_nc, "index.TraceW" ]))
          # friedman removed
          #         DiffLev[nc3 - min_nc + 1, 7] <- abs(res[nc3 - 
          #                                                   min_nc + 1, 10] - res[nc3 - min_nc, 10])
          DiffLev[nc3 - min_nc + 1, "rubin"] <-
            ((res[nc3 - min_nc + 2, "index.Rubin"] - res[nc3 - min_nc + 1, "index.Rubin"]) - 
               (res[nc3 - min_nc + 1, "index.Rubin"] - res[nc3 - min_nc,  "index.Rubin"]))
          DiffLev[nc3 - min_nc + 1, "ball"] <-
            abs(res[nc3 - min_nc + 1, "index.Ball"] - res[nc3 - min_nc, "index.Ball"])
          # hubert removed
          #         DiffLev[nc3 - min_nc + 1, 10] <- abs((res[nc3 - 
          #                                                     min_nc + 1, 28] - res[nc3 - min_nc, 28]))
          # dindex removed
          #         DiffLev[nc3 - min_nc + 1, 12] <- ((res[nc3 - 
          #                                                  min_nc + 2, 30] - res[nc3 - min_nc + 1, 30]) - 
          #                                             (res[nc3 - min_nc + 1, 30] - res[nc3 - min_nc, 
          #                                                                              30]))
        }
      }
    }
  }
  
  
  
  nc.Hartigan <- indice.Hartigan <- 0
  if (any(indice == 3) || (indice == 31) || (indice == 32)) {
    nc.Hartigan <- DiffLev[, "N"][which.max(DiffLev[, "hartigan"])]
    indice.Hartigan <- max(DiffLev[, "hartigan"], na.rm = TRUE)
  }
  nc.Ratkowsky <- indice.Ratkowsky <- 0
  if (any(indice == 17) || (indice == 31) || (indice == 32)) {
    nc.Ratkowsky <- (min_nc:max_nc)[which.max(res[, "index.Ratkowsky" ])]
    indice.Ratkowsky <- max(res[, "index.Ratkowsky" ], na.rm = TRUE)
  }
  nc.cindex <- indice.cindex <- 0
  if (any(indice == 11) || (indice == 31) || (indice == 32)) {
    nc.cindex <- (min_nc:max_nc)[which.min(res[, "index.Cindex"])]
    indice.cindex <- min(res[, "index.Cindex"], na.rm = TRUE)
  }
  # scott removed 
  # nc.Scott <- indice.Scott <- 0
  # if (any(indice == 5) || (indice == 31) || (indice == 32)) {
  #   nc.Scott <- DiffLev[, 1][which.max(DiffLev[, 3])]
  #   indice.Scott <- max(DiffLev[, 3], na.rm = TRUE)
  # }
  # marriott removed
  # nc.Marriot <- indice.Marriot <- 0
  # if (any(indice == 6) || (indice == 31) || (indice == 32)) {
  #   nc.Marriot <- DiffLev[, 1][which.max(DiffLev[, 4])]
  #   round(nc.Marriot, digits = 1)
  #   indice.Marriot <- max(DiffLev[, 4], na.rm = TRUE)
  # }
  nc.TrCovW <- indice.TrCovW <- 0
  if (any(indice == 7) || (indice == 31) || (indice == 32)) {
    nc.TrCovW <- DiffLev[, "N"][which.max(DiffLev[, "trcovw"])]
    indice.TrCovW <- max(DiffLev[, "trcovw"], na.rm = TRUE)
  }
  nc.TraceW <- indice.TraceW <- 0
  if (any(indice == 8) || (indice == 31) || (indice == 32)) {
    nc.TraceW <- DiffLev[, "N"][which.max(DiffLev[, "tracew"])]
    indice.TraceW <- max(DiffLev[, "tracew"], na.rm = TRUE)
  }
  # friedman removed
  # nc.Friedman <- indice.Friedman <- 0
  # if (any(indice == 9) || (indice == 31) || (indice == 32)) {
  #   nc.Friedman <- DiffLev[, 1][which.max(DiffLev[, 7])]
  #   indice.Friedman <- max(DiffLev[, 7], na.rm = TRUE)
  # }
  nc.Rubin <- indice.Rubin <- 0
  if (any(indice == 10) || (indice == 31) || (indice == 32)) {
    nc.Rubin <- DiffLev[, "N"][which.min(DiffLev[, "rubin"])]
    indice.Rubin <- min(DiffLev[, "rubin"], na.rm = TRUE)
  }
  nc.Ball <- indice.Ball <- 0
  if (any(indice == 18) || (indice == 31) || (indice == 32)) {
    nc.Ball <- DiffLev[, "N"][which.max(DiffLev[, "ball"])]
    indice.Ball <- max(DiffLev[, "ball"], na.rm = TRUE)
  }
  nc.Dunn <- indice.Dunn <- 0
  if (any(indice == 26) || (indice == 31) || (indice == 32)) {
    nc.Dunn <- (min_nc:max_nc)[which.max(res[, "index.Dunn"])]
    indice.Dunn <- max(res[, "index.Dunn"], na.rm = TRUE)
  }
  # hubert removed
  # nc.Hubert <- indice.Hubert <- 0
  # if (any(indice == 27) || (indice == 31) || (indice == 32)) {
  #   nc.Hubert <- 0
  #   indice.Hubert <- 0
  #   par(mfrow = c(1, 2))
  #   plot(res[, 1], res[, 28], tck = 0, type = "b", col = "red", 
  #        xlab = expression(paste("Number of clusters ")), 
  #        ylab = expression(paste("Index Value")), main = "Normalized Hubert Statistic")
  #   plot(DiffLev[, 1], DiffLev[, 10], tck = 0, type = "b", 
  #        col = "blue", xlab = expression(paste("Number of clusters ")), 
  #        ylab = expression(paste("Index Value")), main = "Second Differences of Hubert index")
  #   print(paste("*** : The Hubert index is a graphical method of determining the number of clusters. In the plot of Hubert index, we seek a significant knee that corresponds to a significant increase of the value of the measure i.e the significant peak in Hubert index second differences plot."))
  # }
  nc.sdindex <- indice.sdindex <- 0
  if (any(indice == 28) || (indice == 31) || (indice == 32)) {
    nc.sdindex <- (min_nc:max_nc)[which.min(res[, "index.SDindex"])]
    indice.sdindex <- min(res[, "index.SDindex"], na.rm = TRUE)
  }
  # Dindex removed
  # nc.Dindex <- indice.Dindex <- 0
  # if (any(indice == 29) || (indice == 31) || (indice == 32)) {
  #   nc.Dindex <- 0
  #   indice.Dindex <- 0
  #   par(mfrow = c(1, 2))
  #   plot(res[, 1], res[, 30], tck = 0, type = "b", col = "red", 
  #        xlab = expression(paste("Number of clusters ")), 
  #        ylab = expression(paste("Index Value")), main = "Dindex")
  #   plot(DiffLev[, 1], DiffLev[, 12], tck = 0, type = "b", 
  #        col = "blue", xlab = expression(paste("Number of clusters ")), 
  #        ylab = expression(paste("Index Value")), main = "Second Differences of D index")
  #   print(paste("*** : The D index is a graphical method of determining the number of clusters. In the plot of D index, we seek a significant knee (the significant peak in Dindex second differences plot) that corresponds to a significant increase of the value of the measure."))
  # }
  nc.SDbw <- indice.SDbw <- 0
  if (any(indice == 30) || (indice == 31) || (indice == 32)) {
    nc.SDbw <- (min_nc:max_nc)[which.min(res[, "index.SDbw"])]
    indice.SDbw <- min(res[, "index.SDbw"], na.rm = TRUE)
  }
  
  # prepare the output
  if (indice < 31) {
    indice2 <- c("index.KL", "index.CH", "index.Hartigan", "ccc_removed", "scott_removed",
                 "marriot_removed", "index.TrCovW", "index.TraceW", "friedman_removed",
                 "index.Rubin", "index.Cindex", "index.DB", "index.Silhouette",
                 "index.Duda", "index.Pseudot2", "beale_removed",
                 "index.Ratkowsky", "index.Ball", "index.ptbiserial",
                 "index.Gap", "index.Frey", "index.McClain", "index.Gamma", "index.Gplus",     
                 "index.Tau", "index.Dunn",  "hubert_removed", "index.SDindex",
                 "dindex_removed", "index.SDbw")[indice]
    
    res <- res[, c("nc.Ward", indice2)]
    if (indice == 15) {
      if (method == 8) {
        res <- NA
        resCritical <- NA
        print(paste("Pseudot2 index can be applied only to hierarchical methods "))
      } else resCritical <- resCritical[, c(1, "CritValue_PseudoT2")]
    }
    if ((indice == 21)) {
      if (method == 8) {
        res <- NA
        print(paste("Frey index can be applied only to hierarchical methods "))
      }
    }
    if ((indice == 23)) {
      if (method == 8) {
        res <- NA
        print(paste("Gamma index can be applied only to hierarchical methods "))
      }
    }
    if (indice == 14) {
      resCritical <- resCritical[, c(1, "CritValue_Duda")]
    }
    # beale removed   
    #   if (indice == 16) {
    #     resCritical <- resCritical[, c(1, 4)]
    #   }
    if (indice == 20) {
      resCritical <- resCritical[, c(1,  "CritValue_Gap")]
    }
  }
  if (indice == 31) {
    res <- res[, c("nc.Ward", "index.KL", "index.CH", "index.Hartigan", "index.TrCovW",    
                   "index.TraceW", "index.Rubin", "index.Cindex", "index.DB", "index.Silhouette",
                   "index.Duda", "index.Pseudot2", "index.Ratkowsky", "index.Ball", "index.ptbiserial",
                   "index.Frey", "index.McClain", "index.Dunn", "index.SDindex", "index.SDbw")]
    resCritical <- resCritical[, c( "nc.CritValue", "CritValue_Duda", "CritValue_PseudoT2")]
    if (method == 8) {
      print(paste("Pseudot2 and Frey indices can be applied only to hierarchical methods "))
    }
  }
  
  if (indice == 32) {
    if (method == 8) {
      print(paste("Pseudot2, Gamma and Frey indices can be applied only to hierarchical methods "))
    }
  }
  
  # resultats is the matrix of results
  if (any(indice == 20) || (indice == 23) || (indice == 24) || 
        (indice == 25) || (indice == 32)) {
    results <- c(nc.KL, indice.KL, nc.CH, indice.CH, nc.Hartigan, 
                 indice.Hartigan, # nc.CCC, indice.CCC, nc.Scott, indice.Scott, 
                 # nc.Marriot, indice.Marriot,
                 nc.TrCovW, indice.TrCovW, 
                 nc.TraceW, indice.TraceW, # nc.Friedman, indice.Friedman, 
                 nc.Rubin, indice.Rubin, nc.cindex, indice.cindex, 
                 nc.DB, indice.DB, nc.Silhouette, indice.Silhouette, 
                 nc.Duda, indice.Duda, nc.Pseudo, indice.Pseudo,
                 # nc.Beale, indice.Beale,
                 nc.Ratkowsky, indice.Ratkowsky, nc.Ball, 
                 indice.Ball, nc.ptbiserial, indice.ptbiserial, nc.Gap, 
                 indice.Gap, nc.Frey, indice.Frey, nc.McClain, indice.McClain, 
                 nc.Gamma, indice.Gamma, nc.Gplus, indice.Gplus, nc.Tau, 
                 indice.Tau, nc.Dunn, indice.Dunn, # nc.Hubert, indice.Hubert, 
                 nc.sdindex, indice.sdindex, # nc.Dindex, indice.Dindex, 
                 nc.SDbw, indice.SDbw)
    resultats <- matrix(c(results), nrow = 2, ncol = 23, 
                        dimnames = list(c("Number_clusters", "Value_Index"), 
                                        c("index.KL", "index.CH", "index.Hartigan", #"index.CCC", 
                                          #"index.Scott", "index.Marriot",
                                          "index.TrCovW", 
                                          "index.TraceW", #"index.Friedman",
                                          "index.Rubin", 
                                          "index.Cindex", "index.DB", "index.Silhouette", 
                                          "index.Duda", "index.PseudoT2", # "index.Beale", 
                                          "index.Ratkowsky", "index.Ball", "index.PtBiserial", 
                                          "index.Gap", "index.Frey", "index.McClain", 
                                          "index.Gamma", "index.Gplus", "index.Tau", 
                                          "index.Dunn", #"index.Hubert",
                                          "index.SDindex", 
                                          #"index.Dindex",
                                          "index.SDbw")))
  } else {
    results <- c(nc.KL, indice.KL, nc.CH, indice.CH, nc.Hartigan, 
                 indice.Hartigan, #nc.CCC, indice.CCC, nc.Scott, indice.Scott, 
                 #nc.Marriot, indice.Marriot,
                 nc.TrCovW, indice.TrCovW, 
                 nc.TraceW, indice.TraceW, # nc.Friedman, indice.Friedman, 
                 nc.Rubin, indice.Rubin, nc.cindex, indice.cindex, 
                 nc.DB, indice.DB, nc.Silhouette, indice.Silhouette, 
                 nc.Duda, indice.Duda, nc.Pseudo, indice.Pseudo,
                 # nc.Beale,  indice.Beale,
                 nc.Ratkowsky, indice.Ratkowsky, nc.Ball, 
                 indice.Ball, nc.ptbiserial, indice.ptbiserial, nc.Frey, 
                 indice.Frey, nc.McClain, indice.McClain, nc.Dunn, 
                 indice.Dunn, #nc.Hubert, indice.Hubert,
                 nc.sdindex, indice.sdindex, #nc.Dindex, indice.Dindex,
                 nc.SDbw, indice.SDbw)
    resultats <- matrix(c(results), nrow = 2, ncol = 19, 
                        dimnames = list(c("Number_clusters", "Value_Index"), 
                                        c("index.KL", "index.CH", "index.Hartigan", #"index.CCC", 
                                          #"index.Scott", "index.Marriot",
                                          "index.TrCovW", 
                                          "index.TraceW", #"index.Friedman",
                                          "index.Rubin", 
                                          "index.Cindex", "index.DB", "index.Silhouette", 
                                          "index.Duda", "index.PseudoT2", #"index.Beale", 
                                          "index.Ratkowsky", "index.Ball", "index.PtBiserial", 
                                          "index.Frey", "index.McClain", "index.Dunn", 
                                          #"index.Hubert", "index.SDindex",
                                          "index.Dindex", 
                                          "index.SDbw")))
  }
  
  if(indice < 31)
    resultats <- resultats[, colnames(resultats) == indice2]
  
  resultats <- as.matrix(resultats)
  resultats["Number_clusters", ] <- round(resultats["Number_clusters", ], digits = 2)
  resultats <- round(resultats, digits = 4)
  if (numberObsAfter != numberObsBefore) {
    print(paste(numberObsAfter, "observations were used out of", 
                numberObsBefore, "possible observations due to missing values."))
  }
  # I did not love this message
  # if (numberObsAfter == numberObsBefore) {
  #   print(paste("All", numberObsAfter, "observations were used."))
  # }
  if ((indice == 14) || (indice == 15) || (indice == 16) || 
        (indice == 20) || (indice == 31) || (indice == 32)) {
    results.final <- list(All.index = res, All.CriticalValues = resCritical, 
                          Best.nc = resultats)
  } else {
    results.final <- list(All.index = res, Best.nc = resultats)
  }
  return(results.final)
}







# Load the necessary functions
# FUNCTION INDICES.WBT HAS BEEN MODIFIED
# to prevent crashes connected to singular matrices
centers <- function(cl, x) {
  x <- as.matrix(x)
  n <- length(cl)
  k <- max(cl)
  centers <- matrix(nrow = k, ncol = ncol(x))
{
    for (i in 1:k) {
      for (j in 1:ncol(x)) {
        centers[i, j] <- mean(x[cl == i, j])
      }
    }
  }
return(centers)
}
Average.scattering <- function(cl, x) {
  x <- as.matrix(x)
  n <- length(cl)
  k <- max(cl)
  centers.matrix <- centers(cl, x)
  cluster.size <- numeric(0)
  variance.clusters <- matrix(0, ncol = ncol(x), nrow = k)
  var <- matrix(0, ncol = ncol(x), nrow = k)
  for (u in 1:k) cluster.size[u] <- sum(cl == u)
  for (u in 1:k) {
    for (j in 1:ncol(x)) {
      for (i in 1:n) {
        if (cl[i] == u) 
          variance.clusters[u, j] <- variance.clusters[u, 
                                                       j] + (x[i, j] - centers.matrix[u, j])^2
      }
    }
  }
  for (u in 1:k) {
    for (j in 1:ncol(x)) variance.clusters[u, j] = variance.clusters[u, 
                                                                     j]/cluster.size[u]
  }
  variance.matrix <- numeric(0)
  for (j in 1:ncol(x)) variance.matrix[j] = var(x[, j]) * 
    (n - 1)/n
  Somme.variance.clusters <- 0
  for (u in 1:k) Somme.variance.clusters <- Somme.variance.clusters + 
    sqrt((variance.clusters[u, ] %*% (variance.clusters[u, 
                                                        ])))
  stdev <- (1/k) * sqrt(Somme.variance.clusters)
  scat <- (1/k) * (Somme.variance.clusters/sqrt(variance.matrix %*% 
                                                  variance.matrix))
  scat <- list(stdev = stdev, centers = centers.matrix, 
               variance.intraclusters = variance.clusters, scatt = scat)
  return(scat)
}
density.clusters <- function(cl, x) {
  x <- as.matrix(x)
  k <- max(cl)
  n <- length(cl)
  distance <- matrix(0, ncol = 1, nrow = n)
  density <- matrix(0, ncol = 1, nrow = k)
  centers.matrix <- centers(cl, x)
  stdev <- Average.scattering(cl, x)$stdev
  for (i in 1:n) {
    u = 1
    while (cl[i] != u) u <- u + 1
    for (j in 1:ncol(x)) {
      distance[i] <- distance[i] + (x[i, j] - centers.matrix[u, 
                                                             j])^2
    }
    distance[i] <- sqrt(distance[i])
    if (distance[i] <= stdev) 
      density[u] = density[u] + 1
  }
  dens <- list(distance = distance, density = density)
  return(dens)
}
density.bw <- function(cl, x) {
  x <- as.matrix(x)
  k <- max(cl)
  n <- length(cl)
  centers.matrix <- centers(cl, x)
  stdev <- Average.scattering(cl, x)$stdev
  density.bw <- matrix(0, ncol = k, nrow = k)
  u <- 1
  for (u in 1:k) {
    for (v in 1:k) {
      if (v != u) {
        distance <- matrix(0, ncol = 1, nrow = n)
        moy <- (centers.matrix[u, ] + centers.matrix[v, 
                                                     ])/2
        for (i in 1:n) {
          if ((cl[i] == u) || (cl[i] == v)) {
            for (j in 1:ncol(x)) {
              distance[i] <- distance[i] + (x[i, j] - 
                                              moy[j])^2
            }
            distance[i] <- sqrt(distance[i])
            if (distance[i] <= stdev) {
              density.bw[u, v] <- density.bw[u, v] + 
                1
            }
          }
        }
      }
    }
  }
  density.clust <- density.clusters(cl, x)$density
  S <- 0
  for (u in 1:k) for (v in 1:k) {
    if (max(density.clust[u], density.clust[v]) != 0) 
      S = S + (density.bw[u, v]/max(density.clust[u], 
                                    density.clust[v]))
  }
  density.bw <- S/(k * (k - 1))
  return(density.bw)
}
Dis <- function(cl, x) {
  x <- as.matrix(x)
  k <- max(cl)
  centers.matrix <- centers(cl, x)
  Distance.centers <- dist(centers.matrix)
  Dmin <- min(Distance.centers)
  Dmax <- max(Distance.centers)
  Distance.centers <- as.matrix(Distance.centers)
  s2 <- 0
  for (u in 1:k) {
    s1 = 0
    for (j in 1:ncol(Distance.centers)) {
      s1 <- s1 + Distance.centers[u, j]
    }
    s2 <- s2 + 1/s1
  }
  Dis <- (Dmax/Dmin) * s2
  return(Dis)
}
Index.Hubert <- function(x, cl) {
  k <- max(cl)
  n <- dim(x)[1]
  y <- matrix(0, ncol = dim(x)[2], nrow = n)
  P <- as.matrix(md)
  meanP <- mean(P)
  variance.matrix <- numeric(0)
  md <- dist(x, method = "euclidean")
  for (j in 1:n) variance.matrix[j] = var(P[, j]) * (n - 
                                                       1)/n
  varP <- sqrt(variance.matrix %*% variance.matrix)
  centers.clusters <- centers(cl, x)
  for (i in 1:n) {
    for (u in 1:k) {
      if (cl[i] == u) 
        y[i, ] <- centers.clusters[u, ]
    }
  }
  Q <- as.matrix(dist(y, method = "euclidean"))
  meanQ <- mean(Q)
  for (j in 1:n) variance.matrix[j] = var(Q[, j]) * (n - 
                                                       1)/n
  varQ <- sqrt(variance.matrix %*% variance.matrix)
  M <- n * (n - 1)/2
  S <- 0
  n1 <- n - 1
  for (i in 1:n1) {
    j <- i + 1
    while (j <= n) {
      S <- S + (P[i, j] - meanP) * (Q[i, j] - meanQ)
      j <- j + 1
    }
  }
  gamma <- S/(M * varP * varQ)
  return(gamma)
}
Index.sPlussMoins <- function(cl1, md) {
  cn1 <- max(cl1)
  n1 <- length(cl1)
  dmat <- as.matrix(md)
  average.distance <- median.distance <- separation <- cluster.size <- within.dist1 <- between.dist1 <- numeric(0)
  separation.matrix <- matrix(0, ncol = cn1, nrow = cn1)
  di <- list()
  for (u in 1:cn1) {
    cluster.size[u] <- sum(cl1 == u)
    du <- as.dist(dmat[cl1 == u, cl1 == u])
    within.dist1 <- c(within.dist1, du)
    average.distance[u] <- mean(du)
    median.distance[u] <- median(du)
    bv <- numeric(0)
    for (v in 1:cn1) {
      if (v != u) {
        suv <- dmat[cl1 == u, cl1 == v]
        bv <- c(bv, suv)
        if (u < v) {
          separation.matrix[u, v] <- separation.matrix[v, 
                                                       u] <- min(suv)
          between.dist1 <- c(between.dist1, suv)
        }
      }
    }
  }
  nwithin1 <- length(within.dist1)
  nbetween1 <- length(between.dist1)
  meanwithin1 <- mean(within.dist1)
  meanbetween1 <- mean(between.dist1)
  s.plus <- s.moins <- 0
  for (k in 1:nwithin1) {
    s.plus <- s.plus + (colSums(outer(between.dist1, 
                                      within.dist1[k], ">")))
    s.moins <- s.moins + (colSums(outer(between.dist1, 
                                        within.dist1[k], "<")))
  }
  Index.Gamma <- (s.plus - s.moins)/(s.plus + s.moins)
  Index.Gplus <- (2 * s.moins)/(n1 * (n1 - 1))
  t.tau <- (nwithin1 * nbetween1) - (s.plus + s.moins)
  Index.Tau <- (s.plus - s.moins)/(((n1 * (n1 - 1)/2 - 
                                       t.tau) * (n1 * (n1 - 1)/2))^(1/2))
  results <- list(gamma = Index.Gamma, gplus = Index.Gplus, 
                  tau = Index.Tau)
  return(results)
}
Index.15and28 <- function(cl1, cl2, md) {
  cn1 <- max(cl1)
  n1 <- length(cl1)
  dmat <- as.matrix(md)
  average.distance <- median.distance <- separation <- cluster.size <- within.dist1 <- between.dist1 <- numeric(0)
  separation.matrix <- matrix(0, ncol = cn1, nrow = cn1)
  di <- list()
  for (u in 1:cn1) {
    cluster.size[u] <- sum(cl1 == u)
    du <- as.dist(dmat[cl1 == u, cl1 == u])
    within.dist1 <- c(within.dist1, du)
    bv <- numeric(0)
    for (v in 1:cn1) {
      if (v != u) {
        suv <- dmat[cl1 == u, cl1 == v]
        bv <- c(bv, suv)
        if (u < v) {
          separation.matrix[u, v] <- separation.matrix[v, 
                                                       u] <- min(suv)
          between.dist1 <- c(between.dist1, suv)
        }
      }
    }
  }
  cn2 <- max(cl2)
  n2 <- length(cl2)
  dmat <- as.matrix(md)
  average.distance <- median.distance <- separation <- cluster.size <- within.dist2 <- between.dist2 <- numeric(0)
  separation.matrix <- matrix(0, ncol = cn2, nrow = cn2)
  di <- list()
  for (w in 1:cn2) {
    cluster.size[w] <- sum(cl2 == w)
    dw <- as.dist(dmat[cl2 == w, cl2 == w])
    within.dist2 <- c(within.dist2, dw)
    bx <- numeric(0)
    for (x in 1:cn2) {
      if (x != w) {
        swx <- dmat[cl2 == w, cl2 == x]
        bx <- c(bx, swx)
        if (w < x) {
          separation.matrix[w, x] <- separation.matrix[x, 
                                                       w] <- min(swx)
          between.dist2 <- c(between.dist2, swx)
        }
      }
    }
  }
  nwithin1 <- length(within.dist1)
  nbetween1 <- length(between.dist1)
  meanwithin1 <- mean(within.dist1)
  meanbetween1 <- mean(between.dist1)
  meanwithin2 <- mean(within.dist2)
  meanbetween2 <- mean(between.dist2)
  Index.15 <- (meanbetween2 - meanbetween1)/(meanwithin2 - 
                                               meanwithin1)
  Index.28 <- (meanwithin1/nwithin1)/(meanbetween1/nbetween1)
  results <- list(frey = Index.15, mcclain = Index.28)
  return(results)
}
Indice.ptbiserial <- function(x, md, cl1) {
  nn <- dim(x)[1]
  pp <- dim(x)[2]
  md2 <- as.matrix(md)
  m01 <- array(NA, c(nn, nn))
  nbr <- (nn * (nn - 1))/2
  pb <- array(0, c(nbr, 2))
  m3 <- 1
  for (m1 in 2:nn) {
    m12 <- m1 - 1
    for (m2 in 1:m12) {
      if (cl1[m1] == cl1[m2]) 
        m01[m1, m2] <- 0
      if (cl1[m1] != cl1[m2]) 
        m01[m1, m2] <- 1
      pb[m3, 1] <- m01[m1, m2]
      pb[m3, 2] <- md2[m1, m2]
      m3 <- m3 + 1
    }
  }
  y <- pb[, 1]
  x <- pb[, 2]
  biserial.cor <- function(x, y, use = c("all.obs", "complete.obs"), 
                           level = 1) {
    if (!is.numeric(x)) 
      stop("'x' must be a numeric variable.\n")
    y <- as.factor(y)
    if (length(levs <- levels(y)) > 2) 
      stop("'y' must be a dichotomous variable.\n")
    if (length(x) != length(y)) 
      stop("'x' and 'y' do not have the same length")
    use <- match.arg(use)
    if (use == "complete.obs") {
      cc.ind <- complete.cases(x, y)
      x <- x[cc.ind]
      y <- y[cc.ind]
    }
    ind <- y == levs[level]
    diff.mu <- mean(x[ind]) - mean(x[!ind])
    prob <- mean(ind)
    diff.mu * sqrt(prob * (1 - prob))/sd(x)
  }
  ptbiserial <- biserial.cor(x = pb[, 2], y = pb[, 1], 
                             level = 2)
  return(ptbiserial)
}
Indices.WKWL <- function(x, cl1 = cl1, cl2 = cl2) {
  pp <- dim(x)[2]
  wss <- function(x) {
    x <- as.matrix(x)
    n <- length(x)
    centers <- matrix(nrow = 1, ncol = ncol(x))
    if (ncol(x) == 1) {
      centers[1, ] <- mean(x)
    }
    if (is.null(dim(x))) {
      bb <- matrix(x, byrow = FALSE, nrow = 1, ncol = ncol(x))
      centers[1, ] <- apply(bb, 2, mean)
    }    else {
      centers[1, ] <- apply(x, 2, mean)
    }
    x.2 <- sweep(x, 2, centers[1, ], "-")
    withins <- sum(x.2^2)
    wss <- sum(withins)
    return(wss)
  }
  ncg1 <- 1
  ncg1max <- max(cl1)
  while ((sum(cl1 == ncg1) == sum(cl2 == ncg1)) && ncg1 <= 
           ncg1max) {
    ncg1 <- ncg1 + 1
  }
  g1 <- ncg1
  ncg2 <- max(cl2)
  nc2g2 <- ncg2 - 1
  while ((sum(cl1 == nc2g2) == sum(cl2 == ncg2)) && nc2g2 >= 
           1) {
    ncg2 <- ncg2 - 1
    nc2g2 <- nc2g2 - 1
  }
  g2 <- ncg2
  NK <- sum(cl2 == g1)
  WK.x <- x[cl2 == g1, ]
  WK <- wss(x = WK.x)
  NL <- sum(cl2 == g2)
  WL.x <- x[cl2 == g2, ]
  WL <- wss(x = WL.x)
  NM <- sum(cl1 == g1)
  WM.x <- x[cl1 == g1, ]
  WM <- wss(x = WM.x)
  duda <- (WK + WL)/WM
  BKL <- WM - WK - WL
  pseudot2 <- BKL/((WK + WL)/(NK + NL - 2))
  # beale removed
  #   beale <- (BKL/(WK + WL))/(((NM - 1)/(NM - 2)) * (2^(2/pp) - 
  #                                                      1))
  results <- list(duda = duda, pseudot2 = pseudot2, NM = NM, 
                  NK = NK, NL = NL)
  return(results)
}
# Indices.WBT <- function(x, cl, P, s, vv) {
#   n <- dim(x)[1]
#   pp <- dim(x)[2]
#   qq <- max(cl)
#   z <- matrix(0, ncol = qq, nrow = n)
#   clX <- as.matrix(cl)
#   for (i in 1:n) for (j in 1:qq) {
#     z[i, j] == 0
#     if (clX[i, 1] == j) {
#       z[i, j] = 1
#     }
#   }
#   xbar <- solve(t(z) %*% z) %*% t(z) %*% x
#   B <- t(xbar) %*% t(z) %*% z %*% xbar
#   W <- P - B
#   marriot <- (qq^2) * det(W)
#   trcovw <- sum(diag(cov(W)))
#   tracew <- sum(diag(W))
#   if (det(W) != 0) 
#   {  scott <- n * log(det(P)/det(W))
#   } else {
#     print("Error: division by zero!")
#   }
#   friedman <- sum(diag(solve(W) * B))
#   rubin <- sum(diag(P))/sum(diag(W))
#   R2 <- 1 - sum(diag(W))/sum(diag(P))
#   v1 <- 1
#   u <- rep(0, pp)
#   c <- (vv/(qq))^(1/pp)
#   u <- s/c
#   k1 <- sum((u >= 1) == TRUE)
#   p1 <- min(k1, qq - 1)
#   if (all(p1 > 0, p1 < pp)) {
#     for (i in 1:p1) v1 <- v1 * s[i]
#     c <- (v1/(qq))^(1/p1)
#     u <- s/c
#     b1 <- sum(1/(n + u[1:p1]))
#     b2 <- sum(u[p1 + 1:pp]^2/(n + u[p1 + 1:pp]), na.rm = TRUE)
#     E_R2 <- 1 - ((b1 + b2)/sum(u^2)) * ((n - qq)^2/n) * 
#       (1 + 4/n)
#     ccc <- log((1 - E_R2)/(1 - R2)) * (sqrt(n * p1/2)/((0.001 + 
#                                                           E_R2)^1.2))
#   }  else {
#     b1 <- sum(1/(n + u))
#     E_R2 <- 1 - (b1/sum(u^2)) * ((n - qq)^2/n) * (1 + 
#                                                     4/n)
#     ccc <- log((1 - E_R2)/(1 - R2)) * (sqrt(n * pp/2)/((0.001 + 
#                                                           E_R2)^1.2))
#   }
#   results <- list(ccc = ccc, scott = scott, marriot = marriot, 
#                   trcovw = trcovw, tracew = tracew, friedman = friedman, 
#                   rubin = rubin)
#   return(results)
# }

# a modded (crippled) version of WBT to deal with singular W, B and T matrices
# I simply removed indices ccc, scott, marriot, and friedman, which
# required nonsingular matrices.
Indices.WBT_mod <- function(x, cl, P) {
  n <- dim(x)[1]
  pp <- dim(x)[2]
  qq <- max(cl)
  z <- matrix(0, ncol = qq, nrow = n)
  clX <- as.matrix(cl)
  for (i in 1:n) for (j in 1:qq) {
    z[i, j] == 0
    if (clX[i, 1] == j) {
      z[i, j] = 1
    }
  }
  xbar <- solve(t(z) %*% z) %*% t(z) %*% x
  B <- t(xbar) %*% t(z) %*% z %*% xbar
  W <- P - B
  
  # marriot <- (qq^2) * det(W) # detW zero if pp > n
  
  trcovw <- sum(diag(cov(W)))
  tracew <- sum(diag(W))
  #   if (det(W) != 0) 
  #   {  scott <- n * log(det(P)/det(W))
  #   }
  #   else {
  #     print("Error: division by zero!")
  #   }
  #friedman <- sum(diag(solve(W) * B))
  rubin <- sum(diag(P))/sum(diag(W))
  #   R2 <- 1 - sum(diag(W))/sum(diag(P))
  #   v1 <- 1
  #   u <- rep(0, pp)
  #   c <- (vv/(qq))^(1/pp)
  #   u <- s/c
  #   k1 <- sum((u >= 1) == TRUE)
  #   p1 <- min(k1, qq - 1)
  #   if (all(p1 > 0, p1 < pp)) {
  #     for (i in 1:p1) v1 <- v1 * s[i]
  #     c <- (v1/(qq))^(1/p1)
  #     u <- s/c
  #     b1 <- sum(1/(n + u[1:p1]))
  #     b2 <- sum(u[p1 + 1:pp]^2/(n + u[p1 + 1:pp]), na.rm = TRUE)
  #     E_R2 <- 1 - ((b1 + b2)/sum(u^2)) * ((n - qq)^2/n) * 
  #       (1 + 4/n)
  #     ccc <- log((1 - E_R2)/(1 - R2)) * (sqrt(n * p1/2)/((0.001 + 
  #                                                           E_R2)^1.2))
  #   }  else {
  #     b1 <- sum(1/(n + u))
  #     E_R2 <- 1 - (b1/sum(u^2)) * ((n - qq)^2/n) * (1 + 
  #                                                     4/n)
  #     ccc <- log((1 - E_R2)/(1 - R2)) * (sqrt(n * pp/2)/((0.001 + 
  #                                                           E_R2)^1.2))
  #   }
  results <- list(trcovw = trcovw, tracew = tracew, rubin = rubin)
  return(results)
}


Indices.Traces <- function(data, d, clall, index = "all") {
  x <- data
  cl0 <- clall[, 1]
  cl1 <- clall[, 2]
  cl2 <- clall[, 3]
  clall <- clall
  nb.cl0 <- table(cl0)
  nb.cl1 <- table(cl1)
  nb.cl2 <- table(cl2)
  nb1.cl0 <- sum(nb.cl0 == 1)
  nb1.cl1 <- sum(nb.cl1 == 1)
  nb1.cl2 <- sum(nb.cl2 == 1)
  gss <- function(x, cl, d) {
    n <- length(cl)
    k <- max(cl)
    centers <- matrix(nrow = k, ncol = ncol(x))
    for (i in 1:k) {
      if (ncol(x) == 1) {
        centers[i, ] <- mean(x[cl == i, ])
      }
      if (is.null(dim(x[cl == i, ]))) {
        bb <- matrix(x[cl == i, ], byrow = FALSE, nrow = 1, 
                     ncol = ncol(x))
        centers[i, ] <- apply(bb, 2, mean)
      } else {
        centers[i, ] <- apply(x[cl == i, ], 2, mean)
      }
    }
    allmean <- apply(x, 2, mean)
    dmean <- sweep(x, 2, allmean, "-")
    allmeandist <- sum(dmean^2)
    withins <- rep(0, k)
    x.2 <- (x - centers[cl, ])^2
    for (i in 1:k) {
      withins[i] <- sum(x.2[cl == i, ])
    }
    wgss <- sum(withins)
    bgss <- allmeandist - wgss
    results <- list(wgss = wgss, bgss = bgss, centers = centers)
    return(results)
  }
  vargss <- function(x, clsize, varwithins) {
    nvar <- dim(x)[2]
    n <- sum(clsize)
    k <- length(clsize)
    varallmean <- rep(0, nvar)
    varallmeandist <- rep(0, nvar)
    varwgss <- rep(0, nvar)
    for (l in 1:nvar) varallmean[l] <- mean(x[, l])
    vardmean <- sweep(x, 2, varallmean, "-")
    for (l in 1:nvar) {
      varallmeandist[l] <- sum((vardmean[, l])^2)
      varwgss[l] <- sum(varwithins[, l])
    }
    varbgss <- varallmeandist - varwgss
    vartss <- varbgss + varwgss
    zvargss <- list(vartss = vartss, varbgss = varbgss)
    return(zvargss)
  }
  varwithinss <- function(x, centers, cluster) {
    nrow <- dim(centers)[1]
    nvar <- dim(x)[2]
    varwithins <- matrix(0, nrow, nvar)
    x <- (x - centers[cluster, ])^2
    for (l in 1:nvar) {
      for (k in 1:nrow) {
        varwithins[k, l] <- sum(x[cluster == k, l])
      }
    }
    return(varwithins)
  }
  indice.kl <- function(x, clall, d = NULL, centrotypes = "centroids") {
    if (nb1.cl1 > 0) {
      KL <- NA
    }
    if (sum(c("centroids", "medoids") == centrotypes) == 
          0) 
      stop("Wrong centrotypes argument")
    if ("medoids" == centrotypes && is.null(d)) 
      stop("For argument centrotypes = 'medoids' d cannot be null")
    if (!is.null(d)) {
      if (!is.matrix(d)) {
        d <- as.matrix(d)
      }
      row.names(d) <- row.names(x)
    }
    m <- ncol(x)
    g <- k <- max(clall[, 2])
    KL <- abs((g - 1)^(2/m) * gss(x, clall[, 1], d)$wgss - 
                g^(2/m) * gss(x, clall[, 2], d)$wgss)/abs((g)^(2/m) * 
                                                            gss(x, clall[, 2], d)$wgss - (g + 1)^(2/m) * 
                                                            gss(x, clall[, 3], d)$wgss)
    return(KL)
  }
  indice.ch <- function(x, cl, d = NULL, centrotypes = "centroids") {
    if (nb1.cl1 > 0) {
      CH <- NA
    }
    if (sum(c("centroids", "medoids") == centrotypes) == 
          0) 
      stop("Wrong centrotypes argument")
    if ("medoids" == centrotypes && is.null(d)) 
      stop("For argument centrotypes = 'medoids' d cannot be null")
    if (!is.null(d)) {
      if (!is.matrix(d)) {
        d <- as.matrix(d)
      }
      row.names(d) <- row.names(x)
    }
    n <- length(cl)
    k <- max(cl)
    CH <- (gss(x, cl, d)$bgss/(k - 1))/(gss(x, cl, d)$wgss/(n - 
                                                              k))
    return(CH)
  }
  indice.hart <- function(x, clall, d = NULL, centrotypes = "centroids") {
    if (sum(c("centroids", "medoids") == centrotypes) == 
          0) 
      stop("Wrong centrotypes argument")
    if ("medoids" == centrotypes && is.null(d)) 
      stop("For argument centrotypes = 'medoids' d cannot be null")
    if (!is.null(d)) {
      if (!is.matrix(d)) {
        d <- as.matrix(d)
      }
      row.names(d) <- row.names(x)
    }
    n <- nrow(x)
    g <- max(clall[, 1])
    HART <- (gss(x, clall[, 2], d)$wgss/gss(x, clall[, 
                                                     3], d)$wgss - 1) * (n - g - 1)
    return(HART)
  }
  indice.ball <- function(x, cl, d = NULL, centrotypes = "centroids") {
    wgssB <- gss(x, cl, d)$wgss
    qq <- max(cl)
    ball <- wgssB/qq
    return(ball)
  }
  indice.ratkowsky <- function(x, cl, d, centrotypes = "centroids") {
    qq <- max(cl)
    clsize <- table(cl)
    centers <- gss(x, cl, d)$centers
    varwithins <- varwithinss(x, centers, cl)
    zvargss <- vargss(x, clsize, varwithins)
    ratio <- mean(sqrt(zvargss$varbgss/zvargss$vartss))
    ratkowsky <- ratio/sqrt(qq)
    return(ratkowsky)
  }
  indice <- pmatch(index, c("kl", "ch", "hart", "ratkowsky", 
                            "ball", "all"))
  if (is.na(indice)) 
    stop("invalid clustering index")
  if (indice == -1) 
    stop("ambiguous index")
  vecallindex <- numeric(5)
  if (any(indice == 1) || (indice == 6)) 
    vecallindex[1] <- indice.kl(x, clall, d)
  if (any(indice == 2) || (indice == 6)) 
    vecallindex[2] <- indice.ch(x, cl = clall[, 2], d)
  if (any(indice == 3) || (indice == 6)) 
    vecallindex[3] <- indice.hart(x, clall, d)
  if (any(indice == 4) || (indice == 6)) 
    vecallindex[4] <- indice.ratkowsky(x, cl = cl1, d)
  if (any(indice == 5) || (indice == 6)) 
    vecallindex[5] <- indice.ball(x, cl = cl1, d)
  names(vecallindex) <- c("kl", "ch", "hart", "ratkowsky", 
                          "ball")
  if (indice < 6) 
    vecallindex <- vecallindex[indice]
  return(vecallindex)
}
Indice.cindex <- function(d, cl) {
  d <- data.matrix(d)
  DU <- 0
  r <- 0
  v_max <- array(1, max(cl))
  v_min <- array(1, max(cl))
  for (i in 1:max(cl)) {
    n <- sum(cl == i)
    if (n > 1) {
      t <- d[cl == i, cl == i]
      DU = DU + sum(t)/2
      v_max[i] = max(t)
      if (sum(t == 0) == n) 
      {  v_min[i] <- min(t[t != 0])
      } else v_min[i] <- 0
      r <- r + n * (n - 1)/2
    }
  }
  Dmin = min(v_min)
  Dmax = max(v_max)
  if (Dmin == Dmax) 
  { result <- NA
  } else result <- (DU - r * Dmin)/(Dmax * r - Dmin * r)
  result
}
Indice.DB <- function(x, cl, d = NULL, centrotypes = "centroids", 
                      p = 2, q = 2) {
  if (sum(c("centroids") == centrotypes) == 0) 
    stop("Wrong centrotypes argument")
  if (!is.null(d)) {
    if (!is.matrix(d)) {
      d <- as.matrix(d)
    }
    row.names(d) <- row.names(x)
  }
  if (is.null(dim(x))) {
    dim(x) <- c(length(x), 1)
  }
  x <- as.matrix(x)
  n <- length(cl)
  k <- max(cl)
  dAm <- d
  centers <- matrix(nrow = k, ncol = ncol(x))
  if (centrotypes == "centroids") {
    for (i in 1:k) {
      for (j in 1:ncol(x)) {
        centers[i, j] <- mean(x[cl == i, j])
      }
    }
  } else {
    stop("wrong centrotypes argument")
  }
  S <- rep(0, k)
  for (i in 1:k) {
    ind <- (cl == i)
    if (sum(ind) > 1) {
      centerI <- centers[i, ]
      centerI <- rep(centerI, sum(ind))
      centerI <- matrix(centerI, nrow = sum(ind), ncol = ncol(x), 
                        byrow = TRUE)
      S[i] <- mean(sqrt(apply((x[ind, ] - centerI)^2, 
                              1, sum))^q)^(1/q)
    } else S[i] <- 0
  }
  M <- as.matrix(dist(centers, p = p))
  R <- array(Inf, c(k, k))
  r = rep(0, k)
  for (i in 1:k) {
    for (j in 1:k) {
      R[i, j] = (S[i] + S[j])/M[i, j]
    }
    r[i] = max(R[i, ][is.finite(R[i, ])])
  }
  DB = mean(r[is.finite(r)])
  resul <- list(DB = DB, r = r, R = R, d = M, S = S, centers = centers)
  resul
}
Indice.S <- function(d, cl) {
  d <- as.matrix(d)
  Si <- 0
  for (k in 1:max(cl)) {
    if ((sum(cl == k)) <= 1) 
    { Sil <- 1
    } else {
      Sil <- 0
      for (i in 1:length(cl)) {
        if (cl[i] == k) {
          ai <- sum(d[i, cl == k])/(sum(cl == k) - 
                                      1)
          dips <- NULL
          for (j in 1:max(cl)) if (cl[i] != j) 
            if (sum(cl == j) != 1) 
            { dips <- cbind(dips, c((sum(d[i, cl == 
                                             j]))/(sum(cl == j))))
            } else dips <- cbind(dips, c((sum(d[i, cl == 
                                                  j]))))
          bi <- min(dips)
          Sil <- Sil + (bi - ai)/max(c(ai, bi))
        }
      }
    }
    Si <- Si + Sil
  }
  Si/length(cl)
}
Indice.Gap <- function(x, clall, reference.distribution = "unif", 
                       B = 10, method = "ward", d = NULL, centrotypes = "centroids") {
  GAP <- function(X, cl, referenceDistribution, B, method, 
                  d, centrotypes) {
    simgap <- function(Xvec) {
      ma <- max(Xvec)
      mi <- min(Xvec)
      Xout <- runif(length(Xvec), min = mi, max = ma)
      return(Xout)
    }
    pcsim <- function(X, d, centrotypes) {
      if (centrotypes == "centroids") {
        Xmm <- apply(X, 2, mean)
      }
      for (k in (1:dim(X)[2])) {
        X[, k] <- X[, k] - Xmm[k]
      }
      ss <- svd(X)
      Xs <- X %*% ss$v
      Xnew <- apply(Xs, 2, simgap)
      Xt <- Xnew %*% t(ss$v)
      for (k in (1:dim(X)[2])) {
        Xt[, k] <- Xt[, k] + Xmm[k]
      }
      return(Xt)
    }
    if (is.null(dim(x))) {
      dim(x) <- c(length(x), 1)
    }
    ClassNr <- max(cl)
    Wk0 <- 0
    WkB <- matrix(0, 1, B)
    for (bb in (1:B)) {
      if (reference.distribution == "unif") 
      { Xnew <- apply(X, 2, simgap)
      } else if (reference.distribution == "pc") 
      {  Xnew <- pcsim(X, d, centrotypes)
      } else stop("Wrong reference distribution type")
      if (bb == 1) {
        pp <- cl
        if (ClassNr == length(cl)) 
        { pp2 <- 1:ClassNr
        } else if (method == "k-means") 
        {  pp2 <- kmeans(Xnew, ClassNr, 100)$cluster
        } else if (method == "single" || method == "complete" || 
                     method == "average" || method == "ward" || 
                     method == "mcquitty" || method == "median" || 
                     method == "centroid") 
        {  pp2 <- cutree(hclust(dist(Xnew), method = method), 
                         ClassNr)
        } else stop("Wrong clustering method")
        if (ClassNr > 1) {
          for (zz in (1:ClassNr)) {
            Xuse <- X[pp == zz, ]
            Wk0 <- Wk0 + sum(diag(var(Xuse))) * (length(pp[pp == 
                                                             zz]) - 1)/(dim(X)[1] - ClassNr)
            Xuse2 <- Xnew[pp2 == zz, ]
            WkB[1, bb] <- WkB[1, bb] + sum(diag(var(Xuse2))) * 
              (length(pp2[pp2 == zz]) - 1)/(dim(X)[1] - 
                                              ClassNr)
          }
        }
        if (ClassNr == 1) {
          Wk0 <- sum(diag(var(X)))
          WkB[1, bb] <- sum(diag(var(Xnew)))
        }
      }
      if (bb > 1) {
        if (ClassNr == length(cl)) 
        {  pp2 <- 1:ClassNr
        }else if (method == "k-means") 
        {  pp2 <- kmeans(Xnew, ClassNr, 100)$cluster
        }else if (method == "single" || method == "complete" || 
                    method == "average" || method == "ward" || 
                    method == "mcquitty" || method == "median" || 
                    method == "centroid") 
        { pp2 <- cutree(hclust(dist(Xnew), method = method), 
                        ClassNr)
        } else stop("Wrong clustering method")
        if (ClassNr > 1) {
          for (zz in (1:ClassNr)) {
            Xuse2 <- Xnew[pp2 == zz, ]
            WkB[1, bb] <- WkB[1, bb] + sum(diag(var(Xuse2))) * 
              length(pp2[pp2 == zz])/(dim(X)[1] - ClassNr)
          }
        }
        if (ClassNr == 1) {
          WkB[1, bb] <- sum(diag(var(Xnew)))
        }
      }
    }
    Sgap <- mean(log(WkB[1, ])) - log(Wk0)
    Sdgap <- sqrt(1 + 1/B) * sqrt(var(log(WkB[1, ]))) * 
      sqrt((B - 1)/B)
    resul <- list(Sgap = Sgap, Sdgap = Sdgap)
    resul
  }
  if (sum(c("centroids", "medoids") == centrotypes) == 
        0) 
    stop("Wrong centrotypes argument")
  if ("medoids" == centrotypes && is.null(d)) 
    stop("For argument centrotypes = 'medoids' d cannot be null")
  if (!is.null(d)) {
    if (!is.matrix(d)) {
      d <- as.matrix(d)
    }
    row.names(d) <- row.names(x)
  }
  X <- as.matrix(x)
  gap1 <- GAP(X, clall[, 1], reference.distribution, B, 
              method, d, centrotypes)
  gap <- gap1$Sgap
  gap2 <- GAP(X, clall[, 2], reference.distribution, B, 
              method, d, centrotypes)
  diffu <- gap - (gap2$Sgap - gap2$Sdgap)
  resul <- list(gap = gap, diffu = diffu)
  resul
}
Index.sdindex <- function(x, clmax, cl) {
  x <- as.matrix(x)
  Alpha <- Dis(clmax, x)
  Scatt <- Average.scattering(cl, x)$scatt
  Dis0 <- Dis(cl, x)
  SD.indice <- Alpha * Scatt + Dis0
  return(SD.indice)
}
Index.SDbw <- function(x, cl) {
  x <- as.matrix(x)
  Scatt <- Average.scattering(cl, x)$scatt
  Dens.bw <- density.bw(cl, x)
  SDbw <- Scatt + Dens.bw
  return(SDbw)
}
Index.Dindex <- function(cl, x) {
  x <- as.matrix(x)
  distance <- density.clusters(cl, x)$distance
  n <- length(distance)
  S <- 0
  for (i in 1:n) S <- S + distance[i]
  inertieIntra <- S/n
  return(inertieIntra)
}
Index.dunn <- function(md, clusters, Data = NULL, method = "euclidean") {
  distance <- as.matrix(md)
  nc <- max(clusters)
  interClust <- matrix(NA, nc, nc)
  intraClust <- rep(NA, nc)
  for (i in 1:nc) {
    c1 <- which(clusters == i)
    for (j in i:nc) {
      if (j == i) 
        intraClust[i] <- max(distance[c1, c1])
      if (j > i) {
        c2 <- which(clusters == j)
        interClust[i, j] <- min(distance[c1, c2])
      }
    }
  }
  dunn <- min(interClust, na.rm = TRUE)/max(intraClust)
  return(dunn)
}

## end functions
GiulioCostantini/TOMproject documentation built on May 6, 2019, 6:29 p.m.