# NbClust_mod is a modified version of NbClust:
# - it does not compute some indices:
# - Some that required nonsingular scatter matrix (of the transposed data)
# ccc, scott, marriott, friedman, beale
# - Two graphical methods: hubert and dindex.
# - I reduced as much as possible repeated calls to the same function with the
# same parameters, when specifying indiex = "all" or "alllong", to make
# the computations slightly faster.
# The available options for index are:
# "kl", "ch", "hartigan", "cindex", "db", "silhouette", "duda", "pseudot2",
# "ratkowsky", "ball", "ptbiserial", "gap", "frey", "mclain", "gamma", "gplus",
# "tau", "dunn", "sdindex", "sdbw", "trcovw", "tracew", "rubin", "all", "alllong".
# refer to the help of package NbClust for other options
#
# To verify that the function works, just compare its results to the original NbClust
# in a situation in which NbClust works (i.e., use a data matrix with less subjects
# than variables). Inspect each index separately, otherwise the original NbClust
# will probably crash anyway. Gap index produces slightly different results every time.
NbClust_mod <- function (data, diss = "NULL", min.nc = 2,
max.nc = 10, method = "average", index = "alllong",
alphaBeale = 0.1)
{
x <- 0
min_nc <- min.nc
max_nc <- max.nc
jeu1 <- as.matrix(data)
numberObsBefore <- dim(jeu1)[1]
jeu <- na.omit(jeu1)
nn <- numberObsAfter <- dim(jeu)[1]
pp <- dim(jeu)[2]
TT <- t(jeu) %*% jeu
if (diss[1] == "NULL") {
stop("ambiguous distance or dissimilarity matrix")
} else {
md <- diss
}
# define dataframes for output.
# I reduced res from 31 to 24, because of the 7 methods excluded.
# I reduced resCritical by removing Beale column, because beale method is not
# considered.
res <- array(0, c(max_nc - min_nc + 1, 24))
res[, 1] <- min_nc:max_nc
rownames(res) <- min_nc:max_nc
resCritical <- array(0, c(max_nc - min_nc + 1, 4))
resCritical[, 1] <- min_nc:max_nc
rownames(resCritical) <- min_nc:max_nc
colnames(resCritical) <- c("nc.CritValue", "CritValue_Duda",
"CritValue_PseudoT2", "CritValue_Gap")
colnames(res) <- c("nc.Ward", "index.KL", "index.CH",
"index.Hartigan", "index.TrCovW", "index.TraceW",
"index.Rubin", "index.Cindex", "index.DB",
"index.Silhouette", "index.Duda", "index.Pseudot2",
"index.Ratkowsky", "index.Ball", "index.ptbiserial",
"index.Gap", "index.Frey", "index.McClain", "index.Gamma",
"index.Gplus", "index.Tau", "index.Dunn", "index.SDindex",
"index.SDbw")
# perform hierarchical cluster analysis according to the specified method
# if the method is kmeans, the cluster analysis will be performed later
if (is.na(method))
stop("invalid clustering method")
if (method != "kmeans") {
hc <- hclust(md, method = method)
}
method <- pmatch(method, c("ward", "single", "complete",
"average", "mcquitty", "median",
"centroid", "kmeans"))
indice <- pmatch(index, c("kl", "ch", "hartigan", "ccc_removed",
"scott_removed", "marriot_removed", "trcovw", "tracew",
"friedman_removed", "rubin",
"cindex", "db", "silhouette", "duda", "pseudot2", "beale_removed",
"ratkowsky", "ball", "ptbiserial", "gap", "frey", "mcclain",
"gamma", "gplus", "tau", "dunn", "hubert_removed", "sdindex",
"dindex_removed", "sdbw", "all", "alllong"))
if ((indice == 3) || (indice == 5) || (indice == 6) || (indice == 7) ||
(indice == 8) || (indice == 9) || (indice == 10) ||
(indice == 11) || (indice == 18) || (indice == 27) ||
(indice == 29) || (indice == 31) || (indice == 32)) {
if ((max.nc - min.nc) < 2)
stop("The difference between the minimum and the maximum number of clusters must be at least equal to 2")
}
# here the computation starts
for (nc in min_nc:max_nc) {
# create the partitions according to nc and method
if (any(method == 1) || (method == 2) || (method == 3) ||
(method == 4) || (method == 5) || (method == 6) ||
(method == 7)) {
cl1 <- cutree(hc, k = nc)
cl2 <- cutree(hc, k = nc + 1)
clall <- cbind(cl1, cl2)
clmax <- cutree(hc, k = max_nc)
if (nc >= 2) {
cl0 <- cutree(hc, k = nc - 1)
clall1 <- cbind(cl0, cl1, cl2)
}
if (nc == 1) {
cl0 <- rep(NA, nn)
clall1 <- cbind(cl0, cl1, cl2)
}
}
if (method == 8) {
cl2 <- kmeans(jeu, nc + 1)$cluster
clmax <- kmeans(jeu, max_nc)$cluster
if (nc > 2) {
cl1 <- kmeans(jeu, nc)$cluster
clall <- cbind(cl1, cl2)
cl0 <- kmeans(jeu, nc - 1)$cluster
clall1 <- cbind(cl0, cl1, cl2)
}
if (nc == 2) {
cl1 <- kmeans(jeu, nc)$cluster
clall <- cbind(cl1, cl2)
cl0 <- rep(1, nn)
clall1 <- cbind(cl0, cl1, cl2)
}
if (nc == 1) {
stop("Number of clusters must be higher than 2")
}
}
j <- table(cl1)
s <- sum(j == 1)
j2 <- table(cl2)
s2 <- sum(j2 == 1)
if(any(indice == 31 || indice == 32))
{
# indices.traces
if(nc < 2)
{
res[nc - min_nc + 1, "index.Hartigan"] <- Indices.Traces(jeu, md, clall1, index = "hart")
res[nc - min_nc + 1, "index.Ball"] <- Indices.Traces(jeu, md, clall1, index = "ball")
res[nc - min_nc + 1, c("index.KL", "index.CH",
"index.Ratkowsky")] <- NA
} else if (nc >= 2)
{
res[nc - min_nc + 1, c("index.KL", "index.CH", "index.Hartigan",
"index.Ratkowsky", "index.Ball")] <-
Indices.Traces(jeu, md, clall1, index = "all")
}
# indices WBT (mod)
WBT <- Indices.WBT_mod(x = jeu, cl = cl1,
P = TT)
res[nc - min_nc + 1, c("index.TrCovW", "index.TraceW",
"index.Rubin")] <- unlist(WBT)
# indices WKWL
WKWL <- Indices.WKWL(x = jeu, cl1 = cl1, cl2 = cl2)
res[nc - min_nc + 1, "index.Duda"] <- WKWL$duda
if(method != 8)
{
res[nc - min_nc + 1, "index.Pseudot2"] <- WKWL$pseudot2
} else res[nc - min_nc + 1, "index.Pseudot2"] <- NA
NM <- WKWL$NM
NK <- WKWL$NK
NL <- WKWL$NL
zz <- 3.2
zzz <- zz * sqrt(2 * (1 - 8/((pi^2) * pp))/(NM * pp))
resCritical[nc - min_nc + 1, "CritValue_Duda"] <- critValue <- 1 -
(2/(pi * pp)) - zzz
if (method == 8)
{resCritical[nc - min_nc + 1, "CritValue_PseudoT2"] <- NA
} else {
critValue <- 1 - (2/(pi * pp)) - zzz
resCritical[nc - min_nc + 1, "CritValue_PseudoT2"] <- ((1 - critValue)/critValue) *
(NK + NL - 2)
}
# indices frey and mcclain
if(nc >= 2)
{
I1528 <- Index.15and28(cl1 = cl1, cl2 = cl2, md = md)
if (method != 8)
{
res[nc - min_nc + 1, c("index.Frey", "index.McClain")] <- unlist(I1528)
} else if(method == 8)
{
res[nc - min_nc + 1, "index.Frey"] <- NA
res[nc - min_nc + 1, "index.McClain"] <- I1528$mcclain
}
} else if (nc < 2)
{
res[nc - min_nc + 1, c("index.Frey", "index.McClain")] <- NA
}
# indices gamma gplus and tau
if(nc >=2)
{
ISPM <- Index.sPlussMoins(cl1 = cl1, md = md)
if (method == 8)
{
res[nc - min_nc + 1, "index.Gamma"] <- NA
} else res[nc - min_nc + 1, "index.Gamma"] <- ISPM$gamma
res[nc - min_nc + 1, "index.Gplus"] <- ISPM$gplus
res[nc - min_nc + 1, "index.Tau"] <- ISPM$tau
} else if (nc < 2)
{
res[nc - min_nc + 1, c("index.Gamma", "index.Gplus", "index.Tau")] <- NA
}
}
# compute the specific indices according to value "indice"
if (indice == 3) {
res[nc - min_nc + 1, "index.Hartigan"] <- Indices.Traces(jeu, md,
clall1, index = "hart")
}
# method ccc excluded
# if (any(indice == 4) || (indice == 31) || (indice ==
# 32)) {
# res[nc - min_nc + 1, 5] <- Indices.WBT_mod(x = jeu, cl = cl1,
# P = TT)$ccc
# }
# method scott excluded
# if (any(indice == 5) || (indice == 31) || (indice ==
# 32)) {
# res[nc - min_nc + 1, 6] <- Indices.WBT_mod(x = jeu, cl = cl1,
# P = TT)$scott
# }
# method marriott excluded
# if (any(indice == 6) || (indice == 31) || (indice ==
# 32)) {
# res[nc - min_nc + 1, 7] <- Indices.WBT_mod(x = jeu, cl = cl1,
# P = TT)$marriot
# }
if (indice == 7) {
res[nc - min_nc + 1, "index.TrCovW"] <- Indices.WBT_mod(x = jeu, cl = cl1,
P = TT)$trcovw
}
if (indice == 8) {
res[nc - min_nc + 1, "index.TraceW"] <- Indices.WBT_mod(x = jeu, cl = cl1,
P = TT)$tracew
}
# friedman removed
# if (any(indice == 9) || (indice == 31) || (indice ==
# 32)) {
# res[nc - min_nc + 1, 10] <- Indices.WBT_mod(x = jeu,
# cl = cl1, P = TT)$friedman
# }
if (indice == 10) {
res[nc - min_nc + 1, "index.Rubin"] <- Indices.WBT_mod(x = jeu,
cl = cl1, P = TT)$rubin
}
if (indice == 14) {
res[nc - min_nc + 1, "index.Duda"] <- Indices.WKWL(x = jeu,
cl1 = cl1, cl2 = cl2)$duda
}
if (indice == 15) {
if (method == 8)
{ res[nc - min_nc + 1, 16] <- NA
} else res[nc - min_nc + 1, "index.Pseudot2"] <- Indices.WKWL(x = jeu,
cl1 = cl1, cl2 = cl2)$pseudot2
}
# index beale removed
# if (any(indice == 16) || (indice == 31) || (indice ==
# 32)) {
# res[nc - min_nc + 1, 17] <- beale <- Indices.WKWL(x = jeu,
# cl1 = cl1, cl2 = cl2)$beale
# }
# compute critical values for indices duda, pseudot2. Beale is not computed.
if (any(indice == 14) || (indice == 15) || (indice ==
16)) {
NM <- Indices.WKWL(x = jeu, cl1 = cl1, cl2 = cl2)$NM
NK <- Indices.WKWL(x = jeu, cl1 = cl1, cl2 = cl2)$NK
NL <- Indices.WKWL(x = jeu, cl1 = cl1, cl2 = cl2)$NL
zz <- 3.2
zzz <- zz * sqrt(2 * (1 - 8/((pi^2) * pp))/(NM *
pp))
if (any(indice == 14) || (indice == 31) || (indice ==
32)) {
resCritical[nc - min_nc + 1, 2] <- critValue <- 1 -
(2/(pi * pp)) - zzz
}
if ((indice == 15) || (indice == 31) || (indice ==
32)) {
if (method == 8)
{resCritical[nc - min_nc + 1, 3] <- NA
} else {
critValue <- 1 - (2/(pi * pp)) - zzz
resCritical[nc - min_nc + 1, 3] <- ((1 - critValue)/critValue) *
(NK + NL - 2)
}
}
# beale removed
# if (any(indice == 16) || (indice == 31) || (indice ==
# 32)) {
# df2 <- (NM - 2) * pp
# resCritical[nc - min_nc + 1, 4] <- 1 - pf(beale,
# pp, df2)
# }
}
if (any(indice == 18)) {
res[nc - min_nc + 1, "index.Ball"] <- Indices.Traces(jeu, md,
clall1, index = "ball")
}
if (any(indice == 19) || (indice == 31) || (indice ==
32)) {
res[nc - min_nc + 1, "index.ptbiserial"] <- Indice.ptbiserial(x = jeu,
md = md, cl1 = cl1)
}
# GAP index
if (any(indice == 20) || (indice == 32)) {
if (method == 1) {
resultSGAP <- Indice.Gap(x = jeu, clall = clall,
reference.distribution = "unif", B = 10, method = "ward",
d = NULL, centrotypes = "centroids")
}
if (method == 2) {
resultSGAP <- Indice.Gap(x = jeu, clall = clall,
reference.distribution = "unif", B = 10, method = "single",
d = NULL, centrotypes = "centroids")
}
if (method == 3) {
resultSGAP <- Indice.Gap(x = jeu, clall = clall,
reference.distribution = "unif", B = 10, method = "complete",
d = NULL, centrotypes = "centroids")
}
if (method == 4) {
resultSGAP <- Indice.Gap(x = jeu, clall = clall,
reference.distribution = "unif", B = 10, method = "average",
d = NULL, centrotypes = "centroids")
}
if (method == 5) {
resultSGAP <- Indice.Gap(x = jeu, clall = clall,
reference.distribution = "unif", B = 10, method = "mcquitty",
d = NULL, centrotypes = "centroids")
}
if (method == 6) {
resultSGAP <- Indice.Gap(x = jeu, clall = clall,
reference.distribution = "unif", B = 10, method = "median",
d = NULL, centrotypes = "centroids")
}
if (method == 7) {
resultSGAP <- Indice.Gap(x = jeu, clall = clall,
reference.distribution = "unif", B = 10, method = "centroid",
d = NULL, centrotypes = "centroids")
}
if (method == 8) {
resultSGAP <- Indice.Gap(x = jeu, clall = clall,
reference.distribution = "unif", B = 10, method = "k-means",
d = NULL, centrotypes = "centroids")
}
res[nc - min_nc + 1, "index.Gap"] <- resultSGAP$gap
resCritical[nc - min_nc + 1, "CritValue_Gap" ] <- resultSGAP$diffu
}
if (nc >= 2) {
if (any(indice == 1)) {
res[nc - min_nc + 1, "index.KL"] <- Indices.Traces(jeu,
md, clall1, index = "kl")
}
if (any(indice == 2)) {
res[nc - min_nc + 1, "index.CH"] <- Indices.Traces(jeu,
md, clall1, index = "ch")
}
if (any(indice == 11) || (indice == 31) || (indice ==
32)) {
res[nc - min_nc + 1, "index.Cindex"] <- Indice.cindex(d = md,
cl = cl1)
}
if (any(indice == 12) || (indice == 31) || (indice ==
32)) {
res[nc - min_nc + 1, "index.DB"] <- Indice.DB(x = jeu,
cl = cl1, d = NULL, centrotypes = "centroids",
p = 2, q = 2)$DB
}
if (any(indice == 13) || (indice == 31) || (indice ==
32)) {
res[nc - min_nc + 1, "index.Silhouette"] <- Indice.S(d = md,
cl = cl1)
}
if (any(indice == 17)) {
res[nc - min_nc + 1, "index.Ratkowsky"] <- Indices.Traces(jeu,
md, clall1, index = "ratkowsky")
}
if (indice == 21) {
if (method == 8)
{ res[nc - min_nc + 1, "index.Frey"] <- NA
} else res[nc - min_nc + 1, "index.Frey"] <- Index.15and28(cl1 = cl1,
cl2 = cl2, md = md)$frey
}
if (indice == 22) {
res[nc - min_nc + 1, "index.McClain"] <- Index.15and28(cl1 = cl1,
cl2 = cl2, md = md)$mcclain
}
if (indice == 23) {
if (method == 8)
{res[nc - min_nc + 1, "index.Gamma"] <- NA
} else res[nc - min_nc + 1, "index.Gamma"] <- Index.sPlussMoins(cl1 = cl1,
md = md)$gamma
}
if (indice == 24) {
res[nc - min_nc + 1, "index.Gplus"] <- Index.sPlussMoins(cl1 = cl1,
md = md)$gplus
}
if (indice == 25) {
res[nc - min_nc + 1, "index.Tau"] <- Index.sPlussMoins(cl1 = cl1,
md = md)$tau
}
if (any(indice == 26) || (indice == 31) || (indice ==
32)) {
res[nc - min_nc + 1, "index.Dunn"] <- Index.dunn(md, cl1,
Data = jeu, method = NULL)
}
# hubert removed
# if (any(indice == 27) || (indice == 31) || (indice ==
# 32)) {
# res[nc - min_nc + 1, 28] <- Index.Hubert(jeu,
# cl1)
# }
if (any(indice == 28) || (indice == 31) || (indice ==
32)) {
res[nc - min_nc + 1, "index.SDindex"] <- Index.sdindex(jeu,
clmax, cl1)
}
# dindex excluded
# if (any(indice == 29) || (indice == 31) || (indice ==
# 32)) {
# res[nc - min_nc + 1, 30] <- Index.Dindex(cl1,
# jeu)
# }
if (any(indice == 30) || (indice == 31) || (indice ==
32)) {
res[nc - min_nc + 1, "index.SDbw"] <- Index.SDbw(jeu, cl1)
}
} else {
res[nc - min_nc + 1, c("index.KL", "index.CH", "index.Cindex", "index.DB",
"index.Silhouette", "index.Ratkowsky", "index.Frey",
"index.McClain", "index.Gamma", "index.Gplus",
"index.Tau", "index.Dunn", "index.SDindex",
"index.SDbw")] <- NA
}
}
# identify the number of clusters suggested by each index
nc.KL <- indice.KL <- 0
if (any(indice == 1) || (indice == 31) || (indice == 32)) {
nc.KL <- (min_nc:max_nc)[which.max(res[, "index.KL" ])]
indice.KL <- max(res[, "index.KL" ], na.rm = TRUE)
}
nc.CH <- indice.CH <- 0
if (any(indice == 2) || (indice == 31) || (indice == 32)) {
nc.CH <- (min_nc:max_nc)[which.max(res[, "index.CH"])]
indice.CH <- max(res[, "index.CH"], na.rm = TRUE)
}
# nc.CCC <- indice.CCC <- 0
# if (any(indice == 4) || (indice == 31) || (indice == 32)) {
# nc.CCC <- (min_nc:max_nc)[which.max(res[, 5])]
# indice.CCC <- max(res[, 5], na.rm = TRUE)
# }
nc.DB <- indice.DB <- 0
if (any(indice == 12) || (indice == 31) || (indice == 32)) {
nc.DB <- (min_nc:max_nc)[which.min(res[, "index.DB"])]
indice.DB <- min(res[, "index.DB"], na.rm = TRUE)
}
nc.Silhouette <- indice.Silhouette <- 0
if (any(indice == 13) || (indice == 31) || (indice == 32)) {
nc.Silhouette <- (min_nc:max_nc)[which.max(res[, "index.Silhouette"])]
indice.Silhouette <- max(res[, "index.Silhouette"], na.rm = TRUE)
}
nc.Gap <- indice.Gap <- 0
if (any(indice == 20) || (indice == 32)) {
found <- FALSE
for (ncG in min_nc:max_nc) {
if ((resCritical[ncG - min_nc + 1, "CritValue_Gap"] >= 0) && (!found)) {
ncGap <- ncG
indiceGap <- res[ncG - min_nc + 1, "index.Gap"]
found <- TRUE
}
}
if (found) {
nc.Gap <- ncGap
indice.Gap <- indiceGap
} else {
nc.Gap <- NA
indice.Gap <- NA
}
}
nc.Duda <- indice.Duda <- 0
if (any(indice == 14) || (indice == 31) || (indice == 32)) {
foundDuda <- FALSE
for (ncD in min_nc:max_nc) {
if ((res[ncD - min_nc + 1, "index.Duda"] >=
resCritical[ncD - min_nc + 1,"CritValue_Duda"]) && (!foundDuda))
{
ncDuda <- ncD
indiceDuda <- res[ncD - min_nc + 1, "index.Duda"]
foundDuda <- TRUE
}
}
if (foundDuda) {
nc.Duda <- ncDuda
indice.Duda <- indiceDuda
} else {
nc.Duda <- NA
indice.Duda <- NA
}
}
nc.Pseudo <- indice.Pseudo <- 0
if (any(indice == 15) || (indice == 31) || (indice == 32)) {
if (method == 8) {
nc.Pseudo <- NA
indice.Pseudo <- NA
} else {
foundPseudo <- FALSE
for (ncP in min_nc:max_nc) {
if ((res[ncP - min_nc + 1, "index.Pseudot2"] <=
resCritical[ncP - min_nc + 1, "CritValue_PseudoT2"]) && (!foundPseudo))
{
ncPseudo <- ncP
indicePseudo <- res[ncP - min_nc + 1, "index.Pseudot2"]
foundPseudo <- TRUE
}
}
if (foundPseudo) {
nc.Pseudo <- ncPseudo
indice.Pseudo <- indicePseudo
} else {
nc.Pseudo <- NA
indice.Pseudo <- NA
}
}
}
# nc.Beale <- indice.Beale <- 0
# if (any(indice == 16) || (indice == 31) || (indice == 32)) {
# foundBeale <- FALSE
# for (ncB in min_nc:max_nc) {
# if ((resCritical[ncB - min_nc + 1, 4] >= alphaBeale) &&
# (!foundBeale)) {
# ncBeale <- ncB
# indiceBeale <- res[ncB - min_nc + 1, 17]
# foundBeale <- TRUE
# }
# }
# if (foundBeale) {
# nc.Beale <- ncBeale
# indice.Beale <- indiceBeale
# } else {
# nc.Beale <- NA
# indice.Beale <- NA
# }
# }
nc.ptbiserial <- indice.ptbiserial <- 0
if (any(indice == 19) || (indice == 31) || (indice == 32)) {
nc.ptbiserial <- (min_nc:max_nc)[which.max(res[, "index.ptbiserial"])]
indice.ptbiserial <- max(res[, "index.ptbiserial"], na.rm = TRUE)
}
foundNC <- foundIndice <- numeric(0)
nc.Frey <- indice.Frey <- 0
if (any(indice == 21) || (indice == 31) || (indice == 32)) {
if (method == 8) {
nc.Frey <- NA
indice.Frey <- NA
} else {
foundFrey <- FALSE
i <- 1
for (ncF in min_nc:max_nc) {
if (!is.na(res[ncF - min_nc + 1, "index.Frey"]))
{
if (res[ncF - min_nc + 1, "index.Frey"] < 1)
{
if (ncF != min_nc) {
ncFrey <- ncF - 1
indiceFrey <- res[ncF - min_nc, "index.Frey"]
foundFrey <- TRUE
foundNC[i] <- ncFrey
foundIndice[i] <- indiceFrey
i <- i + 1
}
}
}
}
if (foundFrey) {
nc.Frey <- foundNC[1]
indice.Frey <- foundIndice[1]
} else {
nc.Frey <- NA
indice.Frey <- NA
print(paste("No clustering structure in this data set"))
}
}
}
nc.McClain <- indice.McClain <- 0
if (any(indice == 22) || (indice == 31) || (indice == 32)) {
nc.McClain <- (min_nc:max_nc)[which.min(res[, "index.McClain"])]
indice.McClain <- min(res[, "index.McClain"], na.rm = TRUE)
}
nc.Gamma <- indice.Gamma <- 0
if (any(indice == 23) || (indice == 31) || (indice == 32)) {
if (method == 8) {
nc.Gamma <- NA
indice.Gamma <- NA
} else {
nc.Gamma <- (min_nc:max_nc)[which.max(res[, "index.Gamma"])]
indice.Gamma <- max(res[, "index.Gamma"], na.rm = TRUE)
}
}
nc.Gplus <- indice.Gplus <- 0
if (any(indice == 24) || (indice == 31) || (indice == 32)) {
nc.Gplus <- (min_nc:max_nc)[which.min(res[, "index.Gplus"])]
indice.Gplus <- min(res[, "index.Gplus"], na.rm = TRUE)
}
nc.Tau <- indice.Tau <- 0
if (any(indice == 25) || (indice == 31) || (indice == 32)) {
nc.Tau <- (min_nc:max_nc)[which.max(res[, "index.Tau"])]
indice.Tau <- max(res[, "index.Tau"], na.rm = TRUE)
}
if ((indice == 3) || (indice == 5) || (indice == 6) ||
(indice == 7) || (indice == 8) || (indice == 9) || (indice == 10) ||
(indice == 18) || (indice == 27) || (indice == 11) ||
(indice == 29) || (indice == 31) || (indice == 32)) {
DiffLev <- array(0, c(max_nc - min_nc + 1, 6))
DiffLev[, 1] <- min_nc:max_nc
colnames(DiffLev) <- c("N", "hartigan", "trcovw", "tracew", "rubin", "ball")
for (nc3 in min_nc:max_nc) {
if (nc3 == min_nc) {
DiffLev[nc3 - min_nc + 1, c("hartigan", "trcovw", "tracew",
"rubin", "ball")] <- NA
} else {
if (nc3 == max_nc) {
DiffLev[nc3 - min_nc + 1, "hartigan"] <- abs(res[
nc3 - min_nc + 1, "index.Hartigan"] - res[nc3 - min_nc, "index.Hartigan"])
# scott removed
# DiffLev[nc3 - min_nc + 1, 3] <- abs(res[nc3 -
# min_nc + 1, 6] - res[nc3 - min_nc, 6])
# marriott removed
# DiffLev[nc3 - min_nc + 1, 4] <- abs(res[nc3 -
# min_nc + 1, 7] - NA)
DiffLev[nc3 - min_nc + 1, "trcovw"] <- abs(res[
nc3 - min_nc + 1, "index.TrCovW"] - res[nc3 - min_nc, 8])
DiffLev[nc3 - min_nc + 1, "tracew"] <- NA
# friedman removed
# DiffLev[nc3 - min_nc + 1, 7] <- abs(res[nc3 -
# min_nc + 1, 10] - res[nc3 - min_nc, 10])
DiffLev[nc3 - min_nc + 1, "rubin"] <- NA
DiffLev[nc3 - min_nc + 1, "ball"] <- abs(res[
nc3 - min_nc + 1, "index.Ball"] - res[nc3 - min_nc, "index.Ball"])
# hubert removed
# DiffLev[nc3 - min_nc + 1, 10] <- abs(res[nc3 -
# min_nc + 1, 28] - NA)
# dindex removed
# DiffLev[nc3 - min_nc + 1, 12] <- abs(res[nc3 -
# min_nc + 1, 30] - NA)
} else {
DiffLev[nc3 - min_nc + 1, "hartigan"] <-
abs(res[nc3 - min_nc + 1, "index.Hartigan"] - res[nc3 - min_nc, "index.Hartigan"])
# scott removed
# DiffLev[nc3 - min_nc + 1, 3] <- abs(res[nc3 -
# min_nc + 1, 6] - res[nc3 - min_nc, 6])
# marriott removed
# DiffLev[nc3 - min_nc + 1, 4] <- ((res[nc3 -
# min_nc + 2, 7] - res[nc3 - min_nc + 1, 7]) -
# (res[nc3 - min_nc + 1, 7] - res[nc3 - min_nc,
# 7]))
DiffLev[nc3 - min_nc + 1, "trcovw"] <- abs(res[
nc3 - min_nc + 1, "index.TrCovW"] - res[nc3 - min_nc, "index.TrCovW"])
DiffLev[nc3 - min_nc + 1, "tracew"] <-
((res[nc3 - min_nc + 2, "index.TraceW"] - res[nc3 - min_nc + 1, "index.TraceW"]) -
(res[nc3 - min_nc + 1, "index.TraceW" ] - res[nc3 - min_nc, "index.TraceW" ]))
# friedman removed
# DiffLev[nc3 - min_nc + 1, 7] <- abs(res[nc3 -
# min_nc + 1, 10] - res[nc3 - min_nc, 10])
DiffLev[nc3 - min_nc + 1, "rubin"] <-
((res[nc3 - min_nc + 2, "index.Rubin"] - res[nc3 - min_nc + 1, "index.Rubin"]) -
(res[nc3 - min_nc + 1, "index.Rubin"] - res[nc3 - min_nc, "index.Rubin"]))
DiffLev[nc3 - min_nc + 1, "ball"] <-
abs(res[nc3 - min_nc + 1, "index.Ball"] - res[nc3 - min_nc, "index.Ball"])
# hubert removed
# DiffLev[nc3 - min_nc + 1, 10] <- abs((res[nc3 -
# min_nc + 1, 28] - res[nc3 - min_nc, 28]))
# dindex removed
# DiffLev[nc3 - min_nc + 1, 12] <- ((res[nc3 -
# min_nc + 2, 30] - res[nc3 - min_nc + 1, 30]) -
# (res[nc3 - min_nc + 1, 30] - res[nc3 - min_nc,
# 30]))
}
}
}
}
nc.Hartigan <- indice.Hartigan <- 0
if (any(indice == 3) || (indice == 31) || (indice == 32)) {
nc.Hartigan <- DiffLev[, "N"][which.max(DiffLev[, "hartigan"])]
indice.Hartigan <- max(DiffLev[, "hartigan"], na.rm = TRUE)
}
nc.Ratkowsky <- indice.Ratkowsky <- 0
if (any(indice == 17) || (indice == 31) || (indice == 32)) {
nc.Ratkowsky <- (min_nc:max_nc)[which.max(res[, "index.Ratkowsky" ])]
indice.Ratkowsky <- max(res[, "index.Ratkowsky" ], na.rm = TRUE)
}
nc.cindex <- indice.cindex <- 0
if (any(indice == 11) || (indice == 31) || (indice == 32)) {
nc.cindex <- (min_nc:max_nc)[which.min(res[, "index.Cindex"])]
indice.cindex <- min(res[, "index.Cindex"], na.rm = TRUE)
}
# scott removed
# nc.Scott <- indice.Scott <- 0
# if (any(indice == 5) || (indice == 31) || (indice == 32)) {
# nc.Scott <- DiffLev[, 1][which.max(DiffLev[, 3])]
# indice.Scott <- max(DiffLev[, 3], na.rm = TRUE)
# }
# marriott removed
# nc.Marriot <- indice.Marriot <- 0
# if (any(indice == 6) || (indice == 31) || (indice == 32)) {
# nc.Marriot <- DiffLev[, 1][which.max(DiffLev[, 4])]
# round(nc.Marriot, digits = 1)
# indice.Marriot <- max(DiffLev[, 4], na.rm = TRUE)
# }
nc.TrCovW <- indice.TrCovW <- 0
if (any(indice == 7) || (indice == 31) || (indice == 32)) {
nc.TrCovW <- DiffLev[, "N"][which.max(DiffLev[, "trcovw"])]
indice.TrCovW <- max(DiffLev[, "trcovw"], na.rm = TRUE)
}
nc.TraceW <- indice.TraceW <- 0
if (any(indice == 8) || (indice == 31) || (indice == 32)) {
nc.TraceW <- DiffLev[, "N"][which.max(DiffLev[, "tracew"])]
indice.TraceW <- max(DiffLev[, "tracew"], na.rm = TRUE)
}
# friedman removed
# nc.Friedman <- indice.Friedman <- 0
# if (any(indice == 9) || (indice == 31) || (indice == 32)) {
# nc.Friedman <- DiffLev[, 1][which.max(DiffLev[, 7])]
# indice.Friedman <- max(DiffLev[, 7], na.rm = TRUE)
# }
nc.Rubin <- indice.Rubin <- 0
if (any(indice == 10) || (indice == 31) || (indice == 32)) {
nc.Rubin <- DiffLev[, "N"][which.min(DiffLev[, "rubin"])]
indice.Rubin <- min(DiffLev[, "rubin"], na.rm = TRUE)
}
nc.Ball <- indice.Ball <- 0
if (any(indice == 18) || (indice == 31) || (indice == 32)) {
nc.Ball <- DiffLev[, "N"][which.max(DiffLev[, "ball"])]
indice.Ball <- max(DiffLev[, "ball"], na.rm = TRUE)
}
nc.Dunn <- indice.Dunn <- 0
if (any(indice == 26) || (indice == 31) || (indice == 32)) {
nc.Dunn <- (min_nc:max_nc)[which.max(res[, "index.Dunn"])]
indice.Dunn <- max(res[, "index.Dunn"], na.rm = TRUE)
}
# hubert removed
# nc.Hubert <- indice.Hubert <- 0
# if (any(indice == 27) || (indice == 31) || (indice == 32)) {
# nc.Hubert <- 0
# indice.Hubert <- 0
# par(mfrow = c(1, 2))
# plot(res[, 1], res[, 28], tck = 0, type = "b", col = "red",
# xlab = expression(paste("Number of clusters ")),
# ylab = expression(paste("Index Value")), main = "Normalized Hubert Statistic")
# plot(DiffLev[, 1], DiffLev[, 10], tck = 0, type = "b",
# col = "blue", xlab = expression(paste("Number of clusters ")),
# ylab = expression(paste("Index Value")), main = "Second Differences of Hubert index")
# print(paste("*** : The Hubert index is a graphical method of determining the number of clusters. In the plot of Hubert index, we seek a significant knee that corresponds to a significant increase of the value of the measure i.e the significant peak in Hubert index second differences plot."))
# }
nc.sdindex <- indice.sdindex <- 0
if (any(indice == 28) || (indice == 31) || (indice == 32)) {
nc.sdindex <- (min_nc:max_nc)[which.min(res[, "index.SDindex"])]
indice.sdindex <- min(res[, "index.SDindex"], na.rm = TRUE)
}
# Dindex removed
# nc.Dindex <- indice.Dindex <- 0
# if (any(indice == 29) || (indice == 31) || (indice == 32)) {
# nc.Dindex <- 0
# indice.Dindex <- 0
# par(mfrow = c(1, 2))
# plot(res[, 1], res[, 30], tck = 0, type = "b", col = "red",
# xlab = expression(paste("Number of clusters ")),
# ylab = expression(paste("Index Value")), main = "Dindex")
# plot(DiffLev[, 1], DiffLev[, 12], tck = 0, type = "b",
# col = "blue", xlab = expression(paste("Number of clusters ")),
# ylab = expression(paste("Index Value")), main = "Second Differences of D index")
# print(paste("*** : The D index is a graphical method of determining the number of clusters. In the plot of D index, we seek a significant knee (the significant peak in Dindex second differences plot) that corresponds to a significant increase of the value of the measure."))
# }
nc.SDbw <- indice.SDbw <- 0
if (any(indice == 30) || (indice == 31) || (indice == 32)) {
nc.SDbw <- (min_nc:max_nc)[which.min(res[, "index.SDbw"])]
indice.SDbw <- min(res[, "index.SDbw"], na.rm = TRUE)
}
# prepare the output
if (indice < 31) {
indice2 <- c("index.KL", "index.CH", "index.Hartigan", "ccc_removed", "scott_removed",
"marriot_removed", "index.TrCovW", "index.TraceW", "friedman_removed",
"index.Rubin", "index.Cindex", "index.DB", "index.Silhouette",
"index.Duda", "index.Pseudot2", "beale_removed",
"index.Ratkowsky", "index.Ball", "index.ptbiserial",
"index.Gap", "index.Frey", "index.McClain", "index.Gamma", "index.Gplus",
"index.Tau", "index.Dunn", "hubert_removed", "index.SDindex",
"dindex_removed", "index.SDbw")[indice]
res <- res[, c("nc.Ward", indice2)]
if (indice == 15) {
if (method == 8) {
res <- NA
resCritical <- NA
print(paste("Pseudot2 index can be applied only to hierarchical methods "))
} else resCritical <- resCritical[, c(1, "CritValue_PseudoT2")]
}
if ((indice == 21)) {
if (method == 8) {
res <- NA
print(paste("Frey index can be applied only to hierarchical methods "))
}
}
if ((indice == 23)) {
if (method == 8) {
res <- NA
print(paste("Gamma index can be applied only to hierarchical methods "))
}
}
if (indice == 14) {
resCritical <- resCritical[, c(1, "CritValue_Duda")]
}
# beale removed
# if (indice == 16) {
# resCritical <- resCritical[, c(1, 4)]
# }
if (indice == 20) {
resCritical <- resCritical[, c(1, "CritValue_Gap")]
}
}
if (indice == 31) {
res <- res[, c("nc.Ward", "index.KL", "index.CH", "index.Hartigan", "index.TrCovW",
"index.TraceW", "index.Rubin", "index.Cindex", "index.DB", "index.Silhouette",
"index.Duda", "index.Pseudot2", "index.Ratkowsky", "index.Ball", "index.ptbiserial",
"index.Frey", "index.McClain", "index.Dunn", "index.SDindex", "index.SDbw")]
resCritical <- resCritical[, c( "nc.CritValue", "CritValue_Duda", "CritValue_PseudoT2")]
if (method == 8) {
print(paste("Pseudot2 and Frey indices can be applied only to hierarchical methods "))
}
}
if (indice == 32) {
if (method == 8) {
print(paste("Pseudot2, Gamma and Frey indices can be applied only to hierarchical methods "))
}
}
# resultats is the matrix of results
if (any(indice == 20) || (indice == 23) || (indice == 24) ||
(indice == 25) || (indice == 32)) {
results <- c(nc.KL, indice.KL, nc.CH, indice.CH, nc.Hartigan,
indice.Hartigan, # nc.CCC, indice.CCC, nc.Scott, indice.Scott,
# nc.Marriot, indice.Marriot,
nc.TrCovW, indice.TrCovW,
nc.TraceW, indice.TraceW, # nc.Friedman, indice.Friedman,
nc.Rubin, indice.Rubin, nc.cindex, indice.cindex,
nc.DB, indice.DB, nc.Silhouette, indice.Silhouette,
nc.Duda, indice.Duda, nc.Pseudo, indice.Pseudo,
# nc.Beale, indice.Beale,
nc.Ratkowsky, indice.Ratkowsky, nc.Ball,
indice.Ball, nc.ptbiserial, indice.ptbiserial, nc.Gap,
indice.Gap, nc.Frey, indice.Frey, nc.McClain, indice.McClain,
nc.Gamma, indice.Gamma, nc.Gplus, indice.Gplus, nc.Tau,
indice.Tau, nc.Dunn, indice.Dunn, # nc.Hubert, indice.Hubert,
nc.sdindex, indice.sdindex, # nc.Dindex, indice.Dindex,
nc.SDbw, indice.SDbw)
resultats <- matrix(c(results), nrow = 2, ncol = 23,
dimnames = list(c("Number_clusters", "Value_Index"),
c("index.KL", "index.CH", "index.Hartigan", #"index.CCC",
#"index.Scott", "index.Marriot",
"index.TrCovW",
"index.TraceW", #"index.Friedman",
"index.Rubin",
"index.Cindex", "index.DB", "index.Silhouette",
"index.Duda", "index.PseudoT2", # "index.Beale",
"index.Ratkowsky", "index.Ball", "index.PtBiserial",
"index.Gap", "index.Frey", "index.McClain",
"index.Gamma", "index.Gplus", "index.Tau",
"index.Dunn", #"index.Hubert",
"index.SDindex",
#"index.Dindex",
"index.SDbw")))
} else {
results <- c(nc.KL, indice.KL, nc.CH, indice.CH, nc.Hartigan,
indice.Hartigan, #nc.CCC, indice.CCC, nc.Scott, indice.Scott,
#nc.Marriot, indice.Marriot,
nc.TrCovW, indice.TrCovW,
nc.TraceW, indice.TraceW, # nc.Friedman, indice.Friedman,
nc.Rubin, indice.Rubin, nc.cindex, indice.cindex,
nc.DB, indice.DB, nc.Silhouette, indice.Silhouette,
nc.Duda, indice.Duda, nc.Pseudo, indice.Pseudo,
# nc.Beale, indice.Beale,
nc.Ratkowsky, indice.Ratkowsky, nc.Ball,
indice.Ball, nc.ptbiserial, indice.ptbiserial, nc.Frey,
indice.Frey, nc.McClain, indice.McClain, nc.Dunn,
indice.Dunn, #nc.Hubert, indice.Hubert,
nc.sdindex, indice.sdindex, #nc.Dindex, indice.Dindex,
nc.SDbw, indice.SDbw)
resultats <- matrix(c(results), nrow = 2, ncol = 19,
dimnames = list(c("Number_clusters", "Value_Index"),
c("index.KL", "index.CH", "index.Hartigan", #"index.CCC",
#"index.Scott", "index.Marriot",
"index.TrCovW",
"index.TraceW", #"index.Friedman",
"index.Rubin",
"index.Cindex", "index.DB", "index.Silhouette",
"index.Duda", "index.PseudoT2", #"index.Beale",
"index.Ratkowsky", "index.Ball", "index.PtBiserial",
"index.Frey", "index.McClain", "index.Dunn",
#"index.Hubert", "index.SDindex",
"index.Dindex",
"index.SDbw")))
}
if(indice < 31)
resultats <- resultats[, colnames(resultats) == indice2]
resultats <- as.matrix(resultats)
resultats["Number_clusters", ] <- round(resultats["Number_clusters", ], digits = 2)
resultats <- round(resultats, digits = 4)
if (numberObsAfter != numberObsBefore) {
print(paste(numberObsAfter, "observations were used out of",
numberObsBefore, "possible observations due to missing values."))
}
# I did not love this message
# if (numberObsAfter == numberObsBefore) {
# print(paste("All", numberObsAfter, "observations were used."))
# }
if ((indice == 14) || (indice == 15) || (indice == 16) ||
(indice == 20) || (indice == 31) || (indice == 32)) {
results.final <- list(All.index = res, All.CriticalValues = resCritical,
Best.nc = resultats)
} else {
results.final <- list(All.index = res, Best.nc = resultats)
}
return(results.final)
}
# Load the necessary functions
# FUNCTION INDICES.WBT HAS BEEN MODIFIED
# to prevent crashes connected to singular matrices
centers <- function(cl, x) {
x <- as.matrix(x)
n <- length(cl)
k <- max(cl)
centers <- matrix(nrow = k, ncol = ncol(x))
{
for (i in 1:k) {
for (j in 1:ncol(x)) {
centers[i, j] <- mean(x[cl == i, j])
}
}
}
return(centers)
}
Average.scattering <- function(cl, x) {
x <- as.matrix(x)
n <- length(cl)
k <- max(cl)
centers.matrix <- centers(cl, x)
cluster.size <- numeric(0)
variance.clusters <- matrix(0, ncol = ncol(x), nrow = k)
var <- matrix(0, ncol = ncol(x), nrow = k)
for (u in 1:k) cluster.size[u] <- sum(cl == u)
for (u in 1:k) {
for (j in 1:ncol(x)) {
for (i in 1:n) {
if (cl[i] == u)
variance.clusters[u, j] <- variance.clusters[u,
j] + (x[i, j] - centers.matrix[u, j])^2
}
}
}
for (u in 1:k) {
for (j in 1:ncol(x)) variance.clusters[u, j] = variance.clusters[u,
j]/cluster.size[u]
}
variance.matrix <- numeric(0)
for (j in 1:ncol(x)) variance.matrix[j] = var(x[, j]) *
(n - 1)/n
Somme.variance.clusters <- 0
for (u in 1:k) Somme.variance.clusters <- Somme.variance.clusters +
sqrt((variance.clusters[u, ] %*% (variance.clusters[u,
])))
stdev <- (1/k) * sqrt(Somme.variance.clusters)
scat <- (1/k) * (Somme.variance.clusters/sqrt(variance.matrix %*%
variance.matrix))
scat <- list(stdev = stdev, centers = centers.matrix,
variance.intraclusters = variance.clusters, scatt = scat)
return(scat)
}
density.clusters <- function(cl, x) {
x <- as.matrix(x)
k <- max(cl)
n <- length(cl)
distance <- matrix(0, ncol = 1, nrow = n)
density <- matrix(0, ncol = 1, nrow = k)
centers.matrix <- centers(cl, x)
stdev <- Average.scattering(cl, x)$stdev
for (i in 1:n) {
u = 1
while (cl[i] != u) u <- u + 1
for (j in 1:ncol(x)) {
distance[i] <- distance[i] + (x[i, j] - centers.matrix[u,
j])^2
}
distance[i] <- sqrt(distance[i])
if (distance[i] <= stdev)
density[u] = density[u] + 1
}
dens <- list(distance = distance, density = density)
return(dens)
}
density.bw <- function(cl, x) {
x <- as.matrix(x)
k <- max(cl)
n <- length(cl)
centers.matrix <- centers(cl, x)
stdev <- Average.scattering(cl, x)$stdev
density.bw <- matrix(0, ncol = k, nrow = k)
u <- 1
for (u in 1:k) {
for (v in 1:k) {
if (v != u) {
distance <- matrix(0, ncol = 1, nrow = n)
moy <- (centers.matrix[u, ] + centers.matrix[v,
])/2
for (i in 1:n) {
if ((cl[i] == u) || (cl[i] == v)) {
for (j in 1:ncol(x)) {
distance[i] <- distance[i] + (x[i, j] -
moy[j])^2
}
distance[i] <- sqrt(distance[i])
if (distance[i] <= stdev) {
density.bw[u, v] <- density.bw[u, v] +
1
}
}
}
}
}
}
density.clust <- density.clusters(cl, x)$density
S <- 0
for (u in 1:k) for (v in 1:k) {
if (max(density.clust[u], density.clust[v]) != 0)
S = S + (density.bw[u, v]/max(density.clust[u],
density.clust[v]))
}
density.bw <- S/(k * (k - 1))
return(density.bw)
}
Dis <- function(cl, x) {
x <- as.matrix(x)
k <- max(cl)
centers.matrix <- centers(cl, x)
Distance.centers <- dist(centers.matrix)
Dmin <- min(Distance.centers)
Dmax <- max(Distance.centers)
Distance.centers <- as.matrix(Distance.centers)
s2 <- 0
for (u in 1:k) {
s1 = 0
for (j in 1:ncol(Distance.centers)) {
s1 <- s1 + Distance.centers[u, j]
}
s2 <- s2 + 1/s1
}
Dis <- (Dmax/Dmin) * s2
return(Dis)
}
Index.Hubert <- function(x, cl) {
k <- max(cl)
n <- dim(x)[1]
y <- matrix(0, ncol = dim(x)[2], nrow = n)
P <- as.matrix(md)
meanP <- mean(P)
variance.matrix <- numeric(0)
md <- dist(x, method = "euclidean")
for (j in 1:n) variance.matrix[j] = var(P[, j]) * (n -
1)/n
varP <- sqrt(variance.matrix %*% variance.matrix)
centers.clusters <- centers(cl, x)
for (i in 1:n) {
for (u in 1:k) {
if (cl[i] == u)
y[i, ] <- centers.clusters[u, ]
}
}
Q <- as.matrix(dist(y, method = "euclidean"))
meanQ <- mean(Q)
for (j in 1:n) variance.matrix[j] = var(Q[, j]) * (n -
1)/n
varQ <- sqrt(variance.matrix %*% variance.matrix)
M <- n * (n - 1)/2
S <- 0
n1 <- n - 1
for (i in 1:n1) {
j <- i + 1
while (j <= n) {
S <- S + (P[i, j] - meanP) * (Q[i, j] - meanQ)
j <- j + 1
}
}
gamma <- S/(M * varP * varQ)
return(gamma)
}
Index.sPlussMoins <- function(cl1, md) {
cn1 <- max(cl1)
n1 <- length(cl1)
dmat <- as.matrix(md)
average.distance <- median.distance <- separation <- cluster.size <- within.dist1 <- between.dist1 <- numeric(0)
separation.matrix <- matrix(0, ncol = cn1, nrow = cn1)
di <- list()
for (u in 1:cn1) {
cluster.size[u] <- sum(cl1 == u)
du <- as.dist(dmat[cl1 == u, cl1 == u])
within.dist1 <- c(within.dist1, du)
average.distance[u] <- mean(du)
median.distance[u] <- median(du)
bv <- numeric(0)
for (v in 1:cn1) {
if (v != u) {
suv <- dmat[cl1 == u, cl1 == v]
bv <- c(bv, suv)
if (u < v) {
separation.matrix[u, v] <- separation.matrix[v,
u] <- min(suv)
between.dist1 <- c(between.dist1, suv)
}
}
}
}
nwithin1 <- length(within.dist1)
nbetween1 <- length(between.dist1)
meanwithin1 <- mean(within.dist1)
meanbetween1 <- mean(between.dist1)
s.plus <- s.moins <- 0
for (k in 1:nwithin1) {
s.plus <- s.plus + (colSums(outer(between.dist1,
within.dist1[k], ">")))
s.moins <- s.moins + (colSums(outer(between.dist1,
within.dist1[k], "<")))
}
Index.Gamma <- (s.plus - s.moins)/(s.plus + s.moins)
Index.Gplus <- (2 * s.moins)/(n1 * (n1 - 1))
t.tau <- (nwithin1 * nbetween1) - (s.plus + s.moins)
Index.Tau <- (s.plus - s.moins)/(((n1 * (n1 - 1)/2 -
t.tau) * (n1 * (n1 - 1)/2))^(1/2))
results <- list(gamma = Index.Gamma, gplus = Index.Gplus,
tau = Index.Tau)
return(results)
}
Index.15and28 <- function(cl1, cl2, md) {
cn1 <- max(cl1)
n1 <- length(cl1)
dmat <- as.matrix(md)
average.distance <- median.distance <- separation <- cluster.size <- within.dist1 <- between.dist1 <- numeric(0)
separation.matrix <- matrix(0, ncol = cn1, nrow = cn1)
di <- list()
for (u in 1:cn1) {
cluster.size[u] <- sum(cl1 == u)
du <- as.dist(dmat[cl1 == u, cl1 == u])
within.dist1 <- c(within.dist1, du)
bv <- numeric(0)
for (v in 1:cn1) {
if (v != u) {
suv <- dmat[cl1 == u, cl1 == v]
bv <- c(bv, suv)
if (u < v) {
separation.matrix[u, v] <- separation.matrix[v,
u] <- min(suv)
between.dist1 <- c(between.dist1, suv)
}
}
}
}
cn2 <- max(cl2)
n2 <- length(cl2)
dmat <- as.matrix(md)
average.distance <- median.distance <- separation <- cluster.size <- within.dist2 <- between.dist2 <- numeric(0)
separation.matrix <- matrix(0, ncol = cn2, nrow = cn2)
di <- list()
for (w in 1:cn2) {
cluster.size[w] <- sum(cl2 == w)
dw <- as.dist(dmat[cl2 == w, cl2 == w])
within.dist2 <- c(within.dist2, dw)
bx <- numeric(0)
for (x in 1:cn2) {
if (x != w) {
swx <- dmat[cl2 == w, cl2 == x]
bx <- c(bx, swx)
if (w < x) {
separation.matrix[w, x] <- separation.matrix[x,
w] <- min(swx)
between.dist2 <- c(between.dist2, swx)
}
}
}
}
nwithin1 <- length(within.dist1)
nbetween1 <- length(between.dist1)
meanwithin1 <- mean(within.dist1)
meanbetween1 <- mean(between.dist1)
meanwithin2 <- mean(within.dist2)
meanbetween2 <- mean(between.dist2)
Index.15 <- (meanbetween2 - meanbetween1)/(meanwithin2 -
meanwithin1)
Index.28 <- (meanwithin1/nwithin1)/(meanbetween1/nbetween1)
results <- list(frey = Index.15, mcclain = Index.28)
return(results)
}
Indice.ptbiserial <- function(x, md, cl1) {
nn <- dim(x)[1]
pp <- dim(x)[2]
md2 <- as.matrix(md)
m01 <- array(NA, c(nn, nn))
nbr <- (nn * (nn - 1))/2
pb <- array(0, c(nbr, 2))
m3 <- 1
for (m1 in 2:nn) {
m12 <- m1 - 1
for (m2 in 1:m12) {
if (cl1[m1] == cl1[m2])
m01[m1, m2] <- 0
if (cl1[m1] != cl1[m2])
m01[m1, m2] <- 1
pb[m3, 1] <- m01[m1, m2]
pb[m3, 2] <- md2[m1, m2]
m3 <- m3 + 1
}
}
y <- pb[, 1]
x <- pb[, 2]
biserial.cor <- function(x, y, use = c("all.obs", "complete.obs"),
level = 1) {
if (!is.numeric(x))
stop("'x' must be a numeric variable.\n")
y <- as.factor(y)
if (length(levs <- levels(y)) > 2)
stop("'y' must be a dichotomous variable.\n")
if (length(x) != length(y))
stop("'x' and 'y' do not have the same length")
use <- match.arg(use)
if (use == "complete.obs") {
cc.ind <- complete.cases(x, y)
x <- x[cc.ind]
y <- y[cc.ind]
}
ind <- y == levs[level]
diff.mu <- mean(x[ind]) - mean(x[!ind])
prob <- mean(ind)
diff.mu * sqrt(prob * (1 - prob))/sd(x)
}
ptbiserial <- biserial.cor(x = pb[, 2], y = pb[, 1],
level = 2)
return(ptbiserial)
}
Indices.WKWL <- function(x, cl1 = cl1, cl2 = cl2) {
pp <- dim(x)[2]
wss <- function(x) {
x <- as.matrix(x)
n <- length(x)
centers <- matrix(nrow = 1, ncol = ncol(x))
if (ncol(x) == 1) {
centers[1, ] <- mean(x)
}
if (is.null(dim(x))) {
bb <- matrix(x, byrow = FALSE, nrow = 1, ncol = ncol(x))
centers[1, ] <- apply(bb, 2, mean)
} else {
centers[1, ] <- apply(x, 2, mean)
}
x.2 <- sweep(x, 2, centers[1, ], "-")
withins <- sum(x.2^2)
wss <- sum(withins)
return(wss)
}
ncg1 <- 1
ncg1max <- max(cl1)
while ((sum(cl1 == ncg1) == sum(cl2 == ncg1)) && ncg1 <=
ncg1max) {
ncg1 <- ncg1 + 1
}
g1 <- ncg1
ncg2 <- max(cl2)
nc2g2 <- ncg2 - 1
while ((sum(cl1 == nc2g2) == sum(cl2 == ncg2)) && nc2g2 >=
1) {
ncg2 <- ncg2 - 1
nc2g2 <- nc2g2 - 1
}
g2 <- ncg2
NK <- sum(cl2 == g1)
WK.x <- x[cl2 == g1, ]
WK <- wss(x = WK.x)
NL <- sum(cl2 == g2)
WL.x <- x[cl2 == g2, ]
WL <- wss(x = WL.x)
NM <- sum(cl1 == g1)
WM.x <- x[cl1 == g1, ]
WM <- wss(x = WM.x)
duda <- (WK + WL)/WM
BKL <- WM - WK - WL
pseudot2 <- BKL/((WK + WL)/(NK + NL - 2))
# beale removed
# beale <- (BKL/(WK + WL))/(((NM - 1)/(NM - 2)) * (2^(2/pp) -
# 1))
results <- list(duda = duda, pseudot2 = pseudot2, NM = NM,
NK = NK, NL = NL)
return(results)
}
# Indices.WBT <- function(x, cl, P, s, vv) {
# n <- dim(x)[1]
# pp <- dim(x)[2]
# qq <- max(cl)
# z <- matrix(0, ncol = qq, nrow = n)
# clX <- as.matrix(cl)
# for (i in 1:n) for (j in 1:qq) {
# z[i, j] == 0
# if (clX[i, 1] == j) {
# z[i, j] = 1
# }
# }
# xbar <- solve(t(z) %*% z) %*% t(z) %*% x
# B <- t(xbar) %*% t(z) %*% z %*% xbar
# W <- P - B
# marriot <- (qq^2) * det(W)
# trcovw <- sum(diag(cov(W)))
# tracew <- sum(diag(W))
# if (det(W) != 0)
# { scott <- n * log(det(P)/det(W))
# } else {
# print("Error: division by zero!")
# }
# friedman <- sum(diag(solve(W) * B))
# rubin <- sum(diag(P))/sum(diag(W))
# R2 <- 1 - sum(diag(W))/sum(diag(P))
# v1 <- 1
# u <- rep(0, pp)
# c <- (vv/(qq))^(1/pp)
# u <- s/c
# k1 <- sum((u >= 1) == TRUE)
# p1 <- min(k1, qq - 1)
# if (all(p1 > 0, p1 < pp)) {
# for (i in 1:p1) v1 <- v1 * s[i]
# c <- (v1/(qq))^(1/p1)
# u <- s/c
# b1 <- sum(1/(n + u[1:p1]))
# b2 <- sum(u[p1 + 1:pp]^2/(n + u[p1 + 1:pp]), na.rm = TRUE)
# E_R2 <- 1 - ((b1 + b2)/sum(u^2)) * ((n - qq)^2/n) *
# (1 + 4/n)
# ccc <- log((1 - E_R2)/(1 - R2)) * (sqrt(n * p1/2)/((0.001 +
# E_R2)^1.2))
# } else {
# b1 <- sum(1/(n + u))
# E_R2 <- 1 - (b1/sum(u^2)) * ((n - qq)^2/n) * (1 +
# 4/n)
# ccc <- log((1 - E_R2)/(1 - R2)) * (sqrt(n * pp/2)/((0.001 +
# E_R2)^1.2))
# }
# results <- list(ccc = ccc, scott = scott, marriot = marriot,
# trcovw = trcovw, tracew = tracew, friedman = friedman,
# rubin = rubin)
# return(results)
# }
# a modded (crippled) version of WBT to deal with singular W, B and T matrices
# I simply removed indices ccc, scott, marriot, and friedman, which
# required nonsingular matrices.
Indices.WBT_mod <- function(x, cl, P) {
n <- dim(x)[1]
pp <- dim(x)[2]
qq <- max(cl)
z <- matrix(0, ncol = qq, nrow = n)
clX <- as.matrix(cl)
for (i in 1:n) for (j in 1:qq) {
z[i, j] == 0
if (clX[i, 1] == j) {
z[i, j] = 1
}
}
xbar <- solve(t(z) %*% z) %*% t(z) %*% x
B <- t(xbar) %*% t(z) %*% z %*% xbar
W <- P - B
# marriot <- (qq^2) * det(W) # detW zero if pp > n
trcovw <- sum(diag(cov(W)))
tracew <- sum(diag(W))
# if (det(W) != 0)
# { scott <- n * log(det(P)/det(W))
# }
# else {
# print("Error: division by zero!")
# }
#friedman <- sum(diag(solve(W) * B))
rubin <- sum(diag(P))/sum(diag(W))
# R2 <- 1 - sum(diag(W))/sum(diag(P))
# v1 <- 1
# u <- rep(0, pp)
# c <- (vv/(qq))^(1/pp)
# u <- s/c
# k1 <- sum((u >= 1) == TRUE)
# p1 <- min(k1, qq - 1)
# if (all(p1 > 0, p1 < pp)) {
# for (i in 1:p1) v1 <- v1 * s[i]
# c <- (v1/(qq))^(1/p1)
# u <- s/c
# b1 <- sum(1/(n + u[1:p1]))
# b2 <- sum(u[p1 + 1:pp]^2/(n + u[p1 + 1:pp]), na.rm = TRUE)
# E_R2 <- 1 - ((b1 + b2)/sum(u^2)) * ((n - qq)^2/n) *
# (1 + 4/n)
# ccc <- log((1 - E_R2)/(1 - R2)) * (sqrt(n * p1/2)/((0.001 +
# E_R2)^1.2))
# } else {
# b1 <- sum(1/(n + u))
# E_R2 <- 1 - (b1/sum(u^2)) * ((n - qq)^2/n) * (1 +
# 4/n)
# ccc <- log((1 - E_R2)/(1 - R2)) * (sqrt(n * pp/2)/((0.001 +
# E_R2)^1.2))
# }
results <- list(trcovw = trcovw, tracew = tracew, rubin = rubin)
return(results)
}
Indices.Traces <- function(data, d, clall, index = "all") {
x <- data
cl0 <- clall[, 1]
cl1 <- clall[, 2]
cl2 <- clall[, 3]
clall <- clall
nb.cl0 <- table(cl0)
nb.cl1 <- table(cl1)
nb.cl2 <- table(cl2)
nb1.cl0 <- sum(nb.cl0 == 1)
nb1.cl1 <- sum(nb.cl1 == 1)
nb1.cl2 <- sum(nb.cl2 == 1)
gss <- function(x, cl, d) {
n <- length(cl)
k <- max(cl)
centers <- matrix(nrow = k, ncol = ncol(x))
for (i in 1:k) {
if (ncol(x) == 1) {
centers[i, ] <- mean(x[cl == i, ])
}
if (is.null(dim(x[cl == i, ]))) {
bb <- matrix(x[cl == i, ], byrow = FALSE, nrow = 1,
ncol = ncol(x))
centers[i, ] <- apply(bb, 2, mean)
} else {
centers[i, ] <- apply(x[cl == i, ], 2, mean)
}
}
allmean <- apply(x, 2, mean)
dmean <- sweep(x, 2, allmean, "-")
allmeandist <- sum(dmean^2)
withins <- rep(0, k)
x.2 <- (x - centers[cl, ])^2
for (i in 1:k) {
withins[i] <- sum(x.2[cl == i, ])
}
wgss <- sum(withins)
bgss <- allmeandist - wgss
results <- list(wgss = wgss, bgss = bgss, centers = centers)
return(results)
}
vargss <- function(x, clsize, varwithins) {
nvar <- dim(x)[2]
n <- sum(clsize)
k <- length(clsize)
varallmean <- rep(0, nvar)
varallmeandist <- rep(0, nvar)
varwgss <- rep(0, nvar)
for (l in 1:nvar) varallmean[l] <- mean(x[, l])
vardmean <- sweep(x, 2, varallmean, "-")
for (l in 1:nvar) {
varallmeandist[l] <- sum((vardmean[, l])^2)
varwgss[l] <- sum(varwithins[, l])
}
varbgss <- varallmeandist - varwgss
vartss <- varbgss + varwgss
zvargss <- list(vartss = vartss, varbgss = varbgss)
return(zvargss)
}
varwithinss <- function(x, centers, cluster) {
nrow <- dim(centers)[1]
nvar <- dim(x)[2]
varwithins <- matrix(0, nrow, nvar)
x <- (x - centers[cluster, ])^2
for (l in 1:nvar) {
for (k in 1:nrow) {
varwithins[k, l] <- sum(x[cluster == k, l])
}
}
return(varwithins)
}
indice.kl <- function(x, clall, d = NULL, centrotypes = "centroids") {
if (nb1.cl1 > 0) {
KL <- NA
}
if (sum(c("centroids", "medoids") == centrotypes) ==
0)
stop("Wrong centrotypes argument")
if ("medoids" == centrotypes && is.null(d))
stop("For argument centrotypes = 'medoids' d cannot be null")
if (!is.null(d)) {
if (!is.matrix(d)) {
d <- as.matrix(d)
}
row.names(d) <- row.names(x)
}
m <- ncol(x)
g <- k <- max(clall[, 2])
KL <- abs((g - 1)^(2/m) * gss(x, clall[, 1], d)$wgss -
g^(2/m) * gss(x, clall[, 2], d)$wgss)/abs((g)^(2/m) *
gss(x, clall[, 2], d)$wgss - (g + 1)^(2/m) *
gss(x, clall[, 3], d)$wgss)
return(KL)
}
indice.ch <- function(x, cl, d = NULL, centrotypes = "centroids") {
if (nb1.cl1 > 0) {
CH <- NA
}
if (sum(c("centroids", "medoids") == centrotypes) ==
0)
stop("Wrong centrotypes argument")
if ("medoids" == centrotypes && is.null(d))
stop("For argument centrotypes = 'medoids' d cannot be null")
if (!is.null(d)) {
if (!is.matrix(d)) {
d <- as.matrix(d)
}
row.names(d) <- row.names(x)
}
n <- length(cl)
k <- max(cl)
CH <- (gss(x, cl, d)$bgss/(k - 1))/(gss(x, cl, d)$wgss/(n -
k))
return(CH)
}
indice.hart <- function(x, clall, d = NULL, centrotypes = "centroids") {
if (sum(c("centroids", "medoids") == centrotypes) ==
0)
stop("Wrong centrotypes argument")
if ("medoids" == centrotypes && is.null(d))
stop("For argument centrotypes = 'medoids' d cannot be null")
if (!is.null(d)) {
if (!is.matrix(d)) {
d <- as.matrix(d)
}
row.names(d) <- row.names(x)
}
n <- nrow(x)
g <- max(clall[, 1])
HART <- (gss(x, clall[, 2], d)$wgss/gss(x, clall[,
3], d)$wgss - 1) * (n - g - 1)
return(HART)
}
indice.ball <- function(x, cl, d = NULL, centrotypes = "centroids") {
wgssB <- gss(x, cl, d)$wgss
qq <- max(cl)
ball <- wgssB/qq
return(ball)
}
indice.ratkowsky <- function(x, cl, d, centrotypes = "centroids") {
qq <- max(cl)
clsize <- table(cl)
centers <- gss(x, cl, d)$centers
varwithins <- varwithinss(x, centers, cl)
zvargss <- vargss(x, clsize, varwithins)
ratio <- mean(sqrt(zvargss$varbgss/zvargss$vartss))
ratkowsky <- ratio/sqrt(qq)
return(ratkowsky)
}
indice <- pmatch(index, c("kl", "ch", "hart", "ratkowsky",
"ball", "all"))
if (is.na(indice))
stop("invalid clustering index")
if (indice == -1)
stop("ambiguous index")
vecallindex <- numeric(5)
if (any(indice == 1) || (indice == 6))
vecallindex[1] <- indice.kl(x, clall, d)
if (any(indice == 2) || (indice == 6))
vecallindex[2] <- indice.ch(x, cl = clall[, 2], d)
if (any(indice == 3) || (indice == 6))
vecallindex[3] <- indice.hart(x, clall, d)
if (any(indice == 4) || (indice == 6))
vecallindex[4] <- indice.ratkowsky(x, cl = cl1, d)
if (any(indice == 5) || (indice == 6))
vecallindex[5] <- indice.ball(x, cl = cl1, d)
names(vecallindex) <- c("kl", "ch", "hart", "ratkowsky",
"ball")
if (indice < 6)
vecallindex <- vecallindex[indice]
return(vecallindex)
}
Indice.cindex <- function(d, cl) {
d <- data.matrix(d)
DU <- 0
r <- 0
v_max <- array(1, max(cl))
v_min <- array(1, max(cl))
for (i in 1:max(cl)) {
n <- sum(cl == i)
if (n > 1) {
t <- d[cl == i, cl == i]
DU = DU + sum(t)/2
v_max[i] = max(t)
if (sum(t == 0) == n)
{ v_min[i] <- min(t[t != 0])
} else v_min[i] <- 0
r <- r + n * (n - 1)/2
}
}
Dmin = min(v_min)
Dmax = max(v_max)
if (Dmin == Dmax)
{ result <- NA
} else result <- (DU - r * Dmin)/(Dmax * r - Dmin * r)
result
}
Indice.DB <- function(x, cl, d = NULL, centrotypes = "centroids",
p = 2, q = 2) {
if (sum(c("centroids") == centrotypes) == 0)
stop("Wrong centrotypes argument")
if (!is.null(d)) {
if (!is.matrix(d)) {
d <- as.matrix(d)
}
row.names(d) <- row.names(x)
}
if (is.null(dim(x))) {
dim(x) <- c(length(x), 1)
}
x <- as.matrix(x)
n <- length(cl)
k <- max(cl)
dAm <- d
centers <- matrix(nrow = k, ncol = ncol(x))
if (centrotypes == "centroids") {
for (i in 1:k) {
for (j in 1:ncol(x)) {
centers[i, j] <- mean(x[cl == i, j])
}
}
} else {
stop("wrong centrotypes argument")
}
S <- rep(0, k)
for (i in 1:k) {
ind <- (cl == i)
if (sum(ind) > 1) {
centerI <- centers[i, ]
centerI <- rep(centerI, sum(ind))
centerI <- matrix(centerI, nrow = sum(ind), ncol = ncol(x),
byrow = TRUE)
S[i] <- mean(sqrt(apply((x[ind, ] - centerI)^2,
1, sum))^q)^(1/q)
} else S[i] <- 0
}
M <- as.matrix(dist(centers, p = p))
R <- array(Inf, c(k, k))
r = rep(0, k)
for (i in 1:k) {
for (j in 1:k) {
R[i, j] = (S[i] + S[j])/M[i, j]
}
r[i] = max(R[i, ][is.finite(R[i, ])])
}
DB = mean(r[is.finite(r)])
resul <- list(DB = DB, r = r, R = R, d = M, S = S, centers = centers)
resul
}
Indice.S <- function(d, cl) {
d <- as.matrix(d)
Si <- 0
for (k in 1:max(cl)) {
if ((sum(cl == k)) <= 1)
{ Sil <- 1
} else {
Sil <- 0
for (i in 1:length(cl)) {
if (cl[i] == k) {
ai <- sum(d[i, cl == k])/(sum(cl == k) -
1)
dips <- NULL
for (j in 1:max(cl)) if (cl[i] != j)
if (sum(cl == j) != 1)
{ dips <- cbind(dips, c((sum(d[i, cl ==
j]))/(sum(cl == j))))
} else dips <- cbind(dips, c((sum(d[i, cl ==
j]))))
bi <- min(dips)
Sil <- Sil + (bi - ai)/max(c(ai, bi))
}
}
}
Si <- Si + Sil
}
Si/length(cl)
}
Indice.Gap <- function(x, clall, reference.distribution = "unif",
B = 10, method = "ward", d = NULL, centrotypes = "centroids") {
GAP <- function(X, cl, referenceDistribution, B, method,
d, centrotypes) {
simgap <- function(Xvec) {
ma <- max(Xvec)
mi <- min(Xvec)
Xout <- runif(length(Xvec), min = mi, max = ma)
return(Xout)
}
pcsim <- function(X, d, centrotypes) {
if (centrotypes == "centroids") {
Xmm <- apply(X, 2, mean)
}
for (k in (1:dim(X)[2])) {
X[, k] <- X[, k] - Xmm[k]
}
ss <- svd(X)
Xs <- X %*% ss$v
Xnew <- apply(Xs, 2, simgap)
Xt <- Xnew %*% t(ss$v)
for (k in (1:dim(X)[2])) {
Xt[, k] <- Xt[, k] + Xmm[k]
}
return(Xt)
}
if (is.null(dim(x))) {
dim(x) <- c(length(x), 1)
}
ClassNr <- max(cl)
Wk0 <- 0
WkB <- matrix(0, 1, B)
for (bb in (1:B)) {
if (reference.distribution == "unif")
{ Xnew <- apply(X, 2, simgap)
} else if (reference.distribution == "pc")
{ Xnew <- pcsim(X, d, centrotypes)
} else stop("Wrong reference distribution type")
if (bb == 1) {
pp <- cl
if (ClassNr == length(cl))
{ pp2 <- 1:ClassNr
} else if (method == "k-means")
{ pp2 <- kmeans(Xnew, ClassNr, 100)$cluster
} else if (method == "single" || method == "complete" ||
method == "average" || method == "ward" ||
method == "mcquitty" || method == "median" ||
method == "centroid")
{ pp2 <- cutree(hclust(dist(Xnew), method = method),
ClassNr)
} else stop("Wrong clustering method")
if (ClassNr > 1) {
for (zz in (1:ClassNr)) {
Xuse <- X[pp == zz, ]
Wk0 <- Wk0 + sum(diag(var(Xuse))) * (length(pp[pp ==
zz]) - 1)/(dim(X)[1] - ClassNr)
Xuse2 <- Xnew[pp2 == zz, ]
WkB[1, bb] <- WkB[1, bb] + sum(diag(var(Xuse2))) *
(length(pp2[pp2 == zz]) - 1)/(dim(X)[1] -
ClassNr)
}
}
if (ClassNr == 1) {
Wk0 <- sum(diag(var(X)))
WkB[1, bb] <- sum(diag(var(Xnew)))
}
}
if (bb > 1) {
if (ClassNr == length(cl))
{ pp2 <- 1:ClassNr
}else if (method == "k-means")
{ pp2 <- kmeans(Xnew, ClassNr, 100)$cluster
}else if (method == "single" || method == "complete" ||
method == "average" || method == "ward" ||
method == "mcquitty" || method == "median" ||
method == "centroid")
{ pp2 <- cutree(hclust(dist(Xnew), method = method),
ClassNr)
} else stop("Wrong clustering method")
if (ClassNr > 1) {
for (zz in (1:ClassNr)) {
Xuse2 <- Xnew[pp2 == zz, ]
WkB[1, bb] <- WkB[1, bb] + sum(diag(var(Xuse2))) *
length(pp2[pp2 == zz])/(dim(X)[1] - ClassNr)
}
}
if (ClassNr == 1) {
WkB[1, bb] <- sum(diag(var(Xnew)))
}
}
}
Sgap <- mean(log(WkB[1, ])) - log(Wk0)
Sdgap <- sqrt(1 + 1/B) * sqrt(var(log(WkB[1, ]))) *
sqrt((B - 1)/B)
resul <- list(Sgap = Sgap, Sdgap = Sdgap)
resul
}
if (sum(c("centroids", "medoids") == centrotypes) ==
0)
stop("Wrong centrotypes argument")
if ("medoids" == centrotypes && is.null(d))
stop("For argument centrotypes = 'medoids' d cannot be null")
if (!is.null(d)) {
if (!is.matrix(d)) {
d <- as.matrix(d)
}
row.names(d) <- row.names(x)
}
X <- as.matrix(x)
gap1 <- GAP(X, clall[, 1], reference.distribution, B,
method, d, centrotypes)
gap <- gap1$Sgap
gap2 <- GAP(X, clall[, 2], reference.distribution, B,
method, d, centrotypes)
diffu <- gap - (gap2$Sgap - gap2$Sdgap)
resul <- list(gap = gap, diffu = diffu)
resul
}
Index.sdindex <- function(x, clmax, cl) {
x <- as.matrix(x)
Alpha <- Dis(clmax, x)
Scatt <- Average.scattering(cl, x)$scatt
Dis0 <- Dis(cl, x)
SD.indice <- Alpha * Scatt + Dis0
return(SD.indice)
}
Index.SDbw <- function(x, cl) {
x <- as.matrix(x)
Scatt <- Average.scattering(cl, x)$scatt
Dens.bw <- density.bw(cl, x)
SDbw <- Scatt + Dens.bw
return(SDbw)
}
Index.Dindex <- function(cl, x) {
x <- as.matrix(x)
distance <- density.clusters(cl, x)$distance
n <- length(distance)
S <- 0
for (i in 1:n) S <- S + distance[i]
inertieIntra <- S/n
return(inertieIntra)
}
Index.dunn <- function(md, clusters, Data = NULL, method = "euclidean") {
distance <- as.matrix(md)
nc <- max(clusters)
interClust <- matrix(NA, nc, nc)
intraClust <- rep(NA, nc)
for (i in 1:nc) {
c1 <- which(clusters == i)
for (j in i:nc) {
if (j == i)
intraClust[i] <- max(distance[c1, c1])
if (j > i) {
c2 <- which(clusters == j)
interClust[i, j] <- min(distance[c1, c2])
}
}
}
dunn <- min(interClust, na.rm = TRUE)/max(intraClust)
return(dunn)
}
## end functions
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.