index.H | R Documentation |
Calculates Hartigan index
index.H (x,clall,d=NULL,centrotypes="centroids")
x |
data |
clall |
Two vectors of integers indicating the cluster to which each object is allocated in partition of n objects into u and u+1 clusters |
d |
optional distance matrix, used for calculations if centrotypes="medoids" |
centrotypes |
"centroids" or "medoids" |
See file $R_HOME\library\clusterSim\pdf\indexH_details.pdf for further details
Hartigan index
Marek Walesiak marek.walesiak@ue.wroc.pl, Andrzej Dudek andrzej.dudek@ue.wroc.pl
Department of Econometrics and Computer Science, University of Economics, Wroclaw, Poland
Hartigan, J. (1975), Clustering algorithms, Wiley, New York. ISBN 047135645X.
Milligan, G.W., Cooper, M.C. (1985), An examination of procedures of determining the number of cluster in a data set, "Psychometrika", vol. 50, no. 2, 159-179. Available at: \Sexpr[results=rd]{tools:::Rd_expr_doi("10.1007/BF02294245")}.
Tibshirani, R., Walther, G., Hastie, T. (2001), Estimating the number of clusters in a data set via the gap statistic, "Journal of the Royal Statistical Society", ser. B, vol. 63, part 2, 411-423. Available at: \Sexpr[results=rd]{tools:::Rd_expr_doi("10.1111/1467-9868.00293")}.
index.G1
, index.G2
, index.G3
, index.C
,
index.S
, index.KL
, index.Gap
, index.DB
# Example 1
library(clusterSim)
data(data_ratio)
cl1<-pam(data_ratio,4)
cl2<-pam(data_ratio,5)
clall<-cbind(cl1$clustering,cl2$clustering)
index.H(data_ratio,clall)
# Example 2
library(clusterSim)
data(data_ratio)
md <- dist(data_ratio, method="euclidean")
# nc - number_of_clusters
min_nc=1
max_nc=20
min <- 0
res <- array(0, c(max_nc-min_nc+1, 2))
res[,1] <- min_nc:max_nc
found <- FALSE
clusters <- NULL
for (nc in min_nc:max_nc)
{
print(nc)
hc <- hclust(md, method="complete")
cl1 <- cutree(hc, k=nc)
cl2 <- cutree(hc, k=nc+1)
clall <- cbind(cl1,cl2)
res[nc-min_nc+1,2] <- H <- index.H(data_ratio,clall,centrotypes="centroids")
if ((res[nc-min_nc+1, 2]<10) && (!found)){
nc1 <- nc
min <- H
clopt <- cl1
found <- TRUE
}
}
if (found)
{
print(paste("minimal nc for H<=10 equals",nc1,"for H=",min))
print("clustering for minimal nc where H<=10")
print(clopt)
}else
{
print("Clustering not found with H<=10")
}
#write.table(res,file="H_res.csv",sep=";",dec=",",row.names=TRUE,col.names=FALSE)
plot(res,type="p",pch=0,xlab="Number of clusters",ylab="H",xaxt="n")
abline(h=10, untf=FALSE)
axis(1, c(min_nc:max_nc))
# Example 3
library(clusterSim)
data(data_ratio)
md <- dist(data_ratio, method="manhattan")
# nc - number_of_clusters
min_nc=1
max_nc=20
min <- 0
res <- array(0, c(max_nc-min_nc+1, 2))
res[,1] <- min_nc:max_nc
found <- FALSE
clusters <- NULL
for (nc in min_nc:max_nc)
{
print(nc)
hc <- hclust(md, method="complete")
cl1 <- cutree(hc, k=nc)
cl2 <- cutree(hc, k=nc+1)
clall <- cbind(cl1,cl2)
res[nc-min_nc+1,2] <- H <- index.H(data_ratio,clall,d=md,centrotypes="medoids")
if ((res[nc-min_nc+1, 2]<10) && (!found)){
nc1 <- nc
min <- H
clopt <- cl1
found <- TRUE
}
}
if (found)
{
print(paste("minimal nc for H<=10 equals",nc1,"for H=",min))
print("clustering for minimal nc where H<=10")
print(clopt)
}else
{
print("Clustering not found with H<=10")
}
#write.table(res,file="H_res.csv",sep=";",dec=",",row.names=TRUE,col.names=FALSE)
plot(res,type="p",pch=0,xlab="Number of clusters",ylab="H",xaxt="n")
abline(h=10, untf=FALSE)
axis(1, c(min_nc:max_nc))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.