index.DB: Calculates Davies-Bouldin's index

Description Usage Arguments Details Value Author(s) References See Also Examples

View source: R/index.DB.r

Description

Calculates Davies-Bouldin's cluster separation measure

Usage

1
index.DB(x, cl, d=NULL, centrotypes="centroids", p=2, q=2)

Arguments

x

data

cl

vector of integers indicating the cluster to which each object is allocated

d

optional distance matrix, used for calculations if centrotypes="medoids"

centrotypes

"centroids" or "medoids"

p

the power of the Minkowski distance between centroids or medoids of clusters: p=1 - Manhattan distance; p=2 - Euclidean distance

q

the power of dispersion measure of a cluster: q=1 - the average distance of objects in the r-th cluster to the centroid or medoid of the r-th cluster; q=2 - the standard deviation of the distance of objects in the r-th cluster to the centroid or medoid of the r-th cluster

Details

See file ../doc/indexDB_details.pdf for further details

Thanks to prof. Christian Hennig c.hennig@ucl.ac.uk for finding and fixing the "immutable p" error

Value

DB

Davies-Bouldin's index

r

vector of maximal R values for each cluster

R

R matrix $(S_r+S_s)/d_rs$

d

matrix of distances between centroids or medoids of clusters

S

vector of dispersion measures for each cluster

centers

coordinates of centroids or medoids for all clusters

Author(s)

Marek Walesiak marek.walesiak@ue.wroc.pl, Andrzej Dudek andrzej.dudek@ue.wroc.pl

Department of Econometrics and Computer Science, University of Economics, Wroclaw, Poland http://keii.ue.wroc.pl/clusterSim/

References

Davies, D.L., Bouldin, D.W. (1979), A cluster separation measure, IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 1, no. 2, 224-227. Available at: doi: 10.1109/TPAMI.1979.4766909.

See Also

index.G1, index.G2, index.G3, index.C, index.S, index.H, index.Gap, index.KL

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# Example 1
library(clusterSim)
data(data_ratio)
cl1 <- pam(data_ratio, 4)
d<-dist(data_ratio)
print(index.DB(data_ratio, cl1$clustering,d, centrotypes="medoids"))

# Example 2
library(clusterSim)
data(data_ratio)
cl2 <- pam(data_ratio, 5)
print(index.DB(data_ratio, cl2$clustering, centrotypes="centroids"))

# Example 3
library(clusterSim)
data(data_ratio)
md <- dist(data_ratio, method="euclidean")
# nc - number_of_clusters
min_nc=2
max_nc=8
res <- array(0, c(max_nc-min_nc+1, 2))
res[,1] <- min_nc:max_nc
clusters <- NULL
for (nc in min_nc:max_nc)
{
hc <- hclust(md, method="complete")
cl2 <- cutree(hc, k=nc)
res[nc-min_nc+1, 2] <- DB <- index.DB(data_ratio, cl2, centrotypes="centroids")$DB
clusters <- rbind(clusters, cl2)
}
print(paste("min DB for",(min_nc:max_nc)[which.min(res[,2])],"clusters=",min(res[,2])))
print("clustering for min DB")
print(clusters[which.min(res[,2]),])
#write.table(res,file="DB_res.csv",sep=";",dec=",",row.names=TRUE,col.names=FALSE)
plot(res, type="p", pch=0, xlab="Number of clusters", ylab="DB", xaxt="n")
axis(1, c(min_nc:max_nc))

# Example 4
library(clusterSim)
data(data_ordinal)
md <- dist.GDM(data_ordinal, method="GDM2")
# nc - number_of_clusters
min_nc=2
max_nc=6
res <- array(0, c(max_nc-min_nc+1, 2))
res[,1] <- min_nc:max_nc
clusters <- NULL
for (nc in min_nc:max_nc)
{
hc <- hclust(md, method="complete")
cl2 <- cutree(hc, k=nc)
res[nc-min_nc+1,2] <- DB <- index.DB(data_ordinal,cl2,d=md,centrotypes="medoids")$DB
clusters <- rbind(clusters, cl2)
}
print(paste("min DB for",(min_nc:max_nc)[which.min(res[,2])],"clusters=",min(res[,2])))
print("clustering for min DB")
print(clusters[which.min(res[,2]),])
#write.table(res,file="DB_res.csv",sep=";",dec=",",row.names=TRUE,col.names=FALSE)
plot(res, type="p", pch=0, xlab="Number of clusters", ylab="DB", xaxt="n")
axis(1, c(min_nc:max_nc))

Example output

Loading required package: cluster
Loading required package: MASS
Warning messages:
1: In rgl.init(initValue, onlyNULL) : RGL: unable to open X11 display
2: 'rgl_init' failed, running with rgl.useNULL = TRUE 
3: .onUnload failed in unloadNamespace() for 'rgl', details:
  call: fun(...)
  error: object 'rgl_quit' not found 
$DB
[1] 1.554805

$r
[1] 1.503425 1.239799 1.737997 1.737997

$R
          [,1]      [,2]     [,3]      [,4]
[1,]       Inf 0.9747895 1.503425 0.8465858
[2,] 0.9747895       Inf 1.202350 1.2397988
[3,] 1.5034250 1.2023504      Inf 1.7379974
[4,] 0.8465858 1.2397988 1.737997       Inf

$d
          1        2        3         4
1  0.000000 9.985020 7.363969 13.325498
2  9.985020 0.000000 8.773559  8.677936
3  7.363969 8.773559 0.000000  6.960181
4 13.325498 8.677936 6.960181  0.000000

$S
[1] 5.127787 4.605506 5.943387 6.153389

$centers
          [,1]      [,2]      [,3]      [,4]     [,5]
[1,]  5.045025  4.951188 11.628062  9.599697 11.73980
[2,]  5.271406 14.745776 10.936124  9.189059 13.49138
[3,] 11.360173  8.595020 11.130270 10.162903 12.45003
[4,] 13.440925 14.850236  9.170466 11.229936 12.36360

$DB
[1] 1.323947

$r
[1] 1.296852 1.229451 1.432706 1.228020 1.432706

$R
          [,1]      [,2]     [,3]      [,4]      [,5]
[1,]       Inf 0.9147717 1.296852 0.9158899 0.7486153
[2,] 0.9147717       Inf 1.229451 0.6148297 1.0109233
[3,] 1.2968522 1.2294510      Inf 1.2280198 1.4327057
[4,] 0.9158899 0.6148297 1.228020       Inf 1.0119114
[5,] 0.7486153 1.0109233 1.432706 1.0119114       Inf

$d
          1         2        3         4         5
1  0.000000 10.005518 7.078517 10.009291 14.144205
2 10.005518  0.000000 7.084201 14.145862 10.009124
3  7.078517  7.084201 0.000000  7.104381  7.081344
4 10.009291 14.145862 7.104381  0.000000 10.013820
5 14.144205 10.009124 7.081344 10.013820  0.000000

$S
[1] 4.811439 4.341327 4.368352 4.355969 5.777130

$centers
     [,1] [,2]      [,3]      [,4]     [,5]
[1,]    5    5 10.009904  9.931666 12.38105
[2,]    5   15 10.103268 10.098965 12.65251
[3,]   10   10 10.009018  9.682935 12.58971
[4,]   15    5 10.104154 10.347696 12.44385
[5,]   15   15  9.773656  9.938738 12.43287

[1] "min DB for 5 clusters= 1.22279793154775"
[1] "clustering for min DB"
 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 
 1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  2  2  2  2  2  2  2  2  2  2  2 
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 
 2  2  2  2  1  1  3  1  3  1  1  1  1  1  1  3  1  3  3  4  4  4  4  4  4  4 
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 
 4  4  4  4  4  4  4  4  5  3  5  3  3  5  5  3  5  3  5  3  3  5  5 
[1] "min DB for 4 clusters= 1.44588132257998"
[1] "clustering for min DB"
 [1] 1 2 3 1 3 1 2 2 3 2 2 2 4 1 1 3 2 1 3 1 2 2 3 3 4 4

clusterSim documentation built on Jan. 8, 2021, 2:13 a.m.