Description Usage Arguments Author(s) Examples
Takes a dataframe and the number of initial clusters and performs kmeans and a hierarchical clustering on the dataframe. The function outputs a dataframe as the clustered data
1 | hkclustering(df, numbk, t)
|
df |
Original dataframe to cluster |
numbk |
The number of initial clusters for the kmeans algorithm |
t |
Number of iterations to find the centroids |
Kaloyan S, kaloyanS@profusion.com
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | ##---- Should be DIRECTLY executable !! ----
##-- ==> Define data, use random,
##-- or do help(data=index) for the standard data sets.
a<-runif(500, min=3.5, max=2000)
b<-runif(500, min=1.5, max=2000)
df = data.frame(a, b)
#Specifying 4 clusters
results.hkclust<-hkclustering(df,4,100)
centroidssummary(results.hkclust)
with(results.hkclust, pairs(results.hkclust[,1:2], col=c(1:10)[results.hkclust[,3]]))
## The function is currently defined as
function (df, numbk, t)
{
scaled.df <- scale(df)
rm(.Random.seed, envir = globalenv())
temp <- kmeans(scaled.df, numbk)
c <- temp$centers
c <- temp$centers
for (i in 2:t) {
rm(.Random.seed, envir = globalenv())
temp <- kmeans(scaled.df, numbk)
c <- rbind(c, temp$centers)
}
cr <- as.data.frame(c, row.names = F)
d <- dist(cr, method = "euclidean")
fit <- hclust(d, method = "centroid")
cr$clusnumber <- cutree(fit, k = numbk)
centroids1 <- aggregate(cr, by = list(cr$clusnumber), FUN = mean)
centr <- centroids1[, c(2:(length(df) + 1))]
final <- kmeans(scaled.df, centr)
clustereddata <- cbind(df, final$cluster)
colnames(clustereddata)[(length(df) + 1)] <- "cluster_number"
return(clustereddata)
}
|
b a cluster_number
1 1611.0023 1555.4959 1
2 1507.2915 519.5899 2
3 546.3772 1498.6844 3
4 489.2429 496.5099 4
function (df, numbk, t)
{
scaled.df <- scale(df)
rm(.Random.seed, envir = globalenv())
temp <- kmeans(scaled.df, numbk)
c <- temp$centers
c <- temp$centers
for (i in 2:t) {
rm(.Random.seed, envir = globalenv())
temp <- kmeans(scaled.df, numbk)
c <- rbind(c, temp$centers)
}
cr <- as.data.frame(c, row.names = F)
d <- dist(cr, method = "euclidean")
fit <- hclust(d, method = "centroid")
cr$clusnumber <- cutree(fit, k = numbk)
centroids1 <- aggregate(cr, by = list(cr$clusnumber), FUN = mean)
centr <- centroids1[, c(2:(length(df) + 1))]
final <- kmeans(scaled.df, centr)
clustereddata <- cbind(df, final$cluster)
colnames(clustereddata)[(length(df) + 1)] <- "cluster_number"
return(clustereddata)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.