lloyd <- function(z, centers, maxIterations) {
# initialization
iter <- 1L
conv <- FALSE
numCent <- as.integer(nrow(centers))
# update until convergence or maximum number of iterations is reached
while (!conv && (iter <= maxIterations)) {
distanceMatrix <- distances(z, centers)
# cluster allocation
cluster <- apply(distanceMatrix, 1, which.min)
# check for empty clusters
if (length(unique(cluster)) < numCent) {
stop("empty cluster: please provide better starting centroids",
call. = FALSE)
}
# updated centroids
splittedData <- split(as.data.frame(z), f = cluster)
centroidsNew <- t(sapply(splittedData, colMeans))
# check for convergence
if (isTRUE(all.equal(centers, centroidsNew))) {
conv <- TRUE
} else {
iter <- iter + 1
centers <- centroidsNew
}
}
# warning if no convergence after maxIterations
if (iter > maxIterations) {
warning(paste("No convergence after", maxIterations, "iterations"),
call. = FALSE)
}
# compute within sum of squares for each cluster
sumOfSquares <- function(w, x, y) sum(t((t(w) - y[x, ])^2))
withinSS <- mapply(sumOfSquares, w = splittedData, x = 1:length(splittedData),
MoreArgs = list(y = centroidsNew), USE.NAMES = FALSE)
withinTot <- sum(withinSS)
# store output
out <- list()
out$cluster <- cluster
out$centroids <- centroidsNew
out$iterations <- iter
out$withinSS <- withinSS
out$withinTot <- withinTot
out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.