R/lloyd.R

Defines functions lloyd

lloyd <- function(z, centers, maxIterations) {
  # initialization
  iter <- 1L
  conv <- FALSE
  numCent <- as.integer(nrow(centers))

  # update until convergence or maximum number of iterations is reached
  while (!conv && (iter <= maxIterations)) {
    distanceMatrix <- distances(z, centers)

    # cluster allocation
    cluster <- apply(distanceMatrix, 1, which.min)

    # check for empty clusters
    if (length(unique(cluster)) < numCent) {
      stop("empty cluster: please provide better starting centroids",
           call. = FALSE)
    }

    # updated centroids
    splittedData <- split(as.data.frame(z), f = cluster)
    centroidsNew <- t(sapply(splittedData, colMeans))

    # check for convergence
    if (isTRUE(all.equal(centers, centroidsNew))) {
      conv <- TRUE
    } else {
      iter <- iter + 1
      centers <- centroidsNew
    }
  }

  # warning if no convergence after maxIterations
  if (iter > maxIterations) {
    warning(paste("No convergence after", maxIterations, "iterations"),
            call. = FALSE)
  }

  # compute within sum of squares for each cluster
  sumOfSquares <- function(w, x, y) sum(t((t(w) - y[x, ])^2))
  withinSS <- mapply(sumOfSquares, w = splittedData, x = 1:length(splittedData),
              MoreArgs = list(y = centroidsNew), USE.NAMES = FALSE)
  withinTot <- sum(withinSS)

  # store output
  out <- list()
  out$cluster <- cluster
  out$centroids <- centroidsNew
  out$iterations <- iter
  out$withinSS <- withinSS
  out$withinTot <- withinTot

  out
}
heiligerl/kMeans_Rpackage documentation built on Aug. 16, 2020, 4:04 p.m.