R/find-centers-kpp.R

Defines functions find_centers_kpp

Documented in find_centers_kpp

#' Generate initial centroids for clustering randomly
#'
#' @param x
#'   An object which can be coerced to [`matrix`],
#'   typically [`matrix`] itself or [`data.frame`].
#' @param k A number of clusters.
#' @param given Given centers. Not yet implemented.
#'
#' @importFrom matrixStats colSums2
#'
#'
find_centers_kpp <- function(x, k, given = NULL) {
  # check parameters
  if (!is.null(given) && nrow(x) < k)
    stop("Number of given centroids must be smaller than k")
  if (k < 2L) stop("k must be a numeric >= 2")

  # transform parameters
  x <- as.matrix(x)
  x_trans <- t(x)

  # calculation
  n <- nrow(x) # number of data points
  n_seq <- seq(n)
  centers <- numeric(k) # IDs of centers
  distances <- matrix(numeric(n * (k - 1L)), ncol = k - 1L)
  # distances[i, j]: The distance between x[i,] and x[centers[j],]
  pr <- rep(1L, n) # probability for sampling centers
  for (i in seq(k - 1L)) {
    centers[i] <- sample.int(n, 1L, prob = pr) # Pick up the ith center
    distances[, i] <- colSums2((x_trans - x[centers[i], ])^2)
    # Compute (the square of) distances to the center
    pr <- distances[cbind(n_seq, max.col(-distances[, 1:i, drop = FALSE]))]
    # Compute probaiblity for the next sampling
  }
  centers[k] <- sample.int(n, 1L, prob = pr)

  data.frame(phase = centers, x[centers, ])
}
atusy/qntmap documentation built on April 11, 2021, 4:45 p.m.