R/SphericalKmeans.r

Defines functions SphericalKmeans

SphericalKmeans = function(X, groups)
  # X: input data frame of dimension N by P
  # groups: a pre-specified number of clusters
{
  # step 0: initialization

  # make sure the data is normalized to have unit length
  X = apply(X, 1, norml2) # dimension P by N

  N = dim(X)[2] # number of data objects
  IsMoving = 1  # 1 indicates at least one of the centroids is still moving
  iter = 0 # initialize the iteration number for k-means
  cluster = rep(0, N) # initialize a vector to store cluster membership for every data object
  dist = matrix(rep(0,N*groups), ncol=groups) # an N x groups matrix that stores the distance from each data object to every centroid


  # step 1: initialize K concept vectors
  centroids = kmeans(t(X), centers = groups, nstart = 10)$centers # initialize centroids
  concepts = t(apply(centroids, 1, norml2)) # calculate concept vectors
  tmpConcepts = concepts # store the concept vectors for the current iteration


  while (IsMoving == 1)
  {
    iter = iter + 1

    # calculate the distance between data objects to concept vectors
    for (j in 1:groups) {dist[,j] = t(X)%*%as.matrix(concepts[j,])}

    # assign data objects that share the same closest concept vector into the same cluster
    # closest concept vector <=> largest cosine similarity
    cluster = apply(dist, 1, which.max)

    # update the location of all concept vectors
    for (i in 1:groups) {
      tmp = t(X[,which(cluster==i)])
      tmpConcepts[i,] = t(norml2(colSums(tmp)/nrow(tmp)))
    }

    # check if any of the concept vectors have moved since the previous iteration
    if (sum(abs(tmpConcepts-concepts))>.0001) {
      IsMoving = 1
      concepts = tmpConcepts
    } else {
      IsMoving = 0
    }
  }

  # construct a list of outputs
  spherical.obj = list()
  spherical.obj$cluster = cluster # the cluster memberships
  spherical.obj$concepts = concepts # the final location of K concept vectors
  spherical.obj$dist = apply(dist,1,max) # the distance from each data point to their closest centroid
  return(spherical.obj)
}
hankuipeng/HKCluster documentation built on May 27, 2019, 8:45 a.m.