SphericalKmeans = function(X, groups)
# X: input data frame of dimension N by P
# groups: a pre-specified number of clusters
{
# step 0: initialization
# make sure the data is normalized to have unit length
X = apply(X, 1, norml2) # dimension P by N
N = dim(X)[2] # number of data objects
IsMoving = 1 # 1 indicates at least one of the centroids is still moving
iter = 0 # initialize the iteration number for k-means
cluster = rep(0, N) # initialize a vector to store cluster membership for every data object
dist = matrix(rep(0,N*groups), ncol=groups) # an N x groups matrix that stores the distance from each data object to every centroid
# step 1: initialize K concept vectors
centroids = kmeans(t(X), centers = groups, nstart = 10)$centers # initialize centroids
concepts = t(apply(centroids, 1, norml2)) # calculate concept vectors
tmpConcepts = concepts # store the concept vectors for the current iteration
while (IsMoving == 1)
{
iter = iter + 1
# calculate the distance between data objects to concept vectors
for (j in 1:groups) {dist[,j] = t(X)%*%as.matrix(concepts[j,])}
# assign data objects that share the same closest concept vector into the same cluster
# closest concept vector <=> largest cosine similarity
cluster = apply(dist, 1, which.max)
# update the location of all concept vectors
for (i in 1:groups) {
tmp = t(X[,which(cluster==i)])
tmpConcepts[i,] = t(norml2(colSums(tmp)/nrow(tmp)))
}
# check if any of the concept vectors have moved since the previous iteration
if (sum(abs(tmpConcepts-concepts))>.0001) {
IsMoving = 1
concepts = tmpConcepts
} else {
IsMoving = 0
}
}
# construct a list of outputs
spherical.obj = list()
spherical.obj$cluster = cluster # the cluster memberships
spherical.obj$concepts = concepts # the final location of K concept vectors
spherical.obj$dist = apply(dist,1,max) # the distance from each data point to their closest centroid
return(spherical.obj)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.