R/hcluster.torus.R

Defines functions hcluster.torus

hcluster.torus <- function(data, J = 4, d, method = "complete", members = NULL){

  # hierarchical clustering for the data
  # this function will not be exported: only use for initializing method for ellip.kmeans.torus
  n <- nrow(data)
  p <- ncol(data)

  hc = stats::hclust(d, method = method, members = members)

  membership <- stats::cutree(hc, J)

  # -------- initializing ---------------
  hcluster <- list(data = data, centers = NULL,
                   membership = membership, totss = NULL, withinss = NULL,
                   betweenss = NULL, size = NULL)

  hcluster$size <- rep(0, J)
  hcluster$centers <- matrix(0, nrow = J, ncol = p)
  hcluster$withinss <- rep(0, J)

  # -------- calculate withinss -----------
  for(j in 1:J){
    hcluster$size[j] <- sum(membership == j)
    nj <- hcluster$size[j]
    hcluster$centers[j, ] <- wtd.stat.ang(data[membership == j, ],
                                          w = rep(1, nj) / nj)$Mean

    # if the size of cluster is 1, withinss is 0
    if (nj == 1) { next }

    # the case for the cluster size larger than 1
    j.mean <- hcluster$centers[j, ]
    hcluster$withinss[j] <- sum(tor.minus(data[hcluster$membership == j, ], j.mean)^2)
  }

  # --------- calculate totss ------------

  tot.mean <- wtd.stat.ang(data, rep(1, n) / n)$Mean
  hcluster$totss <- sum(tor.minus(data, tot.mean)^2)

  # --------- calculate betweenss ----------

  # if there is only one cluster, then center.distmat must be 0
  center.distmat <- ang.pdist(hcluster$centers)
  hcluster$betweenss <- sum(center.distmat^2)

  return(hcluster)
}
sungkyujung/ClusTorus documentation built on April 30, 2022, 9:18 p.m.