R/SLCluster.r

Defines functions SLCluster

SLCluster = function(X, labels = NULL) {
  # X is an input dataframe composed of continuous data values
  # labels are used when plotting the cluster dendrogram

  # create a distance matrix
  dist.vec = dist(X, method = 'euclidean')
  dist.mat = as.matrix(dist.vec)

  N = nrow(dist.mat)
  merge = matrix(0, N-1, 2)
  height = vector(length = N-1)
  dist.df = data.frame(dist.mat)
  diag(dist.df) = Inf

  # reassign the column names and row names
  rownames(dist.df) = -(1:N)
  colnames(dist.df) = -(1:N)
  col = colnames(dist.df)

  # implement the algorithm
  for (i in 1:(N-1)){

    # step 1: find the pair of data points with the minimum distance
    # cand: the coordinates of candidate data points to be merged
    cand = which(dist.df == min(dist.df), arr.ind = T)
    merge[i,] = as.numeric(col[cand[1,]])
    height[i] = min(dist.df)

    # form the merged pair of data points
    # and the data points that they have already merged with
    # into a new cluster named by the current iteration number
    new.clus = c(cand[,2], which(col %in% col[cand[,2]]))
    col[new.clus] = i

    # set the distance between merged data points to be Inf
    dist.df[cand] = Inf

    # update the distance between merged data points to other data points
    min.dist = apply(dist.df[cand[,1],],2,min)
    dist.df[unique(cand[,1]),] = rep(min.dist, each = length(unique(cand[,1])))
    dist.df[,unique(cand[,2])] = min.dist
  }

  # directly use the hclust() function to carry out single linkage hierarchical clustering
  pkg.clust = hclust(dist.vec, method = 'single')

  # construct the slc class with the following components
  slc.obj = list() # Initialize an empty list
  class(slc.obj) = 'hclust' # The list is set to class hclust

  slc.obj$merge = merge # Add the merge component obtained earlier
  slc.obj$height = height # The height component determines the lengths of the dendogram nodes
  slc.obj$labels = labels # Add the labels component (can later be changed by the user)
  slc.obj$order = pkg.clust$order # Here the order component from hclust is added to the list

  return(slc.obj)
}
hankuipeng/HKCluster documentation built on May 27, 2019, 8:45 a.m.