R/cluster.dis.R

#' cluster.dis function
#'
#' This is one of a few functions created by Joe Cauteruccio, Jessie Li, Andrew West of Yale University that are used together to create the hclust_eval() function.
#'
#'@keywords cluster.dis
#'@export
#'
cluster.dis <- function(data, hclust.obj, SPRS_V){
  meth <- hclust.obj$method
  cd_vec <- rep(0, nrow(data)-1)
  cluster.mat <- affected.rows(hclust.obj$merge)[[1]]
  affected.rows <- affected.rows(hclust.obj$merge)[[2]]

  if (meth == "ward") cd_vec <- SPRS_V

  if (meth == "average"){
    for (i in 1:length(cd_vec)){
      merged.set <- hclust.obj$merge[nrow(data)-i, ]
      if (sum(sign(merged.set)) == -2){
        temp.1 <- -merged.set[1]
        temp.2 <- -merged.set[2]
      }

      if (sum(sign(merged.set)) == 0){
        temp.1 <- -merged.set[sign(merged.set) == -1]
        temp.2 <- which(cluster.mat[nrow(data)-merged.set[sign(merged.set) == 1], ] == merged.set[sign(merged.set) == 1])
      }

      if (sum(sign(merged.set)) == 2){
        temp.1 <- which(cluster.mat[nrow(data)-merged.set[1], ] == merged.set[1])
        temp.2 <- which(cluster.mat[nrow(data)-merged.set[2], ] == merged.set[2])
      }

      each.dis <- rep(0, length(temp.1)*length(temp.2))
      temp.sum <- 0
      for (j in 1:length(temp.1)){
        for (k in 1:length(temp.2)){
          temp.sum <- temp.sum+dist(rbind(data[temp.1[j], ], data[temp.2[k], ]))
        }
      }
      cd_vec[i] <- temp.sum/(length(temp.1)*length(temp.2))
    }
  }

  if (meth == "single"){
    for (i in 1:length(cd_vec)){
      merged.set <- hclust.obj$merge[nrow(data)-i, ]
      if (sum(sign(merged.set)) == -2){
        temp.1 <- -merged.set[1]
        temp.2 <- -merged.set[2]
      }

      if (sum(sign(merged.set)) == 0){
        temp.1 <- -merged.set[sign(merged.set) == -1]
        temp.2 <- which(cluster.mat[nrow(data)-merged.set[sign(merged.set) == 1], ] == merged.set[sign(merged.set) == 1])
      }

      if (sum(sign(merged.set)) == 2){
        temp.1 <- which(cluster.mat[nrow(data)-merged.set[1], ] == merged.set[1])
        temp.2 <- which(cluster.mat[nrow(data)-merged.set[2], ] == merged.set[2])
      }

      each.dis <- matrix(0, nrow=length(temp.1), ncol=length(temp.2))
      for (j in 1:length(temp.1)){
        for (k in 1:length(temp.2)){
          each.dis[j, k] <- dist(rbind(data[temp.1[j], ], data[temp.2[k], ]))
        }
      }
      cd_vec[i] <- min(each.dis)
    }
  }

  if (meth == "complete"){
    for (i in 1:length(cd_vec)){
      merged.set <- hclust.obj$merge[nrow(data)-i, ]
      if (sum(sign(merged.set)) == -2){
        temp.1 <- -merged.set[1]
        temp.2 <- -merged.set[2]
      }

      if (sum(sign(merged.set)) == 0){
        temp.1 <- -merged.set[sign(merged.set) == -1]
        temp.2 <- which(cluster.mat[nrow(data)-merged.set[sign(merged.set) == 1], ] == merged.set[sign(merged.set) == 1])
      }

      if (sum(sign(merged.set)) == 2){
        temp.1 <- which(cluster.mat[nrow(data)-merged.set[1], ] == merged.set[1])
        temp.2 <- which(cluster.mat[nrow(data)-merged.set[2], ] == merged.set[2])
      }

      each.dis <- matrix(0, nrow=length(temp.1), ncol=length(temp.2))
      for (j in 1:length(temp.1)){
        for (k in 1:length(temp.2)){
          each.dis[j, k] <- dist(rbind(data[temp.1[j], ], data[temp.2[k], ]))
        }
      }
      cd_vec[i] <- max(each.dis)
    }
  }

  if (meth == "mcquitty"){
    print("Mcquitty agglomeration for cluster distance metric not currently supported")
  }

  if (meth == "median"){
    print("Median agglomeration for cluster distance metric not currently supported")
  }

  if (meth == "centroid"){
    print("Centroid agglomeration for cluster distance metric not currently supported")
  }

  return(cd_vec)
}
18kimn/yalestats documentation built on May 9, 2019, 2:17 a.m.