R/hc_connections.R

Defines functions hc_connections

# Hierarchical Clustering results
hc_connections = function(distance_matrix = NULL, threshold = NULL, method = NULL, tree = NULL) {

  # ---- Transformation of the distance_matrix into data.frame format ----
  distance_matrix_df = reshape2::melt(as.matrix(distance_matrix),
                                       varnames = c("row", "col"))

  # ---- Hierachical clustering for the slected threshold ----
  if (is.null(tree)) {
    tree = stats::hclust(distance_matrix, method = method)
  } else if ( class(tree) != "hclust" ) {
    stop("tree object is not an hclust object")
  }
  result_hc = stats::cutree(tree, h = threshold)

  # ---- List of id and clusters as a result of hc ----
  id_elements = as.numeric(rownames(as.matrix(distance_matrix)))
  cluster_matrix = as.data.frame(cbind(id_elements, result_hc))
  colnames(cluster_matrix) = c("id", "cluster")

  cluster_num = unique(cluster_matrix$cluster)

  # ---- Creating a matrix with all possible connections from the clustering result ----
  result_matrix = NULL
  for(i in cluster_num) {
    id_subset = cluster_matrix[cluster_matrix$cluster == i, "id"]
    result_subset = expand.grid(row = id_subset, col = id_subset)
    if (is.null(result_matrix)) {
      result_matrix = result_subset
    } else {
      result_matrix = rbind(result_matrix, result_subset)
    }
  }

  #  --- Filtering connections that are below the threshold ----
  result_matrix = merge(result_matrix,
                        distance_matrix_df %>%
                          # We take the upper part of the matrix and
                          # filter all values below threshold
                          dplyr::filter(value <= threshold & row <= col ) %>%
                          dplyr::select(-value)
                        )

  return(as.data.frame(result_matrix))
}
danielalcaide/mclean documentation built on May 28, 2019, 7:51 p.m.