R/barcode_tree.R

#' Barcode tree
#'
#' Evaluation of mclean approach through a range of thresholds.
#'
#' @param distance_matrix a dissimilarity structure as produced by dist.
#' @param sequence numeric vector of thresholds or cutoff used to cluster elements. It is equivalent to the height in the dendrogram.
#' @param method the agglomeration method to be used. This should be (an unambiguous abbreviation of) one of "ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC).
#' @param tree a tree as produced by hclust. If the tree is not provided, the function generates it automatically through hclust() function.
#'
#' @return It returns a data frame with the followings variables:
#' \itemize{
#'   \item threshold: cutoff evaluated
#'   \item id: data element
#'   \item name: id of the node
#'   \item components: cluster id
#'   \item informap: result of infomap algorithm
#'   \item plot_components: cluster id created for the plot representation
#' }
#'
#' @author Daniel Alcaide, \email{daniel.alcaide@@esat.kuleuven.be}
#' @references Alcaide D, Aerts J. (2018) MCLEAN: Multilevel Clustering Exploration As Network. PeerJ Computer Science 4:e145 \url{https://doi.org/10.7717/peerj-cs.145}
#'
#' @importFrom igraph graph_from_data_frame
#' @importFrom igraph components
#' @importFrom igraph cluster_infomap
#' @importFrom dplyr select
#' @importFrom dplyr filter
#' @importFrom dplyr group_by
#' @importFrom dplyr summarise
#' @importFrom dplyr ungroup
#' @importFrom dplyr mutate
#' @importFrom dplyr rename
#' @importFrom dplyr arrange
#' @importFrom reshape2 melt
#' @importFrom stats hclust
#' @importFrom stats cutree
#' @importFrom stats reshape
#' @importFrom magrittr %>%
#'
#' @examples
#' data("synthetic_distances")
#' barcode_tree = barcode_tree( distance_matrix = synthetic_distances, sequence = seq(from=100,to=300,by=25))
#' plot_barcode_tree(barcode_tree)
#'
#' @export
barcode_tree =  function (distance_matrix = NULL,
                              sequence = NULL,
                              method = "single") {


  # ---- Define the function to iterate ----
  loop_process = function(x,
                          distance_matrix = distance_matrix,
                          method = method,
                          tree = tree) {

    cbind(threshold = x,
          mclean(distance_matrix = distance_matrix,
                 threshold = x,
                 method = method,
                 tree = tree)$idnodes
    )
  }

  # ---- Clustering ----
  tree = stats::hclust(distance_matrix, method = method)

  # ---- Loop  and conquetenation ----
  components = do.call( rbind,
    lapply(sequence,
           loop_process,
           distance_matrix = distance_matrix,
           method = method,
           tree = tree
           )
    )


  #   id_elements = as.numeric(rownames(as.matrix(distance_matrix)))
  #
  #   for (value in sequence ) {
  #     print(value)
  #     hc = stats::hclust(distance_matrix, method = linkage)
  #     result_hc = stats::cutree(hc, h = value)
  #     if (value == sequence[1]) {
  #       connectedIdMatrix = as.data.frame(cbind(id_elements, result_hc, value))
  #       colnames(connectedIdMatrix) = c("id", "connected.component", "value")
  #     } else {
  #       z_subset = as.data.frame(cbind(id_elements, result_hc, value))
  #       colnames(z_subset) = c("id", "connected.component", "value")
  #       connectedIdMatrix = rbind(connectedIdMatrix, z_subset)
  #     }
  #   }
  # }

  return(sort_components(components))

}
danielalcaide/mclean documentation built on May 28, 2019, 7:51 p.m.