#' Multilevel clustering exploration as network
#'
#' MCLEAN performs hierarchical clustering and transforms the output into a network.
#'
#' @param distance_matrix a dissimilarity structure as produced by dist.
#' @param threshold numeric scalar cutoff used to cluster elements. It is equivalent to the height in the dendrogram.
#' @param method the agglomeration method to be used. This should be (an unambiguous abbreviation of) one of "ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC).
#' @param tree a tree as produced by hclust. If the tree is not provided, the function generates it automatically through hclust() function.
#' @param node_simplification logical value to disable the node aggregation. The default is TRUE.
#'
#' @return It returns a list containing the following elements:
#' \itemize{
#' \item nodes: a data frame with the definition of the nodes. The variable "\code{name} is the node identifier used internally; \code{nodes} is best candidate rowname used in simplification procedure; \code{size} indicates the number of data elements in the node; \code{components} cluster id (connected component identification), \code{infomap} result of infomap algorithm.
#' \item links: a data frame with the links of the network defined by \code{source} and \code{target}. The variable \code{size} determine the number of data elements connected.
#' \item idnodes: a data frame with the id and clusters of each data element. The varaible \code{id} represents the id (rowname) of the element in the distance matrix; \code{name} identifier of the node where the data element is assigned; \code{components} cluster id, \code{infomap} result of the infomap algorithm.
#' }
#'
#' @author Daniel Alcaide, \email{daniel.alcaide@@esat.kuleuven.be}
#' @references Alcaide D, Aerts J. (2018) MCLEAN: Multilevel Clustering Exploration As Network. PeerJ Computer Science 4:e145 \url{https://doi.org/10.7717/peerj-cs.145}
#'
#' @importFrom igraph graph_from_data_frame
#' @importFrom igraph components
#' @importFrom igraph cluster_infomap
#' @importFrom dplyr select
#' @importFrom dplyr filter
#' @importFrom dplyr group_by
#' @importFrom dplyr summarise
#' @importFrom dplyr ungroup
#' @importFrom dplyr mutate
#' @importFrom dplyr rename
#' @importFrom reshape2 melt
#' @importFrom stats hclust
#' @importFrom stats cutree
#' @importFrom magrittr %>%
#'
#' @examples
#' data("synthetic_distances")
#' output = mclean(distance_matrix = synthetic_distances, threshold = 187, method = "single")
#' plot_network(output)
#'
#' # Without node aggregation
#' distance_matrix = as.dist(matrix ( c( 0, 2, 3, 5, 7, 8,10, 9,11,11,
#' 2, 0, 2, 5, 6, 7, 9, 8, 9,10,
#' 3, 2, 0, 2, 6,10,13,10, 9,11,
#' 5, 5, 2, 0, 4, 6, 8, 6, 7, 9,
#' 7, 6, 6, 4, 0, 6, 7, 6, 6, 8,
#' 8, 7,10, 6, 6, 0, 3, 5, 5, 6,
#' 10, 9,13, 8, 7, 3, 0, 2, 6, 4,
#' 9, 8,10, 6, 6, 5, 2, 0, 4, 6,
#' 11, 9, 9, 7, 6, 5, 6, 4, 0, 7,
#' 11,10,11, 9, 8, 6, 4, 6, 7, 0), byrow = TRUE, ncol = 10))
#'
#' plot_network( mclean(distance_matrix = distance_matrix, threshold = 4.1, method = "average", node_simplification = FALSE) )
#'
#' @export
mclean = function (distance_matrix = NULL,
threshold = NULL,
method = "single",
tree = NULL,
node_simplification = TRUE,
... ) {
# ---- Check Errors ----
if( is.null(distance_matrix) ) {
stop("A distance matrix must be provided")
}
if( is.null(threshold) ) {
stop("A threshold value must be provided")
}
# ---- Clustering stage: We group elements according the threshold ----
connections = hc_connections(distance_matrix = distance_matrix,
threshold = threshold,
method = method,
tree = tree)
# ---- Node simplification-----
if(node_simplification) {
aggregated_nodes = grouping_nodes(connections = connections)
} else {
aggregated_nodes = no_grouping_nodes(connections = connections)
}
# --- Network transformation ----
transform_network_output = transform_network (
connections = connections,
aggregated_nodes = aggregated_nodes)
# ---- igraph structure ----
graph_igraph <- igraph::graph_from_data_frame(
transform_network_output$links,
vertices=transform_network_output$nodes %>% dplyr::select(-nodes),
directed=FALSE)
# ---- Connected components in the nodes ----
components = igraph::components(graph_igraph)$membership
# ---- Infomap ----
normalize_sizes <- function(x) {x / sqrt(sum(x^2))}
infomap = igraph::cluster_infomap(graph_igraph,
v.weights = normalize_sizes(transform_network_output$nodes$size),
modularity = FALSE
)$membership
# ---- Appending connected components and infomap clustering ----
transform_network_output$nodes = cbind(
transform_network_output$nodes,
components,
infomap
)
# Table used for higlight ids in network representation in shiny
idnodes = merge(aggregated_nodes,
transform_network_output$nodes %>%
dplyr::select(-size)) %>%
dplyr::select(-nodes)
# Add id list
nameList = tapply(idnodes$id, idnodes$name, function(x) {return(x)} )
return(list(nodes = transform_network_output$nodes,
links = transform_network_output$links,
idnodes = idnodes,
nameList = nameList))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.