#' Berenhaut Community Detection on Networks
#'
#' Performs community detection on networks using average shortest path distance
#' of vertex neighbors as the set of dissimilarities for heirarcal clustering
#' with "average" as the agglomeration method. If shortest paths are supplied,
#' this function is ~50\% faster. It is also best to use a symmetric "matrix"
#' rather than a "dist" object if performance is critical. Time complexity is
#' O(n^2 log n) and space complexity is O(n^2). This function returns an
#' \code{\link{hclust}} object rather than an igraph \code{\link{communities}}
#' object because the algorithm does not attempt to use modularity use or
#' maximize modularity (although the resulting partitioning results in a maximum
#' modularity that is competitive with the top community detection algorithms).
#'
#' @param graph an igraph object.
#' @param nodes a subset of the nodes in the graph to cluster, defaults to all
#' nodes (full community detection)
#' @param short_paths either a \eqn{|V|x|V|} symmetric dissimilarity matrix of the
#' vertices in \code{graph} or a \code{\link{dist}} object from the
#' \pkg{\link{stats}} package. If \code{NULL} (default), the shortest paths will
#' be calculated. Note: much faster if \code{short_paths} is
#' provided and much faster if \code{short_paths} is of class "matrix" rather than
#' "dist".
#' @return \code{cluster_redwalk} returns a \code{\link{hclust}} object from
#' the \pkg{\link{stats}} package.
#' @export
#' @keywords cluster graphs
#'
#' @examples
#' require(stats, quietly = TRUE)
#' require(Rcpp, quietly = TRUE)
#' cb <- cluster_redwalk(karate)
#' ## cb is an hclust object
#' plot(cb)
#' ## membership for different community sizes (k)
#' cutree(cb, k = 2)
#' cutree(cb, k = 3)
#'
#' ## using precomputed shortest paths
#' sp <- shortest_path_lengths(dolphins)
#' cbd <- cluster_redwalk(dolphins, short_paths = sp)
#' plot(cbd)
#' cutree(cbd, k = 2)
#' cutree(cbd, k = 4)
#'
cluster_redwalk <- function(graph, nodes = V(graph), short_paths = NULL)
{
check_graph(graph);
nv <- vcount(graph);
edglst <- as_edgelist(graph, names = FALSE) - 1;
storage.mode(edglst) <- "integer";
ns0 <- length(nodes);
nodes <- sort(unique(as.integer(nodes)));
ns <- length(nodes);
stopifnot(ns0 == ns, ns >= 2, all(nodes >= 1), all(nodes <= nv));
all_nodes <- (ns == nv);
# Check if short_paths is provided, calculate if not
# TODO: really bad that shortest paths is a matrix rather than dist
if(is.null(short_paths)) {
if(all_nodes) {
D <- dissimilarity_c(nv, edglst);
} else {
D <- dissimilarity_subsets_c(nv, edglst, nodes-1);
}
} else {
# if given shortest paths as dist, convert to matrix
if("dist" %in% class(short_paths)) {
short_paths <- as.matrix(short_paths);
}
# need short_paths to be a matrix
if(!("matrix" %in% class(short_paths))) {
stop("short_paths must be of class matrix or dist");
}
# check square
dim_sp <- dim(short_paths);
if(dim_sp[1] != dim_sp[2])
stop("short_paths has incorrect dimensions"); #need square
if(all_nodes) {
if(dim_sp[1] != nv)
stop("short_paths has incorrect dimensions");
D <- dissimilarity_sp_c(nv, edglst, short_paths);
} else {
if(dim_sp[1] != nv & dim_sp[1] != ns)
stop("short_paths has incorrect dimensions");
if(dim_sp[1] == nv)
short_paths <- short_paths[nodes,nodes];
D <- dissimilarity_subsets_sp_c(nv, edglst, nodes-1, short_paths);
}
}
# return the hclust object
if(requireNamespace("fastcluster", quietly = TRUE)) {
return(fastcluster::hclust(D, method = "average"));
} else {
return(stats::hclust(D, method = "average"));
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.