# jamenrich-communities-to-nodegroups.R
#' Convert communities object to nodegroups list format
#'
#' Convert communities object to nodegroups list format
#'
#' Note that this function is "lossy", in that the output `list`
#' does not contain all the information necessary to reconstitute
#' the input `communities` object in detail. However, the output
#' `list` can be converted to a `communities` object that will
#' be accepted by most `igraph` related functions that require
#' that object type as an input value.
#'
#'
#' @family jam igraph functions
#'
#' @return `list` of `character` vectors, where each vector contains
#' names of `igraph` nodes. When `algorithm` is defined in the
#' input object, it is included as an attribute of the output `list`,
#' accessible with `attr(out, "algorithm")`.
#'
#' When optional value `"cluster_names"` is present in the `communities`
#' object, they are used to define the output `list` names.
#'
#' @param wc `communities` object as returned by `igraph` functions
#' such as `cluster_optimal()`, `cluster_walktrap()`, or
#' `cluster_leading_eigen()`.
#' @param ... additional arguments are ignored.
#'
#' @export
communities2nodegroups <- function
(wc,
...)
{
#
if (length(wc$names) == 0) {
wc$names <- seq_along(wc$membership);
}
nodegroups <- split(
wc$names,
wc$membership)
if ("cluster_names" %in% names(wc)) {
names(nodegroups) <- wc$cluster_names
}
if ("algorithm" %in% names(wc)) {
attr(nodegroups, "algorithm") <- wc$algorithm;
} else {
attr(nodegroups, "algorithm") <- "nodegroups";
}
return(nodegroups)
}
#' Convert nodegroups list to communities object
#'
#' Convert nodegroups list to communities object
#'
#' Note that this function is "lossy", in that the output `communities`
#' object will not contain any supporting data specific to the
#' community detection algorithm originally used.
#' However, the output `communities` object will be accepted
#' by most `igraph` related functions that require
#' that object type as an input value.
#'
#' The `names(nodegroups)` are used to define a new element in the
#' output `communities` object `"cluster_names"`, so the names
#' will be maintained in the data. Default `igraph` functions
#' do not use these names, but they are used by `multienrichjam`
#' for example by function `make_point_hull()` which uses these
#' names to label each cluster during plotting.
#'
#' @family jam igraph functions
#'
#' @return `community` object, which is essentially a `list` with
#' specific required elements:
#' * `"membership"` - `integer` assignment of nodes to clusters
#' * `"names"` - `character` list of node names
#' * `"vcount"` - `integer` number of nodes
#' * `"algorithm"` - `character` string with the name of the community
#' detection method used.
#' * `"cluster_names"` - `character` labels associated with `membership`
#' index values. These names are not generated by `igraph` community
#' detection, and are therefore optional for use in most `igraph`
#' workflows. However, they are used in some `multienrichjam` functions,
#' specifically `make_point_hull()` which optionally displays a
#' label beside each node cluster during plotting.
#'
#' @param wc `communities` object as returned by `igraph` functions
#' such as `cluster_optimal()`, `cluster_walktrap()`, or
#' `cluster_leading_eigen()`.
#' @param algorithm `character` or `NULL`, indicating the name of the
#' community detection algorithm used.
#' * When `algorithm` is defined, it is used instead of
#' `attr(nodegroups, "algorithm")`.
#' * When `algorithm` is `NULL`, `attribute(nodegroups, "algorithm")` is
#' used if defined, otherwise `algorithm="nodegroups"`.
#' @param ... additional arguments are ignored.
#'
#' @export
nodegroups2communities <- function
(nodegroups,
algorithm=NULL,
...)
{
#
if (length(nodegroups) == 0) {
stop("Input nodegroups was empty.")
}
if (length(names(nodegroups)) == 0) {
names(nodegroups) <- seq_along(nodegroups)
}
if (length(algorithm) == 0) {
if ("algorithm" %in% names(attributes(nodegroups))) {
algorithm <- attr(nodegroups, "algorithm");
} else {
algorithm <- "nodegroups";
}
}
nodegroup_factors <- factor(names(nodegroups),
levels=names(nodegroups));
nodegroup_df <- data.frame(check.names=FALSE,
stringsAsFactors=FALSE,
name=unlist(unname(nodegroups)),
nodegroup=rep(nodegroup_factors, lengths(nodegroups)),
membership=as.integer(rep(nodegroup_factors, lengths(nodegroups))))
if (is.numeric(nodegroup_df$name)) {
nodegroup_df <- jamba::mixedSortDF(nodegroup_df,
byCols=c("name"));
}
wc <- list(
membership=nodegroup_df$membership,
names=nodegroup_df$name,
vcount=nrow(nodegroup_df),
algorithm=algorithm,
cluster_names=levels(nodegroup_factors))
class(wc) <- "communities";
return(wc)
}
#' Assign labels to igraph communities
#'
#' Assign labels to igraph communities
#'
#' @family jam igraph functions
#'
#' @return `communities` or `list` format matching the input `wc` format.
#' * When `communities` is input, additional value `cluster_names`
#' will contain a `character` vector of names corresponding to each
#' integer index in `wc$membership`.
#' * When `nodegroups` is input, the `list` names will be a `character`
#' vector of cluster labels.
#'
#' @param wc `communities` object, or `list` in form of nodegroups,
#' which is a `list` of `character` vectors that contain `igraph`
#' node names.
#' @param labels `character` vector of optional labels to assign directly
#' to community clusters. When not defined, the auto-detection method
#' is used.
#' @param add_catchwords `character` of optional words to include as
#' catchwords, to be excluded from use in the final label.
#' @param num_keep_terms `integer` maximum number of terms to be included
#' in the final output label, when auto-detection is used.
#' @param keep_terms_sep `character` string used as a delimited to separate
#' each term when multiple terms are concatenated together to form
#' the cluster label.
#' @param ... additional arguments are ignored.
#'
#' @export
label_communities <- function
(wc,
labels=NULL,
add_catchwords=NULL,
num_keep_terms=3,
keep_terms_sep=",\n",
...)
{
# define catchwords
catchwords <- unique(c(
add_catchwords,
"the", "an", "a", "of", "in", "between", "to", "and",
"peptide", "peptides",
"protein", "proteins",
"gene", "genes",
"system", "systems",
"role", "roles",
"base", "bases", "based", "basing", "basic",
"acid", "acids", "acidic",
"cells", "cell", "cellular",
"space", "spaces", "spaced", "spacing",
"positive", "positives", "positively",
"negative", "negatives", "negatively",
"pathway", "pathways",
"set", "sets",
"position", "positions", "positioned", "positioning",
"function", "functions", "functioning", "functioned",
"signaling", "signal", "signaling", "signals", "signaled",
"activity", "activation", "activate", "activates", "activated", "activating",
"involve", "involved", "involves", "involving",
"response", "responses", "respond", "responds", "responded", "responding",
"transcript", "transcripts", "transcribe", "transcribed", "transcribes",
"organization", "organize", "organizes", "organized", "organizing",
"formation", "form", "forms", "formed", "forming",
"enhanced", "enhance", "enhances", "enhancing",
"mediated", "mediate", "mediates", "mediating",
"expression", "express", "expresses", "expressed", "expressing",
"compound", "compounds", "compounding", "compounded",
"process", "processes", "processed", "processing",
"regulation", "regulate", "regulates", "regulated", "regulating",
# "up-regulation", "up-regulate", "up-regulates", "up-regulated", "up-regulating",
# "down-regulation", "down-regulate", "down-regulates", "down-regulated", "down-regulating",
"the"))
hyphen_pattern <- paste0(
"-(",
paste(catchwords, collapse="|"),
")$")
input_type <- NULL;
if ("communities" %in% class(wc) ||
(is.list(wc) && all(c("membership", "names") %in% names(wc)))) {
# define list
input_type <- "communities";
nodegroups_wc <- communities2nodegroups(wc);
} else if (is.list(wc)) {
input_type <- "nodegroups";
nodegroups_wc <- wc;
} else {
stop("Input wc must be 'communities' or 'nodegroups' list object.");
}
if (length(labels) > 0) {
if (length(labels) != length(nodegroups_wc)) {
stop("length(labels) must equal the number of clusters in wc")
}
}
# assign most common terms as a cluster label
nodegroup_labels <- lapply(seq_along(nodegroups_wc), function(inum){
i <- nodegroups_wc[[inum]];
# split on whitespace, tab, or newline
j <- tolower(unlist(strsplit(i, "[\t\r\n ]+")));
# remove catchword from the second word of hyphenated phrases
j <- gsub(hyphen_pattern, "", j);
# remove non-alphanumeric characters
j <- gsub("[():;,]+", "", j)
# keep words which are not catchwords, and with two or more characters
j_keep <- (!j %in% catchwords & nchar(j) > 1);
j <- j[j_keep];
if (length(j) == 0) {
# if no words remain, name the cluster by number
return(inum)
}
names(head(tcount(j), num_keep_terms))
})
names(nodegroups_wc) <- nodegroup_labels;
if ("nodegroups" %in% input_type) {
return(nodegroups_wc)
}
# assign cluster_names to communities object
wc$cluster_names <- nodegroup_labels;
return(wc);
}
#' Sync igraph nodes and communities
#'
#' Sync igraph nodes and communities
#'
#' This function ensures that `igraph` nodes and corresponding
#' community clusters are synchronized for proper downstream use.
#' In particular, when using a subgraph, or when communities only
#' assign a subset of nodes to clusters, this function ensures the
#' two objects are in sync, the same order, and with the same nodes.
#'
#' @return `list` with two elements:
#' * `"g"` - the `igraph` object after subsetting to match node names
#' shared with `wc`, as necessary.
#' * `"wc'` - the `communities` object after subsetting to match
#' node names shared with `g`, as necessary. When input `wc` is
#' in `list` nodegroups format, that same format is returned.
#'
#' @family jam igraph functions
#'
#' @param g `igraph` object
#' @param wc `communities` object, or `list` in form of nodegroups,
#' which is a `list` of `character` vectors that contain `igraph`
#' node names.
#' @param verbose `logical` indicating whether to print verbose output.
#' @param ... additional arguments are passed to `nodegroups2communities()`
#' only when input `wc` is supplied in `list` nodegroups format.
#'
#' @export
sync_igraph_communities <- function
(g,
wc,
verbose=TRUE,
...)
{
# validate input
input_type <- NULL;
if ("communities" %in% class(wc) ||
(is.list(wc) && all(c("membership", "names") %in% names(wc)))) {
# define list
input_type <- "communities";
} else if (is.list(wc)) {
input_type <- "nodegroups";
nodegroups_wc <- wc;
wc <- nodegroups2communities(nodegroups_wc,
...)
} else {
stop("Input wc must be 'communities' or 'nodegroups' list object.");
}
g_nodes <- igraph::V(g)$name;
wc_nodes <- wc$names;
observed_nodes <- intersect(g_nodes, wc_nodes)
# subset igraph
if (!all(g_nodes %in% observed_nodes)) {
g_keep <- which(g_nodes %in% observed_nodes)
g <- igraph::subgraph(g, v=g_keep)
# if layout is defined, subset in place
if ("layout" %in% igraph::graph_attr_names(g)) {
if (verbose) {
jamba::printDebug("sync_igraph_communities(): ",
"igraph layout was subset to match the remaining nodes.")
}
g_layout <- igraph::graph_attr(g, "layout");
igraph::graph_attr(g, "layout") <- g_layout[g_keep, , drop=FALSE];
}
}
# subset (and order) communities
wc_keep <- match(igraph::V(g)$name, wc_nodes)
wc$membership <- wc$membership[wc_keep]
wc$names <- wc$names[wc_keep]
if (length(wc$modularity) > 1) {
wc$modularity <- wc$modularity[wc_keep]
}
if (length(wc$memberships) > 1) {
wc$memberships <- wc$memberships[wc_keep, , drop=FALSE]
}
if (length(wc$merges) > 0) {
if (verbose) {
jamba::printDebug("sync_igraph_communities(): ",
"community merges were removed.")
}
wc$merges <- NULL;
}
wc$vcount <- igraph::vcount(g);
class(wc) <- "communities";
if ("cluster_names" %in% names(wc)) {
cluster_names <- wc$cluster_names;
names(cluster_names) <- seq_along(cluster_names);
if (!all(names(cluster_names) %in% as.character(wc$membership))) {
if (verbose) {
jamba::printDebug("sync_igraph_communities(): ",
"cluster_names were reduced due to match the remaining nodes.")
}
cn_keep <- (names(cluster_names) %in% as.character(wc$membership));
new_cluster_names <- unname(cluster_names[cn_keep]);
new_membership <- as.numeric(as.factor(wc$membership));
wc$cluster_names <- new_cluster_names;
wc$membership <- new_membership
}
}
if ("nodegroups" %in% input_type) {
wc <- communities2nodegroups(wc);
}
return(list(
g=g,
wc=wc));
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.