#' Clique Percolation Community Detection
#'
#' Function for clique percolation community detection algorithms for weighted
#' and unweighted networks.
#'
#' @param W A qgraph object; see also \link[qgraph]{qgraph}
#' @param k Clique size (number of nodes that should form a clique)
#' @param method A string indicating the method to use
#' (\code{"unweighted"}, \code{"weighted"}, or \code{"weighted.CFinder"}); see Details
#' @param I Intensity threshold for weighted networks
#'
#' @return
#' A list object with the following elements:
#' \describe{
#' \item{list.of.communities.numbers}{list of communities with numbers as identifiers
#' of nodes}
#' \item{list.of.communities.labels}{list of communities with labels from qgraph object
#' as identifiers of nodes}
#' \item{shared.nodes.numbers}{vector with all nodes that belong to multiple communities
#' with numbers as identifiers of nodes}
#' \item{shared.nodes.labels}{vector with all nodes that belong to multiple communities
#' with labels from qgraph object as identifiers of nodes}
#' \item{isolated.nodes.numbers}{vector with all nodes that belong to no community
#' with numbers as identifiers of nodes}
#' \item{isolated.nodes.labels}{vector with all nodes that belong to no community
#' with labels from qgraph object as identifiers of nodes}
#' \item{k}{user-specified \code{k}}
#' \item{method}{user-specified method}
#' \item{I}{user-specified \code{I} (if method was \code{"weighted"}
#' or \code{"weighted.CFinder"})}
#' }
#'
#' @details
#' \code{method = "unweighted"} conducts clique percolation for unweighted networks as
#' described in Palla et al. (2005). \code{method = "weighted"} conducts clique percolation
#' for weighted graphs with inclusion of cliques if their Intensity is higher than the
#' specified Intensity (\code{I}), which is the method described in Farkas et al. (2007).
#' \code{method = "weighted.CFinder"} conducts clique percolation as in the CFinder program.
#' The Intensity (\code{I}) threshold is applied twice, namely first to the Intensity of the
#' cliques (as before) and then also to their \code{k-1} overlap with other cliques
#' (e.g., in the case of \code{k = 3}, it is applied to the edge that two cliques share).
#'
#' For weighted networks, the absolute value of the edge weights is taken.
#' Therefore, negative edges are treated like positive edges just like in the CFinder program.
#' Thus, the Intensity threshold \code{I} can only be positive.
#'
#' cpAlgorithm produces a solution for all networks, even if there are no communities
#' or communities have no overlap. The respective output is empty in such cases.
#'
#' @examples
#' ## Example for unweighted networks
#'
#' # create qgraph object
#' W <- matrix(c(0,1,1,1,0,0,0,0,
#' 0,0,1,1,0,0,0,0,
#' 0,0,0,0,0,0,0,0,
#' 0,0,0,0,1,1,1,0,
#' 0,0,0,0,0,1,1,0,
#' 0,0,0,0,0,0,1,0,
#' 0,0,0,0,0,0,0,1,
#' 0,0,0,0,0,0,0,0), nrow = 8, ncol = 8, byrow = TRUE)
#' W <- Matrix::forceSymmetric(W)
#' W <- qgraph::qgraph(W)
#'
#' # run clique percolation for unweighted networks
#' results <- cpAlgorithm(W = W, k = 3, method = "unweighted")
#'
#' ## Example for weighted networks
#'
#' # create qgraph object
#' W <- matrix(c(0,1,1,1,0,0,0,0,
#' 0,0,1,1,0,0,0,0,
#' 0,0,0,0,0,0,0,0,
#' 0,0,0,0,1,1,1,0,
#' 0,0,0,0,0,1,1,0,
#' 0,0,0,0,0,0,1,0,
#' 0,0,0,0,0,0,0,1,
#' 0,0,0,0,0,0,0,0), nrow = 8, ncol = 8, byrow = TRUE)
#' set.seed(4186)
#' rand_w <- stats::rnorm(length(which(W == 1)), mean = 0.3, sd = 0.1)
#' W[which(W == 1)] <- rand_w
#' W <- Matrix::forceSymmetric(W)
#' W <- qgraph::qgraph(W)
#'
#' # run clique percolation for weighted networks
#' results <- cpAlgorithm(W = W, k = 3, method = "weighted", I = 0.1)
#'
#' @references
#' Farkas, I., Abel, D., Palla, G., & Vicsek, T. (2007). Weighted network modules.
#' \emph{New Journal of Physics, 9}, 180-180. http://doi.org/10.1088/1367-2630/9/6/180
#'
#' Palla, G., Derenyi, I., Farkas, I., & Vicsek, T. (2005). Uncovering the overlapping community
#' structure of complex networks in nature and society. \emph{Nature, 435},
#' 814-818. http://doi.org/10.1038/nature03607
#'
#' @author Jens Lange, \email{lange.jens@@outlook.com}
#'
#' @importFrom magrittr "%>%"
#'
#' @export cpAlgorithm
cpAlgorithm <- function(W, k, method = c("unweighted","weighted","weighted.CFinder"), I){
###error message if W is not a qgraph object
if (methods::is(W, "qgraph") == FALSE) {
stop("W (network object) must be a qgraph object.")
}
#extract weights matrix
Wmat <- qgraph::getWmat(W)
labels <- as.vector(W$graphAttributes$Nodes$labels)
return(cpAlgorithmRaw(Wmat, k, method, I, labels, all_k_cliques = NULL))
}
cpAlgorithmRaw <- function(Wmat, k, method = c("unweighted","weighted","weighted.CFinder"), I, labels = NULL, all_k_cliques = NULL) {
###error message if k is not larger than 2
if (k < 3) {
stop("k must be larger than 2, because this is the first reasonable clique size.")
}
###error message if method is not "unweighted", "weighted", or "weighted.CFinder"
if (method != "unweighted" & method != "weighted" & method != "weighted.CFinder") {
stop("method must be 'unweighted', 'weighted', or 'weighted.CFinder' (depending on the network).")
}
## ###function to determine list of cliques for unweighted networks
## unweighted <- function(W_unweighted){
## #transform to igraph object to use specific functions
## W_i_unweighted <- igraph::graph_from_adjacency_matrix(W_unweighted,
## mode = "undirected", weighted = NULL)
## #extract cliques
## cliques_unweighted <- igraph::cliques(W_i_unweighted, min = k, max = k) %>% lapply(as.vector)
## #return cliques
## return(cliques_unweighted)
## }
#function to derive communities, shared nodes, and isolated nodes
results_cp <- function(Wmat, cliques, labels){
#loop to compare all cliques with each other
#if cliques share k-1 nodes, a vector is created stating their indices
#if there are not at least two cliques, there can be no edge; thus, the edge list is empty
#CFinder applies the Intensity threshold twice, once for the cliques and once for the overlap of cliques
#this is not stated in the paper, but to increase comparability, this is also implemented here for method = weighted.CFinder
#each k-1 set of nodes coded by an edge is taken and this subnetwork is again tested against I
#each subnetwork coded by an edge is excluded, when it does not exceed I
#this is done only when there are edges
## if (method == "weighted.CFinder" & length(edges) > 0) {
## intensity_weighted_edge_net <- list()
## for (i in 1:length(edges)) {
## edge_net_nodes <- Reduce(intersect, list(cliques[[edges[[i]][1]]],cliques[[edges[[i]][2]]]))
## edge_net_W <- abs(W)[edge_net_nodes,edge_net_nodes]
## weights_edge_net <- c(edge_net_W[upper.tri(edge_net_W)])
## exponent_edge_net <- 2/(length(edge_net_nodes) * (length(edge_net_nodes) - 1))
## intensity_weighted_edge_net[[i]] <- prod(weights_edge_net)^exponent_edge_net
## }
## edges_include_weighted <- lapply(intensity_weighted_edge_net, function(x) as.numeric(as.character(x)) > I)
## edges <- edges[which(edges_include_weighted == TRUE)]
## }
if (length(cliques) > 1) {
communities = calculate_community_membership(cliques, nrow(Wmat))
} else {
#communities list is empty
communities <- list()
}
#create vector of nodes that do not belong to a community
#if there are no isolated nodes, create empty variable
isolated <- subset(1:nrow(Wmat),
subset = !(1:nrow(Wmat)%in%unique(unlist(communities))))
if (length(isolated) == 0) {isolated <- c()}
#if there is more than one community...
#get shared nodes
#if there are no shared nodes, create empty variable
if (length(communities) > 1) {
shared <- list()
count <- 1
for (i in 1:(length(communities) - 1)) {
for (j in (i + 1):length(communities)) {
shared[[count]] <- intersect(communities[[i]],communities[[j]])
count <- count + 1
}
}
shared <- sort(unique(unlist(shared)))
if (length(shared) == 0) {shared <- c()}
}
#if there are communities...
#create list of communities with labels from qgraph object instead of node numbers
if (length(communities) > 0) {
communities_labels <- vector("list",length(communities))
for (i in 1:length(communities)) {
communities_labels[[i]] <- labels[communities[[i]]]
}
}
#if there are no communities...
if (length(communities) == 0) {
communities_labels <- list()
}
#create vector of isolated nodes with labels from qgraph object instead of node numbers
#if there are no isolated nodes, create empty variable
isolated_labels <- labels[isolated]
if (length(isolated_labels) == 0) {isolated_labels <- c()}
#if there is more than one community...
#create vector with shared nodes with labels from qgraph object instead of node numbers
if (length(communities) > 1) {
shared_labels <- labels[shared]
}
#if there is less than two communities
if (length(communities) < 2) {
shared <- c()
shared_labels <- c()
}
#if there are no shared nodes, create empty variable
if (length(shared_labels) == 0) {shared_labels <- c()}
#return community lists, shared nodes vectors, and isolated nodes vectors
return(list(communities,communities_labels,
shared,shared_labels,
isolated,isolated_labels))
}
#run corresponding functions for respective method
## if (method == "unweighted") {
## cliques_unweighted <- unweighted(Wmat)
## results_unweighted <- results_cp(Wmat, cliques_unweighted, labels)
## names(results_unweighted) <- c("list.of.communities.numbers","list.of.communities.labels",
## "shared.nodes.numbers","shared.nodes.labels",
## "isolated.nodes.numbers","isolated.nodes.labels")
## return(c(results_unweighted,list(k = k, method = method)))
## }
if (method == "weighted" | method == "weighted.CFinder") {
###error message if I is not larger than zero
if (I <= 0) {
stop("Intensity (I) must be larger than zero.\nThis is because all edges are considered positive.\nComparing intensities of cliques or their overlap to zero or negative value therefore makes no sense.")
}
# First find all cliques (if not provided as an argument), with their associated intensities
if (is.null(all_k_cliques)) {
all_k_cliques <- calculate_all_clique_intensities(Wmat, k)
}
# Restrict to cliques above this threshold
cliques_above_thresh <- threshold_cliques(all_k_cliques, I)$cliques
# Run algorithm (i.e. find communities)
results_weighted <- results_cp(Wmat, cliques_above_thresh, labels)
names(results_weighted) <- c("list.of.communities.numbers","list.of.communities.labels",
"shared.nodes.numbers","shared.nodes.labels",
"isolated.nodes.numbers","isolated.nodes.labels")
return(c(results_weighted,list(k = k, method = method, I = I)))
}
}
threshold_cliques <- function(cliques_result, I) {
include_clique <- lapply(cliques_result$intensity_weighted, function(x) as.numeric(as.character(x)) > I)
cliques_weighted_thr <- cliques_result$cliques[which(include_clique == TRUE)]
intensity_weighted_thr <- cliques_result$intensity_weighted[which(include_clique == TRUE)]
return(list("intensity_weighted" = intensity_weighted_thr,
"cliques" = cliques_weighted_thr))
}
##function to find all cliques and calculate their intensities
calculate_all_clique_intensities_raw <- function(W_weighted, k) {
#take absolute Value of weights matrix
#deals with negative edges such that they are simply considered like positive edges
W_weighted <- abs(W_weighted)
#transform to igraph object to use specific functions
W_i_weighted <- igraph::graph_from_adjacency_matrix(W_weighted,
mode = "undirected", weighted = TRUE)
#extract cliques
cliques_weighted <- igraph::cliques(W_i_weighted, min = k, max = k) %>% lapply(as.vector)
intensity_weighted <- vector("list",length(cliques_weighted))
if (length(cliques_weighted) > 0) {
for (i in 1:length(cliques_weighted)) {
## weights <- c()
weights <- numeric( (length(cliques_weighted[[i]])-1) * (length(cliques_weighted[[i]])) / 2 )
m <- 1
for (j in 1:(length(cliques_weighted[[i]]) - 1)) {
for (k in (j+1):length(cliques_weighted[[i]])) {
weights[m] <- W_weighted[cliques_weighted[[i]][j],cliques_weighted[[i]][k]]
m <- m + 1
}
}
exponent <- 2/(length(cliques_weighted[[i]]) * (length(cliques_weighted[[i]]) - 1))
intensity_weighted[[i]] <- prod(weights)^exponent
}
}
return(list("intensity_weighted" = intensity_weighted,
"cliques" = cliques_weighted))
}
# Use memoization to cache the results of this function call. Thus if the function is called
# twice in a session with the same parameters, the second call will be almost instant.
calculate_all_clique_intensities <- R.cache::addMemoization(calculate_all_clique_intensities_raw)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.