R/gs_cluster.R
In GeneTonic: Enjoy Analyzing And Integrating The Results From Differential Expression Analysis And Functional Enrichment Analysis

Documented in cluster_markov

#' Markov Clustering (MCL) for community detection
#' 
#' This function implements the Markov Clustering (MCL) algorithm for finding community 
#' structure, in an analogous way to other existing algorithms in `igraph`.
#' 
#' This implementation has been driven by the nice explanations provided in
#' * https://sites.cs.ucsb.edu/~xyan/classes/CS595D-2009winter/MCL_Presentation2.pdf
#' * https://medium.com/analytics-vidhya/demystifying-markov-clustering-aeb6cdabbfc7
#' * https://github.com/GuyAllard/markov_clustering (python implementation)
#' 
#' More info on the MCL: https://micans.org/mcl/index.html, and 
#' https://micans.org/mcl/sec_description1.html
#' 
#' @references van Dongen, S.M., Graph clustering by flow simulation (2000) PhD thesis, 
#' Utrecht University Repository - https://dspace.library.uu.nl/handle/1874/848
#' @references  Enright AJ, van Dongen SM, Ouzounis CA, An efficient algorithm for 
#' large-scale detection of protein families (2002) Nucleic Acids Research, Volume 30, 
#' Issue 7, 1 April 2002, Pages 1575–1584, https://doi.org/10.1093/nar/30.7.1575
#'
#' @param g The input graph object
#' @param add_self_loops Logical, whether to add self-loops to the matrix by 
#' setting the diagonal to `loop_value`
#' @param loop_value Numeric, the value to use for self-loops
#' @param mcl_expansion Numeric, cluster expansion factor for the Markov clustering
#' iteration - defaults to 2
#' @param mcl_inflation Numeric, cluster inflation factor for the Markov clustering
#' iteration - defaults to 2
#' @param allow_singletons Logical; if `TRUE`, single isolated vertices are allowed 
#' to form their own cluster. If set to `FALSE`, all clusters of size = 1 are 
#' grouped in one cluster (to be interpreted as background noise).
#' @param max_iter Numeric value for the maximum number of iterations for the 
#' Markov clustering
#' @param return_node_names Logical, if the graph is named and set to `TRUE`, returns
#' the node names. 
#' @param return_esm Logical, controlling whether the equilibrium state matrix should be returned 
#'
#' @return This function returns a `communities` object, containing the numbers of
#' the assigned membership (in the slot `membership`). Please see the 
#' [igraph::communities()] manual page for additional details
#' 
#' @export
#'
#' @examples
#' library("igraph")
#' g <- make_full_graph(5) %du% make_full_graph(5) %du% make_full_graph(5)
#' g <- add_edges(g, c(1,6, 1,11, 6, 11))
#' cluster_markov(g)
#' V(g)$color <- cluster_markov(g)$membership
#' plot(g)
cluster_markov <- function(g, 
                           add_self_loops = TRUE,
                           loop_value = 1,
                           mcl_expansion = 2, 
                           mcl_inflation = 2, 
                           allow_singletons = TRUE, 
                           max_iter = 100, 
                           return_node_names = TRUE,
                           return_esm = FALSE) {
  # g must be a graph
  if (!is(g, "igraph"))
    stop("You need to provide an igraph object as input")
  
  stopifnot(is.logical(add_self_loops))
  stopifnot(loop_value >= 0)
  stopifnot(mcl_expansion > 1)
  stopifnot(mcl_inflation > 1)
  stopifnot(loop_value >= 0) 
  stopifnot(max_iter > 0)
  
  # graph to adjacency matrix
  adj_mat <- igraph::as_adjacency_matrix(g) 
  adj_mat <- as.matrix(adj_mat) # to enforce non-sparse matrix
  
  converged <- FALSE
  
  # Initialize self-loops
  if (add_self_loops) 
    diag(adj_mat) <- loop_value
  
  # Normalize (sum by col must be 1)
  adj_mat_norm <- t(adj_mat/colSums(adj_mat))
  
  cur_mat_norm <- adj_mat_norm
  for (i_mcl in seq_len(max_iter)) {
    # message(i_mcl)
    last_mat <- cur_mat_norm
    
    exp_mat <- cur_mat_norm %^% mcl_expansion
    inf_mat <- exp_mat ^ mcl_inflation
    # inf_mat_norm <- t(inf_mat/colSums(inf_mat))
    inf_mat_norm <- apply(inf_mat, MARGIN = 2, FUN = function(matcol) {
      matcol/sum(matcol)
    })
    
    ## TODO: optional pruning?
    # idea: inspect matrix and set small values directly to zero (assume they would have reached there eventually anyways).
    if (identical(inf_mat_norm, last_mat)) {
      converged <- TRUE
      break
    }
    
    cur_mat_norm <- inf_mat_norm
  }
  
  if (converged & is.na(cur_mat_norm[1, 1]))
    stop("An error occurred after convergence - maybe you set `add_self_loops` to FALSE?")
  
  # getting the attractors - non-zero elements of the matrix diagonal
  clu_attractors <- which(diag(cur_mat_norm) > 0)
  # store the clusters
  clusters <- vector(mode = "list", length(clu_attractors))
  
  # the nodes in the same row as each attractor form a cluster
  for(i in seq_along(clu_attractors)) {
    cur_att <- clu_attractors[i]
    cur_members <- which(cur_mat_norm[cur_att, ] > 0)
    clusters[[i]] <- cur_members
  }
  
  # chop off the identical ones
  clusters <- unique(clusters)
  
  # from clusters sets to membership as label
  clu_memberships <- rep(NA, nrow(adj_mat))
  for(i in seq_along(clusters)) {
    this_cluster <- clusters[[i]]
    clu_memberships[this_cluster] <- i
  }
  
  # handling the singletons
  if (!allow_singletons) {
    dub <- duplicated(clu_memberships) + duplicated(clu_memberships, fromLast = TRUE)
    n_singletons <- sum(dub == 0)
    clu_memberships[dub == 0] <- 0 
    # reshifting to not having gaps in the cluster numbers
    clu_memberships[dub != 0] <- clu_memberships[dub != 0] - n_singletons
  }
  
  res <- list()
  if (return_node_names & igraph::is_named(g))
    res$names <- V(g)$name
  res$vcount <- vcount(g)
  res$algorithm <- "mcl"
  res$iterations <- i_mcl - 1
  res$membership <- clu_memberships
  if (return_esm)
    res$esm <- cur_mat_norm
  
  class(res) <- "communities" # to take advantage of the goodies for printing and co
  
  return(res)
}


# nice to test on the set of igraph examples - say, louvain

Any scripts or data that you put into this service are public.

GeneTonic documentation built on Nov. 8, 2020, 5:27 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

GeneTonic
Enjoy Analyzing And Integrating The Results From Differential Expression Analysis And Functional Enrichment Analysis

R/gs_cluster.R
In GeneTonic: Enjoy Analyzing And Integrating The Results From Differential Expression Analysis And Functional Enrichment Analysis

Defines functions cluster_markov

Documented in cluster_markov

Try the GeneTonic package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GeneTonic Enjoy Analyzing And Integrating The Results From Differential Expression Analysis And Functional Enrichment Analysis

R/gs_cluster.R In GeneTonic: Enjoy Analyzing And Integrating The Results From Differential Expression Analysis And Functional Enrichment Analysis

Defines functions cluster_markov

Documented in cluster_markov

Try the GeneTonic package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GeneTonic
Enjoy Analyzing And Integrating The Results From Differential Expression Analysis And Functional Enrichment Analysis

R/gs_cluster.R
In GeneTonic: Enjoy Analyzing And Integrating The Results From Differential Expression Analysis And Functional Enrichment Analysis