rm(list = ls()) seed <- 1 set.seed(seed)
require(kaiaulu) require(visNetwork) require(reactable) require(data.table) require(igraph) require(yaml) require(stringi) require(knitr) require(RColorBrewer)
This notebook showcases the use of community detection using OSLOM [1], which requires a separate installation to obtain the binary (see Kaiaulu README.md for details). OSLOM differs from other community detection algorithms in that its algorithm can account for directed and weighted graphs. In addition, OSLOM will also identify nodes that belong to multiple communities and automatically verify how statistically significant the identified communities are.
[1] Finding statistically significant communities in networks. A. Lancichinetti, F. Radicchi, J.J. Ramasco and S. Fortunato PLoS ONE 6, e18961 (2011).
To facilitate the usage of the OSLOM algorithm, Kaiaulu currently provides two interface functions to its two binaries: community_oslom
and recolor_network_by_community
. The former requires the binary path to OSLOM to be provided and parses OSLOM outputs into R memory as a list of tables closest to the raw output. The latter leverages said list of tables to re-color a network by its assigned community up to 12 different communities (the limitation is only on color, the OSLOM output will give as many communities as detected). Nodes that are assigned to more than one community are colored black.
recolor_network_by_community
leverages the existing visualization library in Kaiaulu, which means communities can be explored interactively. This is of particular interest to some of the social smells, where community detection is a required step, and therefore a poor community detection would lead to erroneous results.
Despite the algorithm being commonly named community detection, any graph can be used for the algorithm. Therefore, any network obtained by the family of transform_to_network_*
functions can be used as input for community detection. For example, while applying the algorithm to a network obtained from transform_reply_to_bipartite_network()
would lead to the community of developers based on their e-mail exchange, transform_dependencies_to_network()
would identify groups of files that contain several static dependencies to one another (e.g. function calls, class inheritance, etc). Applying community detection to a co-change network could identify files co-changed across multiple commits, etc.
The remainder of this Notebook illustrates two functions for some of Kaiaulu networks for the APR project.
As usual, the first step is to load the project configuration file.
tool <- parse_config("../tools.yml") conf <- parse_config("../conf/apr.yml") # 3rd Party Tools perceval_path <- get_tool_project("perceval", tool) utags_path <- get_tool_project("utags", tool) oslom_dir_path <- get_tool_project("oslom_dir", tool) oslom_undir_path <- get_tool_project("oslom_undir", tool) # Ctags Line Types kinds <- get_uctags_line_types(conf) # Local Git Repo Folder Path git_repo_path <- get_git_repo_path(conf) # File Filters file_extensions <- get_file_extensions(conf) substring_filepath <- get_substring_filepath(conf)
project_git <- parse_gitlog(perceval_path,git_repo_path) project_git <- project_git %>% filter_by_file_extension(file_extensions,"file_pathname") %>% filter_by_filepath_substring(substring_filepath,"file_pathname")
project_git$author_datetimetz <- as.POSIXct(project_git$author_datetimetz, format = "%a %b %d %H:%M:%S %Y %z", tz = "UTC") project_git$author_datetimetz <- as.POSIXct(project_git$committer_datetimetz, format = "%a %b %d %H:%M:%S %Y %z", tz = "UTC") #project_git_slice <- project_git[author_datetimetz >= as.POSIXct("2015-01-01", format = "%Y-%m-%d",tz = "UTC") & author_datetimetz < as.POSIXct("2015-12-31", format = "%Y-%m-%d",tz = "UTC")]
project_log <- list(project_git=project_git) project_log <- identity_match(project_log, name_column = c("author_name_email"), assign_exact_identity, use_name_only=TRUE, label = "raw_name" ) project_git <- project_log[["project_git"]]
project_collaboration_network <- transform_gitlog_to_temporal_network(project_git,mode="author")
i_project_temporal_collaboration_network <- igraph::graph_from_data_frame(d=project_collaboration_network[["edgelist"]], directed = TRUE, vertices = project_collaboration_network[["nodes"]]) visIgraph(i_project_temporal_collaboration_network,randomSeed = 1)
Use oslom_dir_path
or oslom_undir_path
as preferred for directed or undirected
community detection.
community <- community_oslom(oslom_dir_path, project_collaboration_network, seed=seed, n_runs = 100, is_weighted = TRUE) community
project_collaboration_network <- recolor_network_by_community(project_collaboration_network,community) gcid <- igraph::graph_from_data_frame(d=project_collaboration_network[["edgelist"]], directed = TRUE, vertices = project_collaboration_network[["nodes"]]) visIgraph(gcid,randomSeed = 1)
# Depends parameters depends_jar_path <- get_tool_project("depends", tool) language <- get_depends_code_language(conf) keep_dependencies_type <- get_depends_keep_dependencies_type(conf)
project_dependencies <- parse_dependencies(depends_jar_path,git_repo_path,language=language)
project_dependencies[["nodes"]] <- project_dependencies[["nodes"]] %>% filter_by_file_extension(file_extensions,"filepath") %>% filter_by_filepath_substring(substring_filepath,"filepath") project_dependencies[["edgelist"]] <- project_dependencies[["edgelist"]] %>% filter_by_file_extension(file_extensions,"src_filepath") %>% filter_by_file_extension(file_extensions,"dest_filepath") %>% filter_by_filepath_substring(substring_filepath,"src_filepath") %>% filter_by_filepath_substring(substring_filepath,"dest_filepath")
project_file_network <- transform_dependencies_to_network(project_dependencies, weight_types = keep_dependencies_type)
module <- community_oslom(oslom_undir_path, project_file_network, seed=seed, n_runs = 100, is_weighted = TRUE) module
project_file_network <- recolor_network_by_community(project_file_network,module) gcid <- igraph::graph_from_data_frame(d=project_file_network[["edgelist"]], directed = FALSE, vertices = project_file_network[["nodes"]]) visIgraph(gcid,randomSeed = 1)
project_git_slice <- project_git[author_datetimetz >= as.POSIXct("2015-01-01", format = "%Y-%m-%d",tz = "UTC") & author_datetimetz < as.POSIXct("2019-12-31", format = "%Y-%m-%d",tz = "UTC")] project_commit_network <- transform_gitlog_to_bipartite_network(project_git_slice, mode="commit-file")
co_change_network <- bipartite_graph_projection(project_commit_network, mode = FALSE, weight_scheme_function = weight_scheme_sum_edges)
co_change_module <- community_oslom(oslom_undir_path, co_change_network, seed=seed, n_runs = 5, is_weighted = TRUE) co_change_module
co_change_network <- recolor_network_by_community(co_change_network,co_change_module) gcid <- igraph::graph_from_data_frame(d=co_change_network[["edgelist"]], directed = FALSE, vertices = co_change_network[["nodes"]]) visIgraph(gcid,randomSeed = 1)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.