rm(list = ls())
seed <- 1
set.seed(seed)
require(kaiaulu)
require(visNetwork)
require(data.table)
require(stringi)
require(igraph)
require(gh)
require(yaml)
require(magrittr)
require(knitr)
tool <- parse_config("../tools.yml")
#conf <- parse_config("../conf/apr.yml")
conf <- parse_config("../conf/helix.yml")
git_repo_path <- get_git_repo_path(conf)

# Depends parameters
depends_jar_path <- get_tool_project("depends", tool)
language <- get_depends_code_language(conf)
keep_dependencies_type <- get_depends_keep_dependencies_type(conf)

# Filters
file_extensions <- get_file_extensions(conf)
substring_filepath <- get_substring_filepath(conf)

Construct File Network

Project Language

We can use the gh package with the language endpoint from Github to figure out what language a project has. Paste the project url below and run the code block.

project_github_url <- "https://github.com/apache/apr"


project_github_url <- stri_split_regex(project_github_url,pattern="/")[[1]]
owner <- project_github_url[length(project_github_url)-1]
repo <- project_github_url[length(project_github_url)]
language_distribution_byte <- unlist(gh("GET /repos/:owner/:repo/languages",owner=owner,repo=repo))
language_distribution_byte <- language_distribution_byte/sum(language_distribution_byte)
format(round(language_distribution_byte, 2), nsmall = 2)

Parse Dependencies

Based on the most used language on the project, choose the parameter below for Depends. Accepted languages are: cpp, java, ruby, python, pom.

project_dependencies <- parse_dependencies(depends_jar_path,git_repo_path,language=language)

Filter files

project_dependencies[["nodes"]] <- project_dependencies[["nodes"]]  %>%
  filter_by_file_extension(file_extensions,"filepath")  %>% 
  filter_by_filepath_substring(substring_filepath,"filepath")

project_dependencies[["edgelist"]] <- project_dependencies[["edgelist"]]  %>%
  filter_by_file_extension(file_extensions,"src_filepath")  %>% 
  filter_by_file_extension(file_extensions,"dest_filepath")  %>% 
  filter_by_filepath_substring(substring_filepath,"src_filepath") %>%
  filter_by_filepath_substring(substring_filepath,"dest_filepath")

Example network visualization with igraph

# Plot only a sample, as file networks can be very large 
project_dependencies_slice <- project_dependencies
project_dependencies_slice[["edgelist"]] <- project_dependencies[["edgelist"]][1:500]


project_file_network <- transform_dependencies_to_network(project_dependencies_slice,
                                                   weight_types = keep_dependencies_type)

kable(head(project_dependencies[["edgelist"]],10))
project_file_network <- igraph::graph_from_data_frame(d=project_file_network[["edgelist"]],
                      directed = TRUE,
                      vertices = project_file_network[["nodes"]])
visIgraph(project_file_network,randomSeed = 1)


sailuh/kaiaulu documentation built on Dec. 10, 2024, 3:14 a.m.