rm(list = ls())
seed <- 1
set.seed(seed)
require(kaiaulu)
require(visNetwork)
require(data.table)
require(stringi)
require(igraph)
require(gh)
require(yaml)
require(magrittr)
require(knitr)
require(openxlsx)
tool <- parse_config("../tools.yml")
conf <- parse_config("../conf/calculator.yml")
perceval_path <- get_tool_project("perceval", tool)
dv8_path <- get_tool_project("dv8", tool)

# Gitlog parameters
git_repo_path <- get_git_repo_path(conf)

# Depends parameters
depends_jar_path <- get_tool_project("depends", tool)
language <- get_depends_code_language(conf)
keep_dependencies_type <- get_depends_keep_dependencies_type(conf)

# DV8 parameters 
project_path <- get_dv8_folder_path(conf)
project_name <- stringi::stri_split_regex(project_path,pattern = "/")[[1]]
project_name <- project_name[length(project_name)]

flaws_params <- get_dv8_flaws_params(conf)

# Filters
file_extensions <- get_file_extensions(conf)
substring_filepath <- get_substring_filepath(conf)
filter_commit_size <- get_filter_commit_size(conf)

Preparing Gitlog and Dependencies for DV8

Our first step is to parse the Git log and Dependencies for DV8. These will used to construct historical and structural dependencies in DV8. There are two ways to perform this step. The legacy approach is deferred to the end of this notebook for completeness sake, however does not allow for customizing the filtering step in Kaiaulu.

Git Log

For the git log, we first parse it into Kaiaulu as a table, and filter based on the project configuration files criteria. Here's a sample:

#git_repo_path <- "~/Downloads/testing-cochange/.git"
project_git <- parse_gitlog(perceval_path,git_repo_path)
project_git <- project_git  %>%
  filter_by_file_extension(file_extensions,"file_pathname")  %>% 
  filter_by_filepath_substring(substring_filepath,"file_pathname") %>% 
  filter_by_commit_size(commit_size = filter_commit_size)

kable(head(project_git))

We then transform it as a hsdsm JSON (i.e. hdsmj):

hdsmj_path <- transform_gitlog_to_hdsmj(project_git,
                                        hdsmj_path = file.path(project_path,paste0(project_name,"-hdsm.json")))

Next, we parse file dependencies using Depends, filter the files by the project configuration file criteria, and proceed to convert it into a DV8 Structural Dependency Matrix binary.

project_dependencies <- parse_dependencies(depends_jar_path,git_repo_path,language=language)

project_dependencies[["nodes"]] <- project_dependencies[["nodes"]]  %>%
  filter_by_file_extension(file_extensions,"filepath")  %>% 
  filter_by_filepath_substring(substring_filepath,"filepath")

project_dependencies[["edgelist"]] <- project_dependencies[["edgelist"]]  %>%
  filter_by_file_extension(file_extensions,"src_filepath")  %>% 
  filter_by_file_extension(file_extensions,"dest_filepath")  %>% 
  filter_by_filepath_substring(substring_filepath,"src_filepath") %>%
  filter_by_filepath_substring(substring_filepath,"dest_filepath")


sdsmj_path <- transform_dependencies_to_sdsmj(project_dependencies,
                                    sdsmj_path = file.path(project_path,paste0(project_name,"-sdsm.json")))

list.files(project_path)

Converting to DV8 Binaries

We are now ready to convert the json files into binary format, so they can be merged:

hdsmb_path <- dv8_dsmj_to_dsmb(dv8_path = dv8_path,
                                     dsmj_path = hdsmj_path, 
                                     dsmb_path = file.path(project_path,paste0(project_name,
                                                                               "-hdsm.dv8-dsm")))

sdsmb_path <- dv8_dsmj_to_dsmb(dv8_path = dv8_path,
                                     dsmj_path = sdsmj_path, 
                                     dsmb_path = file.path(project_path,paste0(project_name,
                                                                               "-sdsm.dv8-dsm")))

list.files(project_path)

Merging DSMs

Our next step in the pipeline, is to combine both our binary DSMs, i.e. *-sdsm.dv8-dsm and *-hdsm.dv8-dsm into a merged DSM, *-merge-dv8.dsm.

mdsmb_path <- dv8_hdsmb_sdsmb_to_mdsmb(dv8_path=dv8_path,
                                         hdsmb_path=hdsmb_path,
                                         sdsmb_path = sdsmb_path,
                                         mdsmb_path = file.path(project_path,
                                                                paste0(project_name,"-merge.dv8-dsm")))
list.files(project_path)

Exploring Merge DSMs

We can load load on DV8-GUI the -hdsm, -sdsm or -mdsm files files to inspect their matrices, or export them via dv8-console their .xlsx counterpart. Let's inspect the -mdsm. First, we will perform clustering over the files. This will later be displayed in the DSM as black rectangles. This is an optional step:

hierclsxb_path <- dv8_mdsmb_to_hierclsxb(dv8_path = dv8_path,
                                          mdsmb_path = mdsmb_path,
                                          hierclsxb_path = file.path(project_path,
                                                                     paste0(project_name,
                                                                            "-clsx.dv8-clsx")))
list.files(project_path)

We then export the file as a .xlsx, including the clusters just generated like so:

mdsmb_xlsx <- dv8_mdsmb_drhier_to_excel(dv8_path = dv8_path,
                                         mdsmb_path = mdsmb_path,
                                         excel_path = file.path(project_path,paste0(project_name,"-mdsm.xlsx"))#,
                                         #hierclsxm_path = hierclsxb_path
                                        )
list.files(project_path)

Once the .xlsx is exported, we can open it in excel or load it in R. Observe only a small sample of the table exported is shown here. Note the clustering will not be displayed when loading in R, as it is visually represented using black lines around the cells, something R does not display natively. The dsm files can also be loaded to DV8 GUI for inspection instead of exporting and opening in excel. To obtain cluster information in R, we will use other set of functions discussed towards the end of this Notebook. For now, we will present analysis that can be done with the -merge.dv8-dsm.

mdsmb_xlsx_table <- data.table(openxlsx::read.xlsx(mdsmb_xlsx))
kable(mdsmb_xlsx_table[1:10,1:10,with=FALSE])

DV8 Analysis

Now we have a merged DSM, we can perform various analysis in DV8.

Architectural Flaws

DV8 computes a variety of metrics. Let's first observe Architectural Flaws.

# Format Architectural Flaw Parameters to DV8 Command
flaws_params$cliqueDepends <- stringi::stri_c(flaws_params$cliqueDepends,collapse=",")
flaws_params$uihDepends <- stringi::stri_c(flaws_params$uihDepends,collapse=",")
flaws_params$uihInheritance <- stringi::stri_c(flaws_params$uihInheritance,collapse=",")


flaws_folder <- dv8_mdsmb_to_flaws(dv8_path = dv8_path, 
                                    mdsmb_path = mdsmb_path,
                                    flaws_path = file.path(project_path,paste0(project_name,"_flaws")),
                                    cliqueDepends=flaws_params$cliqueDepends,
                                    crossingCochange=flaws_params$crossingCochange,
                                    crossingFanIn=flaws_params$crossingFanIn,
                                    crossingFanOut=flaws_params$crossingFanOut,
                                    mvCochange=flaws_params$mvCochange,
                                    uiCochange=flaws_params$uiCochange,
                                    uihDepends=flaws_params$uihDepends,
                                    uihInheritance=flaws_params$uihInheritance,
                                    uiHistoryImpact=flaws_params$uiHistoryImpact,
                                    uiStructImpact=flaws_params$uiStructImpact)
list.files(project_path)

Exploring Flaws DSMs

We can see a folder of Architectural Flaws was generated. The folder organization provides us with means to understand what files are assigned to various types of architectural flaws. The following is a general example used for APR (not computed here, but the structure carries over for other larger projects):

apr_flaws/
├── modularity-violation
└── package-cycle

Describes two types of architectural flaws were found in APR, modularity violation and package-cycle. Moreover inside each of the folders we see numbered folders:

apr_flaws/
├── modularity-violation
│   ├── 1
│   │   ├── 1-clsx.dv8-clsx
│   │   ├── 1-hdsm.dv8-dsm
│   │   ├── 1-merge.dv8-dsm
│   │   ├── 1-sdsm.dv8-dsm
│   │   └── 1.dv8-issue
│   ├── 10
│   │   ├── 10-clsx.dv8-clsx
│   │   ├── 10-hdsm.dv8-dsm
│   │   ├── 10-merge.dv8-dsm
│   │   ├── 10-sdsm.dv8-dsm
│   │   └── 10.dv8-issue

Each numbered folder represents an architectural flaw ID. For example, above we have the modularity violation flaws ID 1 and ID 10. We can further see the sdsm, hdsm, merge dsm, and cluster files occur within both folder ids. If load on DV8 GUI or export to excel (as done in the prior section) any of these files, we can see which files participate in these architectural flaws, and the clusters they are assigned. We will now see how to represent this information as a table we can parse in R, or more specifically the file-architectural flaw assignment, so it can be combined to other analysis in Kaiaulu.

Creating a Flaws Mapping

file_to_flaws_map <- parse_dv8_architectural_flaws(dv8_path, flaws_folder,progress_bar = TRUE)
fwrite(file_to_flaws_map,file.path(project_path,paste0(project_name,"-flaws_map.csv")))
kable(head(file_to_flaws_map))

We can see in the output above, the CHANGES file was included as part of the DV8 analysis. We discuss filtering in subsequent sections. For now, let's continue building on this analysis. With the file mapping, we can now aggregate the number of flaws a single file participate. For example, grouping by file_path and architectural issue type (the following shows a sample of the table):

head(file_to_flaws_map[,.(n_flaws=length(architecture_issue_id)),by=c("file_path",
                                                                 "architecture_issue_type")])

Gives us the same information as anti-pattern-summary.csv. However, we can also perform other aggregations, whereas this would be incorrect to be done to anti-pattern-summary as a file would be then counted twice on further aggregation.

For example, for social smells, we compute the total number of flaws of each type that ocurred:

file_to_flaws_map[,.(n_flaws=max(architecture_issue_id)),by=c(                                      "architecture_issue_type")]

Note the value above differs from asking the total number of unique files that participated in each architectural flaw:

file_to_flaws_map[,.(n_flaws=length(architecture_issue_id)),by=c(                          "architecture_issue_type")]

Augmenting a Flaws Mapping with Clustering

We can augment the Flaws Mapping with cluster assignment information. Recall we already calculated the cluster assignment of every file, in order to generate the excel output of -merge.dv8-dsm, resulting in the -clsx.dv8-clsx file. To utilize it, we must first convert this binary file to json, and subsequently convert it into a table:

hierclsxj_path <- dv8_clsxb_to_clsxj(dv8_path,
                                     clsxb_path = hierclsxb_path,
                                     clsxj_path = file.path(project_path,paste0(project_name,
                                                                                "-clsxj.json")))
hierclsxj_table <- parse_dv8_clusters(hierclsxj_path)
kable(head(hierclsxj_table))

We can now combine this information to the mapping obtained before by simple joining both tables:

file_to_flaws_to_cluster_map <- merge(file_to_flaws_map,hierclsxj_table,by="file_path",all.x=TRUE,all.y=TRUE)

kable(head(file_to_flaws_to_cluster_map))

Observe in this join that all files should be assigned a module (there should not any NA cells under module column), but it may be the case that some files may not be assigned an architectural type flaws Indeed, one would hope the architectural flaws to be kept to a minimum!

DL Metrics

An additional metric that can be computed from our merge DSM are DL Metrics. We can do so as follows:

dl_metrics <- dv8_mdsmb_to_decoupling_level(dv8_path = dv8_path,
                                             mdsmb_path = mdsmb_path,
                                            dl_path = file.path(project_path,paste0(project_name,
                                                                                       "-dl.json")))

dl_metrics_table <- parse_dv8_metrics_decoupling_level(dl_metrics)
kable(dl_metrics_table)

(Legacy) Preparing Gitlog and Dependencies for DV8

This section documents the legacy approach to construct both datasets for DV8. The code below is not executed in the Notebook.

Git Log

For the git log, this is done in two steps. First we obtain a gitlog numstat .txt file out of the git log, then we convert it to a DV8 Historical Dependency Matrix binary, or *-dv8.sdsm for short. Since DV8 can represent a sdsm in both json and binary format, we differentiate the function names in Kaiaulu with the suffix "j" and "b" respectively.

numstat_path <- dv8_gitlog_to_gitnumstat(git_repo_path,file.path(project_path,
                                                                 paste0(project_name,"-numstat.txt")))
hdsmb_path <- dv8_gitnumstat_to_hdsmb(dv8_path,numstat_path,hdsmb_path = file.path(project_path,paste0(project_name,"_flaws")))

Next, we parse file dependencies using Depends, and proceed to convert it into a DV8 Structural Dependency Matrix binary.

sdsmj_path <- dv8_depends_to_sdsmj(depends_jar=depends_jar_path,
                                   git_repo_path = git_repo_path,
                                   language = language,
                                   sdsmj_path = file.path(project_path,paste0(project_name,"-sdsm.json")))
sdsmb_path <- dv8_dsmj_to_dsmb(dv8_path = dv8_path,
                                     dsmj_path = sdsmj_path, 
                                     dsmb_path = file.path(project_path,paste0(project_name,
                                                                               "-sdsm.dv8-dsm")))

list.files(project_path)


sailuh/kaiaulu documentation built on Dec. 10, 2024, 3:14 a.m.