knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "man/figures/README-", out.width = "100%", knitr.kable.NA = '' )
The aim of this package is to provide a wrapper on gh
to quickly get you
key Github repo information you need.The code here is used within Roche to
quickly let me pull answer simple questions like:
You can install the released version of GithubMetrics from CRAN with:
install.packages("GithubMetrics")
library(GithubMetrics) library(tidyverse) library(glue) organisation <- "openpharma"
Pull all the repos present within an org (that I can see).
repos_raw <- gh_repos_get( org = organisation ) repos_clean <- gh_repos_clean(repos_raw) glimpse(repos_clean)
Realistically, research code is likely to be on Github Enterprise, so
the .api_url
and .token
parameters can be passed through to gh()
.
Commented code below shows how you can use an on-premise Github server.
# repos_raw <- gh_repos_get( # org = organisation, # .api_url = "https://github.roche.com/api/v3", # .token = Sys.getenv("GITHUB_PAT_ROCHE") # )
Get every commit for all the repos in this organisation.
repo_all_commits <- gh_commits_get( repos_clean %>% filter(size > 0) %>% pull(full_name), days_back = 365*10 ) glimpse(repo_all_commits)
Pull all the people that have committed in r
.
contributors <- repo_all_commits %>% group_by(author) %>% summarise( commits = n() ) %>% filter(!author %in% c(".gitconfig missing email","actions-user")) contributors <- contributors %>% left_join( gh_user_get(contributors$author), by = c("author"="username") ) contributors %>% arrange(-commits) %>% mutate( last_active = Sys.Date() - last_active, contributor = glue('<img src="{avatar}" alt="" height="30"> {author}'), blog = case_when( blog == "" ~ "", TRUE ~ as.character(glue('<a href="{blog}">link</a>')) ) ) %>% select(contributor,commits,name,last_active,company,location,blog) %>% knitr::kable( )
Pull a specific file using gh_file_get()
.
desc_formatted <- gh_file_get( repo = "GithubMetrics", org = "OpenPharma", file = "DESCRIPTION" ) %>% # format the description desc::desc(text = .) # Print it desc_formatted$get(c("Package","Title","Version")) %>% tibble::enframe() %>% knitr::kable()
Get all of the files present in the last commit of all the repos using
gh_repo_files_get()
.
repo_files <- gh_repo_files_get( repo_commits = repo_all_commits, only_last_commit = TRUE ) glimpse(repo_files) repo_files %>% group_by(repo) %>% summarise( Files = n(), `R files` = sum(lang %in% "R"), `Python files` = sum(lang %in% c("Python","Jupyter Notebook")) ) %>% knitr::kable( caption = "Types of files in the organisation" )
results <- gh_repo_search( code = "tidyverse", organisation = organisation ) glimpse(results)
helper_gh_repo_search <- function(x, org = "openpharma"){ ## Slow it down! as search has 30 calls a minute rate limit. ## If you prem the search rate limit is higher, so usually not needed if(interactive()){message("Wait 5 seconds")} Sys.sleep(5) ## End slow down results <- gh_repo_search( code = x, organisation = org ) if(is.na(results)) { results <- return() } results %>% mutate(Package = x, Organisation = org) %>% group_by(Organisation,Package) %>% summarise( Repos = n_distinct(full_name), .groups = "drop" ) } packages <- c( "tidyverse","pkgdown","dplyr","data.table" ) package_use <- bind_rows( packages %>% map_df( helper_gh_repo_search, org = "PHCAnalytics" ), packages %>% map_df( helper_gh_repo_search, org = "openpharma" ), packages %>% map_df( helper_gh_repo_search, org = "AstraZeneca" ), packages %>% map_df( helper_gh_repo_search, org = "Roche" ), packages %>% map_df( helper_gh_repo_search, org = "Genentech" ), packages %>% map_df( helper_gh_repo_search, org = "Novartis" ) ) package_use %>% pivot_wider(names_from = "Package", values_from = "Repos") %>% mutate(Total = rowSums(.[,-1], na.rm = TRUE)) %>% arrange(-Total) %>% knitr::kable( caption = "Package use detected within repositaries in Pharma orgs" )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.