knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) is_ghactions <- identical(Sys.getenv("CI"), "true") && identical(Sys.getenv("GH_ACTIONS_KWB_R"), TRUE)
library(jsonld) library(jsonlite) library(magrittr) library(codemetar) library(purrr) library(dplyr) library(printr) library(tibble) ################################################################## ### Importing "codemetar.json" ################################################################## ### Option 1: ### Not working correctly (-> gives list that is twice as long as just ### using jsonlite::fromJSON as defined below # frame <- system.file("schema/frame_schema.json", package="codemetar") # # corpus <- jsonld::jsonld_frame("codemetar.json", frame) %>% # jsonlite::fromJSON("codemetar.json", # simplifyVector = FALSE) %>% # getElement("@graph") #"https://kwb-r.github.io/pkgmeta/codemetar.json" %>% # jsonlite::fromJSON() %>% # jsonlite::write_json("codemetar.json") ### Option 2: working as expected corpus <- jsonlite::fromJSON("codemetar.json", simplifyVector = FALSE) ### add Github topics to R package names pkg_topics <- tibble::tibble( name = purrr::map_chr(corpus, "identifier"), topic = purrr::map_chr( lapply(purrr::map(corpus, "keywords"), function(x) {paste(unlist(x), collapse = ", ")}),1)) pkg_topics
topics <- tidyr::separate_rows(pkg_topics, topic, sep = ",\\s+") %>% dplyr::count(topic) %>% dplyr::filter(!topic %in% c("r", "rstats")) %>% dplyr::arrange(dplyr::desc(n)) %>% dplyr::rename(word = topic, freq = n) knitr::kable(topics)
wordcloud2::wordcloud2(topics)
## deal with nulls explicitly by starting with map pkgs <- purrr::map(corpus, "name") %>% purrr::compact() %>% as.character() # keep only those with package identifiers (names) keep <- purrr::map_lgl(corpus, ~ length(.x$identifier) > 0) corpus <- corpus[keep] ## now we can just do all_pkgs <- purrr::map_chr(corpus, "name") head(all_pkgs) ## 3 unique maintainers purrr::map(corpus, "maintainer") %>% purrr::map(1) %>% purrr::map("familyName") %>% unique() %>% length()
## Mostly Hauke maintainer <- purrr::map(corpus, "maintainer") %>% purrr::map(1) %>% purrr::map("familyName") %>% unlist() %>% tibble::enframe(name = NULL) %>% dplyr::group_by(value) %>% dplyr::tally(sort=TRUE) %>% dplyr::rename(word = value, freq = n) knitr::kable(maintainer)
wordcloud2::wordcloud2(maintainer, minSize = 0.4, size = 0.6)
## number of co-authors ... purrr::map_int(corpus, function(r) length(r$author)) %>% tibble::enframe() %>% dplyr::group_by(value) %>% dplyr::tally(sort=TRUE) ## Contributors isn't used as much... purrr::map_int(corpus, function(r) length(r$contributor)) %>% tibble::enframe() %>% dplyr::group_by(value) %>% dplyr::tally(sort=TRUE)
purrr::map_int(corpus, function(r) length(r$softwareRequirements)) %>% tibble::enframe() %>% dplyr::group_by(value) %>% dplyr::tally(sort=TRUE) corpus %>% map_df(function(x){ ## single, unboxed dep if("name" %in% names(x$softwareRequirements)) dep <- x$identifier else if("name" %in% names(x$softwareRequirements)) dep <- map_chr(x$softwareRequirements, "name") else { ## No requirements dep <- NA } tibble(identifier = x$identifier, dep = dep) }) deps_df <- tibble(identifier = purrr::map_chr(corpus, "identifier"), deps = lapply(purrr::map(corpus, "softwareRequirements"), FUN = function(x) { deps <- purrr::map_chr(x, "name") %>% unlist() %>% paste(collapse = ",") %>% as.character() deps <- ifelse(stringr::str_length(deps) > 0, deps, NA_character_)})) dep_df <- tidyr::separate_rows(deps_df, deps, sep = ",") %>% dplyr::select(deps, identifier) %>% dplyr::arrange(deps, identifier) %>% dplyr::rename(dependancy = deps, used_in_r_package = identifier) #which dependencies are used most frequently? dep_count <- dep_df %>% dplyr::group_by(dependancy) %>% dplyr::tally(sort = TRUE) %>% dplyr::rename(word = dependancy, freq = n) knitr::kable(dep_count)
wordcloud2::wordcloud2(dep_count, minSize = 0.4, size = 1)
#Alternate approach using a frame instead of purrr functions for subsetting the #Note that this gets all Depends and suggests (really all SoftwareApplication #types mentioned) dep_frame <- '{ "@context": "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld", "@explicit": "true", "name": {} }' dep_sug_count <- jsonld_frame("codemetar.json", dep_frame) %>% fromJSON() %>% getElement("@graph") %>% filter(type == "SoftwareApplication") %>% group_by(name) %>% tally(sort = TRUE) %>% dplyr::rename(word = name, freq = n) knitr::kable(dep_sug_count)
wordcloud2::wordcloud2(dep_sug_count, minSize = 0.4, size = 1)
environment <- sessioninfo::session_info() knitr::kable(tibble::enframe(unlist(environment$platform)))
print(environment$packages)
if(rmarkdown::pandoc_available()) { data.frame(pandoc_directory = rmarkdown::pandoc_exec(), pandoc_version = as.character(rmarkdown::pandoc_version()), stringsAsFactors = FALSE) } else { print("No PANDOC installed!") }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.