knitr::opts_chunk$set(comment="", warning = FALSE) if(grepl("windows", tolower(Sys.info()[["sysname"]]))) knitr::opts_chunk$set(comment="", error =TRUE)
Here we illustrate some example use cases that involve parsing codemeta data.
library(jsonld) library(jsonlite) library(magrittr) library(codemetar) library(purrr) library(dplyr) library(printr) library(tibble)
We then digest this input using a JSON-LD "frame." While not strictly necessary, this helps ensure the data matches the format we expect, even if the original file had errors or missing data. See the vignette "Validating in JSON-LD" in this package and the official JSON-LD docs for details). The codemetar
package includes a reasonably explicit frame to get us started:
frame <- system.file("schema/frame_schema.json", package="codemetar") codemeta <- system.file("codemeta.json", package="codemetar") meta <- jsonld_frame(codemeta, frame) %>% fromJSON(FALSE) %>% getElement("@graph") %>% getElement(1)
Construct a citation
authors <- lapply(meta$author, function(author) person(given = author$given, family = author$family, email = author$email, role = "aut")) year <- meta$datePublished if(is.null(year)) year <- format(Sys.Date(), "%Y") bibitem <- bibentry( bibtype = "Manual", title = meta$name, author = authors, year = year, note = paste0("R package version ", meta$version), url = meta$URL, key = meta$identifier ) cat(format(bibitem, "bibtex")) bibitem
The ropensci corpus consists of a list of codemeta files for all packages provided by the rOpenSci project,
download.file("https://github.com/codemeta/codemetar/raw/main/inst/notebook/ropensci.json", "ropensci.json")
As before, it is helpful, though not essential, to start off by framing the input data.
frame <- system.file("schema/frame_schema.json", package="codemetar") corpus <- jsonld_frame("ropensci.json", frame) %>% fromJSON(simplifyVector = FALSE) %>% getElement("@graph")
We're now ready to start exploring. As usual, functions from purrr
prove very useful for iterating through large JSON files. First, we look at some basic summary data:
## deal with nulls explicitly by starting with map pkgs <- map(corpus, "name") %>% compact() %>% as.character() # keep only those with package identifiers (names) keep <- map_lgl(corpus, ~ length(.x$identifier) > 0) corpus <- corpus[keep] ## now we can just do all_pkgs <- map_chr(corpus, "name") head(all_pkgs)
## 60 unique maintainers map_chr(corpus, c("maintainer", "familyName")) %>% unique() %>% length() ## Mostly Scott map_chr(corpus, c("maintainer", "familyName")) %>% as_tibble() %>% group_by(value) %>% tally(sort=TRUE)
## number of co-authors ... map_int(corpus, function(r) length(r$author)) %>% as_tibble() %>% group_by(value) %>% tally(sort=TRUE)
## Contributors isn't used as much... map_int(corpus, function(r) length(r$contributor)) %>% as_tibble() %>% group_by(value) %>% tally(sort=TRUE)
Numbers (n) of packages with a total of (value) dependencies:
map_int(corpus, function(r) length(r$softwareRequirements)) %>% as_tibble() %>% group_by(value) %>% tally(sort=TRUE)
which dependencies are used most frequently?
corpus %>% map_df(function(x){ ## single, unboxed dep if("name" %in% names(x$softwareRequirements)) dep <- x$name else if("name" %in% names(x$softwareRequirements[[1]])) dep <- map_chr(x$softwareRequirements, "name") else { ## No requirementsß dep <- NA } tibble(identifier = x$identifier, dep = dep) }) -> dep_df dep_df %>% group_by(dep) %>% tally(sort = TRUE)
Alternate approach using a frame instead of purrr
functions for subsetting the data. Note that this gets all Depends and suggests (really all SoftwareApplication
types mentioned)
dep_frame <- '{ "@context": "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld", "@explicit": "true", "name": {} }' jsonld_frame("ropensci.json", dep_frame) %>% fromJSON() %>% getElement("@graph") %>% filter(type == "SoftwareApplication") %>% group_by(name) %>% tally(sort = TRUE) # summarise(count(name))
unlink("ropensci.json") unlink("codemeta.json")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.