In cboettig/emld: Ecological Metadata as Linked Data

library(jsonld)
library(jsonlite)
library(tidyverse)
library(printr)

is.url <- function(x){ 
  if(!is.character(x))
    return(FALSE)
  grepl("^http[s]?://.*", x)
}

add_context <- function(json, context = NULL){
  if(is.character(json)){ ## does not handle case of local files
    json <- jsonlite::fromJSON(json, simplifyVector = FALSE)
    if(length(json) > 1)
      json <- list("@graph" = json)
  }
  if(is.url(context)){
     json[["@context"]] <- context[["@context"]]
  } else { 
    if(is.character(context))
      context <- jsonlite::fromJSON(context, simplifyVector = FALSE)
    json[["@context"]] <- context[["@context"]]
  }
  jsonlite::toJSON(json, pretty=TRUE, auto_unbox = TRUE)
}

drop_context <- function(json){
  if(is.character(json)){
    json <- jsonlite::fromJSON(json, simplifyVector = FALSE)
  }
  json[["@context"]] <- NULL
  if(!is.null(json[["@graph"]]))
    json <- json[["@graph"]]
  jsonlite::toJSON(json, pretty=TRUE, auto_unbox = TRUE)

}

Switching Contexts: Expansion & Compaction

Good: homonyms (same word has different meanings), synonyms (different word has ~ same meaning)
Fails: Mapping isn't 1:1

A simple example

ex <- 
 '{ 
      "date": "1993-10-02",
      "site": "N654",
      "scientificName": "Picea rubens",
      "area": 2,
      "count": 26
 }'

No need to alter your own data files/source data files. Simply define a context that maps those terms into the reference vocabulary. Data can use whatever fields it likes.

map <- 
'{ 
  "@context": {
    "@vocab": "http://example.com/",
    "scientificName": "http://example.com/species"
  }
}'
target <- '{"@context": {"@vocab": "http://example.com/"}}'
add_context(ex, map) %>% 
  jsonld_compact(target)

Using tabular schema

df <- 
tribble(
  ~Date,       ~Site, ~Species, ~Area, ~Count,
  "10/1/1993", "N654", "PIRU",   2.0,     26,
  "10/3/1994", "N654", "PIRU",   2.0,     29,
  "10/1/1993", "N654", "BEPA",   1.0,     3
)

map <- 
'{ 
  "@context": {
    "@vocab": "http://example.com/",
    "Species": "http://example.com/scientificName"
  }
}'
target <- '{"@context": {"@vocab": "http://example.com/"}}'

df %>% 
  toJSON() %>% 
  add_context(map) %>% 
  jsonld_compact(target) %>% 
  drop_context() %>%
  fromJSON()

Wow does that seem like a round-about way to just do:

df %>% rename(scientificName = Species)

and it is. But a few things are worth noting:

Robust to what columns are actually present. Instead of a default vocabulary we can map all terms explicitly.
Terms not expanded by the map (that is, not part of our context) will be dropped. (Count in the example below)
Terms not in our target context remain as external keys, and are not compacted.
Typesafe transforms. Terms (keys/column names) with the wrong type will not compact if the types do not match the target transform. This way they are not actually lost, but cannot be confused.

map <- 
'{ 
  "@context": {
    "ex": "http://example.com/",
    "Species": "ex:scientificName",
    "Site": "ex:Site",
    "Date": "ex:Date",
    "Area": "ex:Area"
  }
}'

target <- '{
"@context": {
  "ex": "http://example.com/",
  "scientificName": "ex:scientificName",
  "Site": "ex:Site",
  "Count": "ex:count",
  "Date": {"@id": "ex:Date", "@type": "ex:Date"}
}}'

df %>% 
  toJSON() %>% 
  add_context(map) %>% 
  jsonld_compact(target) %>% 
  drop_context() %>%
  fromJSON()

map <- '{ "@context": { "@vocab": "https://data.berkeley.edu/" }}'

target <- '{
"@context": {
  "ucb": "https://data.berkeley.edu/",
  "hav": "http://data.harvard.edu/",
  "scientificName": "ucb:Species",
  "Site": "ucb:Site",
  "Count": "ucb:Count",
  "Date": "ucb:Date",
  "Area": "ucb:Area",
  "Density": "hav:density"
}}'

df %>% 
  toJSON() %>% 
  add_context(map) %>% 
  jsonld_compact(target) %>% 
  drop_context() %>%
  fromJSON() -> ucsb_df

ucsb_df %>% mutate(density = Count / Area)

Integration: Flattening shares id

context <- '{   "@context": { "@vocab": "http://example.com/" } }'

ex <- '{
  "@context": { "@vocab": "http://example.com/" },
  "datasets": [
    {
      "@id": "http://global_unique_id",
      "@type": "dataset",
      "image": "http://image_uri",
      "name": "stuff"
    },

    {
      "@type": "dataset",
      "@id": "http://global_unique_id",
      "name": "Species name"
    }
  ]
}'

jsonld_flatten(ex, context)