#' @export
atoml_to_val = function(atoml)
{
if (length(atoml) > 0) {
return(atoml[[1]]$text_value)
}
else {
return (NA)
}
}
#' @export
metadata = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
pub_date = function(year, month, day) {
literal(paste0(text_value = unlist(year)["text_value"],
"-", unlist(month)["text_value"], "-", unlist(day)["text_value"]),
xsd_type = rdf4r::xsd_date)
}
if (length(unlist(atoms$pensoft_pub)) > 0) {
stop("Pensoft publication")
}
doi = unlist(atoms$doi)["text_value"]
article_root = identifiers$root_id
publisher_lit = toString(unlist(atoms$publisher)["text_value"])
df = set_component_frame(label = publisher_lit, mongo_key = c(publisher = NA), type = "publisher", orcid = NA, parent = NA, key = NA, publisher_id = NA, journal_id = NA, plazi_doc = plazi_doc, doi = NA, article_id = NA)
publisher_id = get_or_set_mongoid(df, prefix )
publisher_id = identifier(publisher_id, prefix)
journal_lit = toString(unlist(atoms$journal)["text_value"])
journal_id = toString(unlist(atoms$journal_id)["text_value"])
journal_id = gsub("urn:lsid:arphahub.com:pub:", "", journal_id )
journal_id = identifier(journal_id, prefix)
if(is.null(journal_id)){
df = set_component_frame(label = journal_lit, mongo_key = c(journal = NA), type = "journal", orcid = NA, parent = NA, key = NA, publisher_id = NA, journal_id = NA, plazi_doc = plazi_doc, doi = NA, article_id = NA)
journal_id = get_or_set_mongoid(df, prefix )
journal_id = identifier(journal_id, prefix)
}
paper_label = unlist(atoms$title)["text_value"]
research_paper_df = set_component_frame(label = paper_label, mongo_key = NA, type = "researchPaper", orcid = NA, parent = article_root$uri, key = NA, publisher_id = NA, journal_id = NA, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
paper_id = get_or_set_mongoid(research_paper_df, prefix)
paper_id = identifier(paper_id, prefix)
tt = ResourceDescriptionFramework$new()
tt$add_triple(journal_id, rdf_type, Journal)
sapply(atoms$journal, function(j) {
tt$add_triple(journal_id, pref_label, j)
})
sapply(atoms$journal_abbrev, function(j) {
tt$add_triple(journal_id, alt_label, j)
})
sapply(atoms$issn, function(i) {
tt$add_triple(journal_id, issn, i)
})
sapply(atoms$eIssn, function(i) {
tt$add_triple(journal_id, eissn, i)
})
if(length(unlist(atoms$journal_zoobank))>0){
for (n in 1:length(atoms$journal_zoobank)){
if (grepl("zoobank", unlist(atoms$journal_zoobank[[n]])["text_value"]) == TRUE){
text_value = gsub("^(.*):", "", unlist(atoms$journal_zoobank[n])["text_value"])
text_value = gsub(" ", "", text_value)
ll = list(text_value = text_value, xsd_type = xsd_string, lang = "",
squote = paste0("\"", text_value, "\"", ""))
class(ll) = "literal"
journal_zoobank_literal = ll
zoobank_id = identifier(text_value, c(zoobank = "http://zoobank.org/"))
zoobank_url = paste0("http://zoobank.org/",text_value)
tt$add_triple(journal_id, has_identifier, zoobank_id)
tt$add_triple(zoobank_id, rdf_type, ResourceIdentifier)
tt$add_triple(zoobank_id, identifier_scheme, zoobank)
tt$add_triple(zoobank_id, rdfs_label, journal_zoobank_literal)
tt$add_triple(zoobank_id, has_url, literal(zoobank_url, xsd_type = xsd_uri))
}
}
}
tt$add_triple(journal_id, frbr_part, article_root)
tt$add_triple(article_root, rdf_type, Article)
articleTitle = atoms$title[[1]]
articleTitle = escape_special(articleTitle$text_value)
tt$add_triple(article_root, rdfs_label, literal(articleTitle))
tt$add_triple(article_root, realization_of, paper_id)
tt$add_triple(article_root, dc_title, literal(articleTitle))
sapply(atoms$doi, function(i) {
tt$add_triple(article_root, has_doi, i)
})
#the article zoobank id is the one containing the words "zoobank"
if(length(unlist(atoms$article_zoobank)) > 0){
for (n in 1:length(atoms$article_zoobank)){
text_value = unlist(atoms$article_zoobank[n])["text_value"]
ll = list(text_value = text_value, xsd_type = xsd_string, lang = "",
squote = paste0("\"", text_value, "\"", ""))
class(ll) = "literal"
article_zoobank_literal = ll
article_zoobank_id = identifier(text_value, c(zoobank = "http://zoobank.org/"))
tt$add_triple(article_root, has_identifier, article_zoobank_id)
tt$add_triple(article_zoobank_id, rdf_type, ResourceIdentifier)
tt$add_triple(article_zoobank_id, identifier_scheme, zoobank)
tt$add_triple(article_zoobank_id, rdfs_label, article_zoobank_literal)
tt$add_triple(article_zoobank_id, has_url, literal(strip_angle(article_zoobank_id$uri), xsd_type = xsd_uri))
}
}
#Zenodo
if(length(unlist(atoms$zenodo)) > 0){
for (n in 1:length(atoms$zenodo)){
text_value = unlist(atoms$zenodo[n])["text_value"]
ll = list(text_value = text_value, xsd_type = xsd_string, lang = "",
squote = paste0("\"", text_value, "\"", ""))
class(ll) = "literal"
zenodo_literal = ll
zenodo_id = identifier(text_value, c(zenodo = "http://zenodo.org/record/"))
tt$add_triple(article_root, has_identifier, zenodo_id)
tt$add_triple(zenodo_id, rdf_type, ResourceIdentifier)
tt$add_triple(zenodo_id, identifier_scheme, zenodo)
tt$add_triple(zenodo_id, rdfs_label, zenodo_literal)
tt$add_triple(zenodo_id, has_url, literal(strip_angle(zenodo_id$uri), xsd_type = xsd_uri))
}
}
if(length(unlist(atoms$plazi_id))> 0){
for (n in 1:length(atoms$plazi_id)){
text_value = unlist(atoms$plazi_id[n])["text_value"]
text_value = gsub(" ", "", text_value)
text_value = gsub("http://tb.plazi.org/GgServer/summary/", "", text_value)
ll = list(text_value = text_value, xsd_type = xsd_string, lang = "",
squote = paste0("\"", text_value, "\"", ""))
class(ll) = "literal"
plazi_article_id_lit = ll
plazi_article_id = identifier(text_value, c(plazi = "http://tb.plazi.org/GgServer/summary/"))
plazi_url = paste0("http://tb.plazi.org/GgServer/summary/",text_value)
tt$add_triple(article_root, has_identifier, plazi_article_id)
tt$add_triple(plazi_article_id, rdf_type, ResourceIdentifier)
tt$add_triple(plazi_article_id, identifier_scheme, plazi)
tt$add_triple(plazi_article_id, rdfs_label, plazi_article_id_lit)
tt$add_triple(plazi_article_id, has_url, literal(plazi_url, xsd_type = xsd_uri))
}
}
sapply(atoms$publisher, function(i) {
tt$add_triple(article_root, has_publisher, i)
})
sapply(atoms$date, function(i) {
tt$add_triple(article_root, publication_date, i)
})
sapply(list(pub_date(atoms$pub_year, atoms$pub_month, atoms$pub_day)),
function(i) {
tt$add_triple(article_root, publication_date, i)
})
tt$add_triple(article_root, has_publisher_id, publisher_id)
sapply(atoms$issue, function(i) {
tt$add_triple(article_root, has_issue, i)
})
tt$add_triple(publisher_id, rdf_type, Publisher)
sapply(atoms$publisher, function(i) {
tt$add_triple(publisher_id, rdfs_label, i)
})
tt$add_triple(paper_id, rdf_type, Paper)
sapply(atoms$keyword, function(i) {
tt$add_triple(identifiers$nid, has_keyword, i)
})
return(tt)
}
#' @export
plazi_metadata = function (atoms, identifiers, prefix, new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc,doi, article_id){
if (length(unlist(atoms$pensoft_pub)) > 0) {
stop("Pensoft publication")
#tt = ResourceDescriptionFramework$new()
}
doi = unlist(atoms$doi)["text_value"]
article_root = identifiers$root_id
journal_id = gsub("<http://openbiodiv.net/", "", journal_id)
journal_id = gsub(">", "", journal_id)
journal_id = identifier(journal_id, prefix)
paper_label = unlist(atoms$title)["text_value"]
if (is.null(doi)){
doi = NA
}
research_paper_df = set_component_frame(label = paper_label, mongo_key = NA, type = "researchPaper", orcid = NA, parent = article_root$uri, key = NA, publisher_id = NA, journal_id = NA, plazi_doc = TRUE, doi = doi, article_id = article_id)
paper_id = get_or_set_mongoid(research_paper_df, prefix)
paper_id = identifier(paper_id, prefix)
tt = ResourceDescriptionFramework$new()
tt$add_triple(journal_id, rdf_type, Journal)
sapply(atoms$journal, function(j) {
tt$add_triple(journal_id, pref_label, j)
})
sapply(atoms$issn, function(i) {
tt$add_triple(journal_id, issn, i)
})
sapply(atoms$eIssn, function(i) {
tt$add_triple(journal_id, eissn, i)
})
tt$add_triple(journal_id, frbr_part, article_root)
tt$add_triple(article_root, rdf_type, Article)
articleTitle = atoms$title[[1]]
articleTitle = escape_special(articleTitle$text_value)
tt$add_triple(article_root, rdfs_label, literal(articleTitle))
tt$add_triple(article_root, realization_of, paper_id)
tt$add_triple(article_root, dc_title, literal(articleTitle))
sapply(atoms$doi, function(i) {
tt$add_triple(article_root, has_doi, i)
})
if(length(atoms$zoobank) > 0){
for (n in 1:length(atoms$zoobank)){
text_value = gsub("^(.*):", "", unlist(atoms$zoobank[n])["text_value"])
text_value = gsub(" ", "", text_value)
ll = list(text_value = text_value, xsd_type = xsd_string, lang = "",
squote = paste0("\"", text_value, "\"", ""))
class(ll) = "literal"
article_zoobank_literal = ll
article_zoobank_id = identifier(text_value, c(zoobank = "http://zoobank.org/"))
tt$add_triple(article_root, has_identifier, article_zoobank_id)
tt$add_triple(article_zoobank_id, rdf_type, ResourceIdentifier)
tt$add_triple(article_zoobank_id, identifier_scheme, zoobank)
tt$add_triple(article_zoobank_id, rdfs_label, article_zoobank_literal)
tt$add_triple(article_zoobank_id, has_url, literal(article_zoobank_id, xsd_type = xsd_uri))
}
}
if(length(unlist(atoms$zenodo)) > 0){
for (n in 1:length(atoms$zenodo)){
text_value = unlist(atoms$zenodo[n])["text_value"]
ll = list(text_value = text_value, xsd_type = xsd_string, lang = "",
squote = paste0("\"", text_value, "\"", ""))
class(ll) = "literal"
zenodo_literal = ll
zenodo_id = identifier(text_value, c(zenodo = "http://zenodo.org/record/"))
tt$add_triple(article_root, has_identifier, zenodo_id)
tt$add_triple(zenodo_id, rdf_type, ResourceIdentifier)
tt$add_triple(zenodo_id, identifier_scheme, zenodo)
tt$add_triple(zenodo_id, rdfs_label, zenodo_literal)
tt$add_triple(zenodo_id, has_url, literal(strip_angle(zenodo_id$uri), xsd_type = xsd_uri))
}
}
if(length(unlist(atoms$gbif_dataset)) > 0){
for (n in 1:length(atoms$gbif_dataset)){
text_value = unlist(atoms$gbif_dataset[n])["text_value"]
ll = list(text_value = text_value, xsd_type = xsd_string, lang = "",
squote = paste0("\"", text_value, "\"", ""))
class(ll) = "literal"
gbif_dataset_literal = ll
gbif_dataset_prefix = c(gbif_dataset = "https://www.gbif.org/dataset/")
gbif_dataset_id = identifier(text_value, gbif_dataset_prefix)
tt$add_triple(article_root, has_identifier, gbif_dataset_id)
tt$add_triple(gbif_dataset_id, rdf_type, ResourceIdentifier)
tt$add_triple(gbif_dataset_id, identifier_scheme, gbif_dataset)
tt$add_triple(gbif_dataset_id, rdfs_label, gbif_dataset_literal)
tt$add_triple(gbif_dataset_id, has_url, literal(strip_angle(gbif_dataset_id$uri), xsd_type = xsd_uri))
}
}
sapply(atoms$date, function(i) {
tt$add_triple(article_root, publication_date, i)
})
sapply(atoms$issue, function(i) {
tt$add_triple(article_root, has_issue, i)
})
tt$add_triple(paper_id, rdf_type, Paper)
return(tt)
}
#' Keyword Group Constructor
#'
#' @param atoms a list of literals
#' @param identifiers a list of identifiers
#' @param prefix
#'
#' @return \code{ResourceDescriptionFramework}
#' @export
keyword_group = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
tt = ResourceDescriptionFramework$new()
tt$add_triple(identifiers$nid, rdf_type, KeywordGroup)
tt$add_triple(identifiers$nid, is_contained_by, identifiers$pid)
sapply(atoms$keyword, function(i) {
tt$add_triple(identifiers$nid, has_keyword, i)
})
return(tt)
}
#' Title Constructor
#'
#' @param atoms a list of literals
#' @param identifiers a list of identifiers
#' @param prefix
#'
#' @return \code{ResourceDescriptionFramework}
#' @export
title = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
title_content = atoms$text_content[[1]]
title_content = escape_special(title_content$text_value)
tt = ResourceDescriptionFramework$new()
tt$add_triple(identifiers$nid, rdf_type, Title)
tt$add_triple(identifiers$nid, is_contained_by, identifiers$pid)
tt$add_triple(identifiers$nid, has_content, literal(title_content))
return(tt)
}
#' Abstract Constructor
#'
#' @param atoms a list of literals
#' @param identifiers a list of identifiers
#' @param prefix
#'
#' @return \code{ResourceDescriptionFramework}
#' @export
abstract =function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
#abstract_content = atoms$text_content[[1]]
#abstract_content = escape_special(abstract_content$text_value)
# trans_abstract = escape_special(atoms$trans_abstract)
tt = ResourceDescriptionFramework$new()
tt$add_triple(identifiers$nid, rdf_type, Abstract)
tt$add_triple(identifiers$nid, is_contained_by, identifiers$pid)
#tt$add_triple(identifiers$nid, has_content, literal(abstract_content))
# if(length(unlist(atoms$trans_abstract)) > 0)
# {
# trans_content = atoms$trans_abstract[[1]]
# trans_content = escape_special(trans_content$text_value)
# tt$add_triple(identifiers$nid, has_content, literal(trans_content))
# }
tt = bold_genbank_serializer(tt, atoms, identifiers, publisher_id, journal_id, plazi_doc, doi, article_id)
tt = institution_serializer(tt, atoms, identifiers)
return(tt)
}
#' Author constructor
#'
#' TODO rewrite the atoms as individual arguments. I would need a
#' do.call in the extractor for that.
#'
#' @param atoms a list of literals
#' @param identifiers a list of identifiers
#' @param prefix
#'
#' @return \code{ResourceDescriptionFramework}
#' @export
author = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
full_name = function(lsurname, lgiven_name) {
if (length(lsurname) == 1 && length(lgiven_name) == 1) {
paste(lgiven_name[[1]]$text_value, lsurname[[1]]$text_value)
}
else if (length(lsurname) == 1) {
lsurname[[1]]$text_value
}
else {
NA
}
}
atoms$full_name = ifelse(length(unlist(atoms$full_name)) == 0, list(literal(full_name(atoms$surname,
atoms$given_names), xsd_type = rdf4r::xsd_string)), atoms$full_name)
aid = sapply(atoms$aff_id, function(a) {
as.integer(gsub("[^0-9.]", "", a$text_value))
})
article_root = identifiers$root_id
author_id = identifiers$nid
paper_id = check_mongo_key_via_parent(parent = article_root$uri, type = "researchPaper", collection = general_collection)
paper_id = gsub("http://openbiodiv.net/", "", paper_id)
paper_id = identifier(paper_id, prefix)
tt = ResourceDescriptionFramework$new()
tt$add_triple(paper_id, creator, author_id)
tt$add_triple(author_id, rdf_type, Person)
sapply(atoms$full_name, function(j) {
tt$add_triple(author_id, rdfs_label, j)
})
if (length(aid)>0){
sapply(atoms$all_affiliations[aid], function(j) {
tt$add_triple(author_id, has_affiliation, j)
})
}
sapply(atoms$email, function(j) {
tt$add_triple(author_id, has_email, j)
})
if(length(unlist(atoms$orcid))>0){
orcid_value = atoms$orcid[[1]]$text_value
orcid_value = gsub(" ", "", orcid_value)
orcid_value = gsub("^(.*)orcid.org\\/", "", orcid_value)
orcid_id = identifier(orcid_value, c(orcid = "https://orcid.org/"))
}else{
orcid_id = NULL
}
#following the datacite ontology: http://www.sparontologies.net/ontologies/datacite
tt$add_triple(author_id, has_identifier, orcid_id)
tt$add_triple(orcid_id, rdf_type, PersonalIdentifier)
tt$add_triple(orcid_id, identifier_scheme, orcid)
tt$add_triple(orcid_id, rdfs_label, literal(orcid_value))
tt$add_triple(orcid_id, has_url, literal(strip_angle(orcid$uri), xsd_type = xsd_uri))
return(tt)
}
#' @export
plazi_author = function (atoms, identifiers, prefix, new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id){
article_root = identifiers$root_id
author_id = identifiers$nid
paper_id = check_mongo_key_via_parent(parent = article_root$uri, type = "researchPaper", collection = general_collection)
paper_id = gsub("http://openbiodiv.net/", "", paper_id)
paper_id = identifier(paper_id, prefix)
tt = ResourceDescriptionFramework$new()
tt$add_triple(paper_id, creator, author_id)
tt$add_triple(author_id, rdf_type, Person)
sapply(atoms$full_name, function(j) {
tt$add_triple(author_id, rdfs_label, j)
})
name_literal = unlist(atoms$full_name[1])["text_value"]
if (grepl(",", name_literal)==TRUE){
names = strsplit(name_literal, ",")
names = sapply(names, function(n){ #remove leading and trailing whitespaces
n = gsub("^\\s", "", n)
n = gsub("\\s$", "", n)
})
last_name = names[1]
first_name = names[2]
tt$add_triple(author_id, surname, literal(last_name))
tt$add_triple(author_id, givenName, literal(first_name))
}
return(tt)
}
#' Introduction Section Constructor
#'
#' @param atoms a list of literals
#' @param identifiers a list of identifiers
#' @param prefix
#'
#' @return \code{ResourceDescriptionFramework}
#' @export
introduction_section = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
# intro_content = atoms$text_content[[1]]
# intro_content = escape_special(intro_content$text_value)
tt = ResourceDescriptionFramework$new()
tt$add_triple(identifiers$nid, rdf_type, Introduction)
tt$add_triple(identifiers$nid, is_contained_by, identifiers$pid)
# tt$add_triple(identifiers$nid, has_content, literal(intro_content))
tt = bold_genbank_serializer(tt, atoms, identifiers, publisher_id, journal_id, plazi_doc, doi, article_id)
tt = institution_serializer(tt, atoms, identifiers)
return(tt)
}
#' @export
treatment = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
treatment_id = identifiers$nid
tt = ResourceDescriptionFramework$new()
#get or set taxonomic concept id
tc_df = set_component_frame(label = NA, mongo_key = NA, type = "taxonomicConcept", orcid = NA, parent = treatment_id$uri, key = NA, publisher_id = NA, journal_id = NA, plazi_doc = plazi_doc, doi = NA, article_id = NA)
tc_identifier = get_or_set_mongoid(tc_df, prefix)
tc_identifier = identifier(tc_identifier, prefix)
tt$add_triple(treatment_id, rdf_type, Treatment)
tt$add_triple(treatment_id, is_contained_by, identifiers$pid)
if (length(unlist(atoms$status))>0 ){
status = atoms$status[[1]]$text_value
if (!(is.null(status)))
{
tt$add_triple(treatment_id, taxonStatus, literal(status))
if (status %in% new_taxons ==TRUE)
tt$add_triple(treatment_id, mentions, TaxonomicDiscovery)
}
}
#for (a in 1:length(atoms$text_content)){
# atoms$text_content[[a]]$text_value = escape_special(atoms$text_content[[a]]$text_value)
#}
# treatment_content = atoms$text_content[[1]]
#treatment_id_literal = treatment_id$id
#treatment_content = gsub(treatment_id_literal, "", treatment_content$text_value)
#treatment_content = escape_special(treatment_content)
#tt$add_triple(treatment_id, has_content, literal(treatment_content))
if (length(unlist(atoms$habitat))>0 ){
sapply(atoms$habitat, function(i){
tt$add_triple(treatment_id, mentionsHabitat, i)
})
}
tt$add_triple(tc_identifier, rdf_type, TaxonomicConcept)
tt$add_triple(tc_identifier, realization, treatment_id)
tt = bold_genbank_serializer(tt, atoms, identifiers, publisher_id, journal_id, plazi_doc, doi, article_id)
tt = institution_serializer(tt, atoms, identifiers)
occurrenceID = check_dwc_occurrence(atoms = atoms, typeMaterialID= identifiers$nid, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$occurrenceID = occurrenceID
tt = serialize_occurrence(tt, atoms, identifiers$nid)
locationID = check_dwc_location(atoms = atoms, typeMaterialID= identifiers$nid, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$locationID = locationID
tt = serialize_location(tt, atoms, identifiers$nid)
identificationID = check_dwc_identification(atoms = atoms, typeMaterialID =identifiers$nid, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$identificationID = identificationID
tt = serialize_identification(tt, atoms, identifiers$nid)
eventID = check_dwc_event(atoms = atoms, typeMaterialID= identifiers$nid, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$eventID = eventID
tt = serialize_event(tt, atoms, identifiers$nid)
return(tt)
}
plazi_treatment = function (atoms, identifiers, prefix, new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id){
treatment_id = identifiers$nid
tt = ResourceDescriptionFramework$new()
tc_df = set_component_frame(label = NA, mongo_key = NA, type = "taxonomicConcept", orcid = NA, parent = treatment_id$uri, key = NA, publisher_id = NA, journal_id = NA, plazi_doc = plazi_doc,doi = NA, article_id = NA)
tc_identifier = get_or_set_mongoid(tc_df, prefix)
tc_identifier = identifier(tc_identifier, prefix)
plazi_treatment_id = treatment_id$id
plazi_prefix = c(plazi_treatment = "http://treatment.plazi.org/id/")
plazi_treatment_id = identifier(plazi_treatment_id, plazi_prefix)
tt$add_triple(treatment_id, rdf_type, Treatment)
tt$add_triple(treatment_id, exact_match, plazi_treatment_id)
tt$add_triple(treatment_id, is_contained_by, identifiers$pid)
tt$add_triple(tc_identifier, rdf_type, TaxonomicConcept)
tt$add_triple(tc_identifier, realization, treatment_id)
if (plazi_doc == TRUE){
occurrenceID = check_dwc_occurrence(atoms = atoms, typeMaterialID= identifiers$nid, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$occurrenceID = occurrenceID
tt = serialize_occurrence(tt, atoms, identifiers$nid)
locationID = check_dwc_location(atoms = atoms, typeMaterialID= identifiers$nid, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$locationID = locationID
tt = serialize_location(tt, atoms, identifiers$nid)
identificationID = check_dwc_identification(atoms = atoms, typeMaterialID =identifiers$nid, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$identificationID = identificationID
tt = serialize_identification(tt, atoms, identifiers$nid)
eventID = check_dwc_event(atoms = atoms, typeMaterialID= identifiers$nid, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$eventID = eventID
tt = serialize_event(tt, atoms, identifiers$nid)
}
return(tt)
}
#' @export
nomenclature = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
nomenclature_id = identifiers$nid #remove any ids from the text contents
nomenclature_parent_id = identifiers$pid$id
nomenclature_content = atoms$text_content[[1]]
nomenclature_content = gsub(nomenclature_id, "", nomenclature_content$text_value)
#nomenclature_content = gsub(nomenclature_parent_id, "", nomenclature_content)
nomenclature_content = escape_special(nomenclature_content)
nomenclature_content = gsub("\"", "'", nomenclature_content)
tt = ResourceDescriptionFramework$new()
tt$add_triple(nomenclature_id, rdf_type, Nomenclature)
tt$add_triple(nomenclature_id, is_contained_by, identifiers$pid)
tt$add_triple(nomenclature_id, has_content, literal(nomenclature_content))
tt = institution_serializer(tt, atoms, identifiers)
return(tt)
}
#' @export
nomenclature_citations = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
tt = ResourceDescriptionFramework$new()
citations = atoms$text_content[[1]]
citations = escape_special(citations$text_value)
citations = gsub("\"", "'", citations)
tt$add_triple(identifiers$nid, rdf_type, NomenclatureCitationsList)
tt$add_triple(identifiers$nid, is_contained_by, identifiers$pid)
tt$add_triple(identifiers$nid, has_content, literal(citations))
return(tt)
}
#' @export
nomenclature_citation =function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
tt = ResourceDescriptionFramework$new()
#citations = atoms$text_content[[1]]
#citations = escape_special(citations$text_value)
tt$add_triple(identifiers$nid, rdf_type, NomenclatureCitation)
tt$add_triple(identifiers$nid, is_contained_by, identifiers$pid)
sapply(atoms$text_content, function(n){
tt$add_triple(identifiers$nid, has_content, literal(gsub("\"", "'", n$text_value)))
})
sapply(atoms$bibr, function(a) {
bibr = as.integer(gsub("B", "", a$text_value))
tt$add_triple(identifiers$nid, has_ref_id, literal(bibr))
})
if(length(atoms$comment)>0){
for (n in 1:length(atoms$comment)){
comment = unlist(atoms$comment[n])["text_value"]
if(!(is.null(comment))){
verbatim_citations = strsplit(comment, ";")
sapply(unlist(verbatim_citations), function(i){
i = strip_trailing_whitespace(i)
i = gsub("^ ", "", i)
#create an id for each 'verbatim cit'
df = set_component_frame(label = i, mongo_key = NA, type = "nomenclature_litCit", orcid = NA, parent = identifiers$nid$uri, key = NA, publisher_id = NA, journal_id = NA, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
citID = get_or_set_mongoid(df, prefix)
citID = identifier(citID, prefix)
tt$add_triple(identifiers$nid, mentions, citID)
tt$add_triple(citID, rdf4r::rdf_type, LitCitation)
tt$add_triple(citID, has_content, literal(i))
author_name = stringr::str_extract(i, "^(.*?)(?=[0-9])")
author_name = gsub(",", "", author_name)
author_name = strip_trailing_whitespace(author_name)
author_name = stringr::str_extract_all(author_name, "[A-Z].*?(?=[\\s(])|[A-Z].*?$")
sapply(unlist(author_name), function(n){
tt$add_triple(citID, verbatimAuthor, literal(n))
})
year = stringr::str_extract(i, "[1-2][0-9]{3}")
tt$add_triple(citID, verbatimYear, literal(year))
})
}
}
}
return(tt)
}
#' @export
plazi_nomenclature_citation = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
tt = ResourceDescriptionFramework$new()
tt$add_triple(identifiers$nid, rdf_type, NomenclatureCitation)
tt$add_triple(identifiers$nid, is_contained_by, identifiers$pid)
text_content = unlist(atoms$text_content[1])["text_value"]
text_content = gsub("(?<=[a-z])(?=[A-Z])", " ", text_content, perl = TRUE)
text_content = gsub("\"", "'", text_content)
tt$add_triple(identifiers$nid, has_content, literal(text_content))
sapply(atoms$bibr, function(a) {
tt$add_triple(identifiers$nid, has_ref_id, a)
})
return(tt)
}
#For now: no author disambiguation -> separate id for authors
#' @export
bibliography = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
tt = ResourceDescriptionFramework$new()
tt$add_triple(identifiers$nid, rdf_type, ReferenceList)
tt$add_triple(identifiers$nid, is_contained_by, identifiers$pid)
return(tt)
}
#For now: no author disambiguation -> separate id for authors
#' @export
reference = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
tt = ResourceDescriptionFramework$new()
tt$add_triple(identifiers$nid, rdf_type, Reference)
tt$add_triple(identifiers$nid, is_contained_by, identifiers$pid)
sapply(atoms$reference_id, function(n){
reference_num = n$text_value
reference_num = as.integer(gsub("B", "", reference_num))
tt$add_triple(identifiers$nid, has_ref_id, literal(reference_num))
})
# sapply(atoms$verbatimContent, function(n){
# tt$add_triple(reference, rdfs_label, n)
# })
#get or set an id for the cited article
check_mongo_citation = function(value, parent, collection)
{
if (is.na(parent) && is.na(value)){
key = NULL
}else{
if (is.na(parent)){
# query = sprintf("{\"%s\":\"%s\",\"%s\":\"%s\"}", "type", "bibResource", "value", value)
query = sprintf("{\"$text\":{\"$search\":\"\\\"%s\\\"\"}, \"type\": \"%s\"}", value, "bibResource")
df = collection$find(query)
key = NULL
if (!(is.null(df)) && nrow(df) > 0){
if (!(is.na(df$value)) %% !(is.na(value))){
df <- df[which(df$value == value),]
if (nrow(df)>0){
for (n in 1:nrow(df)){
if (df[n,]$value == value){
key = df[n,]$key
}
}
}
}
}
}else {
query = sprintf("{\"%s\":\"%s\",\"%s\":\"%s\"}", "type", "bibResource", "parent", parent)
key = collection$find(query)$key
}
}
return(key)
}
doi = unlist(atoms$doi[1])["text_value"]
title = unlist(atoms$title[1])["text_value"]
if (is.null(doi))
doi = NA
if (is.null(title))
title = NA
title = gsub("\"", "", title)
key = check_mongo_citation(value = title, parent = doi, collection = general_collection)
df = set_component_frame(label = title, mongo_key = NA, type = "bibResource", orcid = NA, parent = doi, key = NA, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
bibResource = get_or_set(key, df)
bibResource = identifier(bibResource, prefix)
tt$add_triple(bibResource, rdf_type, BibResource)
tt$add_triple(bibResource, rdf_type, Work)
sapply(atoms$year, function(n){
tt$add_triple(bibResource, publication_date, n)
})
title = escape_special(title)
# title = gsub("\"", " ", title)
tt$add_triple(bibResource, dc_title, literal(title))
sapply(atoms$issue, function(n){
tt$add_triple(bibResource, has_issue, n)
})
sapply(atoms$doi, function(n){
tt$add_triple(bibResource, has_doi, n)
})
sapply(atoms$http_doi, function(n){
tt$add_triple(bibResource, has_url, n)
})
#verbatimContent = unlist(atoms$verbatimContent[1])["text_value"]
#verbatimContent = gsub("\"", " ", verbatimContent)
# tt$add_triple(bibResource, rdfs_label, literal(verbatimContent))
tt$add_triple(identifiers$nid, mentions, bibResource) #link the reference to the article it references
full_name = function(lsurname, lgiven_name) {
if (length(lsurname) == 1 && length(lgiven_name) == 1) {
paste(lgiven_name[[1]]$text_value, lsurname[[1]]$text_value)
}
else if (length(lsurname) == 1) {
lsurname[[1]]$text_value
}
else {
NA
}
}
if(length(atoms$author_name)>0){
for (n in 1:length(atoms$author_name)){
fullname = full_name(atoms$author_surname[n], atoms$author_fname[n])
df = set_component_frame(label = fullname, mongo_key = NA, type = "author", orcid = NA, parent = NA, key = NA, publisher_id = NA, journal_id = NA, plazi_doc = plazi_doc, doi = NA, article_id = NA)
author = get_or_set_mongoid(df, prefix)
author = identifier(author, prefix)
tt$add_triple(bibResource, creator, author)
tt$add_triple(author, rdf_type, Person)
tt$add_triple(author, rdfs_label, literal(fullname))
if (length(unlist(atoms$author_surname))>0){
tt$add_triple(author, surname, atoms$author_surname[[n]])
}
}
}
sapply(atoms$source, function(n){
source_name = n$text_value
if (!(is.null(source_name))){
df = set_component_frame(label = toString(source_name), mongo_key = NA, type = "journal", orcid = NA, parent = NA, key = NA, publisher_id = NA, journal_id = NA, plazi_doc = plazi_doc, doi = NA, article_id = NA)
source = get_or_set_mongoid(df, prefix)
source = identifier(source, prefix)
tt$add_triple(source, rdf_type, ExpressionCollection) #we are not actually sure if this is a journal (can be a website or other source; fabio:ExpressionCollection is more general)
tt$add_triple(source, frbr_part, bibResource)
n = gsub("\"", " ", n)
tt$add_triple(source, rdfs_label, n)
}
})
return(tt)
}
#' @export
plazi_reference = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
tt = ResourceDescriptionFramework$new()
tt$add_triple(identifiers$nid, rdf_type, Reference)
tt$add_triple(identifiers$nid, is_contained_by, identifiers$pid)
sapply(atoms$reference_id, function(n){
tt$add_triple(identifiers$nid, has_ref_id, n)
})
#get or set an id for the cited article
check_mongo_citation = function(value, parent, collection)
{
if (is.na(parent) && is.na(value)){
key = NULL
}else{
if (is.na(parent)){
# query = sprintf("{\"%s\":\"%s\",\"%s\":\"%s\"}", "type", "bibResource", "value", value)
query = sprintf("{\"$text\":{\"$search\":\"\\\"%s\\\"\"}, \"type\": \"%s\"}", value, "bibResource")
df = collection$find(query)
key = NULL
if (!(is.null(df)) && nrow(df) > 0){
df <- df[which(df$value == value),]
for (n in 1:nrow(df)){
if (df[n,]$value == value){
key = df[n,]$key
}
}
}
}else {
query = sprintf("{\"%s\":\"%s\",\"%s\":\"%s\"}", "type", "bibResource", "parent", parent)
key = collection$find(query)$key
}
}
return(key)
}
sapply(atoms$refString, function(n){
tt$add_triple(identifiers$nid, rdfs_label, n)
})
value = unlist(atoms$refString[[1]])["text_value"]
value = gsub("\"", "\\\\", value)
key = check_mongo_citation(value = value, parent = NA, collection = general_collection)
df = set_component_frame(label = value, mongo_key = NA, type = "bibResource", orcid = NA, parent = NA, key = NA, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
bibResource = get_or_set(key, df)
bibResource = identifier(bibResource, prefix)
tt$add_triple(bibResource, rdf_type, BibResource)
tt$add_triple(bibResource, rdf_type, Work)
sapply(atoms$year, function(n){
tt$add_triple(bibResource, publication_date, n)
})
tt$add_triple(identifiers$nid, mentions, bibResource) #link the reference to the article it references
sapply(atoms$author, function(n){
tt$add_triple(bibResource, creator, n)
})
return(tt)
}
#' Diagnosis Section Constructor
#'
#' @param atoms a list of literals
#' @param identifiers a list of identifiers
#' @param prefix
#'
#' @return \code{ResourceDescriptionFramework}
#' @export
diagnosis = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
diagnosis_id = identifiers$nid
#diagnosis_content = atoms$text_content[[1]]
#diagnosis_content = escape_special(diagnosis_content$text_value)
tt = ResourceDescriptionFramework$new()
tt$add_triple(diagnosis_id, rdf_type, Diagnosis)
tt$add_triple(diagnosis_id, is_contained_by, identifiers$pid)
# tt$add_triple(diagnosis_id, has_content, literal(diagnosis_content))
tt = bold_genbank_serializer(tt, atoms, identifiers, publisher_id, journal_id, plazi_doc, doi, article_id)
tt = institution_serializer(tt, atoms, identifiers)
return(tt)
}
#' Discussion Section Constructor
#'
#' @param atoms a list of literals
#' @param identifiers a list of identifiers
#' @param prefix
#'
#' @return \code{ResourceDescriptionFramework}
#' @export
discussion = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
tt = ResourceDescriptionFramework$new()
# discussion_content = atoms$text_content[[1]]
# discussion_content = escape_special(discussion_content$text_value)
tt$add_triple(identifiers$nid, rdf_type, Discussion)
tt$add_triple(identifiers$nid, is_contained_by, identifiers$pid)
# tt$add_triple(identifiers$nid, has_content, literal(discussion_content))
tt = bold_genbank_serializer(tt, atoms, identifiers, publisher_id, journal_id, plazi_doc, doi, article_id)
tt = institution_serializer(tt, atoms, identifiers)
if (plazi_doc == TRUE){
occurrenceID = check_dwc_occurrence(atoms = atoms, typeMaterialID= identifiers$nid, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$occurrenceID = occurrenceID
tt = serialize_occurrence(tt, atoms, identifiers$nid)
locationID = check_dwc_location(atoms = atoms, typeMaterialID= identifiers$nid, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$locationID = locationID
tt = serialize_location(tt, atoms, identifiers$nid)
identificationID = check_dwc_identification(atoms = atoms, typeMaterialID= identifiers$nid, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$identificationID = identificationID
tt = serialize_identification(tt, atoms, identifiers$nid)
eventID = check_dwc_event(atoms = atoms, typeMaterialID= identifiers$nid, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$eventID = eventID
tt = serialize_event(tt, atoms, identifiers$nid)
}
return(tt)
}
#' @export
methods = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
tt = ResourceDescriptionFramework$new()
# methods_content = atoms$text_content[[1]]
# methods_content = escape_special(methods_content$text_value)
tt$add_triple(identifiers$nid, rdf_type, Methods)
tt$add_triple(identifiers$nid, is_contained_by, identifiers$pid)
# tt$add_triple(identifiers$nid, has_content, literal(methods_content))
tt = bold_genbank_serializer(tt, atoms, identifiers, publisher_id, journal_id, plazi_doc, doi, article_id)
tt = institution_serializer(tt, atoms, identifiers)
return(tt)
}
#' Checklist Section Constructor
#'
#' @param atoms a list of literals
#' @param identifiers a list of identifiers
#' @param prefix
#'
#' @return \code{ResourceDescriptionFramework}
#' @export
checklist = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
tt = ResourceDescriptionFramework$new()
# checklist_content = atoms$text_content[[1]]
# checklist_content = escape_special(checklist_content$text_value)
tt$add_triple(identifiers$nid, rdf_type, Checklist)
tt$add_triple(identifiers$nid, is_contained_by, identifiers$pid)
# tt$add_triple(identifiers$nid, has_content, literal(checklist_content))
tt = bold_genbank_serializer(tt, atoms, identifiers, publisher_id, journal_id, plazi_doc, doi, article_id)
tt = institution_serializer(tt, atoms, identifiers)
return(tt)
}
#' Distribution Section Constructor
#'
#' @param atoms a list of literals
#' @param identifiers a list of identifiers
#' @param prefix
#'
#' @return \code{ResourceDescriptionFramework}
#' @export
distribution = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
tt = ResourceDescriptionFramework$new()
#distribution_content = atoms$text_content[[1]]
#distribution_content = escape_special(distribution_content$text_value)
tt$add_triple(identifiers$nid, rdf_type, Distribution)
tt$add_triple(identifiers$nid, is_contained_by, identifiers$pid)
#tt$add_triple(identifiers$nid, has_content, literal(distribution_content))
tt = bold_genbank_serializer(tt, atoms, identifiers, publisher_id, journal_id, plazi_doc, doi, article_id)
tt = institution_serializer(tt, atoms, identifiers)
if (plazi_doc == TRUE){
occurrenceID = check_dwc_occurrence(atoms = atoms, typeMaterialID= identifiers$nid, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$occurrenceID = occurrenceID
tt = serialize_occurrence(tt, atoms, identifiers$nid)
locationID = check_dwc_location(atoms = atoms, typeMaterialID= identifiers$nid, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$locationID = locationID
tt = serialize_location(tt, atoms, identifiers$nid)
identificationID = check_dwc_identification(atoms = atoms, typeMaterialID = identifiers$nid, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$identificationID = identificationID
tt = serialize_identification(tt, atoms, identifiers$nid)
eventID = check_dwc_event(atoms = atoms, typeMaterialID= identifiers$nid, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$eventID = eventID
tt = serialize_event(tt, atoms, identifiers$nid)
}
return(tt)
}
#' @export
figure = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
tt = ResourceDescriptionFramework$new()
tt$add_triple(identifiers$nid, rdf_type, Figure)
tt$add_triple(identifiers$nid, is_contained_by, identifiers$pid)
if (length(unlist(atoms$caption))>0){
fig_caption = atoms$caption[[1]]
fig_caption = escape_special(fig_caption$text_value)
tt$add_triple(identifiers$nid, has_content, literal(fig_caption))
}
sapply(atoms$doi, function(i){
tt$add_triple(identifiers$nid, has_doi, i)
})
sapply(atoms$download_link, function(i){
tt$add_triple(identifiers$nid, has_link, i)
})
tt = bold_genbank_serializer(tt, atoms, identifiers, publisher_id, journal_id, plazi_doc, doi, article_id)
tt = institution_serializer(tt, atoms, identifiers)
return(tt)
}
#' @export
type_material = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
#first check the status
tt = ResourceDescriptionFramework$new()
#get the name:
#escape special chars
typeMaterialID = identifiers$nid
tt$add_triple(typeMaterialID, rdf_type, MaterialsExamined)
tt$add_triple(typeMaterialID, is_contained_by, identifiers$pid)
#material_content = atoms$text_content[[1]]
#material_content = escape_special(material_content$text_value)
#tt$add_triple(typeMaterialID, has_content, literal(material_content))
if (length(unlist(atoms$holotype))>0){
sapply(atoms$holotype, function(n){
#n$text_value = escape_special(n$text_value)
#label = escape_special_json(n$text_value)
df = set_component_frame(label = escape_special_json(n$text_value), mongo_key = NA, type = "holotype", orcid = NA, parent = identifiers$nid$uri, key = NA, publisher_id = NA, journal_id = NA, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
holotypeID = get_or_set_mongoid(df, prefix)
tt$add_triple(identifier(holotypeID, prefix), rdf_type, HolotypeDescription)
tt$add_triple(identifier(holotypeID, prefix), is_contained_by, typeMaterialID)
})
}
if (length(unlist(atoms$collection_code))>0){
sapply(atoms$collection_code, function(n){
tt$add_triple(typeMaterialID, dwc_collection_code, n)
})
}
occurrenceID = check_dwc_occurrence(atoms = atoms, typeMaterialID= typeMaterialID, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$occurrenceID = occurrenceID
tt = serialize_occurrence(tt, atoms, typeMaterialID)
locationID = check_dwc_location(atoms = atoms, typeMaterialID= typeMaterialID, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$locationID = locationID
tt = serialize_location(tt, atoms, typeMaterialID)
identificationID = check_dwc_identification(atoms = atoms, typeMaterialID= typeMaterialID, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$identificationID = identificationID
tt = serialize_identification(tt, atoms, typeMaterialID)
eventID = check_dwc_event(atoms = atoms, typeMaterialID= typeMaterialID, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$eventID = eventID
tt = serialize_event(tt, atoms, typeMaterialID)
tt = bold_genbank_serializer(tt, atoms, identifiers, publisher_id, journal_id, plazi_doc, doi, article_id)
tt = institution_serializer(tt, atoms, identifiers)
}
#' @export
occurrence_list = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
#first check the status
tt = ResourceDescriptionFramework$new()
typeMaterialID = identifiers$pid
if (plazi_doc == TRUE)
{
nid = typeMaterialID
}else{
nid = identifiers$nid
tt$add_triple(nid, is_contained_by, typeMaterialID)
}
if (length(unlist(atoms$collection_code))>0){
sapply(atoms$collection_code, function(n){
tt$add_triple(typeMaterialID, dwc_collection_code, n)
})
}
occurrenceID = check_dwc_occurrence(atoms = atoms, typeMaterialID= typeMaterialID, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$occurrenceID = occurrenceID
tt = serialize_occurrence(tt, atoms, typeMaterialID)
locationID = check_dwc_location(atoms = atoms, typeMaterialID= typeMaterialID, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$locationID = locationID
tt = serialize_location(tt, atoms, typeMaterialID)
identificationID = check_dwc_identification(atoms = atoms, typeMaterialID= typeMaterialID, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$identificationID = identificationID
tt = serialize_identification(tt, atoms, typeMaterialID)
eventID = check_dwc_event(atoms = atoms, typeMaterialID= typeMaterialID, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
atoms$eventID = eventID
tt = serialize_event(tt, atoms, typeMaterialID)
tt = serialize_dwc_relations(tt, atoms)
identifiers = list(nid = nid,
pid = identifiers$root_id,
root_id = identifiers$root_id)
tt = institution_serializer(tt, atoms, identifiers)
return(tt)
}
#' @export
taxonomic_name_usage = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
tnu_id = identifiers$nid
tt = ResourceDescriptionFramework$new()
atoml_to_val = function(atoml)
{
if (length(atoml) > 0) {
return(atoml[[1]]$text_value)
}
else {
return (NA)
}
}
scName = get_scientific_name_or_tcl(kingdom = atoml_to_val(atoms$kingdom), phylum = atoml_to_val(atoms$phylum), class = atoml_to_val(atoms$class), order = atoml_to_val(atoms$order),
family = atoml_to_val(atoms$family), subfamily = atoml_to_val(atoms$subfamily), genus = atoml_to_val(atoms$genus), subgenus = atoml_to_val(atoms$subgenus), species = atoml_to_val(atoms$species),
subspecies = atoml_to_val(atoms$subspecies), authorship = atoml_to_val(atoms$authorship), secundum_literal = NA)
#if there is no genus, species, etc. just take the whole node content (e.g. <tp:taxon-name obkms_id="540d2809-0e18-4beb-9bf0-a1118d0a6d37" obkms_process="TRUE">Flueggea suffruticosa</tp:taxon-name>)
if(scName==""){
scName = atoms$text_content[[1]]$text_value
}
if( !(is.null(scName))){
if ( !(scName == "")){
scName_df = set_component_frame(label = scName, mongo_key = NA, type = "scName", orcid = NA, parent = NA, key = NA, publisher_id = NA, journal_id = NA, plazi_doc = plazi_doc, doi = NA, article_id = NA)
scNameID = get_or_set_mongoid(scName_df, prefix)
scNameID = identifier(scNameID, prefix)
#get or set scName identifier - mongoDB
scName_gbif = gbif_taxonomy_mapping(scName = scName, collection = checklistCol)
if (is.null(scName_gbif)){
scNameParent = NA
}
else
{
scNameParent = scName_gbif$uri
update_parent(key=scNameID$uri, parent = scNameParent, collection = general_collection)
}
#add the parent to mongo
update_parent = function(key, parent, collection = general_collection){
query = sprintf("{\"%s\":\"%s\"}", "key", key)
update = sprintf("{\"$set\":{\"%s\":\"%s\"}}", "parent", parent)
collection$update(query = query, update = update)
}
tt$add_triple(tnu_id, rdf_type, TaxonomicNameUsage)
tt$add_triple(tnu_id, is_contained_by, identifiers$pid)
tt$add_triple(tnu_id, mentions, scNameID)
tt$add_triple(scNameID, rdf_type, ScientificName)
tt$add_triple(scNameID, has_gbifID, scName_gbif)
tt$add_triple(scNameID, rdfs_label, literal(scName))
sapply(atoms$kingdom, function(i) {
tt$add_triple(scNameID, dwc_kingdom, i)
})
sapply(atoms$phylum, function(i) {
tt$add_triple(scNameID, dwc_phylum, i)
})
sapply(atoms$class, function(i) {
tt$add_triple(scNameID, dwc_class, i)
})
sapply(atoms$order, function(i) {
tt$add_triple(scNameID, dwc_order, i)
})
sapply(atoms$family, function(i) {
tt$add_triple(scNameID, dwc_family, i)
})
sapply(atoms$subfamily, function(i) {
tt$add_triple(scNameID, dwc_family, i)
})
sapply(atoms$genus, function(i) {
tt$add_triple(scNameID, dwc_genus, i)
})
sapply(atoms$subgenus, function(i) {
tt$add_triple(scNameID, dwc_subgenus, i)
})
sapply(atoms$species, function(i) {
tt$add_triple(scNameID, dwc_species_ep, i)
})
sapply(atoms$subspecies, function(i) {
tt$add_triple(scNameID, dwc_subspecies_ep, i)
})
sapply(atoms$verbatim_rank, function(i) {
tt$add_triple(scNameID, has_verbatim_rank, i)
})
sapply(atoms$taxonomic_rank, function(i) {
tt$add_triple(scNameID, has_taxonomic_rank_id, i)
})
sapply(atoms$authorship, function(i) {
tt$add_triple(scNameID, dwc_authorship, i)
})
sapply(atoms$verbatim_status, function(i) { tt$add_triple(tnu_id, taxonStatus, i ) } )
if(length(atoms$verbatim_status) >= 1)
{
atoms$status = list(verbstat2openbiodiv(atoms$verbatim_status[[1]]$text_value, def_prefix = prefix))
}
sapply(atoms$status, function(i) { tt$add_triple(tnu_id, has_taxonomic_status_id, i ) } )
}
}
return(tt)
}
#' @export
plazi_taxonomic_name_usage = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
tnu_id = identifiers$nid
tt = ResourceDescriptionFramework$new()
atoml_to_val = function(atoml)
{
if (length(atoml) > 0) {
return(atoml[[1]]$text_value)
}
else {
return (NA)
}
}
scName = get_scientific_name_or_tcl(kingdom = atoml_to_val(atoms$kingdom), phylum = atoml_to_val(atoms$phylum), class = atoml_to_val(atoms$class), order = atoml_to_val(atoms$order),
family = atoml_to_val(atoms$family), subfamily = atoml_to_val(atoms$subfamily), genus = atoml_to_val(atoms$genus), subgenus = atoml_to_val(atoms$subgenus), species = atoml_to_val(atoms$species),
subspecies = atoml_to_val(atoms$subspecies), authorship = atoml_to_val(atoms$authorship), secundum_literal = NA)
#if there is no genus, species, etc. just take the whole node content (e.g. <tp:taxon-name obkms_id="540d2809-0e18-4beb-9bf0-a1118d0a6d37" obkms_process="TRUE">Flueggea suffruticosa</tp:taxon-name>)
if(scName==""){
scName = atoms$text_content[[1]]$text_value
}
if( !(is.null(scName))){
if ( !(scName == "")){
scName_df = set_component_frame(label = scName, mongo_key = NA, type = "scName", orcid = NA, parent = NA, key = NA, publisher_id = NA, journal_id = NA, plazi_doc = plazi_doc, doi = NA, article_id = NA)
scNameID = get_or_set_mongoid(scName_df, prefix)
scNameID = identifier(scNameID, prefix)
#get or set scName identifier - mongoDB
scName_gbif = gbif_taxonomy_mapping(scName = scName, collection = checklistCol)
if (is.null(scName_gbif)){
scNameParent = NA
}
else
{
scNameParent = scName_gbif$uri
update_parent(key=scNameID$uri, parent = scNameParent, collection = general_collection)
}
#add the parent to mongo
update_parent = function(key, parent, collection = general_collection){
query = sprintf("{\"%s\":\"%s\"}", "key", key)
update = sprintf("{\"$set\":{\"%s\":\"%s\"}}", "parent", parent)
collection$update(query = query, update = update)
}
tt$add_triple(tnu_id, rdf_type, TaxonomicNameUsage)
tt$add_triple(tnu_id, is_contained_by, identifiers$pid)
tt$add_triple(tnu_id, mentions, scNameID)
tt$add_triple(scNameID, rdf_type, ScientificName)
tt$add_triple(scNameID, has_gbifID, scName_gbif)
tt$add_triple(scNameID, rdfs_label, literal(scName))
sapply(atoms$kingdom, function(i) {
tt$add_triple(scNameID, dwc_kingdom, i)
})
sapply(atoms$phylum, function(i) {
tt$add_triple(scNameID, dwc_phylum, i)
})
sapply(atoms$class, function(i) {
tt$add_triple(scNameID, dwc_class, i)
})
sapply(atoms$order, function(i) {
tt$add_triple(scNameID, dwc_order, i)
})
sapply(atoms$family, function(i) {
tt$add_triple(scNameID, dwc_family, i)
})
sapply(atoms$subfamily, function(i) {
tt$add_triple(scNameID, dwc_family, i)
})
sapply(atoms$genus, function(i) {
tt$add_triple(scNameID, dwc_genus, i)
})
sapply(atoms$subgenus, function(i) {
tt$add_triple(scNameID, dwc_subgenus, i)
})
sapply(atoms$species, function(i) {
tt$add_triple(scNameID, dwc_species_ep, i)
})
sapply(atoms$subspecies, function(i) {
tt$add_triple(scNameID, dwc_subspecies_ep, i)
})
sapply(atoms$verbatim_rank, function(i) {
tt$add_triple(scNameID, has_verbatim_rank, i)
})
sapply(atoms$taxonomic_rank, function(i) {
tt$add_triple(scNameID, has_taxonomic_rank_id, i)
})
sapply(atoms$authorship, function(i) {
tt$add_triple(scNameID, dwc_authorship, i)
})
if (length(unlist(atoms$status))>0 ){
status = atoms$status[[1]]$text_value
tt$add_triple(scNameID, taxonStatus, literal(status))
}
}
}
return(tt)
}
#' Taxonomic Key Section Constructor
#'
#' @param atoms a list of literals
#' @param identifiers a list of identifiers
#' @param prefix
#'
#' @return \code{ResourceDescriptionFramework}
#' @export
taxonomic_key = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
tt = ResourceDescriptionFramework$new()
tt$add_triple(identifiers$nid, rdf_type, TaxonomicKey)
title = atoms$title[[1]]
tt$add_triple(identifiers$nid, dc_title, title)
# if (length(unlist(atoms$text_content))>0){
# table_content = atoms$text_content[[1]]
# table_content = escape_special(table_content$text_value)
# tt$add_triple(identifiers$nid, has_content, literal(table_content))
# }
tt$add_triple(identifiers$nid, is_contained_by, identifiers$pid)
tt = bold_genbank_serializer(tt, atoms, identifiers, publisher_id, journal_id, plazi_doc, doi, article_id)
return(tt)
}
#' @export
materials_examined = function(){
}
#' @export
institution_code_usage = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
tt = ResourceDescriptionFramework$new()
tt$add_triple(identifiers$nid, rdf_type, InstitutionalCodeUsage) # type
tt$add_triple(identifiers$nid, is_contained_by, identifiers$pid) # containtment
sapply(atoms$text_content, function(i) { tt$add_triple(identifiers$nid, institutional_code, i) })
# TODO add Institions as resources
return(tt)
}
#' @export
metadata_en = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
pub_date = function(year, month, day) {
literal(paste0(text_value = unlist(year)["text_value"],
"-", unlist(month)["text_value"], "-", unlist(day)["text_value"]),
xsd_type = rdf4r::xsd_date)
}
if (length(unlist(atoms$pensoft_pub)) > 0) {
stop("Pensoft publication")
}
doi = unlist(atoms$doi)["text_value"]
article_root = identifiers$root_id
publisher_lit = toString(unlist(atoms$publisher)["text_value"])
df = set_component_frame(label = publisher_lit, mongo_key = c(publisher = NA), type = "publisher", orcid = NA, parent = NA, key = NA, publisher_id = NA, journal_id = NA, plazi_doc = plazi_doc, doi = NA, article_id = NA)
publisher_id = get_or_set_mongoid(df, prefix )
publisher_id = identifier(publisher_id, prefix)
journal_lit = toString(unlist(atoms$journal)["text_value"])
df = set_component_frame(label = journal_lit, mongo_key = c(journal = NA), type = "journal", orcid = NA, parent = NA, key = NA, publisher_id = NA, journal_id = NA, plazi_doc = plazi_doc, doi = NA, article_id = NA)
journal_id = get_or_set_mongoid(df, prefix )
journal_id = identifier(journal_id, prefix)
paper_label = unlist(atoms$title)["text_value"]
research_paper_df = set_component_frame(label = paper_label, mongo_key = NA, type = "researchPaper", orcid = NA, parent = article_root$uri, key = NA, publisher_id = NA, journal_id = NA, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
paper_id = get_or_set_mongoid(research_paper_df, prefix)
paper_id = identifier(paper_id, prefix)
tt = ResourceDescriptionFramework$new()
tt$add_triple(journal_id, rdf_type, Journal)
sapply(atoms$journal, function(j) {
tt$add_triple(journal_id, pref_label, j)
})
sapply(atoms$journal_abbrev, function(j) {
tt$add_triple(journal_id, alt_label, j)
})
sapply(atoms$issn, function(i) {
tt$add_triple(journal_id, issn, i)
})
sapply(atoms$eIssn, function(i) {
tt$add_triple(journal_id, eissn, i)
})
if(length(atoms$journal_zoobank)>0){
for (n in 1:length(atoms$journal_zoobank)){
if (grepl("zoobank", unlist(atoms$journal_zoobank[[n]])["text_value"]) == TRUE){
text_value = gsub("^(.*):", "", unlist(atoms$journal_zoobank[n])["text_value"])
text_value = gsub(" ", "", text_value)
ll = list(text_value = text_value, xsd_type = xsd_string, lang = "",
squote = paste0("\"", text_value, "\"", ""))
class(ll) = "literal"
journal_zoobank_literal = ll
}
}
zoobank_id = identifier(text_value, c(zoobank = "http://zoobank.org/"))
tt$add_triple(journal_id, has_identifier, zoobank_id)
tt$add_triple(zoobank_id, rdf_type, ResourceIdentifier)
tt$add_triple(zoobank_id, identifier_scheme, zoobank)
tt$add_triple(zoobank_id, rdfs_label, journal_zoobank_literal)
tt$add_triple(zoobank_id, has_url, literal(strip_angle(zoobank_id$uri), xsd_type = xsd_uri))
}
tt$add_triple(journal_id, frbr_part, article_root)
tt$add_triple(article_root, rdf_type, Article)
articleTitle = atoms$title[[1]]
articleTitle = escape_special(articleTitle$text_value)
tt$add_triple(article_root, rdfs_label, literal(articleTitle))
tt$add_triple(article_root, realization_of, paper_id)
sapply(atoms$title, function(i) {
tt$add_triple(article_root, dc_title, i)
})
sapply(atoms$doi, function(i) {
tt$add_triple(article_root, has_doi, i)
})
#the article zoobank id is the one containing the words "zoobank"
if(length(atoms$article_zoobank) > 0){
for (n in 1:length(atoms$article_zoobank)){
if (grepl("zoobank", unlist(atoms$article_zoobank[[n]])["text_value"]) == TRUE){
text_value = gsub("^(.*):", "", unlist(atoms$article_zoobank[n])["text_value"])
text_value = gsub(" ", "", text_value)
ll = list(text_value = text_value, xsd_type = xsd_string, lang = "",
squote = paste0("\"", text_value, "\"", ""))
class(ll) = "literal"
article_zoobank_literal = ll
article_zoobank_id = identifier(text_value, c(zoobank = "http://zoobank.org/"))
tt$add_triple(article_root, has_identifier, article_zoobank_id)
tt$add_triple(article_zoobank_id, rdf_type, ResourceIdentifier)
tt$add_triple(article_zoobank_id, identifier_scheme, zoobank)
tt$add_triple(article_zoobank_id, rdfs_label, article_zoobank_literal)
tt$add_triple(article_zoobank_id, has_url, literal(article_zoobank_id, xsd_type = xsd_uri))
}
}
}
if(length(atoms$plazi_id) > 0){
for (n in 1:length(atoms$plazi_id)){
text_value = unlist(atoms$plazi_id[n])["text_value"]
text_value = gsub(" ", "", text_value)
text_value = gsub("http://tb.plazi.org/GgServer/summary/", "", text_value)
ll = list(text_value = text_value, xsd_type = xsd_string, lang = "",
squote = paste0("\"", text_value, "\"", ""))
class(ll) = "literal"
plazi_article_id_lit = ll
plazi_url = paste0("http://tb.plazi.org/GgServer/summary/",text_value)
plazi_article_id = identifier(text_value, c(plazi = "http://tb.plazi.org/GgServer/summary/"))
tt$add_triple(article_root, has_identifier, plazi_article_id)
tt$add_triple(plazi_article_id, rdf_type, ResourceIdentifier)
tt$add_triple(plazi_article_id, identifier_scheme, plazi)
tt$add_triple(plazi_article_id, rdfs_label, plazi_article_id_lit)
tt$add_triple(plazi_article_id, has_url, literal(plazi_url, xsd_type = xsd_uri))
}
}
sapply(atoms$publisher, function(i) {
tt$add_triple(article_root, has_publisher, i)
})
sapply(atoms$date, function(i) {
tt$add_triple(article_root, publication_date, i)
})
sapply(list(pub_date(atoms$pub_year, atoms$pub_month, atoms$pub_day)),
function(i) {
tt$add_triple(article_root, publication_date, i)
})
tt$add_triple(article_root, has_publisher_id, publisher_id)
sapply(atoms$issue, function(i) {
tt$add_triple(article_root, has_issue, i)
})
tt$add_triple(publisher_id, rdf_type, Publisher)
sapply(atoms$publisher, function(i) {
tt$add_triple(publisher_id, rdfs_label, i)
})
tt$add_triple(paper_id, rdf_type, Paper)
sapply(atoms$keyword, function(i) {
tt$add_triple(identifiers$nid, has_keyword, i)
})
return(tt)
}
#' @export
tnu = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
tnu_id = identifiers$nid
tt = ResourceDescriptionFramework$new()
atoml_to_val = function(atoml)
{
if (length(atoml) > 0) {
return(atoml[[1]]$text_value)
}
else {
return (NA)
}
}
#get the name:
#escape special chars
#atoms$text_content = double_quote_replacer(atoms$text_content)
scName = get_scientific_name_or_tcl(kingdom = atoml_to_val(atoms$kingdom), phylum = atoml_to_val(atoms$phylum), class = atoml_to_val(atoms$class), order = atoml_to_val(atoms$order),
family = atoml_to_val(atoms$family), subfamily = atoml_to_val(atoms$subfamily), genus = atoml_to_val(atoms$genus), subgenus = atoml_to_val(atoms$subgenus), species = atoml_to_val(atoms$species),
subspecies = atoml_to_val(atoms$subspecies), authorship = atoml_to_val(atoms$authorship), secundum_literal = NA)
#if there is no genus, species, etc. just take the whole node content (e.g. <tp:taxon-name obkms_id="540d2809-0e18-4beb-9bf0-a1118d0a6d37" obkms_process="TRUE">Flueggea suffruticosa</tp:taxon-name>)
if(scName==""){
scName = atoms$text_content[[1]]$text_value
}
if( !(is.null(scName))){
if ( !(scName == "")){
scName_df = set_component_frame(label = scName, mongo_key = NA, type = "scName", orcid = NA, parent = NA, key = NA, publisher_id = NA, journal_id=NULL, plazi_doc = plazi_doc, doi = NA, article_id = NA)
scNameID = get_or_set_mongoid(scName_df, prefix)
scNameID = identifier(scNameID, prefix)
#get or set scName identifier - mongoDB
scName_gbif = gbif_taxonomy_mapping(scName = scName, collection = checklistCol)
if (is.null(scName_gbif)){
scNameParent = NA
}
else
{
scNameParent = scName_gbif$uri
update_parent(key=scNameID$uri, parent = scNameParent, collection = general_collection)
}
#add the parent to mongo
update_parent = function(key, parent, collection = general_collection){
query = sprintf("{\"%s\":\"%s\"}", "key", key)
update = sprintf("{\"$set\":{\"%s\":\"%s\"}}", "parent", parent)
collection$update(query = query, update = update)
}
tt$add_triple(tnu_id, rdf_type, TaxonomicNameUsage)
tt$add_triple(tnu_id, is_contained_by, identifiers$pid)
tt$add_triple(tnu_id, mentions, scNameID)
tt$add_triple(scNameID, rdf_type, ScientificName)
tt$add_triple(scNameID, has_gbifID, scName_gbif)
tt$add_triple(scNameID, rdfs_label, literal(scName))
sapply(atoms$kingdom, function(i) {
tt$add_triple(scNameID, dwc_kingdom, i)
})
sapply(atoms$phylum, function(i) {
tt$add_triple(scNameID, dwc_phylum, i)
})
sapply(atoms$class, function(i) {
tt$add_triple(scNameID, dwc_class, i)
})
sapply(atoms$order, function(i) {
tt$add_triple(scNameID, dwc_order, i)
})
sapply(atoms$family, function(i) {
tt$add_triple(scNameID, dwc_family, i)
})
sapply(atoms$subfamily, function(i) {
tt$add_triple(scNameID, dwc_family, i)
})
sapply(atoms$genus, function(i) {
tt$add_triple(scNameID, dwc_genus, i)
})
sapply(atoms$subgenus, function(i) {
tt$add_triple(scNameID, dwc_subgenus, i)
})
sapply(atoms$species, function(i) {
tt$add_triple(scNameID, dwc_species_ep, i)
})
sapply(atoms$subspecies, function(i) {
tt$add_triple(scNameID, dwc_subspecies_ep, i)
})
sapply(atoms$verbatim_rank, function(i) {
tt$add_triple(scNameID, has_verbatim_rank, i)
})
sapply(atoms$taxonomic_rank, function(i) {
tt$add_triple(scNameID, has_taxonomic_rank_id, i)
})
sapply(atoms$authorship, function(i) {
tt$add_triple(scNameID, dwc_authorship, i)
})
if (length(atoms$status)>0 ){
status = atoms$status[[1]]$text_value
tt$add_triple(scNameID, taxonStatus, literal(status))
}
}
}
return(tt)
}
#' @export
table = function (atoms, identifiers, prefix,new_taxons, mongo_key, publisher_id,
journal_id, plazi_doc, doi, article_id)
{
tt = ResourceDescriptionFramework$new()
tt$add_triple(identifiers$nid, is_contained_by, identifiers$root_id)
tt$add_triple(identifiers$nid, rdf_type, Table)
return(tt)
}
###############################3
#dwc checks
#' @export
check_dwc_occurrence = function(atoms, typeMaterialID, publisher_id, journal_id, plazi_doc, doi, article_id){
if (length(atoms$catalog_number)>0 || length(atoms$other_catalog_numbers)>0 || length(atoms$record_number)>0 || length(atoms$recorded_by)>0 || length(atoms$individual_count)>0 || length(atoms$sex)>0 || length(atoms$life_stage)>0 ){
occurrence_content_label = escape_special(atoms$text_content[[1]]$text_value)
occurrence_df = set_component_frame(label = paste0("Occurrence: ", occurrence_content_label), mongo_key = NA, type = "occurrence", orcid = NA, parent = typeMaterialID$uri, key = NA, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
occurrenceID = identifier(get_or_set_mongoid(occurrence_df, prefix), prefix)
}else{
occurrenceID = NULL
}
return(occurrenceID)
}
#' @export
serialize_occurrence = function(tt, atoms, typeMaterialID){
# atoms$occurrenceID = ifelse(length(unlist(atoms$occurrenceID)) == 0, list(occurrenceID), atoms$occurrenceID)
occurrenceID = atoms$occurrenceID
#Occurrence
tt$add_triple(typeMaterialID, dwc_occurrence_id, occurrenceID)
tt$add_triple(occurrenceID, rdf_type, Occurrence)
sapply(atoms$record_number, function(n){
tt$add_triple(occurrenceID, dwc_record_number, n)
})
sapply(atoms$recorded_by, function(n){
tt$add_triple(occurrenceID, dwc_recorded_by, n)
})
sapply(atoms$catalog_number, function(n){
tt$add_triple(occurrenceID, dwc_catalog_number, n)
})
sapply(atoms$other_catalog_numbers, function(n){
tt$add_triple(occurrenceID, dwc_other_catalog_numbers, n)
})
sapply(atoms$individual_count, function(n){
tt$add_triple(occurrenceID, dwc_individual_count, n)
})
sapply(atoms$sex, function(n){
tt$add_triple(occurrenceID, dwc_sex, n)
})
sapply(atoms$life_stage, function(n){
tt$add_triple(occurrenceID, dwc_life_stage, n)
})
return(tt)
}
#' @export
check_dwc_location = function(atoms, typeMaterialID, publisher_id, journal_id, plazi_doc, doi, article_id){
if (length(atoms$coordinates)>0 || length(atoms$verbatim_lat)>0 || length(atoms$verbatim_long)>0 || length(atoms$decimal_long)>0 || length(atoms$decimal_lat)>0 || length(atoms$country)>0 || length(atoms$state_province)>0 || length(atoms$decimal_lat)>0 || length(atoms$country)>0 || length(atoms$locality)>0 || length(atoms$elevation)>0 || length(atoms$depth)>0 || length(atoms$water_body)>0){
occurrence_content_label = escape_special(atoms$text_content[[1]]$text_value)
location_df = set_component_frame(label = paste0("Location: ", occurrence_content_label), mongo_key = NA, type = "location", orcid = NA, parent = typeMaterialID$uri, key = NA, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
locationID = identifier(get_or_set_mongoid(location_df, prefix), prefix)
}else{
locationID = NULL
}
return(locationID)
}
#' @export
serialize_location = function(tt, atoms, typeMaterialID){
#atoms$locationID = ifelse(length(unlist(atoms$locationID)) == 0, list(locationID), atoms$locationID)
locationID = atoms$locationID
#Location
tt$add_triple(typeMaterialID, dwc_location_id, locationID)
tt$add_triple(locationID, rdf_type, Location)
verbatim_coord = function(lat, long) {
if (length(lat) == 1 && length(long) == 1) {
latitude = lat[[1]]$text_value
latitude = escape_special(latitude)
longitude = long[[1]]$text_value
longitude = escape_special(longitude)
coord = paste0(latitude, ", ", longitude )
}
else {
NA
}
}
if (length(unlist(atoms$decimal_lat)) > 0 && length(unlist(atoms$decimal_long)) > 0){
atoms$coordinates = list(literal(verbatim_coord(atoms$decimal_lat,atoms$decimal_long), xsd_type = rdf4r::xsd_string))
}else{
atoms$coordinates = ifelse(length(unlist(atoms$coordinates)) == 0, list(literal(verbatim_coord(atoms$verbatim_lat,
atoms$verbatim_long), xsd_type = rdf4r::xsd_string)), atoms$coordinates)
}
sapply(atoms$coordinates , function(n){
coordinates = escape_special(n$text_value)
tt$add_triple(locationID, dwc_coordinates, literal(coordinates))
})
sapply(atoms$decimal_long , function(n){
tt$add_triple(locationID, dwc_decimal_long, n)
})
sapply(atoms$decimal_lat , function(n){
tt$add_triple(locationID, dwc_decimal_lat, n)
})
sapply(atoms$country, function(n){
tt$add_triple(locationID, dwc_country, n)
})
sapply(atoms$state_province, function(n){
tt$add_triple(locationID, dwc_state_province, n)
})
sapply(atoms$locality, function(n){
tt$add_triple(locationID, dwc_locality, n)
})
sapply(atoms$elevation, function(n){
tt$add_triple(locationID, dwc_elevation, n)
})
sapply(atoms$depth, function(n){
tt$add_triple(locationID, dwc_depth, n)
})
sapply(atoms$water_body, function(n){
tt$add_triple(locationID, dwc_water_body, n)
})
return(tt)
}
#' @export
check_dwc_identification = function(atoms, typeMaterialID, publisher_id, journal_id, plazi_doc = plazi_doc, doi, article_id){
if (length(atoms$identified_by)>0 || length(atoms$type_status)>0){
occurrence_content_label = escape_special(atoms$text_content[[1]]$text_value)
identification_df = set_component_frame(label = paste0("Identification: ", occurrence_content_label), mongo_key = NA, type = "identification", orcid = NA, parent = typeMaterialID$uri, key = NA, publisher_id = publisher_id, journal_id = journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
identificationID = identifier(get_or_set_mongoid(identification_df, prefix), prefix)
}else{
identificationID = NULL
}
return(identificationID)
}
#' @export
serialize_identification = function(tt, atoms, typeMaterialID){
# atoms$identificationID = ifelse(length(unlist(atoms$identificationID)) == 0, list(identificationID), atoms$identificationID)
identificationID = atoms$identificationID
tt$add_triple(typeMaterialID, dwc_identification_id, identificationID)
tt$add_triple(identificationID, rdf_type, Identification)
sapply(atoms$identified_by, function(n){
tt$add_triple(identificationID, dwc_identified_by, n)
})
sapply(atoms$type_status, function(n){
tt$add_triple(identificationID, dwc_type_status, n)
})
return(tt)
}
#' @export
check_dwc_event = function(atoms, typeMaterialID, publisher_id, journal_id, plazi_doc = plazi_doc, doi, article_id){
if (length(atoms$collection_year)>0 || length(atoms$collection_month)>0 || length(atoms$collection_day)>0 || length(atoms$event_date)>0 || length(atoms$collection_date)>0 || length(atoms$samplingProtocol)>0 || length(atoms$habitat) > 0){
occurrence_content_label = escape_special(atoms$text_content[[1]]$text_value)
event_df = set_component_frame(label = paste0("Event: ", occurrence_content_label), mongo_key = NA, type = "event", orcid = NA, parent = typeMaterialID$uri, key = NA, publisher_id=publisher_id, journal_id=journal_id, plazi_doc = plazi_doc, doi = doi, article_id = article_id)
eventID = identifier(get_or_set_mongoid(event_df, prefix), prefix)
}else{
eventID = NULL
}
return(eventID)
}
#' @export
serialize_event = function(tt, atoms, typeMaterialID){
eventID = atoms$eventID
#atoms$eventID = ifelse(length(unlist(atoms$eventID)) == 0, list(eventID), atoms$eventID)
tt$add_triple(typeMaterialID, dwc_event_id, eventID)
tt$add_triple(eventID, rdf_type, Event)
collection_date = function(year, month, day) {
if (length(year) == 1 && length(month) == 1 && length(day) == 1) {
paste0(year[[1]]$text_value, "-", month[[1]]$text_value, "-", day[[1]]$text_value)
}
else if (length(year) == 1) {
year[[1]]$text_value
}
else {
NA
}
}
if (length(atoms$collection_month)>0){
for (n in 1:length(atoms$collection_month)){
collection_month_text = atoms$collection_month[[n]]$text_value
if (nchar(collection_month_text)<2){
collection_month_text = paste0("0", collection_month_text)
}
atoms$collection_month[[n]]=literal(collection_month_text)
}
}
atoms$collection_date = ifelse(length(unlist(atoms$collection_date)) == 0, list(literal(collection_date(atoms$collection_year,
atoms$collection_month, atoms$collection_day), xsd_type = rdf4r::xsd_date)), atoms$collection_date)
sapply(atoms$collection_date, function(n){
tt$add_triple(eventID, dwc_event_date, n)
})
sapply(atoms$event_date, function(n){
tt$add_triple(eventID, dwc_event_date, n)
})
sapply(atoms$collection_year, function(n){
tt$add_triple(eventID, dwc_collection_year, n)
})
sapply(atoms$collection_month, function(n){
tt$add_triple(eventID, dwc_collection_month, n)
})
sapply(atoms$collection_day, function(n){
tt$add_triple(eventID, dwc_collection_day, n)
})
sapply(atoms$sampling_protocol, function(n){
tt$add_triple(eventID, dwc_sampling_protocol, n)
})
sapply(atoms$habitat, function(n){
tt$add_triple(eventID, dwc_habitat, n)
})
return(tt)
}
#' @export
serialize_dwc_relations = function(tt, atoms){
tt$add_triple(atoms$occurrenceID, relation, atoms$eventID)
tt$add_triple(atoms$occurrenceID, relation, atoms$locationID)
tt$add_triple(atoms$occurrenceID, relation, atoms$identificationID)
tt$add_triple(atoms$eventID, relation, atoms$occurrenceID)
tt$add_triple(atoms$eventID, relation, atoms$locationID)
tt$add_triple(atoms$eventID, relation, atoms$identificationID)
tt$add_triple(atoms$locationID, relation, atoms$occurrenceID)
tt$add_triple(atoms$locationID, relation, atoms$eventID)
tt$add_triple(atoms$locationID, relation, atoms$identificationID)
tt$add_triple(atoms$identificationID, relation, atoms$occurrenceID)
tt$add_triple(atoms$identificationID, relation, atoms$eventID)
tt$add_triple(atoms$identificationID, relation, atoms$locationID)
return(tt)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.