#' @export
mySchema = XmlSchema$new(
schema_name = "mySchema",
xpath = "/",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
title = "/article/front/article-meta/title-group/article-title",
date = NA,
pub_year = "/article/front/article-meta/pub-date[@pub-type='epub']/year",
pub_month = "/article/front/article-meta/pub-date[@pub-type='epub']/month",
pub_day = "/article/front/article-meta/pub-date[@pub-type='epub']/day",
doi = "/article/front/article-meta/article-id[@pub-id-type='doi']",
zenodo = NA,
zoobank = NA,
publisher = "/article/front/journal-meta/publisher/publisher-name",
journal = "/article/front/journal-meta/journal-title-group/journal-title",
journal_abbrev = "/article/front/journal-meta/journal-title-group/abbrev-journal-title",
issn = "/article/front/journal-meta/issn[@pub-type='ppub']",
eIssn = "/article/front/journal-meta/issn[@pub-type='epub']",
issue = "/article/front/article-meta/issue",
volume = NA,
starting_page = NA,
ending_page = NA,
keyword = "/article/front/article-meta/kwd-group/kwd",
bold_id="/article/front/article-meta/bold-ids/bold-id",
bin="/article/front/article-meta/bins/bin"
#pensoft_pub = NA # becasue we skip only Pensoft pubs from Plazi
),
atom_lang = c(
title = NA,
date = NA,
pub_year = NA,
pub_month = NA,
pub_day = NA,
doi = NA,
zenodo = NA,
publisher = NA,
journal = "/article/front/journal-meta/journal-title-group/journal-title/@xml:lang",
journal_abbrev = "/article/front/journal-meta/journal-title-group/abbrev-journal-title/@xml:lang",
issn = NA,
eIssn = NA,
issue = NA,
volume = NA,
starting_page = NA,
ending_page = NA,
#pensoft_pub = NA
keyword = NA,
bold_id = NA,
bin = NA
),
atom_types = list(
title = rdf4r::xsd_string,
date = rdf4r::xsd_date,
pub_year = rdf4r::xsd_integer,
pub_month = rdf4r::xsd_integer,
pub_day = rdf4r::xsd_integer,
doi = rdf4r::xsd_string,
zenodo = rdf4r::xsd_string,
publisher = rdf4r::xsd_string,
journal = rdf4r::xsd_string,
journal_abbrev = rdf4r::xsd_string,
issn = rdf4r::xsd_string,
eIssn = rdf4r::xsd_string,
issue = rdf4r::xsd_integer,
volume = rdf4r::xsd_integer,
starting_page = rdf4r::xsd_integer,
ending_page = rdf4r::xsd_integer,
#pensoft_pub = rdf4r::xsd_string
keyword = rdf4r::xsd_string,
bold_id = rdf4r::xsd_string,
bin = rdf4r::xsd_string
),
constructor = my_metadata,
components = list(
#Bold-id
XmlSchema$new(
schema_name = "myschema_bold_id",
xpath = "/article/front/article-meta/bold-ids/bold-id",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
constructor = bold_id_constr,
components = NULL
),
#BIN
XmlSchema$new(
schema_name = "myschema_bin",
xpath = "/article/front/article-meta/bins",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
bin = "./bin"
),
atom_lang = c(
bin = NA
),
atom_types = list(
bin = rdf4r::xsd_string
),
constructor = bin_constr,
components = NULL
),
# Keyword
XmlSchema$new(
schema_name = "mySchema_keyword_group",
xpath = "/article/front/article-meta/kwd-group",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
keyword = "./kwd"
),
atom_lang = c(
keyword = NA
),
atom_types = list(
keyword = rdf4r::xsd_string
),
constructor = keyword_group,
components = NULL
),
# Abstract
XmlSchema$new(
schema_name = "taxpub_abstract",
xpath = "/article/front/article-meta/abstract",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = ".",
trans_abstract = "../trans-abstract"
),
atom_lang = c(
text_content = NA,
trans_abstract = "../trans-abstract/@xml:lang"
),
atom_types = list(
text_content = rdf4r::xsd_string,
trans_abstract = rdf4r::xsd_string
),
constructor = abstract,
components = NULL
),
# Title
XmlSchema$new(
schema_name = "taxpub_title",
xpath = "/article/front/article-meta/title-group/article-title",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
constructor = title,
components = NULL
),
# Author
XmlSchema$new(
schema_name = "taxpub_author",
xpath = "/article/front/article-meta/contrib-group/contrib",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
full_name = NA,
surname = "./name/surname",
given_names = "./name/given-names",
email = "./email",
aff_id = "./xref/@rid",
all_affiliations = "/article/front/article-meta/aff/addr-line"
# role = "./mods:role/mods:roleTerm"
),
atom_lang = c(
full_name = NA,
surname = NA,
given_names = NA,
email = NA,
aff_id = NA,
all_affiliations = NA
#role = NA
),
atom_types = list(
full_name = rdf4r::xsd_string,
surname = rdf4r::xsd_string,
given_names = rdf4r::xsd_string,
email = rdf4r::xsd_string,
aff_id = rdf4r::xsd_integer,
all_affiliations = rdf4r::xsd_string
),
constructor = author,
components = NULL
),
# Introduction
XmlSchema$new(
schema_name = "taxpub_introduction_section",
xpath = "/article/body/sec[@sec-type='Introduction']",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
constructor = introduction_section
),
# DISCUSSION
XmlSchema$new(
schema_name = "discussion_section",
xpath = "//sec[@sec-type='Discussion']", #rel path from treatment
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
constructor = discussion
),
# Treatment
XmlSchema$new(
schema_name = "taxpub_treatment",
xpath = "/article/body/sec/tp:taxon-treatment",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
constructor = treatment,
components = list(
# Nomenclature
XmlSchema$new(
schema_name = "taxpub_nomenclature_section",
xpath = "./tp:nomenclature", #rel path from treatment
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
# nomenclature citations
components = list(
XmlSchema$new(
schema_name = "taxpub_nomenclature_citations",
xpath = "./tp:nomenclature-citation-list", #rel path from treatment
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
components = NULL,
constructor = nomenclature_citations
)
),
constructor = nomenclature
),
# Materials Examined
XmlSchema$new(
schema_name = "taxpub_materials_examined",
xpath = "./tp:treatment-sec[@sec-type='materials']", #rel path from treatment
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
constructor = materials_examined
),
#Diagnosis
XmlSchema$new(
schema_name = "taxpub_diagnosis_section",
xpath = "./tp:treatment-sec[@sec-type='Diagnosis']", #rel path from treatment
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
constructor = diagnosis
),
# distribution
XmlSchema$new(
schema_name = "taxpub_distribution_section",
xpath = "./tp:treatment-sec[@sec-type='Distribution']", #rel path from treatment
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
constructor = distribution
)
)
),
# Taxonomic Key
XmlSchema$new(
schema_name = "taxpub_taxonomic_key",
xpath = "/sec[@sec-type='key']",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
constructor = taxonomic_key,
components = NULL
),
# Figure
XmlSchema$new(
schema_name = "taxpub_figure",
xpath = "//fig|//fig-group",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = ".",
caption = "./caption"
),
atom_lang = c(
text_content = NA,
caption = NA
),
atom_types = list(
text_content = rdf4r::xsd_string,
caption = rdf4r::xsd_string
),
constructor = figure,
components = NULL
),
# Taxonomic Name Usage
XmlSchema$new(
schema_name = "taxpub_taxonomic_name_usage",
xpath = "//tp:taxon-name",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
date = NA,
pub_year = "/article/front/article-meta/pub-date/year",
pub_month = "/article/front/article-meta/pub-date/month",
pub_day = "/article/front/article-meta/pub-date/day",
kingdom = "./tp:taxon-name-part[@taxon-name-part-type='kingdom' or @taxon-name-part-type='Kingdom' or @taxon-name-part-type='divisio' or @taxon-name-part-type='Divisio' or @taxon-name-part-type='division' or @taxon-name-part-type='Division']",
phylum = "./tp:taxon-name-part[@taxon-name-part-type='phylum' or @taxon-name-part-type='Phylum' or @taxon-name-part-type='regnum' or @taxon-name-part-type='Regnum']",
class = "./tp:taxon-name-part[@taxon-name-part-type='class' or @taxon-name-part-type='Class' or @taxon-name-part-type='classis' or @taxon-name-part-type='Classis']",
order = "./tp:taxon-name-part[@taxon-name-part-type='order' or @taxon-name-part-type='Order' or @taxon-name-part-type='ordo' or @taxon-name-part-type='Ordo']",
family = "./tp:taxon-name-part[@taxon-name-part-type='family' or @taxon-name-part-type='Family' or @taxon-name-part-type='familia' or @taxon-name-part-type='Familia' or @taxon-name-part-type='famil' or @taxon-name-part-type='Famil']",
subfamily = "./tp:taxon-name-part[@taxon-name-part-type='subfamily' or @taxon-name-part-type='Subfamily' or @taxon-name-part-type='subfamilia' or @taxon-name-part-type='Subfamilia' or @taxon-name-part-type='subfamil' or @taxon-name-part-type='Subfamil' or @taxon-name-part-type='tribe' or @taxon-name-part-type='Tribe' or @taxon-name-part-type='tribus' or @taxon-name-part-type='Tribus' or @taxon-name-part-type='subtribe' or @taxon-name-part-type='Subtribe' or @taxon-name-part-type='subtribus' or @taxon-name-part-type='Subtribus']",
genus = "./tp:taxon-name-part[@taxon-name-part-type='genus' or @taxon-name-part-type='Genus' or @taxon-name-part-type='genera' or @taxon-name-part-type='Genera']",
regularzied_genus = "./tp:taxon-name-part[@taxon-name-part-type='genus' or @taxon-name-part-type='Genus' or @taxon-name-part-type='genera' or @taxon-name-part-type='Genera']/@reg",
subgenus = "./tp:taxon-name-part[@taxon-name-part-type='subgenus' or @taxon-name-part-type='Subgenus' or @taxon-name-part-type='subgenera' or @taxon-name-part-type='Subgenera' or @taxon-name-part-type='section' or @taxon-name-part-type='Section' or @taxon-name-part-type='sectio' or @taxon-name-part-type='Sectio']",
species = "./tp:taxon-name-part[@taxon-name-part-type='Species' or @taxon-name-part-type='species']",
subspecies = "/tp:taxon-name-part[@taxon-name-part-type='Subspecies' or @taxon-name-part-type='subspecies' or @taxon-name-part-type='Variety' or @taxon-name-part-type='variety' or @taxon-name-part-type='varietas' or @taxon-name-part-type='Varietas' or @taxon-name-part-type='variation' or @taxon-name-part-type='Variation' or @taxon-name-part-type='subvariety' or @taxon-name-part-type='Subvariety' or @taxon-name-part-type='subvarietas' or @taxon-name-part-type='Subvarietas' or @taxon-name-part-type='subvariation' or @taxon-name-part-type='Subvariation' or @taxon-name-part-type='Form' or @taxon-name-part-type='form' or @taxon-name-part-type='forma' or @taxon-name-part-type='Forma' or @taxon-name-part-type='aberration' or @taxon-name-part-type='Aberration' or @taxon-name-part-type='race' or @taxon-name-part-type='Race' or @taxon-name-part-type='Subform' or @taxon-name-part-type='subform' or @taxon-name-part-type='subforma' or @taxon-name-part-type='Subforma' or @taxon-name-part-type='subaberation' or @taxon-name-part-type='Subaberation' or @taxon-name-part-type='subrace' or @taxon-name-part-type='Subrace' ]",
verbatim = ".",
verbatim_rank = "./tp:taxon-name-part[last()]/@taxon-name-part-type",
verbatim_status = "following-sibling::tp:taxon-status",
status = NA,
authorship = "following-sibling::tp:taxon-authority | ./tp:taxon-name-part[@taxon-name-part-type='authority']",
external_taxonomic_name_id = "./object-id",
secundum_literal = NA
),
atom_lang = c(
date = NA,
pub_year = NA,
pub_month = NA,
pub_day = NA,
kingdom = NA,
phylum = NA,
class = NA,
order = NA,
family = NA,
subfamily = NA,
genus = NA,
regularzied_genus = NA,
subgenus = NA,
species = NA, ## This is an error in TaxonX, not DwC!
subspecies = NA,
verbatim_rank = NA,
verbatim_status = NA,
status = NA,
authorship = NA,
external_taxonomic_name_id = NA,
secundum_literal = NA
),
atom_types = list(
date = rdf4r::xsd_date,
pub_year = rdf4r::xsd_integer,
pub_month = rdf4r::xsd_integer,
pub_day = rdf4r::xsd_integer,
kingdom = rdf4r::xsd_string,
class = rdf4r::xsd_string,
order = rdf4r::xsd_string,
family = rdf4r::xsd_string,
subfamily = rdf4r::xsd_string,
genus = rdf4r::xsd_string,
regularzied_genus = rdf4r::xsd_string,
subgenus = rdf4r::xsd_string,
species = rdf4r::xsd_string,
subspecies = rdf4r::xsd_string,
verbatim_rank = rdf4r::xsd_string,
taxonomic_rank = rdf4r::xsd_string,
taxonomic_status = rdf4r::xsd_string,
status = rdf4r::xsd_string,
authorship = rdf4r::xsd_string,
external_taxonomic_name_id = rdf4r::xsd_string,
secundum_literal = rdf4r::xsd_string
),
constructor = taxonomic_name_usage,
components = NULL
),
# Institution Code Usage
XmlSchema$new(
schema_name = "institution_code_usage",
xpath = "//named-content[@content-type='dwc:institutional_code']",
file_pattern = "",
extension = "",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
constructor = institution_code_usage,
components = NULL
)
)
)
#' @export
my_xml2rdf <- function (filename, xml_schema = mySchema, access_options, serialization_dir,reprocess = FALSE, dry = TRUE)
{
tryCatch({
xml = xml2::read_xml(filename)
triples = ResourceDescriptionFramework$new()
root_id = identifier(root_id(xml, access_options, xml_schema),
access_options$prefix["openbiodiv"])
triples$set_context(root_id)
triples = node_extractor(node = xml, xml_schema = mySchema,
reprocess = reprocess, triples = triples, access_options = access_options,
dry = dry, filename = filename)
atoms = find_literals(xml, mySchema)
bold_identifiers = list(nid = identifier(get_or_set_obkms_id(xml),
access_options$prefix["openbiodiv"]), pid = identifier(parent_id(xml),
access_options$prefix["openbiodiv"]), root_id = identifier(root_id(xml,
access_options), access_options$prefix["openbiodiv"]))
#print(bold_identifiers)
#checks if both bin/clusteruri and boldid/recordid/etc are present or just one of them
#for each case sends a curl request to bold using bold's bold_seqspec with a list of all ids
if ((length(atoms$bin) > 0) && (length(atoms$bold_id) > 0))
{
bin_text <- lapply(atoms$bin, function(x) x[c('text_value')])
bin_list <- as.character(unlist(bin_text))
#BOLD CURL request
new_xml <- bold_seqspec(taxon=NULL, bin = bin_list, format = "xml")
bold_text <- lapply(atoms$bold_id, function(x) x[c('text_value')])
bold_list <- as.character(unlist(bold_text))
another_xml <- bold_seqspec(taxon=NULL, ids = bold_list, format = "xml")
#add the results from bold into 1 xml
doc1children <- xml_children(new_xml)
for (child in doc1children)
{
xml_add_child(another_xml, child)
}
bold_file = paste0(configuration$bold_dir,"/",
paste0(strip_filename_extension(last_token(filename,
split = "/")), "_bold.xml"))
xml2::write_xml(another_xml, bold_file)
bold_triples = ResourceDescriptionFramework$new()
bold_triples$set_context(root_id)
bold_triples = new_node_extractor(node = another_xml, xml_schema = newSchema,
reprocess = reprocess, triples = bold_triples, access_options = access_options,
dry = dry, filename = filename)
triples$add_triples(bold_triples)
}else
{
if (length(atoms$bold_id) > 0)
{
bold_id_text <- lapply(atoms$bold_id, function(x) x[c('text_value')])
bold_id_list <- as.character(unlist(bold_id_text))
bold_xml <- bold_seqspec(taxon=NULL, ids = bold_id_list, format = "xml")
#match_records modifies bold_xml by setting the same obkms_ids
bold_xml = match_records(xml, bold_xml, access_options)
#save _bold file
bold_file = paste0(configuration$bold_dir,"/",
paste0(strip_filename_extension(last_token(filename,
split = "/")), "_bold.xml"))
xml2::write_xml(bold_xml, bold_file)
bold_triples = ResourceDescriptionFramework$new()
bold_triples$set_context(root_id)
bold_triples = new_node_extractor(node = bold_xml, xml_schema = newSchema,
reprocess = reprocess, triples = bold_triples, access_options = access_options,
dry = dry, filename = filename)
triples$add_triples(bold_triples)
}
if (length(atoms$bin) > 0)
{
bin_text <- lapply(atoms$bin, function(x) x[c('text_value')])
bin_list <- as.character(unlist(bin_text))
bold_xml <- bold_seqspec(taxon=NULL, bin = bin_list, format = "xml")
#optional: save xml from bold
bold_file = paste0(configuration$bold_dir,"/",
paste0(strip_filename_extension(last_token(filename,
split = "/")), "_bold.xml"))
xml2::write_xml(bold_xml, bold_file)
bold_triples = ResourceDescriptionFramework$new()
bold_triples$set_context(root_id)
bold_triples = new_node_extractor(node = bold_xml, xml_schema = newSchema,
reprocess = reprocess, triples = bold_triples, access_options = access_options,
dry = dry, filename = filename)
triples$add_triples(bold_triples)
}
}
xml2::write_xml(xml, filename)
serialization = triples$serialize()
cat(serialization, file = paste0(serialization_dir, "/",
paste0(strip_filename_extension(last_token(filename,
split = "/")), ".ttl")))
return(TRUE)
}, error = function(e) {
warning(e)
return(FALSE)
})
}
#' @export
process_bold <- function(file)
{
suppressWarnings({
xml <- read_xml(file)
#xpath to find all bold systems links within article!
results <- xml_find_all(xml, "//*[starts-with(@xlink:href, 'http://www.boldsystems.org/')] | //*[starts-with(@xlink:href, 'http://boldsystems.org/')]")
if(!(length(results)==0))
{
doc <- xmlParse(xml, isHTML = FALSE)
nodeset <- getNodeSet(doc, "//article-meta")
#creates parent nodes for the bold-ids or bins section
parent_id <- newXMLNode("bold-ids", parent = nodeset)
parent_bin <- newXMLNode("bins", parent = nodeset)
empty_bin <- TRUE
empty_bold_id <- TRUE
for (r in results)
{
node_string <- sub('.*http', '', r)
#handles cases where the node value is either a link or just an id
#if we have 2 closing tags => the value is just an id
#1 closing tag means the value contained a link and sub('.*http','', r) matched and replaced the second 'http' (within the value)
if (stringi::stri_count_regex(node_string, '>') > 1){
string <- sub('\">?.*', '', node_string)
}
else {
string <- sub('<.*', '', node_string)
}
#adds the http part to url_parse the string => break it down into parts and only take the query part
string <- paste0("http", string)
result <- url_parse(string)
query <- result$query
#if the query part contains "bin" or "clusteruri" the uri contains a bin
#else - bold-id (which can be anything, like record-id or process-id)
if((sub('=.*', '', query) == "bin") || (sub('=.*', '', query) == "clusteruri" ))
{
empty_bin = FALSE
tag = "bin"
new_node <- newXMLNode(tag, parent = parent_bin)
} else
{
empty_bold_id = FALSE
tag = "bold-id"
new_node <- newXMLNode(tag, parent = parent_id)
}
value <- sub('.*=', '', query)
xmlValue(new_node) <- value
}
#remove the empty nodesets
if (empty_bin == TRUE)
{
removeNodes(parent_bin)
}
if (empty_bold_id == TRUE)
{
removeNodes(parent_id)
}
saving_file_name <- paste0(dirname(file), "/modified/",basename(file), "_mod.xml" )
saveXML(doc, saving_file_name)
}
else{
saving_file_name <- NULL
}
return(saving_file_name)
})
}
#' @export
#matches bold records from two xmls by adding obkms_id attributes to nodes
match_records <- function(xml, bold_xml, access_options){
results_bold_xml <- xml_find_all(bold_xml, "//record")
results_pensoft_xml <- xml_find_all(xml, "//bold-id")
i = 1
for (r in results_pensoft_xml){
bold_identifiers[[i]] = list(
nid = identifier(get_or_set_obkms_id(r), access_options$prefix["openbiodiv"]),
pid = identifier(parent_id(r), access_options$prefix["openbiodiv"]),
root_id = identifier(root_id(r, access_options), access_options$prefix["openbiodiv"])
)
i = i+1
}
count = 1
for (rr in results_bold_xml){
xml_set_attr(rr, "obkms_id", bold_identifiers[[count]]$nid)
count = count+1
}
#returns the modified xml
return(bold_xml)
}
#' @export
newSchema = XmlSchema$new(
schema_name = "newSchema",
xpath = "//record",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
record = "."
#random = "/bold_records/record/specimen_desc/reproduction"
),
atom_lang = c(
record = NA
#random = NA
),
atom_types = list(
record = rdf4r::xsd_string
#random = rdf4r::xsd_string
),
constructor = new_metadata,
components = list(
#record_id
XmlSchema$new(
schema_name = "newSchema_record_id",
xpath = "./record_id",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
constructor = record_id,
components = NULL
),
#process id
XmlSchema$new(
schema_name = "newSchema_process_id",
xpath = "/bold_records/record/processid",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
constructor = process_id,
components = NULL
),
#bin
XmlSchema$new(
schema_name = "newSchema_bin",
xpath = "/bold_records/record/bin_uri",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
constructor = bin_constr,
components = NULL
),
#sample_id
XmlSchema$new(
schema_name = "newSchema_sample_id",
xpath = "/bold_records/record/specimen_identifiers/sampleid",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
constructor = sample_id,
components = NULL
),
#sequence_id
XmlSchema$new(
schema_name = "newSchema_sequence_id",
xpath = "/bold_records/record/sequences/sequence/sequenceID",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
constructor = sequence_id,
components = NULL
),
#institution
XmlSchema$new(
schema_name = "newSchema_institution",
xpath = "/bold_records/record/specimen_identifiers/institution_storing",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
constructor = institution,
components = NULL
),
#TAXONOMY
XmlSchema$new(
schema_name = "newSchema_taxonomy",
xpath = "/bold_records/record/taxonomy",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
constructor = taxonomy,
#PHYLUM
components = list(
XmlSchema$new(
schema_name = "newSchema_phylum",
xpath = "./phylum",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "./taxon/name",
taxonid = "./taxon/taxID"
),
atom_lang = c(
text_content = NA,
taxonid = NA
),
atom_types = list(
text_content = rdf4r::xsd_string,
taxonid = rdf4r::xsd_integer
),
constructor = phylum,
components = NULL
),
#CLASS
XmlSchema$new(
schema_name = "newSchema_class",
xpath = "./class",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "./taxon/name",
taxonid = "./taxon/taxID"
),
atom_lang = c(
text_content = NA,
taxonid = NA
),
atom_types = list(
text_content = rdf4r::xsd_string,
taxonid = rdf4r::xsd_integer
),
constructor = clas,
components = NULL
),
#ORDER
XmlSchema$new(
schema_name = "newSchema_order",
xpath = "./order",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "./taxon/name",
taxonid = "./taxon/taxID"
),
atom_lang = c(
text_content = NA,
taxonid = NA
),
atom_types = list(
text_content = rdf4r::xsd_string,
taxonid = rdf4r::xsd_integer
),
constructor = order,
components = NULL
),
#FAMILY
XmlSchema$new(
schema_name = "newSchema_family",
xpath = "./family",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "./taxon/name",
taxonid = "./taxon/taxID"
),
atom_lang = c(
text_content = NA,
taxonid = NA
),
atom_types = list(
text_content = rdf4r::xsd_string,
taxonid = rdf4r::xsd_integer
),
constructor = family,
components = NULL
),
#GENUS
XmlSchema$new(
schema_name = "newSchema_genus",
xpath = "./genus",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "./taxon/name",
taxonid = "./taxon/taxID"
),
atom_lang = c(
text_content = NA,
taxonid = NA
),
atom_types = list(
text_content = rdf4r::xsd_string,
taxonid = rdf4r::xsd_integer
),
constructor = genus,
components = NULL
),
#species
XmlSchema$new(
schema_name = "newSchema_species",
xpath = "./species",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "./taxon/name",
taxonid = "./taxon/taxID"
),
atom_lang = c(
text_content = NA,
taxonid = NA
),
atom_types = list(
text_content = rdf4r::xsd_string,
taxonid = rdf4r::xsd_integer
),
constructor = species,
components = NULL
)
)),
#nucleotides
XmlSchema$new(
schema_name = "newSchema_nucleotides",
xpath = "/bold_records/record/sequences/sequence/nucleotides",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
constructor = nucleotides,
components = NULL
)
,
#catalog number
XmlSchema$new(
schema_name = "newSchema_catalog_num",
xpath = "/bold_records/record/specimen_identifiers/catalognum",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
constructor = catalog,
components = NULL
)
))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.