#' @include xml.R
#' TaxPub Schema
#'
#' @export
taxpub = XmlSchema$new(
schema_name = "taxpub",
xpath = "/",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
title = "/article/front/article-meta/title-group/article-title",
date = NA,
pub_year = "/article/front/article-meta/pub-date[@pub-type='epub']/year",
pub_month = "/article/front/article-meta/pub-date[@pub-type='epub']/month",
pub_day = "/article/front/article-meta/pub-date[@pub-type='epub']/day",
doi = "/article/front/article-meta/article-id[@pub-id-type='doi']",
zenodo = "/article/front/article-meta/uri[@content-type='zenodo_dep_id']",
article_zoobank = "/article/front/article-meta/uri[@content-type='zoobank']",
plazi_id = "/article/front/article-meta/article-id[@pub-id-type='other'][contains(.,'http://tb.plazi.org/')]",
publisher = "/article/front/journal-meta/publisher/publisher-name",
journal = "/article/front/journal-meta/journal-title-group/journal-title",
journal_id = "/article/front/journal-meta/journal-id[@journal-id-type='index']",
journal_abbrev = "/article/front/journal-meta/journal-title-group/abbrev-journal-title",
journal_zoobank = "/article/front/journal-meta/journal-id[@journal-id-type='aggregator']",
issn = "/article/front/journal-meta/issn[@pub-type='ppub']",
eIssn = "/article/front/journal-meta/issn[@pub-type='epub']",
issue = "/article/front/article-meta/issue",
volume = NA,
starting_page = NA,
ending_page = NA,
keyword = "/article/front/article-meta/kwd-group/kwd"
),
atom_lang = c(
title = NA,
date = NA,
pub_year = NA,
pub_month = NA,
pub_day = NA,
doi = NA,
zenodo = NA,
article_zoobank = NA,
plazi_id = NA,
publisher = NA,
journal = "/article/front/journal-meta/journal-title-group/journal-title/@xml:lang",
journal_id = NA,
journal_abbrev = "/article/front/journal-meta/journal-title-group/abbrev-journal-title/@xml:lang",
journal_zoobank = NA,
issn = NA,
eIssn = NA,
issue = NA,
volume = NA,
starting_page = NA,
ending_page = NA,
#pensoft_pub = NA
keyword = NA
),
atom_types = list(
title = rdf4r::xsd_string,
date = rdf4r::xsd_date,
pub_year = rdf4r::xsd_integer,
pub_month = rdf4r::xsd_integer,
pub_day = rdf4r::xsd_integer,
doi = rdf4r::xsd_string,
zenodo = rdf4r::xsd_string,
article_zoobank = rdf4r::xsd_string,
plazi_id = rdf4r::xsd_string,
publisher = rdf4r::xsd_string,
journal = rdf4r::xsd_string,
journal_id = rdf4r::xsd_string,
journal_abbrev = rdf4r::xsd_string,
journal_zoobank = rdf4r::xsd_string,
issn = rdf4r::xsd_string,
eIssn = rdf4r::xsd_string,
issue = rdf4r::xsd_integer,
volume = rdf4r::xsd_integer,
starting_page = rdf4r::xsd_integer,
ending_page = rdf4r::xsd_integer,
#pensoft_pub = rdf4r::xsd_string
keyword = rdf4r::xsd_string
),
mongo_key = c(article = "/article/front/article-meta/article-id[@pub-id-type='doi']"),
constructor = metadata,
components = list(
# Keyword
XmlSchema$new(
schema_name = "keyword_group",
xpath = "/article/front/article-meta/kwd-group",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
keyword = "./kwd"
),
atom_lang = c(
keyword = NA
),
atom_types = list(
keyword = rdf4r::xsd_string
),
mongo_key = c(keywords = "/article/front/article-meta/kwd-group"),
constructor = keyword_group,
components = NULL
),
# Abstract
XmlSchema$new(
schema_name = "abstract",
xpath = "/article/front/article-meta/abstract",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
text_content = ".",
trans_abstract = "../trans-abstract",
institution_name = ".//abbrev[@content-type='institution'] | .//named-content[@xlink:type='simple'][@content-type='institution']",
institution_code = ".//named-content[@content-type='institutionCode'] | .//named-content[@content-type='dwc:institutional_code']",
bold_id=".//*[starts-with(@xlink:href, 'http://www.boldsystems.org/')]/@xlink:href | .//*[starts-with(@xlink:href, 'http://boldsystems.org/')]/@xlink:href",
genbank_id = ".//ext-link[@ext-link-type='gen']"
),
atom_lang = c(
text_content = NA,
trans_abstract = "../trans-abstract/@xml:lang",
institution_name = NA,
institution_code = NA,
bold_id=NA,
genbank_id = NA
),
atom_types = list(
text_content = rdf4r::xsd_string,
trans_abstract = rdf4r::xsd_string,
institution_name = rdf4r::xsd_string,
institution_code = rdf4r::xsd_string,
bold_id=rdf4r::xsd_string,
genbank_id= rdf4r::xsd_string
),
mongo_key = c(abstract = "."),
constructor = abstract,
components = NULL
),
# Title
XmlSchema$new(
schema_name = "title",
xpath = "/article/front/article-meta/title-group/article-title",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
mongo_key = c(article_title = "/article/front/article-meta/title-group/article-title"),
constructor = title,
components = NULL
),
# Author
XmlSchema$new(
schema_name = "author",
xpath = "/article/front/article-meta/contrib-group/contrib",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
full_name = NA,
surname = "./name/surname",
given_names = "./name/given-names",
email = "./email",
aff_id = "./xref/@rid",
all_affiliations = "//article/front/article-meta/aff/addr-line[1]",
orcid = "./uri[@content-type='orcid']"
# role = "./mods:role/mods:roleTerm"
),
atom_lang = c(
full_name = NA,
surname = NA,
given_names = NA,
email = NA,
aff_id = NA,
all_affiliations = NA,
orcid = NA
#role = NA
),
atom_types = list(
full_name = rdf4r::xsd_string,
surname = rdf4r::xsd_string,
given_names = rdf4r::xsd_string,
email = rdf4r::xsd_string,
aff_id = rdf4r::xsd_integer,
all_affiliations = rdf4r::xsd_string,
orcid = rdf4r::xsd_string
),
#change mongokey
mongo_key = c(author_name = "./name/given-names", surname = "./name/surname"),
constructor = author,
components = NULL
),
# Introduction
XmlSchema$new(
schema_name = "introduction_section",
xpath = "/article/body/sec[@sec-type='Introduction']",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
text_content = ".",
institution_name = ".//abbrev[@content-type='institution'] | .//named-content[@xlink:type='simple'][@content-type='institution']",
institution_code = ".//named-content[@content-type='institutionCode'] | .//named-content[@content-type='dwc:institutional_code']",
bold_id=".//*[starts-with(@xlink:href, 'http://www.boldsystems.org/')]/@xlink:href | .//*[starts-with(@xlink:href, 'http://boldsystems.org/')]/@xlink:href",
genbank_id = ".//ext-link[@ext-link-type='gen']"
),
atom_lang = c(
text_content = NA,
institution_name = NA,
institution_code = NA,
bold_id=NA,
genbank_id = NA
),
atom_types = list(
text_content = rdf4r::xsd_string,
institution_name = rdf4r::xsd_string,
institution_code = rdf4r::xsd_string,
bold_id=rdf4r::xsd_string,
genbank_id= rdf4r::xsd_string
),
mongo_key = c(introduction = "."),
constructor = introduction_section
),
# DISCUSSION
XmlSchema$new(
schema_name = "discussion_section",
xpath = "//sec[@sec-type='Discussion']",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
text_content = ".",
institution_name = ".//abbrev[@content-type='institution'] | .//named-content[@xlink:type='simple'][@content-type='institution']",
institution_code = ".//named-content[@content-type='institutionCode'] | .//named-content[@content-type='dwc:institutional_code']",
bold_id=".//*[starts-with(@xlink:href, 'http://www.boldsystems.org/')]/@xlink:href | .//*[starts-with(@xlink:href, 'http://boldsystems.org/')]/@xlink:href",
genbank_id = ".//ext-link[@ext-link-type='gen']"
),
atom_lang = c(
text_content = NA,
institution_name = NA,
institution_code = NA,
bold_id=NA,
genbank_id = NA
),
atom_types = list(
text_content = rdf4r::xsd_string,
institution_name = rdf4r::xsd_string,
institution_code = rdf4r::xsd_string,
bold_id=rdf4r::xsd_string,
genbank_id= rdf4r::xsd_string
),
mongo_key = c(discussion = "."),
constructor = discussion
),
# Methods
XmlSchema$new(
schema_name = "methods",
xpath = "//sec[@sec-type= 'materials|methods'] | //sec[@sec-type= 'Methods']",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
text_content = ".",
institution_name = ".//abbrev[@content-type='institution'] | .//named-content[@xlink:type='simple'][@content-type='institution']",
institution_code = ".//named-content[@content-type='institutionCode'] | .//named-content[@content-type='dwc:institutional_code']",
bold_id=".//*[starts-with(@xlink:href, 'http://www.boldsystems.org/')]/@xlink:href | .//*[starts-with(@xlink:href, 'http://boldsystems.org/')]/@xlink:href",
genbank_id = ".//ext-link[@ext-link-type='gen']"
),
atom_lang = c(
text_content = NA,
institution_name = NA,
institution_code = NA,
bold_id=NA,
genbank_id = NA
),
atom_types = list(
text_content = rdf4r::xsd_string,
institution_name = rdf4r::xsd_string,
institution_code = rdf4r::xsd_string,
bold_id=rdf4r::xsd_string,
genbank_id= rdf4r::xsd_string
),
mongo_key = c(methods = "."),
constructor = methods
),
# Checklist
XmlSchema$new(
schema_name = "checklist",
xpath = "//sec[contains(@sec-type, 'Checklist')]",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
text_content = ".",
institution_name = ".//abbrev[@content-type='institution'] | .//named-content[@xlink:type='simple'][@content-type='institution']",
institution_code = ".//named-content[@content-type='institutionCode'] | .//named-content[@content-type='dwc:institutional_code']",
bold_id=".//*[starts-with(@xlink:href, 'http://www.boldsystems.org/')]/@xlink:href | .//*[starts-with(@xlink:href, 'http://boldsystems.org/')]/@xlink:href",
genbank_id = ".//ext-link[@ext-link-type='gen']"
),
atom_lang = c(
text_content = NA,
institution_name = NA,
institution_code = NA,
bold_id=NA,
genbank_id = NA
),
atom_types = list(
text_content = rdf4r::xsd_string,
institution_name = rdf4r::xsd_string,
institution_code = rdf4r::xsd_string,
bold_id=rdf4r::xsd_string,
genbank_id= rdf4r::xsd_string
),
mongo_key = c(checklist = "."),
constructor = checklist
),
#Treatment
XmlSchema$new(
schema_name = "treatment",
xpath = "/article/body/..//tp:taxon-treatment",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
text_content = ".",
status = "tp:nomenclature/tp:taxon-status",
institution_name = ".//abbrev[@content-type='institution'] | .//named-content[@xlink:type='simple'][@content-type='institution']",
institution_code = ".//named-content[@content-type='institutionCode'] | .//named-content[@content-type='dwc:institutional_code']",
bold_id=".//*[starts-with(@xlink:href, 'http://www.boldsystems.org/')]/@xlink:href | .//*[starts-with(@xlink:href, 'http://boldsystems.org/')]/@xlink:href",
genbank_id = ".//ext-link[@ext-link-type='gen']",
habitat = ".//named-content[@content-type='dwc:habitat']"
),
atom_lang = c(
text_content = NA,
status = NA,
institution_name = NA,
institution_code = NA,
bold_id=NA,
genbank_id = NA,
habitat = NA
),
atom_types = list(
text_content = rdf4r::xsd_string,
status = rdf4r::xsd_string,
institution_name = rdf4r::xsd_string,
institution_code = rdf4r::xsd_string,
bold_id=rdf4r::xsd_string,
genbank_id= rdf4r::xsd_string,
habitat = rdf4r::xsd_string
),
mongo_key = c(treatment = "/article/body/..//tp:taxon-treatment"),
constructor = treatment,
components = list(
XmlSchema$new(
schema_name = "nomenclature",
xpath = "tp:nomenclature",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
text_content = ".",
zoobank = ".//tp:taxon-name/object-id[@content-type='zoobank']",
institution_name = ".//abbrev[@content-type='institution'] | .//named-content[@xlink:type='simple'][@content-type='institution']",
institution_code = ".//named-content[@content-type='institutionCode'] | .//named-content[@content-type='dwc:institutional_code']"
),
atom_lang = c(
text_content = NA,
zoobank = NA,
institution_name = NA,
institution_code = NA
),
atom_types = list(
text_content = rdf4r::xsd_string,
zoobank = rdf4r::xsd_string,
institution_name = rdf4r::xsd_string,
institution_code = rdf4r::xsd_string
),
mongo_key = c(nomenclature = "."),
constructor = nomenclature,
components = list(
XmlSchema$new(
schema_name = "nomenclature_citations-list",
xpath = "tp:nomenclature-citation-list", #rel path from treatment
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
mongo_key = c(nomenclature_citations = "."),
constructor = nomenclature_citations,
components = list(
XmlSchema$new(
schema_name = "nomenclature_citation",
xpath = "tp:nomenclature-citation", #rel path from treatment
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
text_content = ".",
comment = ".//comment", #need to crossreference with the bibliography section,
bibr = ".//comment/xref[@ref-type]/@rid"
),
atom_lang = c(
text_content = NA,
comment = NA,
bibr = NA
),
atom_types = list(
text_content = rdf4r::xsd_string,
comment = rdf4r::xsd_string,
bibr = rdf4r::xsd_string
),
mongo_key = c(nomenclature_citation = "."),
constructor = nomenclature_citation,
components = NULL
)
)
)
)
),
XmlSchema$new(
schema_name = "type_material",
xpath = "tp:treatment-sec[@sec-type='type material'] | tp:treatment-sec[@sec-type='material'] | tp:treatment-sec[@sec-type='materials'] | tp:treatment-sec[@sec-type='Holotype'] | tp:treatment-sec[@sec-type='Types'] | tp:treatment-sec[@sec-type='Typification']",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
text_content = ".",
holotype = "./*[contains(., 'holotype')] | ./*[contains(., 'Holotype')]",
identified_by = ".//named-content[@content-type='dwc:identifiedBy']//*[not(ancestor::list-item)]",
type_status = ".//named-content[@content-type='dwc:typeStatus']//*[not(ancestor::list-item)]",
record_number = ".//named-content[@content-type='dwc:recordNumber']//*[not(ancestor::list-item)]",
recorded_by = ".//named-content[@content-type='dwc:recordedBy']//*[not(ancestor::list-item)]",
coordinates = ".//named-content[@content-type='dwc:verbatimCoordinates']/*[not(ancestor::list[@list-type='alpha-lower'])]//*[not(ancestor::list-item)]",
verbatim_lat = ".//named-content[@content-type='dwc:verbatimLatitude']//*[not(ancestor::list-item)]",
verbatim_long = ".//named-content[@content-type='dwc:verbatimLongitude']//*[not(ancestor::list-item)]",
decimal_lat = ".//named-content[@content-type='dwc:decimalLatitude']//*[not(ancestor::list-item)]",
decimal_long = ".//named-content[@content-type='dwc:decimalLongitude']//*[not(ancestor::list-item)]",
country = ".//named-content[@content-type='dwc:country']//*[not(ancestor::list-item)]",
state_province = ".//named-content[@content-type='dwc:stateProvince']//*[not(ancestor::list-item)]",
locality = ".//named-content[@content-type='dwc:locality']//*[not(ancestor::list-item)]",
collection_year = ".//named-content[@content-type='dwc:year']//*[not(ancestor::list-item)]",
collection_month = ".//named-content[@content-type='dwc:month']//*[not(ancestor::list-item)]",
collection_day = ".//named-content[@content-type='dwc:day']//*[not(ancestor::list-item)]",
collection_date = NA,
event_date = ".//named-content[@content-type='dwc:eventDate']//*[not(ancestor::list-item)]",
institution_name = ".//abbrev[@content-type='institution']| .//named-content[@xlink:type='simple'][@content-type='institution'] | .//named-content[@content-type='dwc:institutionID']",
institution_code = ".//named-content[@content-type='dwc:institutionCode'] | .//named-content[@content-type='dwc:institutionCode']",
individual_count = ".//named-content[@content-type='dwc:individualCount']//*[not(ancestor::list-item)]",
sex = ".//named-content[@content-type='dwc:sex']//*[not(ancestor::list-item)]",
life_stage = ".//named-content[@content-type='dwc:lifeStage']//*[not(ancestor::list-item)]",
sampling_protocol = ".//named-content[@content-type='dwc:samplingProtocol']//*[not(ancestor::list-item)]",
catalog_number = ".//named-content[@content-type='dwc:catalogNumber']//*[not(ancestor::list-item)]",
other_catalog_numbers = ".//named-content[@content-type='dwc:otherCatalogNumbers']//*[not(ancestor::list-item)]",
bold_id=".//*[starts-with(@xlink:href, 'http://www.boldsystems.org/')]/@xlink:href | .//*[starts-with(@xlink:href, 'http://boldsystems.org/')]/@xlink:href",
genbank_id = ".//ext-link[@ext-link-type='gen']",
habitat = ".//named-content[@content-type='dwc:habitat']"
),
atom_lang = c(
text_content = NA,
holotype = NA,
identified_by = NA,
type_status = NA,
record_number = NA,
recorded_by = NA,
coordinates = NA,
verbatim_lat = NA,
verbatim_long = NA,
decimal_lat = NA,
decimal_long = NA,
country = NA,
state_province = NA,
locality = NA,
collection_year = NA,
collection_month = NA,
collection_day = NA,
collection_date = NA,
event_date = NA,
institution_name = NA,
institution_code = NA,
individual_count = NA,
sex = NA,
life_stage = NA,
sampling_protocol = NA,
catalog_number = NA,
other_catalog_numbers = NA,
bold_id=NA,
genbank_id = NA,
habitat = NA
),
atom_types = list(
text_content = rdf4r::xsd_string,
holotype = rdf4r::xsd_string,
identified_by = rdf4r::xsd_string,
type_status = rdf4r::xsd_string,
record_number = rdf4r::xsd_string,
recorded_by = rdf4r::xsd_string,
coordinates = rdf4r::xsd_string,
verbatim_lat = rdf4r::xsd_string,
verbatim_long = rdf4r::xsd_string,
decimal_lat = rdf4r::xsd_string,
decimal_long = rdf4r::xsd_string,
dwc_verb_coord = rdf4r::xsd_string,
country = rdf4r::xsd_string,
state_province = rdf4r::xsd_string,
locality = rdf4r::xsd_string,
collection_year = rdf4r::xsd_string,
collection_month = rdf4r::xsd_string,
collection_day = rdf4r::xsd_string,
collection_date = rdf4r::xsd_date,
event_date = rdf4r::xsd_string,
institution_name = rdf4r::xsd_string,
institution_code = rdf4r::xsd_string,
individual_count = rdf4r::xsd_string,
sex = rdf4r::xsd_string,
life_stage = rdf4r::xsd_string,
sampling_protocol = rdf4r::xsd_string,
catalog_number = rdf4r::xsd_string,
other_catalog_numbers = rdf4r::xsd_string,
bold_id=rdf4r::xsd_string,
genbank_id= rdf4r::xsd_string,
habitat = rdf4r::xsd_string
),
mongo_key = c(type_material = "tp:treatment-sec[@sec-type='type material'] | tp:treatment-sec[@sec-type='material'] | tp:treatment-sec[@sec-type='materials'] | tp:treatment-sec[@sec-type='Holotype'] | tp:treatment-sec[@sec-type='Types'] | tp:treatment-sec[@sec-type='Typification']"),
constructor = type_material,
components = list(
XmlSchema$new(
schema_name = "occurrence_info_list",
xpath = "./list[@list-type='alpha-lower']/list-item", #rel path from type_materials
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
occurrenceID = NA,
locationID = NA,
eventID = NA,
identificationID = NA,
text_content = ".",
identified_by = ".//named-content[@content-type='dwc:identifiedBy']",
type_status = ".//named-content[@content-type='dwc:typeStatus']",
record_number = ".//named-content[@content-type='dwc:recordNumber']",
recorded_by = ".//named-content[@content-type='dwc:recordedBy']",
coordinates = ".//named-content[@content-type='dwc:verbatimCoordinates']",
verbatim_lat = ".//named-content[@content-type='dwc:verbatimLatitude']",
verbatim_long = ".//named-content[@content-type='dwc:verbatimLongitude']",
decimal_lat = ".//named-content[@content-type='dwc:decimalLatitude']",
decimal_long = ".//named-content[@content-type='dwc:decimalLongitude']",
country = ".//named-content[@content-type='dwc:country']",
state_province = ".//named-content[@content-type='dwc:stateProvince']",
locality = ".//named-content[@content-type='dwc:locality']",
collection_year = ".//named-content[@content-type='dwc:year']",
collection_month = ".//named-content[@content-type='dwc:month']",
collection_day = ".//named-content[@content-type='dwc:day']",
collection_date = NA,
event_date = ".//named-content[@content-type='dwc:eventDate']",
individual_count = ".//named-content[@content-type='dwc:individualCount']",
sex = ".//named-content[@content-type='dwc:sex']",
life_stage = ".//named-content[@content-type='dwc:lifeStage']",
sampling_protocol = ".//named-content[@content-type='dwc:samplingProtocol']",
catalog_number = ".//named-content[@content-type='dwc:catalogNumber']",
other_catalog_numbers = ".//named-content[@content-type='dwc:otherCatalogNumbers']"
),
atom_lang = c(
occurrenceID = NA,
locationID = NA,
eventID = NA,
identificationID = NA,
text_content = NA,
identified_by = NA,
type_status = NA,
record_number = NA,
recorded_by = NA,
coordinates = NA,
verbatim_lat = NA,
verbatim_long = NA,
decimal_lat = NA,
decimal_long = NA,
country = NA,
state_province = NA,
locality = NA,
collection_year = NA,
collection_month = NA,
collection_day = NA,
collection_date = NA,
event_date = NA,
individual_count = NA,
sex = NA,
life_stage = NA,
sampling_protocol = NA,
catalog_number = NA,
other_catalog_numbers = NA
),
atom_types = list(
occurrenceID = rdf4r::xsd_string,
locationID = rdf4r::xsd_string,
eventID = rdf4r::xsd_string,
identificationID = rdf4r::xsd_string,
text_content = rdf4r::xsd_string,
identified_by = rdf4r::xsd_string,
type_status = rdf4r::xsd_string,
record_number = rdf4r::xsd_string,
recorded_by = rdf4r::xsd_string,
coordinates = rdf4r::xsd_string,
verbatim_lat = rdf4r::xsd_string,
verbatim_long = rdf4r::xsd_string,
decimal_lat = rdf4r::xsd_string,
decimal_long = rdf4r::xsd_string,
country = rdf4r::xsd_string,
state_province = rdf4r::xsd_string,
locality = rdf4r::xsd_string,
collection_year = rdf4r::xsd_string,
collection_month = rdf4r::xsd_string,
collection_day = rdf4r::xsd_string,
collection_date = rdf4r::xsd_date,
event_date = rdf4r::xsd_string,
individual_count = rdf4r::xsd_string,
sex = rdf4r::xsd_string,
life_stage = rdf4r::xsd_string,
sampling_protocol = rdf4r::xsd_string,
catalog_number = rdf4r::xsd_string,
other_catalog_numbers = rdf4r::xsd_string
),
mongo_key = c(occurrence_info_list = "."),
constructor = occurrence_list
)
)
),
#Diagnosis
XmlSchema$new(
schema_name = "diagnosis_section",
xpath = "./tp:treatment-sec[@sec-type='Diagnosis']", #rel path from treatment
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
text_content = ".",
institution_name = ".//abbrev[@content-type='institution'] | .//named-content[@xlink:type='simple'][@content-type='institution']",
institution_code = ".//named-content[@content-type='institutionCode'] | .//named-content[@content-type='dwc:institutional_code']",
bold_id=".//*[starts-with(@xlink:href, 'http://www.boldsystems.org/')]/@xlink:href | .//*[starts-with(@xlink:href, 'http://boldsystems.org/')]/@xlink:href",
genbank_id = ".//ext-link[@ext-link-type='gen']"
),
atom_lang = c(
text_content = NA,
institution_name = NA,
institution_code = NA,
bold_id=NA,
genbank_id = NA
),
atom_types = list(
text_content = rdf4r::xsd_string,
institution_name = rdf4r::xsd_string,
institution_code = rdf4r::xsd_string,
bold_id=rdf4r::xsd_string,
genbank_id= rdf4r::xsd_string
),
mongo_key = c(diagnosis = "."),
constructor = diagnosis
),
# distribution
XmlSchema$new(
schema_name = "distribution_section",
xpath = "./tp:treatment-sec[@sec-type='Distribution']", #rel path from treatment
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
text_content = ".",
institution_name = ".//abbrev[@content-type='institution'] | .//named-content[@xlink:type='simple'][@content-type='institution']",
institution_code = ".//named-content[@content-type='institutionCode'] | .//named-content[@content-type='dwc:institutional_code']",
bold_id=".//*[starts-with(@xlink:href, 'http://www.boldsystems.org/')]/@xlink:href | .//*[starts-with(@xlink:href, 'http://boldsystems.org/')]",
genbank_id = ".//ext-link[@ext-link-type='gen']"
),
atom_lang = c(
text_content = NA,
institution_name = NA,
institution_code = NA,
bold_id=NA,
genbank_id = NA
),
atom_types = list(
text_content = rdf4r::xsd_string,
institution_name = rdf4r::xsd_string,
institution_code = rdf4r::xsd_string,
bold_id=rdf4r::xsd_string,
genbank_id= rdf4r::xsd_string
),
mongo_key = c(distribution = "."),
constructor = distribution
)
)
),
# Taxonomic Key
XmlSchema$new(
schema_name = "taxonomic_key",
xpath = "//sec[@sec-type='key'] | //sec[starts-with(@sec-type, 'key')] | //sec[starts-with(@sec-type, 'Key to')] | //sec[starts-with(@sec-type, 'key to')]",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
title = "title",
text_content = "*[not(name()='title')]", #any node which is not a title,
bold_id=".//*[starts-with(@xlink:href, 'http://www.boldsystems.org/')]/@xlink:href | .//*[starts-with(@xlink:href, 'http://boldsystems.org/')]/@xlink:href",
genbank_id = ".//ext-link[@ext-link-type='gen']"
),
atom_lang = c(
title = NA,
text_content = NA,
bold_id=NA,
genbank_id = NA
),
atom_types = list(
title = rdf4r::xsd_string,
text_content = rdf4r::xsd_string,
bold_id=rdf4r::xsd_string,
genbank_id= rdf4r::xsd_string
),
mongo_key = c(taxonomic_key = "//sec[@sec-type='key'] | //sec[contains(@sec-type, 'key')] | //sec[contains(@sec-type, 'Key to')] | //sec[contains(@sec-type, 'key to')]"),
constructor = taxonomic_key,
components = NULL
),
# Figure
XmlSchema$new(
schema_name = "figure",
xpath = "//fig|//fig-group",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
text_content = ".",
caption = "caption",
download_link = "graphic/uri[@content-type='original_file']",
doi = "object-id[@content-type = 'doi']",
bold_id=".//*[starts-with(@xlink:href, 'http://www.boldsystems.org/')]/@xlink:href | .//*[starts-with(@xlink:href, 'http://boldsystems.org/')]/@xlink:href",
genbank_id = ".//ext-link[@ext-link-type='gen']"
),
atom_lang = c(
text_content = NA,
caption = NA,
download_link = NA,
doi = NA,
bold_id=NA,
genbank_id = NA
),
atom_types = list(
text_content = rdf4r::xsd_string,
caption = rdf4r::xsd_string,
download_link = rdf4r::xsd_string,
doi = rdf4r::xsd_string,
bold_id= rdf4r::xsd_string,
genbank_id = rdf4r::xsd_string
),
mongo_key = c(figure = "//fig"),
constructor = figure,
components = NULL
),
XmlSchema$new(
schema_name = "bibliography",
xpath = "/article/back/ref-list",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
mongo_key = c(bibliography = "/article/back/ref-list"),
constructor = bibliography,
components = list(XmlSchema$new(
schema_name = "reference",
xpath = "ref",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
reference_id = "./@id",
verbatimContent = ".",
author_fullname = NA,
author_name = ".//person-group/name",
author_surname = ".//person-group/name/surname",
author_fname = ".//person-group/name/given-names",
year = ".//year",
title = ".//article-title",
source = ".//source",
volume = ".//volume",
issue = ".//issue",
doi = ".//ext-link[@ext-link-type='doi']/@xlink:href",
http_doi = ".//ext-link[@ext-link-type='doi']"
),
atom_lang = c(
reference_id = NA,
verbatimContent = NA,
author_fullname = NA,
author_name = NA,
author_surname = NA,
author_fname = NA,
year = NA,
title = NA,
source = NA,
volume = NA,
issue = NA,
doi = NA,
http_doi = NA
),
atom_types = list(
reference_id = rdf4r::xsd_string,
verbatimContent = rdf4r::xsd_string,
author_fullname = rdf4r::xsd_string,
author_name = rdf4r::xsd_string,
author_surname = rdf4r::xsd_string,
author_fname = rdf4r::xsd_string,
year = rdf4r::xsd_string,
title = rdf4r::xsd_string,
source = rdf4r::xsd_string,
volume = rdf4r::xsd_string,
issue = rdf4r::xsd_string,
doi = rdf4r::xsd_string,
http_doi = rdf4r::xsd_string
),
mongo_key = c(reference = "."),
constructor = reference,
components = NULL
)
)
),
# Taxonomic Name Usage
XmlSchema$new(
schema_name = "taxonomic_name_usage",
xpath = "//tp:taxon-name",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
text_content = ".",
date = NA,
pub_year = "/article/front/article-meta/pub-date/year",
pub_month = "/article/front/article-meta/pub-date/month",
pub_day = "/article/front/article-meta/pub-date/day",
kingdom = "./tp:taxon-name-part[@taxon-name-part-type='kingdom' or @taxon-name-part-type='Kingdom' or @taxon-name-part-type='divisio' or @taxon-name-part-type='Divisio' or @taxon-name-part-type='division' or @taxon-name-part-type='Division']",
phylum = "./tp:taxon-name-part[@taxon-name-part-type='phylum' or @taxon-name-part-type='Phylum' or @taxon-name-part-type='regnum' or @taxon-name-part-type='Regnum']",
class = "./tp:taxon-name-part[@taxon-name-part-type='class' or @taxon-name-part-type='Class' or @taxon-name-part-type='classis' or @taxon-name-part-type='Classis']",
order = "./tp:taxon-name-part[@taxon-name-part-type='order' or @taxon-name-part-type='Order' or @taxon-name-part-type='ordo' or @taxon-name-part-type='Ordo']",
family = "./tp:taxon-name-part[@taxon-name-part-type='family' or @taxon-name-part-type='Family' or @taxon-name-part-type='familia' or @taxon-name-part-type='Familia' or @taxon-name-part-type='famil' or @taxon-name-part-type='Famil']",
subfamily = "./tp:taxon-name-part[@taxon-name-part-type='subfamily' or @taxon-name-part-type='Subfamily' or @taxon-name-part-type='subfamilia' or @taxon-name-part-type='Subfamilia' or @taxon-name-part-type='subfamil' or @taxon-name-part-type='Subfamil' or @taxon-name-part-type='tribe' or @taxon-name-part-type='Tribe' or @taxon-name-part-type='tribus' or @taxon-name-part-type='Tribus' or @taxon-name-part-type='subtribe' or @taxon-name-part-type='Subtribe' or @taxon-name-part-type='subtribus' or @taxon-name-part-type='Subtribus']",
genus = "./tp:taxon-name-part[@taxon-name-part-type='genus' or @taxon-name-part-type='Genus' or @taxon-name-part-type='genera' or @taxon-name-part-type='Genera']",
regularzied_genus = "./tp:taxon-name-part[@taxon-name-part-type='genus' or @taxon-name-part-type='Genus' or @taxon-name-part-type='genera' or @taxon-name-part-type='Genera']/@reg",
subgenus = "./tp:taxon-name-part[@taxon-name-part-type='subgenus' or @taxon-name-part-type='Subgenus' or @taxon-name-part-type='subgenera' or @taxon-name-part-type='Subgenera' or @taxon-name-part-type='section' or @taxon-name-part-type='Section' or @taxon-name-part-type='sectio' or @taxon-name-part-type='Sectio']",
species = "./tp:taxon-name-part[@taxon-name-part-type='Species' or @taxon-name-part-type='species']",
subspecies = "/tp:taxon-name-part[@taxon-name-part-type='Subspecies' or @taxon-name-part-type='subspecies' or @taxon-name-part-type='Variety' or @taxon-name-part-type='variety' or @taxon-name-part-type='varietas' or @taxon-name-part-type='Varietas' or @taxon-name-part-type='variation' or @taxon-name-part-type='Variation' or @taxon-name-part-type='subvariety' or @taxon-name-part-type='Subvariety' or @taxon-name-part-type='subvarietas' or @taxon-name-part-type='Subvarietas' or @taxon-name-part-type='subvariation' or @taxon-name-part-type='Subvariation' or @taxon-name-part-type='Form' or @taxon-name-part-type='form' or @taxon-name-part-type='forma' or @taxon-name-part-type='Forma' or @taxon-name-part-type='aberration' or @taxon-name-part-type='Aberration' or @taxon-name-part-type='race' or @taxon-name-part-type='Race' or @taxon-name-part-type='Subform' or @taxon-name-part-type='subform' or @taxon-name-part-type='subforma' or @taxon-name-part-type='Subforma' or @taxon-name-part-type='subaberation' or @taxon-name-part-type='Subaberation' or @taxon-name-part-type='subrace' or @taxon-name-part-type='Subrace' ]",
verbatim = ".",
verbatim_rank = "./tp:taxon-name-part[last()]/@taxon-name-part-type",
verbatim_status = "following-sibling::tp:taxon-status",
status = "../tp:taxon-status",
authorship = "following-sibling::tp:taxon-authority | ./tp:taxon-name-part[@taxon-name-part-type='authority']",
zoobank = "../object-id[@content-type='zoobank']",
secundum_literal = NA,
bold_id=".//*[starts-with(@xlink:href, 'http://www.boldsystems.org/')]/@xlink:href | .//*[starts-with(@xlink:href, 'http://boldsystems.org/')]/@xlink:href",
genbank_id = ".//ext-link[@ext-link-type='gen']"
),
atom_lang = c(
text_content = NA,
date = NA,
pub_year = NA,
pub_month = NA,
pub_day = NA,
kingdom = NA,
phylum = NA,
class = NA,
order = NA,
family = NA,
subfamily = NA,
genus = NA,
regularzied_genus = NA,
subgenus = NA,
species = NA, ## This is an error in TaxonX, not DwC!
subspecies = NA,
verbatim_rank = NA,
verbatim_status = NA,
status = NA,
authorship = NA,
zoobank = NA,
secundum_literal = NA,
bold_id=NA,
genbank_id = NA
),
atom_types = list(
text_content = rdf4r::xsd_string,
date = rdf4r::xsd_date,
pub_year = rdf4r::xsd_integer,
pub_month = rdf4r::xsd_integer,
pub_day = rdf4r::xsd_integer,
kingdom = rdf4r::xsd_string,
class = rdf4r::xsd_string,
order = rdf4r::xsd_string,
family = rdf4r::xsd_string,
subfamily = rdf4r::xsd_string,
genus = rdf4r::xsd_string,
regularzied_genus = rdf4r::xsd_string,
subgenus = rdf4r::xsd_string,
species = rdf4r::xsd_string,
subspecies = rdf4r::xsd_string,
verbatim_rank = rdf4r::xsd_string,
taxonomic_rank = rdf4r::xsd_string,
taxonomic_status = rdf4r::xsd_string,
status = rdf4r::xsd_string,
authorship = rdf4r::xsd_string,
external_taxonomic_name_id = rdf4r::xsd_string,
secundum_literal = rdf4r::xsd_string,
bold_id= rdf4r::xsd_string,
genbank_id = rdf4r::xsd_string
),
mongo_key = c(
kingdom = "./tp:taxon-name-part[@taxon-name-part-type='kingdom' or @taxon-name-part-type='Kingdom' or @taxon-name-part-type='divisio' or @taxon-name-part-type='Divisio' or @taxon-name-part-type='division' or @taxon-name-part-type='Division']",
phylum = "./tp:taxon-name-part[@taxon-name-part-type='phylum' or @taxon-name-part-type='Phylum' or @taxon-name-part-type='regnum' or @taxon-name-part-type='Regnum']",
class = "./tp:taxon-name-part[@taxon-name-part-type='class' or @taxon-name-part-type='Class' or @taxon-name-part-type='classis' or @taxon-name-part-type='Classis']",
order = "./tp:taxon-name-part[@taxon-name-part-type='order' or @taxon-name-part-type='Order' or @taxon-name-part-type='ordo' or @taxon-name-part-type='Ordo']",
family = "./tp:taxon-name-part[@taxon-name-part-type='family' or @taxon-name-part-type='Family' or @taxon-name-part-type='familia' or @taxon-name-part-type='Familia' or @taxon-name-part-type='famil' or @taxon-name-part-type='Famil']",
subfamily = "./tp:taxon-name-part[@taxon-name-part-type='subfamily' or @taxon-name-part-type='Subfamily' or @taxon-name-part-type='subfamilia' or @taxon-name-part-type='Subfamilia' or @taxon-name-part-type='subfamil' or @taxon-name-part-type='Subfamil' or @taxon-name-part-type='tribe' or @taxon-name-part-type='Tribe' or @taxon-name-part-type='tribus' or @taxon-name-part-type='Tribus' or @taxon-name-part-type='subtribe' or @taxon-name-part-type='Subtribe' or @taxon-name-part-type='subtribus' or @taxon-name-part-type='Subtribus']",
genus = "./tp:taxon-name-part[@taxon-name-part-type='genus' or @taxon-name-part-type='Genus' or @taxon-name-part-type='genera' or @taxon-name-part-type='Genera']",
subgenus = "./tp:taxon-name-part[@taxon-name-part-type='subgenus' or @taxon-name-part-type='Subgenus' or @taxon-name-part-type='subgenera' or @taxon-name-part-type='Subgenera' or @taxon-name-part-type='section' or @taxon-name-part-type='Section' or @taxon-name-part-type='sectio' or @taxon-name-part-type='Sectio']",
species = "./tp:taxon-name-part[@taxon-name-part-type='Species' or @taxon-name-part-type='species']",
subspecies = "/tp:taxon-name-part[@taxon-name-part-type='Subspecies' or @taxon-name-part-type='subspecies' or @taxon-name-part-type='Variety' or @taxon-name-part-type='variety' or @taxon-name-part-type='varietas' or @taxon-name-part-type='Varietas' or @taxon-name-part-type='variation' or @taxon-name-part-type='Variation' or @taxon-name-part-type='subvariety' or @taxon-name-part-type='Subvariety' or @taxon-name-part-type='subvarietas' or @taxon-name-part-type='Subvarietas' or @taxon-name-part-type='subvariation' or @taxon-name-part-type='Subvariation' or @taxon-name-part-type='Form' or @taxon-name-part-type='form' or @taxon-name-part-type='forma' or @taxon-name-part-type='Forma' or @taxon-name-part-type='aberration' or @taxon-name-part-type='Aberration' or @taxon-name-part-type='race' or @taxon-name-part-type='Race' or @taxon-name-part-type='Subform' or @taxon-name-part-type='subform' or @taxon-name-part-type='subforma' or @taxon-name-part-type='Subforma' or @taxon-name-part-type='subaberation' or @taxon-name-part-type='Subaberation' or @taxon-name-part-type='subrace' or @taxon-name-part-type='Subrace' ]",
authorship = "following-sibling::tp:taxon-authority | ./tp:taxon-name-part[@taxon-name-part-type='authority']",
secundum_literal = NA
),
constructor = taxonomic_name_usage,
components = NULL
),
# Table
XmlSchema$new(
schema_name = "table",
xpath = "/article/body/..//table",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = c(openbiodiv = "http://openbiodiv.net/"),
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
mongo_key = c(table = "."),
constructor = table,
components = NULL
)
)
)
#' Plazi Internal
#'
#' @export
plazi_int =
XmlSchema$new(schema_name = "plazi_int",
file_pattern = ".*\\.plazixml",
extension = ".plazixml",
prefix = "http://tb.plazi.org/GgServer/xml/",
atoms = c(lang = "/document/@docLanguage"))
#' Plazi Feed
#'
#' @export
plazi_feed_schema =
XmlSchema$new(schema_name = "plazi_feed",
atoms = c(link = "/rss/channel/item/link",
pub_date = "/rss/channel/item/pubDate"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.