#' TaxPub Schema
#'
#' @export
taxpub = XmlSchema$new(
schema_name = "taxpub",
xpath = "/",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
title = "/article/front/article-meta/title-group/article-title",
date = NA,
pub_year = "/article/front/article-meta/pub-date[@pub-type='epub']/year",
pub_month = "/article/front/article-meta/pub-date[@pub-type='epub']/month",
pub_day = "/article/front/article-meta/pub-date[@pub-type='epub']/day",
doi = "/article/front/article-meta/article-id[@pub-id-type='doi']",
zenodo = NA,
zoobank = NA,
publisher = "/article/front/journal-meta/publisher/publisher-name",
journal = "/article/front/journal-meta/journal-title-group/journal-title",
journal_abbrev = "/article/front/journal-meta/journal-title-group/abbrev-journal-title",
issn = "/article/front/journal-meta/issn[@pub-type='ppub']",
eIssn = "/article/front/journal-meta/issn[@pub-type='epub']",
issue = "/article/front/article-meta/issue",
volume = NA,
starting_page = NA,
ending_page = NA,
keyword = "/article/front/article-meta/kwd-group/kwd"
#pensoft_pub = NA # becasue we skip only Pensoft pubs from Plazi
),
atom_lang = c(
title = NA,
date = NA,
pub_year = NA,
pub_month = NA,
pub_day = NA,
doi = NA,
zenodo = NA,
publisher = NA,
journal = "/article/front/journal-meta/journal-title-group/journal-title/@xml:lang",
journal_abbrev = "/article/front/journal-meta/journal-title-group/abbrev-journal-title/@xml:lang",
issn = NA,
eIssn = NA,
issue = NA,
volume = NA,
starting_page = NA,
ending_page = NA,
#pensoft_pub = NA
keyword = NA
),
atom_types = list(
title = rdf4r::xsd_string,
date = rdf4r::xsd_date,
pub_year = rdf4r::xsd_integer,
pub_month = rdf4r::xsd_integer,
pub_day = rdf4r::xsd_integer,
doi = rdf4r::xsd_string,
zenodo = rdf4r::xsd_string,
publisher = rdf4r::xsd_string,
journal = rdf4r::xsd_string,
journal_abbrev = rdf4r::xsd_string,
issn = rdf4r::xsd_string,
eIssn = rdf4r::xsd_string,
issue = rdf4r::xsd_integer,
volume = rdf4r::xsd_integer,
starting_page = rdf4r::xsd_integer,
ending_page = rdf4r::xsd_integer,
#pensoft_pub = rdf4r::xsd_string
keyword = rdf4r::xsd_string
),
mongo_key = c(article_meta = "/article/front/article-meta/article-id[@pub-id-type='doi']"),
constructor = metadata,
components = list(
# Keyword
XmlSchema$new(
schema_name = "taxpub_keyword_group",
xpath = "/article/front/article-meta/kwd-group",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
keyword = "./kwd"
),
atom_lang = c(
keyword = NA
),
atom_types = list(
keyword = rdf4r::xsd_string
),
mongo_key = c(keywords = "/article/front/article-meta/kwd-group"),
constructor = keyword_group,
components = NULL
),
# Abstract
XmlSchema$new(
schema_name = "taxpub_abstract",
xpath = "/article/front/article-meta/abstract",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = ".",
trans_abstract = "../trans-abstract"
),
atom_lang = c(
text_content = NA,
trans_abstract = "../trans-abstract/@xml:lang"
),
atom_types = list(
text_content = rdf4r::xsd_string,
trans_abstract = rdf4r::xsd_string
),
mongo_key = c(abstract = "/article/front/article-meta/article-id[@pub-id-type='doi']"),
constructor = abstract,
components = NULL
),
# Title
XmlSchema$new(
schema_name = "taxpub_title",
xpath = "/article/front/article-meta/title-group/article-title",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
mongo_key = c(article_title = "/article/front/article-meta/title-group/article-title"),
constructor = title,
components = NULL
),
# Author
XmlSchema$new(
schema_name = "taxpub_author",
xpath = "/article/front/article-meta/contrib-group/contrib",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
full_name = NA,
surname = "./name/surname",
given_names = "./name/given-names",
email = "./email",
aff_id = "./xref[@rid]",
all_affiliations_institutions = "//article/front/article-meta/aff/institution",
all_affiliations_cities = "//article/front/article-meta/aff/addr-line[@content-type='city']",
all_affiliations = NA
# role = "./mods:role/mods:roleTerm"
),
atom_lang = c(
full_name = NA,
surname = NA,
given_names = NA,
email = NA,
aff_id = NA,
all_affiliations_institutions = NA,
all_affiliations_cities = NA,
all_affiliations = NA
#role = NA
),
atom_types = list(
full_name = rdf4r::xsd_string,
surname = rdf4r::xsd_string,
given_names = rdf4r::xsd_string,
email = rdf4r::xsd_string,
aff_id = rdf4r::xsd_integer,
all_affiliations_institutions = rdf4r::xsd_string,
all_affiliations_cities = rdf4r::xsd_string,
all_affiliations = rdf4r::xsd_string
),
#change mongokey
mongo_key = c(author_name = "./name/given-names", surname = "./name/surname"),
constructor = author_new,
components = NULL
),
# Introduction
XmlSchema$new(
schema_name = "taxpub_introduction_section",
xpath = "/article/body/sec[@sec-type='Introduction']",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
mongo_key = c(introduction = "/article/front/article-meta/article-id[@pub-id-type='doi']"),
constructor = introduction_section
),
# DISCUSSION
XmlSchema$new(
schema_name = "discussion_section",
xpath = "//sec[@sec-type='Discussion']", #rel path from treatment
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
mongo_key = c(discussion = "/article/front/article-meta/article-id[@pub-id-type='doi']"),
constructor = discussion
),
# Treatment
XmlSchema$new(
schema_name = "taxpub_treatment",
xpath = "/article/body/sec/tp:taxon-treatment",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
mongo_key = c(treatment = "/article/front/article-meta/article-id[@pub-id-type='doi']"),
constructor = treatment_new,
components = list(
# Nomenclature
XmlSchema$new(
schema_name = "taxpub_nomenclature_section",
xpath = "./tp:nomenclature", #rel path from treatment
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
# nomenclature citations
components = list(
XmlSchema$new(
schema_name = "taxpub_nomenclature_citations",
xpath = "./tp:nomenclature-citation-list", #rel path from treatment
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
components = NULL,
mongo_key = c(nomenclature_citations = "/article/front/article-meta/article-id[@pub-id-type='doi']"),
constructor = nomenclature_citations
)
),
mongo_key = c(nomenclature = "/article/front/article-meta/article-id[@pub-id-type='doi']"),
constructor = nomenclature
),
# Materials Examined
XmlSchema$new(
schema_name = "taxpub_materials_examined",
xpath = "./tp:treatment-sec[@sec-type='materials']", #rel path from treatment
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
mongo_key = c(materials_examined = "/article/front/article-meta/article-id[@pub-id-type='doi']"),
constructor = materials_examined
),
#Diagnosis
XmlSchema$new(
schema_name = "taxpub_diagnosis_section",
xpath = "./tp:treatment-sec[@sec-type='Diagnosis']", #rel path from treatment
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
mongo_key = c(diagnosis = "/article/front/article-meta/article-id[@pub-id-type='doi']"),
constructor = diagnosis
),
# distribution
XmlSchema$new(
schema_name = "taxpub_distribution_section",
xpath = "./tp:treatment-sec[@sec-type='Distribution']", #rel path from treatment
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
mongo_key = c(distribution = "/article/front/article-meta/article-id[@pub-id-type='doi']"),
constructor = distribution
)
)
),
# Taxonomic Key
XmlSchema$new(
schema_name = "taxpub_taxonomic_key",
xpath = "/sec[@sec-type='key']",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = "."
),
atom_lang = c(
text_content = NA
),
atom_types = list(
text_content = rdf4r::xsd_string
),
mongo_key = c(taxonomic_key = "/article/front/article-meta/article-id[@pub-id-type='doi']"),
constructor = taxonomic_key,
components = NULL
),
# Figure
XmlSchema$new(
schema_name = "taxpub_figure",
xpath = "//fig|//fig-group",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
text_content = ".",
caption = "./caption"
),
atom_lang = c(
text_content = NA,
caption = NA
),
atom_types = list(
text_content = rdf4r::xsd_string,
caption = rdf4r::xsd_string
),
mongo_key = c(figure = "//fig|//fig-group"),
constructor = figure,
components = NULL
),
# Taxonomic Name Usage
XmlSchema$new(
schema_name = "taxpub_taxonomic_name_usage",
xpath = "//tp:taxon-name",
file_pattern = ".*\\.xml",
extension = ".xml",
prefix = NA,
atoms = c(
date = NA,
pub_year = "/article/front/article-meta/pub-date/year",
pub_month = "/article/front/article-meta/pub-date/month",
pub_day = "/article/front/article-meta/pub-date/day",
kingdom = "./tp:taxon-name-part[@taxon-name-part-type='kingdom' or @taxon-name-part-type='Kingdom' or @taxon-name-part-type='divisio' or @taxon-name-part-type='Divisio' or @taxon-name-part-type='division' or @taxon-name-part-type='Division']",
phylum = "./tp:taxon-name-part[@taxon-name-part-type='phylum' or @taxon-name-part-type='Phylum' or @taxon-name-part-type='regnum' or @taxon-name-part-type='Regnum']",
class = "./tp:taxon-name-part[@taxon-name-part-type='class' or @taxon-name-part-type='Class' or @taxon-name-part-type='classis' or @taxon-name-part-type='Classis']",
order = "./tp:taxon-name-part[@taxon-name-part-type='order' or @taxon-name-part-type='Order' or @taxon-name-part-type='ordo' or @taxon-name-part-type='Ordo']",
family = "./tp:taxon-name-part[@taxon-name-part-type='family' or @taxon-name-part-type='Family' or @taxon-name-part-type='familia' or @taxon-name-part-type='Familia' or @taxon-name-part-type='famil' or @taxon-name-part-type='Famil']",
subfamily = "./tp:taxon-name-part[@taxon-name-part-type='subfamily' or @taxon-name-part-type='Subfamily' or @taxon-name-part-type='subfamilia' or @taxon-name-part-type='Subfamilia' or @taxon-name-part-type='subfamil' or @taxon-name-part-type='Subfamil' or @taxon-name-part-type='tribe' or @taxon-name-part-type='Tribe' or @taxon-name-part-type='tribus' or @taxon-name-part-type='Tribus' or @taxon-name-part-type='subtribe' or @taxon-name-part-type='Subtribe' or @taxon-name-part-type='subtribus' or @taxon-name-part-type='Subtribus']",
genus = "./tp:taxon-name-part[@taxon-name-part-type='genus' or @taxon-name-part-type='Genus' or @taxon-name-part-type='genera' or @taxon-name-part-type='Genera']",
regularzied_genus = "./tp:taxon-name-part[@taxon-name-part-type='genus' or @taxon-name-part-type='Genus' or @taxon-name-part-type='genera' or @taxon-name-part-type='Genera']/@reg",
subgenus = "./tp:taxon-name-part[@taxon-name-part-type='subgenus' or @taxon-name-part-type='Subgenus' or @taxon-name-part-type='subgenera' or @taxon-name-part-type='Subgenera' or @taxon-name-part-type='section' or @taxon-name-part-type='Section' or @taxon-name-part-type='sectio' or @taxon-name-part-type='Sectio']",
species = "./tp:taxon-name-part[@taxon-name-part-type='Species' or @taxon-name-part-type='species']",
subspecies = "/tp:taxon-name-part[@taxon-name-part-type='Subspecies' or @taxon-name-part-type='subspecies' or @taxon-name-part-type='Variety' or @taxon-name-part-type='variety' or @taxon-name-part-type='varietas' or @taxon-name-part-type='Varietas' or @taxon-name-part-type='variation' or @taxon-name-part-type='Variation' or @taxon-name-part-type='subvariety' or @taxon-name-part-type='Subvariety' or @taxon-name-part-type='subvarietas' or @taxon-name-part-type='Subvarietas' or @taxon-name-part-type='subvariation' or @taxon-name-part-type='Subvariation' or @taxon-name-part-type='Form' or @taxon-name-part-type='form' or @taxon-name-part-type='forma' or @taxon-name-part-type='Forma' or @taxon-name-part-type='aberration' or @taxon-name-part-type='Aberration' or @taxon-name-part-type='race' or @taxon-name-part-type='Race' or @taxon-name-part-type='Subform' or @taxon-name-part-type='subform' or @taxon-name-part-type='subforma' or @taxon-name-part-type='Subforma' or @taxon-name-part-type='subaberation' or @taxon-name-part-type='Subaberation' or @taxon-name-part-type='subrace' or @taxon-name-part-type='Subrace' ]",
verbatim = ".",
verbatim_rank = "./tp:taxon-name-part[last()]/@taxon-name-part-type",
verbatim_status = "following-sibling::tp:taxon-status",
status = NA,
authorship = "following-sibling::tp:taxon-authority | ./tp:taxon-name-part[@taxon-name-part-type='authority']",
external_taxonomic_name_id = "./object-id",
secundum_literal = NA
),
atom_lang = c(
date = NA,
pub_year = NA,
pub_month = NA,
pub_day = NA,
kingdom = NA,
phylum = NA,
class = NA,
order = NA,
family = NA,
subfamily = NA,
genus = NA,
regularzied_genus = NA,
subgenus = NA,
species = NA, ## This is an error in TaxonX, not DwC!
subspecies = NA,
verbatim_rank = NA,
verbatim_status = NA,
status = NA,
authorship = NA,
external_taxonomic_name_id = NA,
secundum_literal = NA
),
atom_types = list(
date = rdf4r::xsd_date,
pub_year = rdf4r::xsd_integer,
pub_month = rdf4r::xsd_integer,
pub_day = rdf4r::xsd_integer,
kingdom = rdf4r::xsd_string,
class = rdf4r::xsd_string,
order = rdf4r::xsd_string,
family = rdf4r::xsd_string,
subfamily = rdf4r::xsd_string,
genus = rdf4r::xsd_string,
regularzied_genus = rdf4r::xsd_string,
subgenus = rdf4r::xsd_string,
species = rdf4r::xsd_string,
subspecies = rdf4r::xsd_string,
verbatim_rank = rdf4r::xsd_string,
taxonomic_rank = rdf4r::xsd_string,
taxonomic_status = rdf4r::xsd_string,
status = rdf4r::xsd_string,
authorship = rdf4r::xsd_string,
external_taxonomic_name_id = rdf4r::xsd_string,
secundum_literal = rdf4r::xsd_string
),
mongo_key = c(
kingdom = "./tp:taxon-name-part[@taxon-name-part-type='kingdom' or @taxon-name-part-type='Kingdom' or @taxon-name-part-type='divisio' or @taxon-name-part-type='Divisio' or @taxon-name-part-type='division' or @taxon-name-part-type='Division']",
phylum = "./tp:taxon-name-part[@taxon-name-part-type='phylum' or @taxon-name-part-type='Phylum' or @taxon-name-part-type='regnum' or @taxon-name-part-type='Regnum']",
class = "./tp:taxon-name-part[@taxon-name-part-type='class' or @taxon-name-part-type='Class' or @taxon-name-part-type='classis' or @taxon-name-part-type='Classis']",
order = "./tp:taxon-name-part[@taxon-name-part-type='order' or @taxon-name-part-type='Order' or @taxon-name-part-type='ordo' or @taxon-name-part-type='Ordo']",
family = "./tp:taxon-name-part[@taxon-name-part-type='family' or @taxon-name-part-type='Family' or @taxon-name-part-type='familia' or @taxon-name-part-type='Familia' or @taxon-name-part-type='famil' or @taxon-name-part-type='Famil']",
subfamily = "./tp:taxon-name-part[@taxon-name-part-type='subfamily' or @taxon-name-part-type='Subfamily' or @taxon-name-part-type='subfamilia' or @taxon-name-part-type='Subfamilia' or @taxon-name-part-type='subfamil' or @taxon-name-part-type='Subfamil' or @taxon-name-part-type='tribe' or @taxon-name-part-type='Tribe' or @taxon-name-part-type='tribus' or @taxon-name-part-type='Tribus' or @taxon-name-part-type='subtribe' or @taxon-name-part-type='Subtribe' or @taxon-name-part-type='subtribus' or @taxon-name-part-type='Subtribus']",
genus = "./tp:taxon-name-part[@taxon-name-part-type='genus' or @taxon-name-part-type='Genus' or @taxon-name-part-type='genera' or @taxon-name-part-type='Genera']",
subgenus = "./tp:taxon-name-part[@taxon-name-part-type='subgenus' or @taxon-name-part-type='Subgenus' or @taxon-name-part-type='subgenera' or @taxon-name-part-type='Subgenera' or @taxon-name-part-type='section' or @taxon-name-part-type='Section' or @taxon-name-part-type='sectio' or @taxon-name-part-type='Sectio']",
species = "./tp:taxon-name-part[@taxon-name-part-type='Species' or @taxon-name-part-type='species']",
subspecies = "/tp:taxon-name-part[@taxon-name-part-type='Subspecies' or @taxon-name-part-type='subspecies' or @taxon-name-part-type='Variety' or @taxon-name-part-type='variety' or @taxon-name-part-type='varietas' or @taxon-name-part-type='Varietas' or @taxon-name-part-type='variation' or @taxon-name-part-type='Variation' or @taxon-name-part-type='subvariety' or @taxon-name-part-type='Subvariety' or @taxon-name-part-type='subvarietas' or @taxon-name-part-type='Subvarietas' or @taxon-name-part-type='subvariation' or @taxon-name-part-type='Subvariation' or @taxon-name-part-type='Form' or @taxon-name-part-type='form' or @taxon-name-part-type='forma' or @taxon-name-part-type='Forma' or @taxon-name-part-type='aberration' or @taxon-name-part-type='Aberration' or @taxon-name-part-type='race' or @taxon-name-part-type='Race' or @taxon-name-part-type='Subform' or @taxon-name-part-type='subform' or @taxon-name-part-type='subforma' or @taxon-name-part-type='Subforma' or @taxon-name-part-type='subaberation' or @taxon-name-part-type='Subaberation' or @taxon-name-part-type='subrace' or @taxon-name-part-type='Subrace' ]",
authorship = "following-sibling::tp:taxon-authority | ./tp:taxon-name-part[@taxon-name-part-type='authority']",
secundum_literal = NA),
constructor = taxonomic_name_usage_new,
components = NULL
)
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.