library(tidyverse)
library(glue)
library(xml2)
library(jsonlite)
xsd_files <- list.files("../inst/xsd/eml-2.2.0", full.names = TRUE)

Find arbitrary element

found <- map(xsd_files, function(xsd){
  read_xml(xsd) %>% xml_find_all("//xs:complexType[@name='ConferenceProceedings']")})

names(found) <- xsd_files
compact(found)
found <- map(xsd_files, function(xsd){
  read_xml(xsd) %>% xml_find_all("//xs:complexContent/xs:restriction")})

names(found) <- xsd_files
compact(found)

Explore

Root contains: - attribute, attributeGroup, complexType, element, group, simpleType - annotation, import

element: contains nothing (but has a type declaration), or has no type declaration and contains one of: - simpleType - complexType (plus every element has an annotation child element)

complexType: contains - annotation - attribute - group - choice - sequence - complexContent - simpleContent

choice, sequence, group all contain any number of: - choice, sequence, group, element

(note that sequence also appears in restriction & extension) (groups only contain sequences)

map(xsd_files, function(xsd){
  read_xml(xsd) %>% xml_find_all("//xs:element") %>% map(xml_children) %>% map(xml_name) })
map(xsd_files, function(xsd){
  read_xml(xsd) %>% xml_find_all("//xs:extension") %>% map(xml_children) %>% map(xml_name) }) %>% unlist() %>% table()
map(xsd_files, function(xsd){
  read_xml(xsd) %>% xml_find_all("//xs:sequence/xs:any") }) %>% compact() -> a
map(xsd_files, function(xsd){
    read_xml(xsd) %>% xml_root() %>% xml_children() %>% map_chr(xml_name)
}) %>% unlist() %>% table()

terms <-
  map(xsd_files, function(xsd){
    read_xml(xsd) %>% xml_find_all("//xs:element") %>% xml_children() %>% map_chr(xml_name)
  }) %>% unlist()
types <- terms[!terms=="annotation"]
table(types)


map(xsd_files, function(xsd){
  read_xml(xsd) %>% xml_find_all("//xs:attribute") %>% xml_children() %>% map_chr(xml_name)
}) %>% unlist() %>% table()

map(xsd_files, function(xsd){
  read_xml(xsd) %>% xml_find_all("//xs:complexType") %>% xml_children() %>% map_chr(xml_name)
}) %>% unlist() %>% table()
map(xsd_files, function(xsd){
  read_xml(xsd) %>% xml_find_all("//xs:simpleType") %>% xml_children() %>% map_chr(xml_name)
}) %>% unlist() %>% table()
map(xsd_files, function(xsd){
  read_xml(xsd) %>% xml_find_all("//xs:choice") %>% xml_children() %>% map_chr(xml_name)
}) %>% unlist() %>% table()
map(xsd_files, function(xsd){
  read_xml(xsd) %>% xml_find_all("//xs:sequence") %>% xml_children() %>% map_chr(xml_name)
}) %>% unlist() %>% table()


cboettig/emljson documentation built on Oct. 27, 2020, 1:27 a.m.