Nothing
# Copyright 2011-2023 Meik Michalke <meik.michalke@hhu.de>
#
# This file is part of the R package XiMpLe.
#
# XiMpLe is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# XiMpLe is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with XiMpLe. If not, see <http://www.gnu.org/licenses/>.
## internal functions, not exported
## function child.list()
# convenience function to let single children be provided without list()
child.list <- function(children){
if(is.XiMpLe.node(children)){
children <- list(children)
} else {
# if already a list, check if it's a list in a list and get it out
if(inherits(children, "list") & length(children) == 1){
if(inherits(children[[1]], "list")){
children <- children[[1]]
} else {}
} else {}
}
return(children)
} ## end function child.list()
## function split.chars()
# used to split a character string into parts at each occurrence of the start and end of a regex pattern
split.chars <- function(txt, pattern, perl=FALSE){
found.pattern <- gregexpr(pattern, text=txt, perl=perl)
found.pattern.start <- found.pattern[[1]]
found.pattern.end <- found.pattern.start + attr(found.pattern[[1]], "match.length") - 1
# returned -1 if pattern wasn't found
if(found.pattern.start[1] == -1){
return(txt)
} else {
txt.length <- nchar(txt)
num.found.patterns <- length(found.pattern.start)
result <- unlist(sapply(
0:num.found.patterns,
function(pat.idx){
# 0: chars before first match
if(pat.idx == 0){
if(found.pattern.start[1] > 1){
return(substr(txt, 1, found.pattern.start[1] - 1))
} else {}
} else {
result.match <- substr(txt, found.pattern.start[pat.idx], found.pattern.end[pat.idx])
# check if there's stuff between two matches
aft.match <- found.pattern.end[pat.idx] + 1
if(pat.idx < num.found.patterns){
nxt.match <- found.pattern.start[pat.idx + 1]
} else {
nxt.match <- txt.length + 1
}
if(aft.match < nxt.match){
result.aft.match <- trim(substr(txt, aft.match, nxt.match - 1))
# remove empty space
if(!identical("", result.aft.match)){
result.match <- c(result.match, result.aft.match)
} else {}
} else {}
return(result.match)
}
},
USE.NAMES=FALSE
), use.names=FALSE)
return(result)
}
} ## end function split.chars()
## function XML.single.tags()
# Splits one character string or vector with an XML tree into a vector with its single tags.
# - tree: The XML tree, must be character.
# - drop: A character vector with the possible contens \code{c("comments","declarations","cdata","value")}
XML.single.tags <- function(tree, drop=NULL){
if(!is.character(tree)){
stop(simpleError("'tree' must be character!"))
} else {}
if(length(tree) > 1) {
# force tree into one string
tree <- paste(tree, collapse="")
} else {}
# remove space at beginning (and end)
tree <- trim(tree)
## the main splitting process
# CDATA or comments can contain stuff which might ruin the outcome. we'll deal with those parts first.
tree <- split.chars(txt=tree, pattern="<!\\[CDATA\\[((?s).*?)\\]\\]>|/\\*[[:space:]]*<!\\[CDATA\\[[[:space:]]*\\*/((?s).*?)/\\*[[:space:]]*\\]\\]>[[:space:]]*\\*/|<!--((?s).*?)-->", perl=TRUE)
# now do the splitting
single.tags <- sapply(
tree,
function(this.tree){
# exclude the already cut our comments an CDATA entries
if(XML.comment(this.tree) | XML.cdata(this.tree) | XML.commcdata(this.tree)){
return(this.tree)
} else {
these.tags <- unlist(split.chars(txt=this.tree, "<((?s).*?)>", perl=TRUE), use.names=FALSE)
# remove probably troublesome content like newlines
these.tags[!XML.value(these.tags)] <- gsub("[[:space:]]+", " ", these.tags[!XML.value(these.tags)])
return(these.tags)
}
},
USE.NAMES=FALSE
)
single.tags <- unlist(single.tags, use.names=FALSE)
single.tags <- as.character(single.tags)
if("comments" %in% drop){
single.tags <- single.tags[!XML.comment(single.tags)]
} else {}
if("declarations" %in% drop){
single.tags <- single.tags[!XML.declaration(single.tags)]
} else {}
if("doctype" %in% drop){
single.tags <- single.tags[!XML.doctype(single.tags)]
} else {}
if("cdata" %in% drop){
single.tags <- single.tags[!XML.cdata(single.tags)]
} else {}
# force garbage collection
gc()
return(single.tags)
} ## end function XML.single.tags()
## function setMinIndent()
# takes a string, determines the minimum number of grouped \t strings,
# and adjusts it globally to the given level
setMinIndent <- function(tag, level=1, indent.by="\t"){
currentMinIndent <- nchar(unlist(strsplit(tag, "[^\t ]+"), use.names=FALSE))
currentMinIndent <- ifelse(length(currentMinIndent) > 0L, min(currentMinIndent), 0)
indentDiff <- currentMinIndent - level
tagParts <- unlist(strsplit(tag, "\n"))
# if currentMinIndent is greater than level, reduce indentation
if(indentDiff > 0L){
tagParts <- gsub(paste0("(^|\n)([\t ]){", indentDiff+1, "}"), "\\1", tagParts, perl=TRUE)
} else if(indentDiff < 0L){
tagParts <- paste0(indent(level=level, by=indent.by), tagParts)
} else {}
return(paste0(tagParts, collapse="\n"))
} ## end function setMinIndent()
## function indent()
# will create tabs to format the output
indent <- function(level, by="\t"){
paste(rep(by, max(0, level-1)), collapse="")
} ## end function indent()
# TODO:
## function paste_shine()
# pastes nodes with a given indentation and separator between arguments
#
# output is this for shine=0:
# {start}{ attrs[1] attrs[2]}{ child}{end}
#
# output is this for shine=1:
# {start}{
# attrs[1] attrs[2]}{
# child}
# {end}
#
# output is this for shine=2:
# {start}{
# attrs[1]
# attrs[2]}{
# child}
# {end}
#
# start: the initial tag start, e.g. "<a" or "<br"
# end: how the inital tag ends, e.g. ">" or "/>"
# attrs: optional character vector, arguments to paste
# child: optional character string, a fully indeted child node, used for recursion
# close: closong tag for non-empty tags, e.g. "</a>"
# level: level of indentation for the tag; indentation of arguments or child nodes depends on 'shine'
# indent.by: indentation character
# shine: shine level
# space_child: useful for e.g. comment tags to add a single space between start/closing tags and value
# space_attrs: similar to space_child, but adds an extra space only before the end tag
# as_script: logical, whether to separate by space (FALSE) or comma (TRUE)
paste_shine <- function(
start,
end,
attrs,
child,
close,
level,
indent.by="\t",
shine=1,
space_child=FALSE,
space_attrs=FALSE,
as_script=FALSE
){
if(isTRUE(as_script)){
next_sep <- ","
} else {
next_sep <- ""
}
indent_node <- indent(level=level, by=indent.by)
indent_attrs <- indent(level=level + 1, by=indent.by)
indent_child <- indent(level=level + 1, by=indent.by)
indent_end <- indent(level=level, by=indent.by)
indent_close <- indent(level=level, by=indent.by)
next_node <- "\n"
extra_space_child <-""
extra_space_attrs <-""
if(isTRUE(as_script)){
next_attr <- next_sep
} else {
next_attr <- paste0(next_sep, "\n")
}
next_close <- "\n"
first_attr <- "\n"
first_child <- "\n"
if(shine < 1){
# shine is 0
indent_attrs <- ""
indent_child <- ""
indent_end <- ""
indent_close <- ""
next_node <- ""
next_close <- ""
if(isTRUE(as_script)){
next_attr <- paste0(next_sep, " ")
first_attr <- ""
} else {
next_attr <- next_sep
first_attr <- " "
if(isTRUE(space_child)){
extra_space_child <-" "
} else {}
if(isTRUE(space_attrs)){
extra_space_attrs <-" "
} else {}
}
first_child <- ""
} else if(shine < 2){
# shine is 1
if(isTRUE(as_script)){
indent_attrs <- ""
} else {
indent_attrs <- " "
if(isTRUE(space_attrs)){
extra_space_attrs <-" "
} else {}
}
indent_end <- ""
next_attr <- next_sep
first_attr <- ""
} else {
# shine is 2, keep defaults
}
no_attrs <- no_child <- no_close <- FALSE
if(missing(attrs)){
attrs <- ""
no_attrs <- TRUE
} else if(any(identical(trim(attrs), ""), identical(trim(attrs), character()))){
no_attrs <- TRUE
}
if(missing(child)){
child <- ""
no_child <- TRUE
} else if(any(identical(trim(child), ""), identical(trim(child), character()))){
no_child <- TRUE
}
if(missing(close)){
close <- ""
no_close <- TRUE
} else if(any(identical(trim(close), ""), identical(trim(close), character()))){
no_close <- TRUE
}
if(isTRUE(no_attrs)){
indent_attrs <- ""
indent_end <- ""
next_attr <- ""
first_attr <- ""
if(isTRUE(space_attrs)){
extra_space_attrs <-" "
} else {}
} else {}
if(isTRUE(no_close)){
if(!isTRUE(no_child)){
stop(simpleError("Invalid call to XiMpLe:::paste_shine(): Missing closing tag!"))
} else {}
next_close <- ""
indent_close <- ""
} else {}
if(isTRUE(no_child)){
indent_child <- ""
child <- ""
if(any(shine < 2, as_script)){
next_attr <- ""
} else {}
first_child <- ""
extra_space_child <-""
} else {}
if(
all(
isTRUE(as_script),
any(
identical(attrs, ""),
identical(shine, 1)
),
identical(child, "")
)
){
next_close <- ""
indent_close <- ""
} else {}
## debugging:
# message(
# paste0(
# "level: ", level, "\n",
# "indent_node: ", deparse(indent_node), "\n",
# "start: ", deparse(start), "\n",
# "first_attr: ", deparse(first_attr), "\n",
# "indent_attrs: ", deparse(indent_attrs), "\n",
# "attrs: ", deparse(attrs), "\n",
# "next_attr: ", deparse(next_attr), "\n",
# "extra_space_attrs: ", deparse(extra_space_attrs), "\n",
# "indent_end: ", deparse(indent_end), "\n",
# "end: ", deparse(end), "\n",
# "first_child: ", deparse(first_child), "\n",
# "indent_child: ", deparse(indent_child), "\n",
# "extra_space_child: ", deparse(extra_space_child), "\n",
# "child: ", deparse(child), "\n",
# "next_close: ", deparse(next_close), "\n",
# "indent_close: ", deparse(indent_close), "\n",
# "close: ", deparse(close), "\n",
# "next_node: ", deparse(next_node), "\n"
# )
# )
return(
paste0(
indent_node, start, first_attr,
indent_attrs, attrs, next_attr, extra_space_attrs,
indent_end, end, first_child,
indent_child, extra_space_child, trim(child), extra_space_child,
next_close,
indent_close, close,
next_node
)
)
} ## end function paste_shine()
## function xml.tidy()
# replace special character < and > from attributes or text values
# with harmless entities
xml.tidy <- function(text){
if(is.character(text)){
tidy.text <- gsub("<", "<", gsub(">", ">", gsub("&([#[:alnum:]]{7}[^;]|[[:space:]]|[^;]*$)", "&\\1", text, perl=TRUE)))
} else {
return(text)
}
return(tidy.text)
} ## function xml.tidy()
## function lookupAttrName()
# takes the original input element names and returns
# the according XML attribute name
lookupAttrName <- function(tag, attr, rename){
if(is.null(tag)){
attr.name <- attr
} else {
attr.name <- rename[[tag]][[attr]]
}
return(attr.name)
} ## end function lookupAttrName()
## function pasteXMLAttr()
# pastes all attributes in a nicely readable way
pasteXMLAttr <- function(
attr=NULL,
tag=NULL,
level=1,
rename=NULL,
shine=2,
indent.by="\t",
tidy=FALSE,
as_script=FALSE
){
if(is.null(attr)){
return("")
} else {}
if(isTRUE(tidy)){
attr <- sapply(attr, xml.tidy, USE.NAMES=FALSE)
} else {}
new.indent <- ifelse(shine > 1, indent(level+1, by=indent.by), "")
new.attr <- ifelse(shine > 1, "\n", " ")
paste_collapse <- ifelse(isTRUE(as_script), paste0(",", new.attr, new.indent), paste0(new.attr, new.indent))
# only use formatting if more than one attribute
if(length(attr) > 1){
full.attr <- c()
full.attr <- paste0(sapply(
names(attr),
function(this.attr){
# skip empty elements
if(is.null(attr[[this.attr]])){
return()
} else {
if(!is.null(rename)){
# look up attribute name to paste
attr.name <- lookupAttrName(tag, this.attr, rename=rename)
} else {
attr.name <- this.attr
}
if(identical(attr[[this.attr]], character())){
# empty argument
if(isTRUE(as_script)){
attr_value <- "=character()"
} else {
attr_value <- ""
}
} else {
attr_value <- paste0("=\"", attr[[this.attr]], "\"")
}
return(
trim(paste0(full.attr, new.attr, new.indent, attr.name, attr_value))
)
}
}
), collapse=paste_collapse)
# for (this.attr in seq_along(all_attrs)){
# # skip empty elements
# if(is.null(attr[[this.attr]])){next}
# if(!is.null(rename)){
# # look up attribute name to paste
# attr.name <- lookupAttrName(tag, this.attr, rename=rename)
# } else {
# attr.name <- this.attr
# }
# full.attr <- trim(paste0(full.attr, new.attr, new.indent, attr.name, "=\"", attr[[this.attr]], "\""))
# }
} else {
if(!is.null(rename)){
# look up attribute name to paste
attr.name <- lookupAttrName(tag, names(attr), rename=rename)
} else {
attr.name <- names(attr)
}
if(identical(attr[[1]], character())){
# empty argument
if(isTRUE(as_script)){
attr_value <- "=character()"
} else {
attr_value <- ""
}
} else {
attr_value <- paste0("=\"", attr[[1]], "\"")
}
full.attr <- paste0(attr.name, attr_value)
}
return(full.attr)
} ## end function pasteXMLAttr()
## function args2list()
# takes a string that was separated from a tag, containing only its attributes,
# and tries to turn it into a named list
# drop_empty_tags: if set to TRUE, empty tags will be removed, otherwise they will
# get an empty character value assigned to them
# doctype_args: if TRUE, quoted empty attributes will temporarily be named for
# counting the start and end points of arguments, to not confuse the parser
attr2list <- function(attr, drop_empty_tags=FALSE, doctype_args=FALSE){
# regular expression to detect alphanumeric characters (we'll also accept some more,
# this is mainly needed to safely detect spaces and quotes from argument values
# unicode 00A7 is the section sign ("§", non-ASCII)
alnum_plus <- "[-_/'|*#@+~&%$\u00A7.,:;(){}?![:alnum:]]"
doctype_restore <- FALSE
qr_to_use <- ""
if(isTRUE(doctype_args) & grepl("\"", attr)){
# find a temporary name for quoted empty attributes,
# testing a few that are unlikely all used here
quote_replacer <- c(
"\u0142", # Å‚ (latin small letter "L" with stroke)
"\u014B", # Å‹ (latin small letter "Eng")
"\u0167", # ŧ (latin small letter "T" with stroke)
"\u00E6", # æ (latin small letter "Ae")
"\u00F0", # ð (latin small letter "Eth")
"\u00F8", # ø (latin small letter "O" with stroke)
"\u00FE" # þ (latin small letter "Thorn")
)
qr_in_attrs <- sapply(
quote_replacer,
function(this_qr){
any(grepl(this_qr, x=attr))
}
)
if(!all(qr_in_attrs)){
qr_to_use <- names(which.min(!qr_in_attrs))
attr <- gsub("[[:space:]]\"", paste0(" ", qr_to_use, "=\""), attr)
doctype_restore <- TRUE
} else {
# go on without replacement, this will likely result in an error
}
} else {}
# split into individual characters
attr_chars <- unlist(strsplit(trim(attr), ""))
if(length(attr_chars) > 0){
# which one is alphanumeric (we'll also accept "-" and "_"?
attr_alnum <- grepl(alnum_plus, attr_chars)
# find continuous patterns in the boolean vector, i.e. detect words vs. nonwords
attr_alnum_rle <- rle(attr_alnum)
# add zero for the sapply loop
attr_borders <- c(0, cumsum(attr_alnum_rle[["lengths"]]))
# here we glue the single characters together again,
# this way we'll get words in one string an nonwords in separated strings
# the result should be a vector of strings that is either
# - just alphanumeric, which could be either an attribute name or part of an attribute value
# - non-alphanumeric, which could be
# - space
# - "=\"" (including spaces, start of an attribute value)
# - "\"" (including spaces, end of an attribute value)
# - anything else, part of an attribute value
attr_tokens <- sapply(seq_along(attr_borders)[-1],
function(n){
paste0(attr_chars[(attr_borders[n-1] + 1):attr_borders[n]], collapse="")
}
)
tokens_n <- seq_along(attr_tokens)
# now we'll mark start and end of attributes
attr_on <- grepl("^[[:space:]]*=[[:space:]]*\"[[:space:]]*$", attr_tokens)
attr_off <- grepl("^[[:space:]]*\"[[:space:]]*$", attr_tokens)
# numbers must match, otherwise there was an error in parsing and we should PANIC! :D
if(!identical(sum(attr_on), sum(attr_off))){
stop(
simpleError(
paste0(
"Looks like I failed to parse attributes correctly. This one i couldn't digest:\n \"",
paste0(attr_chars, collapse=""),
"\""
)
)
)
} else {}
if(sum(attr_on, attr_off) > 0){
# now we can assume the range of attribute values
# make it a list to keep them separated
# each list entry is a vector of one full attribute value
attr_values <- lapply(
1:sum(attr_on),
function(n){
which(attr_on)[n]:which(attr_off)[n]
}
)
# right before each attribute value should be the attribute's name
attr_names <- sapply(
attr_values,
function(val){
if(val[1] < 2){
stop(simpleError("I've detected an attribute value without an attribute name!"))
} else {
arg_name_n <- val[1] - 1
}
if(!isTRUE(grepl(alnum_plus, attr_tokens[arg_name_n]))){
warning(paste0("This attribute name might be invalid, please check: \"", attr_tokens[arg_name_n], "\""), call.=FALSE)
} else {}
return(arg_name_n)
}
)
} else {
# only empty attributes?
attr_values <- list()
attr_names <- attr_tokens
}
# for safety reasons, consider putting arg names in quotes when non alphanumeric strings are used
non_alnum_names <- grepl("[^[:alnum:]]", attr_tokens[attr_names])
if(any(non_alnum_names)){
attr_tokens[attr_names[non_alnum_names]] <- paste0("\"", attr_tokens[attr_names[non_alnum_names]], "\"")
} else {}
# if we've gotten this far, all that's neither an attribute name nor its value is probably an empty attribute
attr_done <- sort(c(attr_names, unlist(attr_values)))
attr_unknown <- tokens_n[!tokens_n %in% attr_done]
if(length(attr_unknown) > 0){
attr_unknown <- attr_unknown[!grepl("^[[:space:]]+$", attr_tokens[attr_unknown])]
if(length(attr_unknown) > 0){
# set value for empty attribute
attr_tokens <- unlist(sapply(
tokens_n,
function(n){
if(isTRUE(n %in% attr_unknown)){
if(isTRUE(drop_empty_tags)){
return("")
} else {
return(c(attr_tokens[n], "=character()"))
}
} else {
return(attr_tokens[n])
}
}
))
# recalculate attr_off
attr_off <- grepl("^[[:space:]]*\"[[:space:]]*$|^[[:space:]]*=character\\(\\)[[:space:]]*$", attr_tokens)
} else {}
} else {}
# all but the last value closing will need a comma separator
if(length(which(attr_off)) > 1){
add_comma <- which(attr_off)
add_comma <- add_comma[1:(length(add_comma) - 1)]
attr_tokens[add_comma] <- paste0(attr_tokens[add_comma], ",")
} else {}
result <- eval(parse(text=paste("list(", paste0(attr_tokens, collapse=""), ")")))
if(isTRUE(doctype_restore)){
# restore the original attributes
to_restore <- which(names(result) %in% qr_to_use)
if(length(to_restore) > 0){
new_names <- paste0("\"", result[to_restore], "\"")
for(this_attr in to_restore){
result[[this_attr]] <- character()
}
names(result)[to_restore] <- new_names
} else {}
} else {}
return(result)
} else {
return(list())
}
} ## end function attr2list()
## function parseXMLAttr()
# takes a whole XML tag and returns a named list with its attributes
parseXMLAttr <- function(tag, drop_empty_tags=FALSE){
if(XML.endTag(tag) | XML.comment(tag) | XML.cdata(tag)){
# end tags, comments and CDATA don't have attributes
parsed.list <- ""
} else {
# first strip of start and end characters
stripped.tag <- gsub("<([?[:space:]]*)[^[:space:]]+[[:space:]]*(.*)", "\\2", tag, perl=TRUE)
stripped.tag <- trim(gsub("[/?]*>$", "", stripped.tag, perl=TRUE))
parsed.list <- attr2list(stripped.tag, drop_empty_tags=drop_empty_tags, doctype_args=XML.doctype(tag))
}
if(XML.declaration(tag)){
# only enforce validation for <?xml ... ?>
if(identical(XML.tagName(tag), tolower("?xml"))){
valid.attr <- c("version", "encoding", "standalone")
parsed.list <- parsed.list[tolower(names(parsed.list)) %in% valid.attr]
for (miss.attr in valid.attr[!valid.attr %in% tolower(names(parsed.list))]){
parsed.list[[miss.attr]] <- ""
}
} else {}
} else {}
return(parsed.list)
} ## end function parseXMLAttr()
## function trim()
# cuts off space at start and end of a character string
trim <- function(char){
char <- gsub("^[[:space:]]*", "", char)
char <- gsub("[[:space:]]*$", "", char)
return(char)
} ## end function trim()
## function XML.emptyTag()
# checks if a tag is a pair of start/end tags or an empty tag;
# returns either TRUE/FALSE, or the tag name if it is an empty tag and get=TRUE
XML.emptyTag <- function(tag, get=FALSE){
empty.tags <- sapply(
tag,
function(this.tag){
empty <- grepl("/>$", this.tag)
if(isTRUE(get)){
result <- ifelse(isTRUE(empty), XML.tagName(this.tag), "")
} else {
result <- empty
}
return(result)
},
USE.NAMES=FALSE
)
return(empty.tags)
} ## end function XML.emptyTag()
## function XML.endTag()
# checks if a tag an end tag;
# returns either TRUE/FALSE, or the tag name if it is an end tag and get=TRUE
XML.endTag <- function(tag, get=FALSE){
end.tags <- sapply(
tag,
function(this.tag){
end <- grepl("^</", this.tag)
if(isTRUE(get)){
result <- ifelse(isTRUE(end), XML.tagName(this.tag), "")
} else {
result <- end
}
return(result)
},
USE.NAMES=FALSE
)
return(end.tags)
} ## end function XML.endTag()
## function XML.comment()
# checks if a tag is a comment, returns TRUE or FALSE, or the comment (TRUE & get=TRUE)
XML.comment <- function(tag, get=FALSE, trim=TRUE){
comment.tags <- sapply(
tag,
function(this.tag){
comment <- grepl("<!--((?s).*)-->", this.tag, perl=TRUE)
if(isTRUE(get)){
result <- ifelse(isTRUE(comment), gsub("<!--((?s).*)-->", "\\1", this.tag, perl=TRUE), "")
if(isTRUE(trim)){result <- trim(result)} else {}
} else {
result <- comment
}
return(result)
},
USE.NAMES=FALSE
)
return(comment.tags)
} ## end function XML.comment()
## function XML.cdata()
# checks if a tag is a CDATA declaration, returns TRUE or FALSE, or the data (TRUE & get=TRUE)
XML.cdata <- function(tag, get=FALSE, trim=TRUE){
cdata.tags <- sapply(
tag,
function(this.tag){
cdata <- grepl("<!\\[CDATA\\[((?s).*)\\]\\]>", this.tag, perl=TRUE)
if(isTRUE(get)){
result <- ifelse(isTRUE(cdata), gsub("<!\\[CDATA\\[((?s).*)\\]\\]>", "\\1", this.tag, perl=TRUE), "")
if(isTRUE(trim)){result <- trim(result)} else {}
} else {
result <- cdata
}
return(result)
},
USE.NAMES=FALSE
)
return(cdata.tags)
} ## end function XML.cdata()
## function XML.commcdata()
# checks if a tag is a /* CDATA */ declaration, returns TRUE or FALSE, or the data (TRUE & get=TRUE)
XML.commcdata <- function(tag, get=FALSE, trim=TRUE){
commcdata.tags <- sapply(
tag,
function(this.tag){
commcdata <- grepl("/\\*[[:space:]]*<!\\[CDATA\\[[[:space:]]*\\*/((?s).*?)/\\*[[:space:]]*\\]\\]>[[:space:]]*\\*/", this.tag, perl=TRUE)
if(isTRUE(get)){
result <- ifelse(isTRUE(commcdata), gsub("/\\*[[:space:]]*<!\\[CDATA\\[[[:space:]]*\\*/((?s).*?)/\\*[[:space:]]*\\]\\]>[[:space:]]*\\*/", "\\1", this.tag, perl=TRUE), "")
if(isTRUE(trim)){result <- trim(result)} else {}
} else {
result <- commcdata
}
return(result)
},
USE.NAMES=FALSE
)
return(commcdata.tags)
} ## end function XML.commcdata()
## function XML.value()
# checks if 'tag' is actually not a tag but value/content/data. returns TRUE or FALSE, or the value (TRUE & get=TRUE)
XML.value <- function(tag, get=FALSE, trim=TRUE){
all.values <- sapply(
tag,
function(this.tag){
value <- grepl("^[[:space:]]*[^<]", this.tag)
if(isTRUE(get)){
result <- ifelse(isTRUE(value), this.tag, "")
if(isTRUE(trim)){result <- trim(result)} else {}
} else {
result <- value
}
return(result)
},
USE.NAMES=FALSE
)
return(all.values)
} ## end function XML.value()
## function XML.declaration()
# checks for a declaration, like <?xml bar?>
XML.declaration <- function(tag, get=FALSE){
decl.tags <- sapply(
tag,
function(this.tag){
declaration <- grepl("<\\?((?i)xml).*\\?>", this.tag, perl=TRUE)
if(isTRUE(get)){
result <- ifelse(isTRUE(declaration), XML.tagName(this.tag), "")
} else {
result <- declaration
}
return(result)
},
USE.NAMES=FALSE
)
return(decl.tags)
} ## end function XML.declaration()
## function XML.doctype()
# checks for a doctype declaration, like <!DOCTYPE foo>
XML.doctype <- function(tag, get=FALSE){
decl.tags <- sapply(
tag,
function(this.tag){
declaration <- grepl("<!((?i)DOCTYPE).*>", this.tag, perl=TRUE)
if(isTRUE(get)){
result <- ifelse(isTRUE(declaration), XML.tagName(this.tag), "")
} else {
result <- declaration
}
return(result)
},
USE.NAMES=FALSE
)
return(decl.tags)
} ## end function XML.doctype()
## function XML.def()
XML.def <- function(tag, get=FALSE){
decl.tags <- sapply(
tag,
function(this.tag){
declaration <- grepl("<[!?]+[^-]*>", this.tag)
if(isTRUE(get)){
result <- ifelse(isTRUE(declaration), XML.tagName(this.tag), "")
} else {
result <- declaration
}
return(result)
},
USE.NAMES=FALSE
)
return(decl.tags)
} ## end function XML.def()
## function XML.tagName()
XML.tagName <- function(tag){
tag.names <- sapply(
tag,
function(this.tag){
tagName <- gsub("<([[:space:]!?/]*)([^[:space:]>/]+).*", "\\2", this.tag, perl=TRUE)
return(tagName)
},
USE.NAMES=FALSE
)
return(tag.names)
} ## end function XML.tagName()
## function XML.nodes()
XML.nodes <- function(single.tags, end.here=NA, drop_empty_tags=FALSE, start=1){
# to save memory, we'll put the single.tags object into an environment
# and pass that on to all iterations
if(is.environment(single.tags)){
single.tags.env <- single.tags
num.all.tags <- length(get("single.tags", envir=single.tags.env))
} else {
single.tags.env <- new.env()
assign("single.tags", single.tags, envir=single.tags.env)
num.all.tags <- length(single.tags)
}
# try to iterate through the single tags
children <- list()
tag.no <- start
## uncomment to debug:
# cat(start,"\n")
while (tag.no <= num.all.tags){
## uncomment to debug:
# time.spent <- system.time({
this.tag <- get("single.tags", envir=single.tags.env)[tag.no]
nxt.child <- length(children) + 1
child.name <- XML.tagName(this.tag)
child.end.tag <- paste0("</[[:space:]]*", end.here,"[[:space:]>]+.*")
if(isTRUE(grepl(child.end.tag, this.tag))){
## uncomment to debug:
# cat(this.tag, ": break (",tag.no,")\n")
break
} else {}
# we must test for commented CDATA first, because XML.value() would be TRUE, too
if(XML.commcdata(this.tag)){
children[[nxt.child]] <- XiMpLe_node(
name="*![CDATA[",
value=XML.commcdata(this.tag, get=TRUE)
)
names(children)[nxt.child] <- "*![CDATA["
tag.no <- tag.no + 1
next
} else {}
if(XML.value(this.tag)){
children[[nxt.child]] <- XiMpLe_node(
name="",
value=XML.value(this.tag, get=TRUE)
)
names(children)[nxt.child] <- "!value!"
tag.no <- tag.no + 1
next
} else {
child.attr <- parseXMLAttr(this.tag, drop_empty_tags=drop_empty_tags)
}
if(XML.declaration(this.tag)){
children[[nxt.child]] <- XiMpLe_node(
name=child.name,
attributes=child.attr
)
names(children)[nxt.child] <- child.name
tag.no <- tag.no + 1
next
} else {}
if(XML.comment(this.tag)){
children[[nxt.child]] <- XiMpLe_node(
name="!--",
value=XML.comment(this.tag, get=TRUE)
)
names(children)[nxt.child] <- "!--"
tag.no <- tag.no + 1
next
} else {}
if(XML.cdata(this.tag)){
children[[nxt.child]] <- XiMpLe_node(
name="![CDATA[",
value=XML.cdata(this.tag, get=TRUE)
)
names(children)[nxt.child] <- "![CDATA["
tag.no <- tag.no + 1
next
} else {}
if(XML.endTag(this.tag)){
break
} else {}
if(!XML.emptyTag(this.tag)){
## uncomment to debug:
# cat(child.name, ":", tag.no, "-", child.end.tag,"\n")
rec.nodes <- XML.nodes(single.tags.env, end.here=child.name, drop_empty_tags=drop_empty_tags, start=tag.no + 1)
children[[nxt.child]] <- XiMpLe_node(
name=child.name,
attributes=child.attr,
children=rec.nodes$children,
# this value will force the node to remain non-empty if it had no children,
# it would be turned into an empty tag otherwise
value=""
)
names(children)[nxt.child] <- child.name
tag.no <- rec.nodes$tag.no + 1
next
} else {
children[[nxt.child]] <- XiMpLe_node(
name=child.name,
attributes=child.attr
)
names(children)[nxt.child] <- child.name
tag.no <- tag.no + 1
next
}
## uncomment to debug:
# })
# cat("system.time:", time.spent, "\n")
}
return(list(children=children, tag.no=tag.no))
} ## end function XML.nodes()
## function valid.child()
# - parent: character string, name of the parent node
# - children: (list of) XiMpLe.node objects, child nodes to check
# - validity: definitions of valid child nodes, class XiMpLe.validity
# - warn: warning or stop?
# - section: an optional name for the section for the warning/error
# (if it shouldn't be the parent name)
# - node names: can alternatively be given instead of 'children', as character vector
# - graceful: allow everything inside "!--" comments?
valid.child <- function(parent, children, validity, warn=FALSE, section=parent, node.names=NULL,
caseSens=TRUE, graceful=TRUE){
if(isTRUE(graceful) & identical(parent, "!--")){
# skip all checks and return TRUE
return(TRUE)
} else {}
if(is.null(node.names)){
# check the node names and allow only valid ones
node.names <- unlist(sapply(
child.list(children),
function(this.child){
if(is.XiMpLe.node(this.child)){
this.child.name <- XMLName(this.child)
if(identical(this.child.name, "")){
# special case: empty node name; this is used to combine
# comments with the node they belong to, so rather check
# the children of this special node
return(unlist(sapply(XMLChildren(this.child), XMLName, USE.NAMES=FALSE)))
} else {
return(this.child.name)
}
} else {
stop(simpleError(paste0("Invalid object for <", section, "> node, must be of class XiMpLe.node, but got class ", class(this.child), "!")))
}
},
USE.NAMES=FALSE
))
} else {}
validAllChildren <- slot(validity, "allChildren")
validChildren <- slot(validity, "children")[[parent]]
# check for recursion
if(is.XiMpLe.validity(validChildren)){
validChildren <- c(
names(slot(validChildren, "children")),
slot(validChildren, "allChildren")
)
} else {}
ignoreChildren <- slot(validity, "ignore")
if(!isTRUE(caseSens)){
node.names <- tolower(node.names)
validAllChildren <- tolower(validAllChildren)
validChildren <- tolower(validChildren)
ignoreChildren <- tolower(ignoreChildren)
} else {}
invalid.sets <- !node.names %in% c(validAllChildren, validChildren, ignoreChildren)
if(any(invalid.sets)){
return.message <- paste0("Invalid XML nodes for <", section, "> section: ", paste(node.names[invalid.sets], collapse=", "))
if(isTRUE(warn)){
warning(return.message, call.=FALSE)
return(FALSE)
} else {
stop(simpleError(return.message))
}
} else {
return(TRUE)
}
} ## end function valid.child()
## function valid.attribute()
# similar to valid.child(), but checks the validity of attributes of a given node
# it's a bit simpler
# - node: a character string, node name
# - attrs: a named list of attributes to check
# - validity: definitions of valid child nodes, class XiMpLe.validity
valid.attribute <- function(node, attrs, validity, warn=FALSE, caseSens=TRUE){
if(length(attrs) > 0){
attrsNames <- names(attrs)
validAllAttrs <- slot(validity, "allAttrs")
validAttrs <- slot(validity, "attrs")[[node]]
ignoreNodes <- slot(validity, "ignore")
if(!isTRUE(caseSens)){
attrsNames <- tolower(attrsNames)
validAllAttrs <- tolower(validAllAttrs)
validAttrs <- tolower(validAttrs)
ignoreNodes <- tolower(ignoreNodes)
} else {}
if(node %in% ignoreNodes){
return(NULL)
} else {
invalid.sets <- !attrsNames %in% c(validAllAttrs, validAttrs)
if(any(invalid.sets)){
return.message <- paste0("Invalid XML attributes for <", node, "> node: ", paste(attrsNames[invalid.sets], collapse=", "))
if(isTRUE(warn)){
warning(return.message, call.=FALSE)
return(FALSE)
} else {
stop(simpleError(return.message))
}
} else {
return(TRUE)
}
}
} else {
return(NULL)
}
} ## end function valid.attribute()
## function validParamName()
# called by XMLgenerators() to ensure valid parameter names in the generated function calls
validParamName <- function(name, replacement="_"){
return(gsub(pattern="[^a-zA-Z0-9_.]", replacement=replacement, x=name))
} ## end function validParamName()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.