#xmlRoot.HTMLInternalDocument =
xmlRoot.XMLInternalDocument =
function(x, skip = TRUE, addFinalizer = NA, ...)
{
.Call("R_xmlRootNode", x, as.logical(skip), addFinalizer, PACKAGE = "XML")
}
setAs("XMLNode", "XMLInternalNode",
function(from) {
con = textConnection("tmp", "w", local = TRUE)
sink(con)
on.exit({sink(file = NULL); close(con)})
print(from)
doc = xmlParse(tmp, asText = TRUE)
node = xmlRoot(doc)
removeChildren(node)
node
}
)
setAs("XMLInternalDocument", "character", function(from) saveXML(from))
setAs("XMLInternalDOM", "character", function(from) saveXML(from))
setAs("XMLInternalDocument", "XMLInternalNode",
function(from) xmlRoot(from))
setAs("XMLInternalNode", "XMLInternalDocument",
function(from) {
doc = .Call("R_getXMLNodeDocument", from, PACKAGE = "XML")
addDocFinalizer(doc, TRUE)
if(is(doc, "HTMLInternalDocument"))
class(doc) = c(class(doc), "XMLInternalDocument", "XMLAbstractDocument")
doc
})
setGeneric("free", function(obj) standardGeneric("free"))
setMethod("free", "XMLInternalDocument",
function(obj) {
invisible(.Call("R_XMLInternalDocument_free", obj, PACKAGE = "XML"))
})
addFinalizer =
function(obj, fun, ...)
{
UseMethod("addFinalizer")
}
addCFinalizer.XMLInternalDocument =
function(obj, fun, ...)
{
if(missing(fun) || fun == NULL)
fun = getNativeSymbolInfo("RSXML_free_internal_document")$address
else if(!is.function(obj)) {
}
.Call("R_addXMLInternalDocument_finalizer", obj, fun, PACKAGE = "XML")
}
asRXMLNode =
function(node, converters = NULL, trim = TRUE, ignoreBlanks = TRUE)
{
.Call("R_createXMLNode", node, converters, as.logical(trim), as.logical(ignoreBlanks), PACKAGE = "XML")[[1]]
}
"[.XMLInternalDocument" =
function(x, i, j, ..., namespaces = xmlNamespaceDefinitions(x, simplify = TRUE), addFinalizer = NA)
{
if(is.character(i)) {
getNodeSet(x, i, ..., addFinalizer = addFinalizer)
} else
stop("No method for subsetting an XMLInternalDocument with ", class(i))
}
"[[.XMLInternalDocument" =
function(x, i, j, ..., exact = NA, namespaces = xmlNamespaceDefinitions(x, simplify = TRUE),
addFinalizer = NA)
{
ans = x[i, addFinalizer = addFinalizer]
if(length(ans) > 1)
warning(length(ans), " elements in node set. Returning just the first one! (Use [])")
ans[[1]]
}
xmlName.XMLInternalNode =
function(node, full = FALSE)
{
ans = .Call("RS_XML_xmlNodeName", node, PACKAGE = "XML")
if((is.logical(full) && full) || (!is.logical(full) && length(full))) {
tmp = xmlNamespace(node)
if(length(tmp) && length(names(tmp)) > 0 && names(tmp) != "")
ans = paste(names(tmp), ans, sep = ":")
else if(is.character(full) && full != "")
ans = paste(full, ans, sep = ":")
}
ans
}
if(useS4)
setMethod("xmlName", "XMLInternalNode", xmlName.XMLInternalNode)
xmlNamespace.XMLInternalNode =
function(x)
{
.Call("RS_XML_xmlNodeNamespace", x, PACKAGE = "XML")
}
xmlAttrs.XMLInternalNode =
function(node, addNamespacePrefix = FALSE, addNamespaceURLs = TRUE, ...)
{
ans = .Call("RS_XML_xmlNodeAttributes", node, as.logical(addNamespacePrefix), as.logical(addNamespaceURLs), PACKAGE = "XML")
if(length(attr(ans, "namespaces")))
ans = new("XMLAttributes", ans) # class(ans) = "XMLAttributes"
ans
}
#setOldClass(c("XMLAttributes", "character"))
setClass("XMLAttributes", contains = "character")
setMethod("show", "XMLAttributes",
function(object)
print(unclass(object)))
setMethod('[', c('XMLAttributes', "ANY"),
function(x, i, j, ...)
{
ans = callNextMethod()
i = match(i, names(x))
structure(ans, namespaces = attr(x, "namespaces")[i], class = class(x))
})
xmlChildren.XMLInternalNode =
function(x, addNames = TRUE, omitNodeTypes = c("XMLXIncludeStartNode", "XMLXIncludeEndNode"), addFinalizer = NA, ...)
{
kids = .Call("RS_XML_xmlNodeChildrenReferences", x, as.logical(addNames), addFinalizer, PACKAGE = "XML")
if(length(omitNodeTypes))
kids = kids[! sapply(kids, function(x) any(inherits(x, omitNodeTypes)) )]
structure(kids, class = c("XMLInternalNodeList", "XMLNodeList"))
}
xmlChildren.XMLInternalDocument =
function(x, addNames = TRUE, ...)
{
# .Call("RS_XML_xmlDocumentChildren", x, as.logical(addNames), PACKAGE = "XML")
xmlChildren.XMLInternalNode(x, addNames, ...)
}
if(useS4) {
setMethod("xmlAttrs", "XMLInternalNode", xmlAttrs.XMLInternalNode)
setMethod("xmlChildren", "XMLInternalNode", xmlChildren.XMLInternalNode)
setMethod("xmlChildren", "XMLInternalDocument", xmlChildren.XMLInternalDocument)
}
xmlSize.XMLInternalNode =
function(obj)
.Call("RS_XML_xmlNodeNumChildren", obj, PACKAGE = "XML")
"[[.XMLInternalNode" <-
#setMethod("[[", "XMLInternalNode",
function(x, i, j, ..., addFinalizer = NA)
{
if(inherits(i, "formula")) {
return(getNodeSet(x, i, if(missing(j)) character() else j, addFinalizer = addFinalizer, ...)[[1]])
}
if(is.na(i))
return(NULL)
# Get the individual elements rather than all the children and then subset those
return(
if(is(i, "numeric"))
.Call("R_getChildByIndex", x, as.integer(i), as.logical(addFinalizer), PACKAGE = "XML")
else
.Call("R_getChildByName", x, as.character(i), as.logical(addFinalizer), PACKAGE = "XML")
)
kids = xmlChildren(x, addFinalizer = addFinalizer)
if(length(kids) == 0)
return(NULL)
if(is.numeric(i))
kids[[i]]
else {
id = as.character(i)
which = match(id, sapply(kids, xmlName))
kids[[which]]
}
}
"[.XMLInternalNode" <-
function(x, i, j, ..., addFinalizer = NA)
{
kids = xmlChildren(x, addFinalizer = addFinalizer)
if(is.logical(i))
i = which(i)
if(is(i, "numeric"))
structure(kids[i], class = c("XMLInternalNodeList", "XMLNodeList"))
else {
id = as.character(i)
#
# The following can return the nodes in the wrong order.
# which = match(sapply(kids, xmlName), id)
# structure(kids[!is.na(which)], class = c("XMLInternalNodeList", "XMLNodeList"))
# This is keeps the order of i/id and is simpler.
structure(kids[id], class = c("XMLInternalNodeList", "XMLNodeList"))
}
}
xmlValue.XMLInternalNode =
function(x, ignoreComments = FALSE, recursive = TRUE, encoding = getEncoding(x), trim = FALSE) #CE_NATIVE)
{
encoding = if(is.integer(encoding))
encoding
else
getEncodingREnum(encoding)
if(!recursive) {
if(xmlSize(x) == 0)
return(character())
kids = xmlChildren(x, addFinaliizer = FALSE)
i = sapply(kids, inherits, "XMLInternalTextNode")
if(any(i))
return(paste(unlist(lapply(kids[i], xmlValue, ignoreComments, recursive = TRUE, encoding = encoding, trim = trim)), collapse = ""))
else
return(character())
}
ans = .Call("R_xmlNodeValue", x, NULL, encoding, PACKAGE = "XML") # 2nd argument ignored.
if(trim)
trim(ans)
else
ans
}
xmlValue.XMLAttributeValue =
function(x, ...)
unname( x )
setS3Method("xmlValue", "XMLInternalNode")
setS3Method("xmlValue", "XMLAttributeValue")
setGeneric("xmlValue<-", function(x, ..., value) standardGeneric("xmlValue<-"))
setMethod("xmlValue<-", "XMLInternalTextNode",
function(x, ..., value) {
.Call("R_setXMLInternalTextNode_value", x, as.character(value), PACKAGE = "XML")
x
})
setMethod("xmlValue<-", "XMLTextNode",
function(x, ..., value) {
x$value = as.character(value)
x
})
setMethod("xmlValue<-", "XMLAbstractNode",
function(x, ..., value) {
if(xmlSize(x) == 0) {
x = addChildren(x, as.character(value))
} else if(xmlSize(x) == 1 && any(inherits(x[[1]], c("XMLTextNode", "XMLInternalTextNode")))) {
#XXX Fix the assignment to children.
# should be xmlValue(x[[1]]) = value
tmp = x[[1]]
xmlValue(tmp) = as.character(value)
if(inherits(x[[1]], "XMLTextNode"))
x$children[[1]] = tmp
} else
stop("Cannot set the content of a node that is not an XMLInternalTextNode or a node containing a text node")
x
})
names.XMLInternalNode =
function(x)
xmlSApply(x, xmlName, addFinalizer = FALSE)
xmlApply.XMLInternalNode =
function(X, FUN, ..., omitNodeTypes = c("XMLXIncludeStartNode", "XMLXIncludeEndNode"), addFinalizer = NA)
{
kids = xmlChildren(X, addFinalizer = addFinalizer)
if(length(omitNodeTypes))
kids = kids[! sapply(kids, function(x) any(inherits(x, omitNodeTypes)) )]
lapply(kids, FUN, ...)
}
xmlSApply.XMLInternalNode =
function(X, FUN, ..., omitNodeTypes = c("XMLXIncludeStartNode", "XMLXIncludeEndNode"), addFinalizer = NA)
{
kids = xmlChildren(X, addFinalizer = addFinalizer)
if(length(omitNodeTypes))
kids = kids[! sapply(kids, function(x) any(inherits(x, omitNodeTypes)) )]
sapply(kids, FUN, ...)
}
xmlSApply.XMLNodeSet =
function(X, FUN, ..., omitNodeTypes = c("XMLXIncludeStartNode", "XMLXIncludeEndNode"), addFinalizer = NA)
{
sapply(X, FUN, ...)
}
xmlApply.XMLNodeSet =
function(X, FUN, ..., omitNodeTypes = c("XMLXIncludeStartNode", "XMLXIncludeEndNode"), addFinalizer = NA)
{
lapply(X, FUN, ...)
}
getChildrenStrings =
function(node, encoding = getEncoding(node), asVector = TRUE, len = xmlSize(node),
addNames = TRUE)
{
encoding = getEncodingREnum(encoding)
.Call("R_childStringValues", node, as.integer(len), as.logical(asVector), as.integer(encoding),
as.logical(addNames), PACKAGE = "XML")
}
setMethod("xmlParent", "XMLInternalNode",
function(x, addFinalizer = NA, count = 1, ...)
{
# fix the addFinalizer to be only for the last one.
for(i in 1:count)
x = .Call("RS_XML_xmlNodeParent", x, addFinalizer, PACKAGE = "XML")
x
})
newXMLDTDNode <-
function(nodeName, externalID = character(), systemID = character(), doc = NULL, addFinalizer = NA)
{
if(length(nodeName) > 1 && missing(externalID))
externalID = nodeName[2]
if(length(nodeName) > 2 && missing(systemID))
systemID = nodeName[3]
.Call("R_newXMLDtd", doc, as.character(nodeName), as.character(externalID), as.character(systemID),
addFinalizer, PACKAGE = "XML")
}
setInternalNamespace =
function(node, ns)
{
.Call("R_xmlSetNs", node, ns, FALSE, PACKAGE = "XML") # as.logical(append))
}
addDocFinalizer =
function(doc, finalizer)
{
fun = NULL
if(is.logical(finalizer)) {
if(is.na(finalizer) || !finalizer)
return()
else
fun = NULL
} else {
fun = finalizer
if(inherits(fun, "NativeSymbolInfo"))
fun = fun$address
}
if(!is.null(fun) && !is.function(fun) && typeof(fun) != "externalptr")
stop("need an R function, address of a routine or NULL for finalizer")
.Call("R_addXMLInternalDocument_finalizer", doc, fun, PACKAGE = "XML")
}
HTML_DTDs =
c("http://www.w3.org/TR/html4/frameset.dtd",
"http://www.w3.org/TR/html4/loose.dtd",
"http://www.w3.org/TR/html4/strict.dtd",
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd",
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd",
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd",
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
)
newHTMLDoc =
function(dtd = "loose", addFinalizer = TRUE, name = character(),
node = newXMLNode("html", newXMLNode("head", addFinalizer = FALSE), newXMLNode("body", addFinalizer = FALSE),
addFinalizer = FALSE))
{
if(is.na(dtd) || dtd == "")
dtd = ""
else if(tolower(dtd) %in% c("html5", "5"))
dtd = "5"
else {
i = grep(dtd, HTML_DTDs)
if(length(i)) {
if(length(i) > 1)
warning("matched multiple DTDs. Using the first")
dtd = HTML_DTDs[i[1]]
} else
dtd = ""
}
doc = newXMLDoc(dtd = dtd, isHTML = TRUE, addFinalizer = addFinalizer, node = node)
doc
}
newXMLDoc <-
#
# Creates internal C-level libxml object for representing
# an XML document/tree of nodes.
#
function(dtd = "", namespaces = NULL, addFinalizer = TRUE, name = character(), node = NULL,
isHTML = FALSE)
{
if(is(dtd, "XMLInternalNode")) {
dtdNode = dtd
dtd = character()
} else
dtdNode = NULL
ans = .Call("R_newXMLDoc", dtd, namespaces, as.logical(isHTML), PACKAGE = "XML")
class(ans) = oldClass(class(ans))
addDocFinalizer(ans, addFinalizer)
if(length(name))
docName(ans) = as.character(name)
if(length(dtdNode))
addChildren(ans, dtdNode)
if(length(node)) {
if(is.character(node))
newXMLTextNode(node, addFinalizer = FALSE, parent = ans)
else
addChildren(ans, node)
}
ans
}
XMLOptions = new.env()
getOption =
function(name, default = NULL, converter = NULL)
{
if(!exists(name, XMLOptions, inherits = FALSE))
return(base::getOption(name, default))
ans = get(name, XMLOptions)
if(is.function(converter))
converter(ans)
else
ans
}
setOption =
function(name, value)
{
prev = getOption(name)
assign(name, value, XMLOptions)
prev
}
newXMLNode <-
###XXX Note that there is another definition of this in dups.R
# Which is now elided.
# Create an internal C-level libxml node
#
#
# It is possible to use a namespace prefix that is not defined.
# This is okay as it may be defined in another node which will become
# an ancestor of this newly created one.
# XXX Have to add something to force the namespace prefix into the node
# when there is no corresponding definition for that prefix.
function(name, ..., attrs = NULL,
namespace = character(), namespaceDefinitions = character(),
doc = NULL, .children = list(...), parent = NULL,
at = NA,
cdata = FALSE,
suppressNamespaceWarning = getOption('suppressXMLNamespaceWarning', FALSE), # i.e. warn.
sibling = NULL, addFinalizer = NA,
noNamespace = length(namespace) == 0 && !missing(namespace),
fixNamespaces = c(dummy = TRUE, default = TRUE)
)
{
# determine whether we know now that there is definitely no namespace.
# make certain we have a character vector for the attributes.
if(length(attrs)) {
ids = names(attrs)
attrs = structure(as(attrs, "character"), names = ids)
# Find any attributes that are actually namespace definitions.
i = grep("^xmlns", names(attrs))
if(length(i)) {
warning("Don't specify namespace definitions via 'attrs'; use namespaceDefinitions")
namespace = c(namespace, structure(attrs[i], names = gsub("^xmlns:", "", names(attrs)[i])))
attrs = attrs[ -i]
}
} else
attrs = character()
# allow the caller to specify the node name as ns_prefix:name
# but we have to create it as name and the set the namespace.
ns = character() # the namespace prefix
name = strsplit(name, ":")[[1]]
if(length(name) == 2) {
ns = name[1]
name = name[2]
noNamespace = FALSE
}
if(is.list(parent)) {
if(length(parent) < 1 ||
!(is(parent[[1]], "XMLInternalElementNode") || is(parent[[1]], "XMLInternalDocument")))
stop("incorrect value for parent")
parent = parent[[1]]
}
# if there is no doc, but we have a parent which is an XMLInternalDocument, use that.
if(missing(doc) && !missing(parent) &&
inherits(parent, "XMLInternalDocument")) {
doc = parent
parent = NULL
}
# Get the doc from the parent node/document.
if(is.null(doc) && !is.null(parent)) {
# doc = as(parent, "XMLInternalDocument")
doc = if(inherits(parent, "XMLInternalDocument"))
parent
else
.Call("R_getXMLNodeDocument", parent, PACKAGE = "XML")
}
# create the node. Let's leave the namespace definitions and prefix till later.
# xmlSetProp() routine in R_newXMLNode() handles namespaces on the attribute names, even checking them.
node <- .Call("R_newXMLNode", as.character(name), character(), character(), doc, namespaceDefinitions,
addFinalizer, PACKAGE = "XML")
if(!is.null(sibling))
addSibling(sibling, node, after = as.logical(at))
else if(!is.null(parent))
addChildren(parent, node, at = at)
if(TRUE) { # Create the name space definitions here rather than in C code.
nsDefs = lapply(seq(along = namespaceDefinitions),
function(i)
newNamespace(node, namespaceDefinitions[[i]], names(namespaceDefinitions)[i], set = FALSE)
)
if(length(namespaceDefinitions))
names(nsDefs) = if(length(names(namespaceDefinitions))) names(namespaceDefinitions) else ""
} else
nsDefs = xmlNamespaceDefinitions(node)
# Now that the namespaces are defined, we can define the attributes which _may_ use them.
addAttributes(node, .attrs = attrs, suppressNamespaceWarning = suppressNamespaceWarning)
if(is(namespace, "XMLNamespaceRef")) {
setInternalNamespace(node, namespace)
} else if(is.na(noNamespace) || !noNamespace) {
ns = getNodeNamespace(ns, nsDefs, node, namespace, noNamespace, namespaceDefinitions, parent, suppressNamespaceWarning)
if(is.null(ns))
!.Call("R_setNamespaceFromAncestors", node, PACKAGE = "XML")
# .Call("R_getAncestorDefaultNSDef", node, TRUE, PACKAGE = "XML")
}
# Here is where we set the namespace for this node.
if(length(ns) && (inherits(ns, c("XMLNamespaceRef", "XMLNamespaceDeclaration")) || (is.character(ns) && ns != "")))
setXMLNamespace( node, ns) # should this be append = FALSE ?
# Add any children to this node.
if(length(.children)) {
if(!is.list(.children))
.children = list(.children)
addChildren(node, kids = .children, cdata = cdata, addFinalizer = addFinalizer)
}
if(any(fixNamespaces)) { # !is.null(parent)) {
xmlFixNamespaces(node, fixNamespaces)
# fixDummyNS(node, suppressNamespaceWarning)
}
node
}
xmlFixNamespaces =
function(node, fix)
{
if(length(fix) == 1)
fix = structure(rep(fix, 2), names = c("dummy", "default"))
if(length(names(fix)) == 0)
names(fix) = c("dummy", "default")
if(fix["dummy"])
xmlApply(node, function(x) .Call("R_fixDummyNS", x, TRUE, PACKAGE = "XML"))
if(fix["default"])
.Call("R_getAncestorDefaultNSDef", node, TRUE, PACKAGE = "XML")
}
FixDummyNS = 2L
FixDefaultNS = 4L
xmlNamespaceRef =
function(node)
.Call("R_getXMLNsRef", node, PACKAGE = "XML")
if(FALSE) {
# Quick check to see if the speed problem in newXMLNode above is in the extra processing
newXMLNode <-
function(name, ..., attrs = NULL,
namespace = "", namespaceDefinitions = character(),
doc = NULL, .children = list(...), parent = NULL,
at = NA,
cdata = FALSE,
suppressNamespaceWarning = getOption('suppressXMLNamespaceWarning', FALSE) # i.e. warn.
)
{
node = .Call("R_newXMLNode", name, as.character(attrs), character(), doc, character(), TRUE, PACKAGE = "XML")
if(!is.null(parent))
addChildren(parent, node, at = at)
node
}
}
findNamespaceDefinition =
#
# Search up the node hierarchy looking for a namespace
# matching that prefix.
#
function(node, namespace, error = TRUE)
{
ptr = node
while(!is.null(ptr)) {
tmp = namespaceDeclarations(ptr, TRUE)
i = match(namespace, names(tmp))
if(!is.na(i))
return(tmp[[i]])
ptr = xmlParent(ptr)
}
if(error)
stop("no matching namespace definition for prefix ", namespace)
NULL
}
setXMLNamespace =
#
# Set the specified namespace as the namespace for this
# node.
# namespace can be a prefix in which case we find it in the
# definition in this node or its ancestors.
# Otherwise, we expect a name = value character vector giving the
# prefix and URI and we create a new namespace definition.
# Alternatively, if you already have the namespace reference object
# from earlier, you can pass that in.
# Then we set the namespace on the node.
function(node, namespace, append = FALSE)
{
if(is.character(namespace) && is.null(names(namespace)))
namespace = findNamespaceDefinition(node, namespace)
else if(is.character(namespace))
namespace = newNamespace(node, namespace)
else if(!is.null(namespace) && !inherits(namespace, c("XMLNamespaceRef", "XMLNamespaceDeclaration")))
stop("Must provide a namespace definition, a prefix of existing namespace or a reference to a namespace definition")
.Call("R_xmlSetNs", node, namespace, FALSE, PACKAGE = "XML")
}
setAs("XMLNamespace", "character",
function(from)
unclass(from))
setAs("XMLNamespaceDefinition", "character",
function(from)
structure(from$uri, names = from$id))
setGeneric("xmlNamespace<-",
function(x, ..., value)
standardGeneric("xmlNamespace<-"))
setMethod("xmlNamespace<-", "XMLInternalNode",
function(x, ..., value) {
setXMLNamespace(x, value)
x
})
setGeneric("xmlNamespaces<-",
function(x, append = TRUE, set = FALSE, value)
standardGeneric("xmlNamespaces<-"))
setMethod("xmlNamespaces<-", "XMLNode",
function(x, append = TRUE, set = FALSE, value) {
if(inherits(value, "XMLNamespace"))
value = as(value, "character")
else if(is.null(names(value)))
names(value) = ""
# check for duplicates?
i = duplicated(names(value))
if(any(i)) {
warning("discarding duplicated namespace prefixes ", paste(names(value)[i], collapse = ", "))
value = value[!i]
}
if(append) {
cur = as(x$namespaceDefinitions, "character")
cur[names(value)] = value
value = cur
}
x$namespaceDefinitions = as(value, "XMLNamespaceDefinitions")
if(set)
x$namespace = names(value)
x
})
setMethod("xmlNamespaces<-", "XMLInternalNode",
function(x, append = TRUE, set = FALSE, value) {
value = as(value, "character")
if(is.null(names(value)))
names(value) = ""
# check for duplicates?
i = duplicated(names(value))
if(any(i)) {
warning("discarding duplicated namespace prefixes ", paste(names(value)[i], collapse = ", "))
value = value[!i]
}
if(append) {
# Work with existing ones
curDefs = namespaceDeclarations(x)
i = names(value) %in% names(curDefs)
if(any(i)) {
warning("discarding duplicated namespace prefixes ", paste(names(value)[i], collapse = ", "))
value = value[!i]
}
}
if(length(value) == 0)
# Should worry about the set.
return()
if(length(set) == 1 && set == TRUE && length(value) > 1)
set = c(set, rep(FALSE, length(value) - 1))
else
set = rep(set, length.out = length(value))
for(i in seq(along = value))
newXMLNamespace(x, value[i], set = set[i])
x
})
newXMLNamespace = newNamespace =
# Create a new namespace reference object.
function(node, namespace, prefix = names(namespace), set = FALSE)
{
if(is.null(namespace))
return(NULL) # XXX
ns <- .Call("R_xmlNewNs", node, namespace, as.character(prefix), PACKAGE = "XML")
if(set)
setXMLNamespace(node, ns)
ns
}
checkNodeNamespace =
#
# can only be checked after we know the parent node,
# i.e. after it has been inserted.
#
function(node, prefix = xmlNamespace(node))
{
if(length(prefix) == 0 || prefix == "")
return(TRUE)
# XXX should check that namespace is defined
# walk the parents.
okay = FALSE
p = xmlParent(node)
while(!is.null(p)) {
okay = prefix %in% names(xmlNamespaceDefinitions(p))
if(okay)
break
}
if(!okay)
stop("using an XML namespace prefix '", prefix, "' for a node that is not defined for this node or its node's ancestors")
TRUE
}
# Still to do:
# element, entity, entity_ref, notation
# And more in libxml/tree.h, e.g. the declaration nodes
#
newXMLTextNode =
#
# cdata allows the caller to specify that the text be
# wrapped in a newXMLCDataNode
function(text, parent = NULL, doc = NULL, cdata = FALSE, escapeEntities = is(text, "AsIs"),
addFinalizer = NA)
{
if(cdata)
return(newXMLCDataNode(text, parent, doc, addFinalizer = addFinalizer))
a = .Call("R_newXMLTextNode", as.character(text), doc, addFinalizer, PACKAGE = "XML")
if(escapeEntities)
setNoEnc(a)
if(!is.null(parent))
addChildren(parent, a)
a
}
newXMLPINode <-
function(name, text, parent = NULL, doc = NULL, at = NA, addFinalizer = NA)
{
a = .Call("R_newXMLPINode", doc, as.character(name), as.character(text), addFinalizer, PACKAGE = "XML")
if(!is.null(parent))
addChildren(parent, a, at = at)
a
}
newXMLCDataNode <-
function(text, parent = NULL, doc = NULL, at = NA, sep = "\n", addFinalizer = NA)
{
text = paste(as.character(text), collapse = "\n")
a = .Call("R_newXMLCDataNode", doc, text, addFinalizer, PACKAGE = "XML")
if(!is.null(parent))
addChildren(parent, a, at = at)
a
}
newXMLCommentNode <-
function(text, parent = NULL, doc = NULL, at = NA, addFinalizer = NA)
{
a = .Call("R_xmlNewComment", as.character(text), doc, addFinalizer, PACKAGE = "XML")
if(!is.null(parent))
addChildren(parent, a, at = at)
a
}
replaceNodes =
function(oldNode, newNode, ...)
{
UseMethod("replaceNodes")
}
replaceNodes.list =
function(oldNode, newNode, addFinalizer = NA, ...)
{
mapply(replaceNodes, oldNode, newNode, MoreArgs = list(addFinalizer = addFinalizer, ...))
}
replaceNodes.XMLInternalNode =
function(oldNode, newNode, addFinalizer = NA, ...)
{
oldNode = as(oldNode, "XMLInternalNode")
#XXX deal with a list of nodes.
newNode = as(newNode, "XMLInternalNode")
.Call("RS_XML_replaceXMLNode", oldNode, newNode, addFinalizer, PACKAGE = "XML")
}
#
if(FALSE) # This is vectorized for no reason
"[[<-.XMLInternalNode" =
function(x, i, j, ..., value)
{
if(!is.list(value))
value = list(value)
if(is.character(i)) {
if(length(names(x)) == 0)
k = rep(NA, length(i))
else
k = match(i, names(x))
if(any(is.na(k))) {
# create a node with that name and text
value[is.na(k)] = mapply(function(name, val)
if(is.character(val))
newXMLNode(name, val)
else
val)
}
i = k
}
replace = (i <= xmlSize(x))
if(any(replace)) {
replaceNodes(xmlChildren(x)[i[replace]], value[replace])
value = value[!replace]
i = i[!replace]
}
if(length(i))
addChildren(x, kids = value, at = i)
x
}
"[[<-.XMLInternalNode" =
function(x, i, j, ..., value)
{
if(is.character(i)) {
if(length(names(x)) == 0)
k = NA
else
k = match(i, names(x))
if(is.na(k) && is.character(value) && !inherits(value, "AsIs")) {
# create a node with that name and text
value = newXMLNode(i, value)
}
i = k
}
replace = !is.na(i) & (i <= xmlSize(x))
if(replace)
replaceNodes(xmlChildren(x)[[i]], value)
else
addChildren(x, kids = list(value), at = i)
x
}
setNoEnc =
function(node)
{
if(!is(node, "XMLInternalTextNode"))
stop("setNoEnc can only be applied to an native/internal text node, not ", paste(class(node), collapse = ", "))
.Call("R_setXMLInternalTextNode_noenc", node, PACKAGE = "XML")
}
addChildren.XMLInternalNode =
addChildren.XMLInternalDocument =
#
# XXX need to expand/recycle the at if it is given as a scalar
# taking into account if the subsequent elements are lists, etc.
#
# Basically, if the caller specifies at as a scalar
# we expand this to be the sequence starting at that value
# and having length which is the total number of nodes
# in kids. This is not just the length of kids but
# the number of nodes since some of the elements might be lists.
#
function(node, ..., kids = list(...), at = NA, cdata = FALSE, addFinalizer = NA,
fixNamespaces = c(dummy = TRUE, default = TRUE))
{
kids = unlist(kids, recursive = FALSE)
removeNodes(kids[!vapply(kids, is.character, logical(1L))])
if(length(kids) == 1 && inherits(kids[[1]], "XMLInternalNode") && is.na(at)) {
.Call("R_insertXMLNode", kids[[1]], node, -1L, FALSE, PACKAGE = "XML")
# return(node)
} else {
# if(all(is.na(at))) {
# kids = lapply(kids, as, function(x) if(is.character(x)) newXMLTextNode(x) else as(x, "XMLInternalNode"))
# .Call("R_insertXMLNodeDirectly", node, kids, PACKAGE = "XML")
# return(node)
# }
if(!is.na(at)) {
# if at is the name of a child node, find its index (first node with that name)
if(is.character(at))
at = match(at, names(node))
if(length(at) == 1)
at = seq(as.integer(at), length = sum(sapply(kids, function(x) if(is.list(x)) length(x) else 1)))
else # pad with NAs
length(at) = length(kids)
return(lapply(seq(along = kids),
function(j) {
i = kids[[j]]
if(is.character(i))
i = newXMLTextNode(i, cdata = cdata, addFinalizer = addFinalizer)
if(!inherits(i, "XMLInternalNode")) #XX is(i, "XMLInternalNode")
i = as(i, "XMLInternalNode")
if(.Call("R_isNodeChildOfAt", i, node, as.integer(at[j]), PACKAGE = "XML"))
return(i)
if(is.na(at[j]))
.Call("R_insertXMLNode", i, node, -1L, FALSE, PACKAGE = "XML")
else {
after = at[j] > 0
if(!after)
at[j] = 1
if(xmlSize(node) < at[j])
.Call("R_insertXMLNode", i, node, as.integer(NA), FALSE, PACKAGE = "XML")
else
.Call("RS_XML_xmlAddSiblingAt", node[[ at[j] ]], i, after, addFinalizer, PACKAGE = "XML") # if at = 0, then shove it in before the sibling.
}
}))
}
for(j in seq(along = kids)) {
i = kids[[j]]
if(is.list(i)) { # can't happen now since we unlist()
for(k in i)
addChildren(node, k, addFinalizer = addFinalizer)
} else {
if(is.null(i))
next
if(is.character(i))
i = newXMLTextNode(i, cdata = cdata, addFinalizer = FALSE)
if(!inherits(i, "XMLInternalNode")) {
i = as(i, "XMLInternalNode")
}
.Call("R_insertXMLNode", i, node, at[j], FALSE, PACKAGE = "XML")
ns = attr(i, "xml:namespace")
if(!is.null(ns)) {
nsdef = findNamespaceDefinition(node, ns)
if(!is.null(nsdef) && (inherits(nsdef, c("XMLNamespaceRef", "XMLNamespaceDeclaration")) || (is.character(nsdef) && nsdef != ""))) {
setXMLNamespace( i, nsdef)
attr(i, "xml:namespace") = NULL
}
}
}
}
}
if(!is(node, "XMLInternalDocument") && any(fixNamespaces))
xmlFixNamespaces(node, fixNamespaces)
node
}
addSibling =
function(node, ..., kids = list(...), after = NA)
{
UseMethod("addSibling")
}
addSibling.XMLInternalNode =
function(node, ..., kids = list(...), after = TRUE, addFinalizer = NA)
{
#XXX Why add as children?
if(FALSE && is.na(after))
addChildren(node, kids = kids, at = NA)
else {
lapply(kids,
function(x) {
.Call("RS_XML_xmlAddSiblingAt", node, x, as.logical(after), addFinalizer, PACKAGE = "XML")
})
}
}
removeNodes =
function(node, free = rep(FALSE, length(node)))
UseMethod("removeNodes")
removeNodes.default =
function(node, free = rep(FALSE, length(node)))
NULL
removeNodes.list = removeNodes.XMLNodeList =
function(node, free = rep(FALSE, length(node)))
{
if(!all(sapply(node, inherits, "XMLInternalNode"))) {
warning("removeNode only works on internal nodes at present")
return(NULL)
}
free = as.logical(free)
free = rep(free, length = length(node))
.Call("R_removeInternalNode", node, free, PACKAGE = "XML")
}
removeNodes.XMLNodeSet =
function(node, free = rep(FALSE, length(node)))
{
removeNodes.list(node, free)
}
removeNodes.XMLInternalNode =
function(node, free = rep(FALSE, length(node)))
{
node = list(node)
free = as.logical(free)
.Call("R_removeInternalNode", node, free, PACKAGE = "XML")
}
removeChildren =
function(node, ..., kids = list(...), free = FALSE)
{
UseMethod("removeChildren")
}
removeChildren.XMLNode =
#
#
function(node, ..., kids = list(...), free = FALSE)
{
kidNames = names(node)
w = sapply(kids,
function(i) {
orig = i
if(length(i) > 1)
warning("each node identifier should be a single value, i.e. a number or a name, not a vector. Ignoring ",
paste(i[-1], collapse = ", "))
if(!inherits(i, "numeric"))
i = match(i, kidNames)
if(is.na(i)) {
warning("can't find node identified by ", orig)
i = 0
}
i
})
node$children = unclass(node)$children[ - w ]
node
}
removeChildren.XMLInternalNode =
function(node, ..., kids = list(...), free = FALSE)
{
# idea is to get the actual XMLInternalNode objects
# corresponding the identifiers in the kids list.
# These are numbers, node names or node objects themselves
# This could be fooled by duplicates, e.g. kids = list(2, 2)
# or kids = list(2, "d") where "d" identifies the second node.
# We can put in stricter checks in the C code if needed.
nodes = xmlChildren(node)
nodeNames = xmlSApply(node, xmlName)
v = lapply(kids,
function(x) {
if(inherits(x, "XMLInternalNode"))
x
else if(is.character(x)) {
i = match(x, nodeNames)
nodes[[i]]
} else
nodes[[as.integer(x)]]
})
free = rep(free, length = length(v))
.Call("RS_XML_removeChildren", node, v, as.logical(free), PACKAGE = "XML")
node
}
replaceNodeWithChildren =
function(node)
{
if(!inherits(node, "XMLInternalNode"))
stop("replaceNodeWithChildren only work on internal XML/HTML nodes")
.Call("R_replaceNodeWithChildren", node, PACKAGE = "XML")
}
setGeneric("toHTML",
function(x, context = NULL) standardGeneric("toHTML"))
setMethod('toHTML', 'vector',
function(x, context = NULL) {
tb = newXMLNode("table")
if(length(names(x)) > 0)
addChildren(tb, newXMLNode("tr", .children = sapply(names(x), function(x) newXMLNode("th", x))))
addChildren(tb, newXMLNode("tr", .children = sapply(x, function(x) newXMLNode("th", format(x)))))
tb
})
setMethod('toHTML', 'matrix',
function(x, context = NULL) {
tb = newXMLNode("table")
if(length(colnames(x)) > 0)
addChildren(tb, newXMLNode("tr", .children = sapply(names(x), function(x) newXMLNode("th", x))))
rows = sapply(seq(length = nrow(x)),
function(i) {
row = newXMLNode("tr")
if(length(rownames(x)) > 0)
addChildren(row, newXMLNode("th", rownames(x)[i]))
addChildren(row, .children = sapply(x[i,], function(x) newXMLNode("th", format(x))))
row
})
addChildren(tb, rows)
tb
})
SpecialCallOperators =
c("+", "-", "*", "/", "%*%", "%in%", ":")
#XXX Not necessarily working yet! See RXMLDoc
setMethod('toHTML', 'call',
function(x, context) {
# handle special operators like +, -, :, ...
if(as.character(v[[1]]) %in% SpecialCallOperators) {
}
v = newXMLNode(x[[1]], "(")
for(i in v[-1])
addChildren(v, toHTML( i , context))
v
})
setAs("vector", "XMLInternalNode",
function(from) {
newXMLTextNode(as(from, "character"))
})
print.XMLInternalDocument =
function(x, ...)
{
cat(as(x, "character"), "\n")
}
print.XMLInternalNode =
function(x, ...)
{
cat(as(x, "character"), "\n")
}
setAs("XMLInternalNode", "character",
function(from) saveXML.XMLInternalNode(from))
setAs("XMLInternalTextNode", "character",
function(from) xmlValue(from))
checkAttrNamespaces =
function(nsDefs, .attrs, suppressNamespaceWarning)
{
ns = sapply(strsplit(names(.attrs), ":"),
function(x) if(length(x) > 1) x[1] else NA)
i = which(!is.na(ns))
m = match(ns[i], names(nsDefs))
if(any(is.na(m))) {
f = if(is.character(suppressNamespaceWarning))
get(suppressNamespaceWarning, mode = "function")
else
warning
f(paste("missing namespace definitions for prefix(es)", paste(ns[i][is.na(m)])))
}
}
setGeneric("addAttributes",
function(node, ..., .attrs = NULL, suppressNamespaceWarning = getOption('suppressXMLNamespaceWarning', FALSE), append = TRUE)
standardGeneric("addAttributes"))
setMethod("addAttributes", "XMLInternalElementNode",
function(node, ..., .attrs = NULL, suppressNamespaceWarning = getOption('suppressXMLNamespaceWarning', FALSE), append = TRUE)
{
if(missing(.attrs))
.attrs = list(...)
.attrs = structure(as.character(.attrs), names = names(.attrs))
if(length(.attrs) == 0)
return(node)
if(is.null(names(.attrs)) || any(names(.attrs) == ""))
stop("all node attributes must have a name")
if(is.character(suppressNamespaceWarning) || !suppressNamespaceWarning)
checkAttrNamespaces(getEffectiveNamespaces(node), .attrs, suppressNamespaceWarning)
if(!append)
removeAttributes(node, .all = TRUE)
.Call("RS_XML_addNodeAttributes", node, .attrs, PACKAGE = "XML")
node
})
#if(!isGeneric("xmlAttrs<-"))
setGeneric("xmlAttrs<-", function(node, append = TRUE, suppressNamespaceWarning = getOption('suppressXMLNamespaceWarning', FALSE), value)
standardGeneric("xmlAttrs<-"))
tmp =
function(node, append = TRUE, suppressNamespaceWarning = getOption('suppressXMLNamespaceWarning', FALSE), value)
{
addAttributes(node, .attrs = value, suppressNamespaceWarning = suppressNamespaceWarning, append = append)
}
setMethod("xmlAttrs<-", "XMLInternalElementNode", tmp)
setMethod("xmlAttrs<-", "XMLNode", tmp)
setMethod("addAttributes", "XMLNode",
function(node, ..., .attrs = NULL, suppressNamespaceWarning = getOption('suppressXMLNamespaceWarning', FALSE), append = TRUE) {
if(missing(.attrs))
.attrs = list(...)
.attrs = structure(as.character(.attrs), names = names(.attrs))
if(is.null(names(.attrs)) || any(names(.attrs) == ""))
stop("all node attributes must have a name")
if(is.character(suppressNamespaceWarning) || !suppressNamespaceWarning)
checkAttrNamespaces(getEffectiveNamespaces(node), .attrs, suppressNamespaceWarning)
if(append) {
i = match(names(.attrs), names(node$attributes))
if(any(!is.na(i))) {
node$attributes[i[!is.na(i)]] = .attrs[!is.na(i)]
.attrs = .attrs[is.na(i)]
}
node$attributes = c(node$attributes, .attrs)
} else
node$attributes = .attrs
node
})
setGeneric("removeAttributes", function(node, ..., .attrs = NULL, .namespace = FALSE,
.all = (length(list(...)) + length(.attrs)) == 0)
standardGeneric("removeAttributes"))
setGeneric("removeXMLNamespaces",
function(node, ..., all = FALSE, .els = unlist(list(...)))
standardGeneric("removeXMLNamespaces"))
setMethod("removeXMLNamespaces", "XMLInternalElementNode",
function(node, ..., all = FALSE, .els = unlist(list(...))) {
if(all)
.Call("RS_XML_removeAllNodeNamespaces", node, PACKAGE = "XML")
else {
if(is.character(.els))
.els = lapply(.els, function(x) x)
.Call("RS_XML_removeNodeNamespaces", node, .els, PACKAGE = "XML")
}
})
setMethod("removeAttributes", "XMLInternalElementNode",
#
# The idea here is to remove attributes by name
# We handle the case where these are a simple collection
# of character string identifiers given via the ... or as a character
# vector using, e.g., .attrs = c("a", "b")
#
# Each identifier can be of the form "name" or "ns:name" giving
# the namespace prefix. We resolve the namespace and
#
# If we are dealing with regular attributes (no namespace attributes)
# then we expect these as a character vector.
#
# The intent of the .namespace argument was originally to indicate that
# we wanted to remove the namespace definition. It appears that libxml2 does
# not support that. (And it would seem that this is a real pain as the xmlNsPtr
# objects can be shared across numerous places in a linked list, so it would
# be very difficult to remove it from one node.)
#
#
#
function(node, ..., .attrs = NULL, .namespace = FALSE,
.all = (length(list(...)) + length(.attrs)) == 0)
{
if(missing(.attrs))
.attrs = list(...)
.attrs = as.character(.attrs)
if(.all) {
if(length(list(...)) || length(.attrs))
stop(".all specified as TRUE and individual values specified via .../.attrs")
# Use the integer indices to identify the elements.
.Call("RS_XML_removeNodeAttributes", node, seq(along = xmlAttrs(node)), FALSE, PACKAGE = "XML")
return(node)
}
if(is(.namespace, "XMLNamespaceDeclaration"))
.namespace = list(.namespace)
#XXX
tmp = strsplit(.attrs, ":")
prefix = sapply(tmp, function(x) if(length(x) > 1) x[1] else "")
ids = sapply(tmp, function(x) if(length(x) == 1) x[1] else x[2])
if(any(prefix != "") && is.logical(.namespace))
.namespace = TRUE
if(is.logical(.namespace) && .namespace) {
ns = namespaceDeclarations(node, TRUE)
# need to create a list with the elements corresponding to the
# (potentially repeated) ns elements
i = match(prefix, names(ns))
ns = ns[i]
names(ns) = gsub("^.*:", "", .attrs) # or ids from above
.attrs = ns
}
.Call("RS_XML_removeNodeAttributes", node, .attrs, .namespace, PACKAGE = "XML")
node
})
setMethod("removeAttributes", "XMLNode",
function(node, ..., .attrs = NULL, .namespace = FALSE,
.all = (length(list(...)) + length(.attrs)) == 0)
{
a = node$attributes
if(missing(.attrs))
.attrs = list(...)
.attrs = as.character(.attrs)
if(.all) {
if(length(.attrs))
stop("Both individual attribute names and .all specified")
node$attributes = character()
return(node)
}
i = match(.attrs, names(a))
if(any(is.na(i)) )
warning("Can't locate attributes ", paste(.attrs[is.na(i)], collapse = ", "), "in XML node ", node$name)
a = a[is.na(i)]
node$attributes <- a
node
})
#xmlNamespaceDefinitions = # ??? added this but overrides other S3 generic.
namespaceDeclarations =
function(node, ref = FALSE, ...)
{
.Call("RS_XML_getNsList", node, as.logical(ref), PACKAGE = "XML")
}
"xmlName<-" =
function(x, value)
{
UseMethod("xmlName<-")
}
"xmlName<-.XMLNode" <-
function(x, value)
{
x$name <- value
x
}
"xmlName<-.XMLInternalElementNode" <-
function(x, value)
{
# we could handle a new namespace by accepting value as
# a character vector with a name
# e.g. c(r:array = 'http://www.r-project.org')
# Alternatively, just define the namespace on the node _before_
# changing the name.
id = names(value)
if(!is.null(id) && length( (tmp <- strsplit(id, ":")[[1]])) > 1) {
names(value) = tmp[1]
newXMLNamespaces(x, .values = as(value, "character"))
value = id
}
.Call("RS_XML_setNodeName", x, value, PACKAGE = "XML")
x
}
newXMLNamespaces =
# allow for multiple namespaces
# and also allow for "r:value"
#
# newXMLNamespaces(node, r = "http://www.r-project.org", ...)
#
function(node, ..., .values = list(...))
{
ids = names(.values)
ans = lapply(ids, function(id)
newNamespace(node, id, as.character(.values[[id]])))
names(ans) = ids
ans
}
xmlNodeMatch =
function(x, table, nomatch = NA_integer_)
{
.Call("R_matchNodesInList", x, table, as.integer(nomatch), PACKAGE = "XML")
}
setGeneric("xmlClone",
function(node, recursive = TRUE, addFinalizer = FALSE, ...)
{
oclass = class(node)
ans = standardGeneric("xmlClone")
if(!isS4(node))
class(ans) = oclass
ans
})
setMethod("xmlClone", "XMLInternalDocument",
function(node, recursive = TRUE, addFinalizer = NA, ...)
{
ans = .Call("RS_XML_clone", node, as.logical(recursive), addFinalizer, PACKAGE = "XML")
addDocFinalizer(ans, addFinalizer)
ans
})
setMethod("xmlClone", "XMLInternalNode",
function(node, recursive = TRUE, addFinalizer = FALSE, ...)
{
ans = .Call("RS_XML_clone", node, as.logical(recursive), addFinalizer, PACKAGE = "XML")
})
ensureNamespace =
#
# Idea is to make certain that the root node has definitions for the specified
# namespaces. The caller specifies the named vector of interest.
# If the URL already exists, we return the corresponding prefix.
#
#
# Returns the prefixes in the documents that correspond to the
# namespace definitions
#
function(doc, what)
{
if(is(doc, "XMLInternalDocument"))
node = xmlRoot(doc)
else
node = doc
defs = xmlNamespaceDefinitions(xmlRoot(doc), simplify = TRUE)
i = match(what, defs)
w = is.na(i)
if(any(w)) {
sapply(names(what)[w], function(id) newXMLNamespace(node, what[id], id))
names(what)[w]
} else
names(defs)[i]
}
"xmlParent<-" =
function(x, ..., value) {
addChildren(value, ..., kids = list(x))
}
setOldClass("XMLNamespaceRef")
setAs("XMLNamespaceRef", "character",
function(from) {
.Call("R_convertXMLNsRef", from, PACKAGE = "XML")
})
xmlSearchNs =
function(node, ns, asPrefix = TRUE, doc = as(node, "XMLInternalDocument"))
{
.Call("R_xmlSearchNs", doc, node, as.character(ns), as.logical(asPrefix), PACKAGE = "XML")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.