xmlSchemaParse =
function(file, asText = FALSE, xinclude = TRUE, error = xmlErrorCumulator())
{
xmlParse(file, asText = asText, isSchema = TRUE, xinclude = xinclude, error = error)
}
BOMRegExp = "(\\xEF\\xBB\\xBF|\\xFE\\xFF|\\xFF\\xFE)"
xmlTreeParse <-
#
# XML parser that reads the entire `document' tree into memory
# and then converts it to an R/S object.
# Uses the libxml from Daniel Veillard at W3.org.
#
# asText treat the value of file as XML text, not the name of a file containing
# the XML text, and parse that.
#
#
function(file, ignoreBlanks = TRUE, handlers = NULL,
replaceEntities = FALSE, asText = FALSE, trim = TRUE, validate = FALSE, getDTD = TRUE,
isURL = FALSE, asTree = FALSE, addAttributeNamespaces = FALSE,
useInternalNodes = FALSE, isSchema = FALSE,
fullNamespaceInfo = FALSE, encoding = character(),
useDotNames = length(grep("^\\.", names(handlers))) > 0,
xinclude = TRUE, addFinalizer = TRUE, error = xmlErrorCumulator(), isHTML = FALSE, options = integer(),
parentFirst = FALSE)
{
isMissingAsText = missing(asText)
if(length(file) > 1) {
file = paste(file, collapse = "\n")
if(!missing(asText) && !asText)
stop(structure(list(message = "multiple URLs passed to xmlTreeParse. If this is the content of the file, specify asText = TRUE"),
class = c("MultipleURLError", "XMLParserError", "simpleError", "error", "condition")))
asText = TRUE
}
if(missing(isURL) && !asText)
isURL <- length(grep("^(http|ftp|file)://", file, useBytes = TRUE, perl = TRUE))
if(file == "" || length(file) == 0)
stop("empty or no content specified")
if(isHTML) {
validate = FALSE
getDTD = FALSE
isSchema = FALSE
docClass = "HTMLInternalDocument"
} else
docClass = character()
checkHandlerNames(handlers, "DOM")
if(missing(fullNamespaceInfo) && inherits(handlers, "RequiresNamespaceInfo"))
fullNamespaceInfo = TRUE
oldValidate = xmlValidity()
xmlValidity(validate)
on.exit(xmlValidity(oldValidate))
# check whether we are treating the file name as
# a) the XML text itself, or b) as a URL.
# Otherwise, check if the file exists and report an error.
if(!asText && isURL == FALSE) {
if(file.exists(file) == FALSE)
if(!missing(asText) && asText == FALSE) {
e = simpleError(paste("File", file, "does not exist"))
class(e) = c("FileNotFound", class(e))
stop(e)
}
else
asText <- TRUE
}
if(asText && length(file) > 1)
file = paste(file, collapse = "\n")
old = setEntitySubstitution(replaceEntities)
on.exit(setEntitySubstitution(old), add = TRUE)
# Look for a < in the string.
if(asText && length(grep(sprintf("^%s?\\s*<", BOMRegExp), file, perl = TRUE, useBytes = TRUE)) == 0) { # !isXMLString(file) ?
if(!isHTML || (isMissingAsText && !inherits(file, "AsIs"))) {
e = simpleError(paste("XML content does not seem to be XML:", if(file.exists(file)) file else sQuote(substring(file, 100))))
class(e) = c("XMLInputError", class(e))
(if(isHTML) warning else stop)(e)
}
}
if(!is.logical(xinclude)) {
# if(is(xinclude, "numeric"))
# xinclude = bitlist(xinclude) # see bitList.R
# else
xinclude = as.logical(xinclude)
}
if(!asText && !isURL)
file = path.expand(as.character(file))
if(useInternalNodes && trim) {
prevBlanks = .Call("RS_XML_setKeepBlanksDefault", 0L, PACKAGE = "XML")
on.exit(.Call("RS_XML_setKeepBlanksDefault", prevBlanks, PACKAGE = "XML"), add = TRUE)
}
.oldErrorHandler = setXMLErrorHandler(error)
on.exit(.Call("RS_XML_setStructuredErrorHandler", .oldErrorHandler, PACKAGE = "XML"), add = TRUE)
if(length(options))
options = sum(options) #XXX coerce to parser options
ans <- .Call("RS_XML_ParseTree", as.character(file), handlers,
as.logical(ignoreBlanks), as.logical(replaceEntities),
as.logical(asText), as.logical(trim), as.logical(validate), as.logical(getDTD),
as.logical(isURL), as.logical(addAttributeNamespaces),
as.logical(useInternalNodes), as.logical(isHTML), as.logical(isSchema),
as.logical(fullNamespaceInfo), as.character(encoding), as.logical(useDotNames),
xinclude, error, addFinalizer, as.integer(options), as.logical(parentFirst), PACKAGE = "XML")
if(!missing(handlers) && length(handlers) && !as.logical(asTree))
return(handlers)
if(!isSchema && length(class(ans)))
class(ans) = c(docClass, oldClass(class(ans)))
if(inherits(ans, "XMLInternalDocument"))
addDocFinalizer(ans, addFinalizer)
else if(!getDTD && !isSchema) {
#??? is this a good idea.
class(ans) = oldClass("XMLDocumentContent")
}
ans
}
xmlNativeTreeParse = xmlInternalTreeParse = xmlTreeParse
formals(xmlNativeTreeParse)[["useInternalNodes"]] = TRUE
formals(xmlInternalTreeParse)[["useInternalNodes"]] = TRUE
xmlParse = xmlNativeTreeParse
if(FALSE) {
# Another approach is to just change the call, as below, but this is tricky
# to get evaluation of arguments, etc. right.
tmp.xmlInternalTreeParse =
function(file, ignoreBlanks = TRUE, handlers=NULL,
replaceEntities=FALSE, asText=FALSE, trim=TRUE, validate=FALSE, getDTD=TRUE,
isURL=FALSE, asTree = FALSE, addAttributeNamespaces = FALSE,
isSchema = FALSE,
fullNamespaceInfo = FALSE, encoding = character(),
useDotNames = length(grep("^\\.", names(handlers))) > 0, # will be switched to TRUE in the future.
xinclude = TRUE, addFinalizer = TRUE)
{
e = sys.call()
e[[1]] = as.name("xmlTreeParse")
e[[length(e) + 1]] = FALSE
names(e)[length(e)] = "useInternalNodes"
eval(e, parent.env())
}
# Could try adding this to the top of xmlTreeParse
# But it won't work with, e.g. lapply(fileNames, xmlInternalTreeParse)
# if(missing(useInternalNodes) && as.character(sys.call()[[1]]) == "xmlInternalTreeParse")
# useInternalNodes = FALSE
}
setGeneric("getEncoding",
function(obj, ...)
{
standardGeneric("getEncoding")
})
setMethod("getEncoding", "ANY", function(obj, ...) NA)
setMethod("getEncoding", "XMLInternalDocument",
function(obj, ...) {
.Call("R_getDocEncoding", obj, PACKAGE = "XML")
})
setMethod("getEncoding", "XMLInternalNode",
function(obj, ...) {
.Call("R_getDocEncoding", obj, PACKAGE = "XML")
})
if(FALSE) {
setMethod("getEncoding", "XMLInternalDOM",
function(obj, ...) {
getEncoding(obj)
})
}
xmlValidity =
function(val = integer(0))
{
.Call("RS_XML_getDefaultValiditySetting", as.integer(val), PACKAGE = "XML")
}
processXInclude =
function(node, flags = 0L)
UseMethod("processXInclude")
processXInclude.list =
function(node, flags = 0L)
{
lapply(node, processXInclude, flags)
}
processXInclude.XMLInternalDocument =
function(node, flags = 0L)
{
.Call("RS_XML_xmlXIncludeProcessFlags", node, as.integer(flags), PACKAGE = "XML")
}
processXInclude.XMLInternalElementNode =
function(node, flags = 0L)
{
# if(xmlName(node) != "include") # Should check name space also
# stop("can only process XInclude on include nodes")
.Call("RS_XML_xmlXIncludeProcessTreeFlags", node, as.integer(flags), PACKAGE = "XML")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.