Nothing
# Adapt this to be able to specify an XPath expression to identify a list of nodes.
setGeneric("xmlToDataFrame",
#
# Read a relatively flat, 2-level deep XML document into a data frame.
# The document is assumed to be something of the form
# <top>
# <obs>
# <var1>value</var1>
# <var2>value</var2>
# <var3>value</var3>
# </obs>
# <obs>
# <var1>value</var1>
# <var2>value</var2>
# <var3>value</var3>
# </obs>
# </top>
#
# This can handle cases where not all observations have the same
# fields.
#
# z = xmlToDataFrame("~/size.xml")
# z = xmlToDataFrame("~/size.xml", c("integer", "integer", "numeric"))
#
function(doc, colClasses = NULL, homogeneous = NA, collectNames = TRUE, nodes = list(), stringsAsFactors = FALSE)
standardGeneric("xmlToDataFrame"))
setMethod("xmlToDataFrame", "character",
# parse the XML document if it is a file name and
# not a regular XML document already.
function(doc, colClasses = NULL, homogeneous = NA, collectNames = TRUE, nodes = list(), stringsAsFactors = FALSE)
xmlToDataFrame(xmlParse(doc), colClasses, homogeneous, collectNames, stringsAsFactors = stringsAsFactors))
setMethod("xmlToDataFrame", c("XMLInternalDocument", nodes = "missing"),
function(doc, colClasses = NULL, homogeneous = NA, collectNames = TRUE, nodes = list(), stringsAsFactors = FALSE)
xmlToDataFrame(doc, colClasses, homogeneous, collectNames, nodes = xmlChildren(xmlRoot(doc)), stringsAsFactors))
tmp =
function(doc, colClasses = NULL, homogeneous = NA, collectNames = TRUE, nodes = list(), stringsAsFactors = FALSE)
{
if(length(nodes) == 0)
return(data.frame())
# Find out how many fields there.
nfields = sapply(nodes, xmlSize)
nvar = max(nfields)
if(collectNames)
varNames = unique(unlist( lapply(nodes, names) ))
else
varNames = names(nodes[[which.max(nfields)]])
if(is.na(homogeneous))
homogeneous = all(nfields == nvar) && all(sapply(nodes[-1], function(x) all(names(x) == varNames)))
if(!homogeneous)
return(fromRaggedXML2DataFrame(nodes, varNames, c(length(nfields), length(varNames)), colClasses, stringsAsFactors))
# Function to operate on each
fun = function(x) {
tmp = xmlSApply(x, xmlValue)
length(tmp) = nvar
tmp
}
# Get the individual values
vals = unlist(lapply(nodes, fun))
ans = matrix(vals, length(nfields), byrow = TRUE)
ans =
if(length(colClasses)) {
as.data.frame(lapply(seq(along = colClasses),
function(i) {
as(ans[, i], colClasses[i])
}), stringsAsFactors = stringsAsFactors)
} else
as.data.frame(ans, stringsAsFactors = stringsAsFactors)
names(ans) = varNames
ans
}
bob =
function(doc, colClasses = NULL, homogeneous = NA, collectNames = TRUE, nodes = list(), stringsAsFactors = FALSE)
xmlToDataFrame(nodes = doc, colClasses = colClasses, homogeneous = homogeneous, collectNames = collectNames, stringsAsFactors = stringsAsFactors)
setMethod("xmlToDataFrame", c(nodes = "XMLNodeSet"), tmp)
setMethod("xmlToDataFrame", c(nodes = "list"), tmp)
setOldClass("XMLInternalNodeList")
setMethod("xmlToDataFrame", c(nodes = "XMLInternalNodeList"), tmp)
setMethod("xmlToDataFrame", "XMLNodeSet", bob)
setMethod("xmlToDataFrame", "XMLInternalNodeList", bob)
setMethod("xmlToDataFrame", "list", bob)
setMethod("xmlToDataFrame", "XMLInternalElementNode",
function(doc, colClasses = NULL, homogeneous = NA, collectNames = TRUE, nodes = list(), stringsAsFactors = FALSE)
xmlToDataFrame(nodes = xmlChildren(doc), colClasses = colClasses, homogeneous = homogeneous, collectNames = collectNames, stringsAsFactors = stringsAsFactors))
fromRaggedXML2DataFrame =
#
# This reads data from the nodes of an XML document and assumes
# that they do not all have the same number or even names of fields.
# So this does extra work to match each observation to the union of
# the field names across all nodes.
#
# o = fromRaggedXML2DataFrame("size2.xml")
# o = fromRaggedXML2DataFrame("size1.xml")
#
function(nodes, varNames = unique(unlist( lapply(nodes, names) )),
dims = c(length(nodes), length(varNames)), colClasses = NULL,
stringsAsFactors = FALSE)
{
#XXX
if(is.character(nodes))
nodes = xmlChildren(xmlRoot(xmlParse(nodes)))
# create an empty data frame with as many rows and columns as needed.
ans = as.data.frame(replicate(dims[2], rep(as.character(NA), dims[1]), simplify = FALSE), stringsAsFactors = FALSE)
names(ans) = varNames
# Fill in the rows based on the names.
for(i in seq(length = dims[1]))
ans[i, names(nodes[[i]])] = xmlSApply(nodes[[i]], xmlValue)
# Convert the columns to the specified classes if specified.
# Should drop cols with NULL. Also guess those with NA.
if(length(colClasses)) {
i = ! sapply(colClasses, is.null)
ans = ans[ i ]
varNames = varNames[i]
colClasses = colClasses[ i ]
ans = as.data.frame(lapply(seq(length = ncol(ans)),
function(i) {
as(ans[, i], colClasses[[i]])
}), stringsAsFactors = stringsAsFactors)
}
names(ans) = varNames
ans
}
setGeneric("xmlAttrsToDataFrame",
function(doc, attrs = character(), omit = character(), ...)
standardGeneric("xmlAttrsToDataFrame"))
setMethod("xmlAttrsToDataFrame", "character",
function(doc, attrs = character(), omit = character(), ...)
xmlAttrsToDataFrame(xmlParse(doc), attrs, omit, ...))
setMethod("xmlAttrsToDataFrame", "AsIs",
function(doc, attrs = character(), omit = character(), ...)
xmlAttrsToDataFrame(xmlParse(doc), attrs, omit, ...))
setMethod("xmlAttrsToDataFrame", "XMLInternalElementNode",
function(doc, attrs = character(), omit = character(), ...)
xmlAttrsToDataFrame(xmlChildren(doc), attrs, omit, ...))
setMethod("xmlAttrsToDataFrame", "XMLNodeSet",
function(doc, attrs = character(), omit = character(), ...) {
xmlAttrsToDataFrame(as(doc, 'list'), attrs, omit, ...)
})
setMethod("xmlAttrsToDataFrame", "list",
function(doc, attrs = character(), omit = character(), ...) {
# assuming these are all nodes.
combineNamedVectors(lapply(doc, xmlAttrs), attrs, omit, ...)
})
setMethod("xmlAttrsToDataFrame", "XMLInternalNodeList",
function(doc, attrs = character(), omit = character(), ...) {
# assuming these are all nodes.
combineNamedVectors(lapply(doc, xmlAttrs), attrs, omit, ...)
})
inAllRecords =
function(x)
{
tt = table(unlist(lapply(x, names)))
names(tt)[ tt == length(x)]
}
allNames =
function(x)
unique( unlist(lapply(x, names)) )
combineNamedVectors =
function(els, attrs = character(), omit = character(), ...)
{
if(is.function(attrs))
attrs = attrs(els)
if(!length(attrs)) {
attrs = allNames(els)
if(length(omit))
attrs = setdiff(attrs, omit)
}
if(length(attrs) == 0) {
warning("no elements to combine across records")
return(data.frame())
}
values = lapply(els, function(x) {
structure(x[attrs], names = attrs)
})
ans = as.data.frame(do.call(rbind, values), row.names = NULL, ...)
rownames(ans) = NULL
ans
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.