Nothing
#' Run XPath query
#'
#' This function finds matches for an XPath query in a corpus.
#'
#' @param x A corpus: an [`fnames`] object, a character vector of an XML source,
#' or a document parsed with [xml2::read_xml()].
#' @param pattern An XPath query.
#' @param fun Function to be applied to the individual nodes prior
#' to returning the result.
#' @param final_fun Function to be applied to the complete list
#' of matches prior to returning the result.
#' @param namespaces A namespace as generated by [xml2::xml_ns()].
#' @param ... Additional arguments.
#'
#' @return A nodeset or the output of applying `fun` to a nodeset.
#' @export
#'
#' @examples
#' test_xml <- '
#' <p>
#' <w pos="at">The</w>
#' <w pos="nn">example</w>
#' <punct>.</punct>
#' </p>'
#'
#' find_xpath(test_xml, "//w")
#' find_xpath(test_xml, "//@pos")
#' find_xpath(test_xml, "//w[@pos='nn']")
#'
#' find_xpath(test_xml, "//w", fun = xml2::xml_text)
#' find_xpath(test_xml, "//w", fun = xml2::xml_attr, attr = "pos")
find_xpath <- function(x,
pattern,
fun = NULL,
final_fun = NULL,
namespaces = NULL,
...) {
# TODO write tutorial
if ("xml_document" %in% class(x)) {
x <- list(x)
}
res <- vector("list", length(x))
for (i in seq_along(x)) {
cur <- x[[i]]
if (is.character(cur)) {
cur <- xml2::read_xml(cur)
}
if (!("xml_document" %in% class(cur) || "xml_node" %in% class(cur))) {
stop("x must be a character vector or an xml document")
}
#res[[i]] <- cur[pattern]
if (is.null(namespaces)) {
namespaces = xml2::xml_ns(cur)
}
res[[i]] <- xml2::xml_find_all(cur, pattern,
ns = namespaces, ...)
}
res <- structure(do.call(c, res), class = "xml_nodeset")
if (!is.null(fun)) {
res <- fun(res, ...)
}
if (!is.null(final_fun)) {
res <- do.call(final_fun, list(res))
}
res
}
#' Get text from xml node
#'
#' @param node XML node as read with [xml2::read_xml()].
#' @inheritParams xml2::xml_text
#'
#' @return Character vector: The text value of the (elements of the) node,
#' concatenated with spaces in between.
#' @export
#'
#' @examples
#' test_xml <- '
#' <p>
#' <w pos="at">The</w>
#' <w pos="nn">example</w>
#' <punct>.</punct>
#' </p>'
#'
#' test_xml_parsed <- xml2::read_xml(test_xml)
#'
#' # xml2 output
#' xml2::xml_text(test_xml_parsed)
#'
#' # mclm version
#' mclm_xml_text(test_xml_parsed)
mclm_xml_text <- function(node, trim = FALSE) {
if (xml2::xml_length(node) == 0) {
xml2::xml_text(node, trim = trim)
} else {
paste(vapply(xml2::xml_contents(node),
mclm_xml_text, trim = trim, FUN.VALUE = character(1)),
collapse = " ")
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.