R/xmlEventParse.R

Defines functions xmlParserContextFunction xmlStopParser mkSubstringByBytes checkHandlerNames

Documented in xmlParserContextFunction xmlStopParser

GeneralHandlerNames =
  list(SAX =  c("text", "startElement", "endElement", "comment",
                 "startDocument", "endDocument",
                 "processingInstruction", "entityDeclaration",  "externalEntity"),
       DOM =  c("text", "startElement", "comment", "entity", "cdata",
                 "processingInstruction"))

checkHandlerNames =
function(handlers, id = "SAX")
{
  if(is.null(handlers))
    return(TRUE)

  ids = names(handlers)
  i = match(ids, GeneralHandlerNames)
  prob = any(!is.na(i))
  if(prob) {
    warning("future versions of the XML package will require names of general handler functions to be prefixed by a . to distinguish them from handlers for nodes with those names.  This _may_ affect the ", paste(names(handlers)[!is.na(i)], collapse = ", "))
  }

  if(any(w <- !sapply(handlers, is.function)))
     warning("some handlers are not functions: ", paste(names(handlers[w]), collapse = ", "))

  !prob
}

xmlEventParse <-
#
# Parses an XML file using an event parser which calls user-level functions in the
# `handlers' collection when different XML nodes are encountered in the parse stream.
#
# See also xmlParseTree()
#
function(file, handlers = xmlEventHandler(), ignoreBlanks = FALSE, addContext = TRUE,
          useTagName = TRUE, asText = FALSE, trim=TRUE, useExpat = FALSE,
          isURL=FALSE, state = NULL,
          replaceEntities = TRUE, validate = FALSE, saxVersion = 1,
          branches = NULL,  useDotNames =  length(grep("^\\.", names(handlers))) > 0,
          error = xmlErrorCumulator(), addFinalizer = NA, encoding = character())
{
  if(libxmlVersion()$major < 2 && !is.character(file))
    stop("Without libxml2, the source of the XML can only be specified as a URI.")


  i = grep("^/", names(handlers))
  if(length(i)) {
    endElementHandlers = handlers[i]
    names(endElementHandlers) = gsub("^/", "", names(endElementHandlers))
    handlers = handlers[ - i]
  } else
    endElementHandlers = list()


  checkHandlerNames(handlers, "SAX")

  if(validate)
    warning("Currently, libxml2 does support validation using SAX/event-driven parsing. It requires a DOM.")
  else {
      oldValidate = xmlValidity()
      xmlValidity(validate)
      on.exit(xmlValidity(oldValidate))
  }

  if(!any(saxVersion == c(1, 2))) {
     stop("saxVersion must be 1 or 2")
  }


  if(inherits(file, "connection")) {
    con = file
    if(!isOpen(file)) {
      open(file, "r")
      on.exit(close(con))
    }

    leftOver = ""
    file = function(len) {
       if(nchar(leftOver) > 0) {
           txt = leftOver
       } else {
            #  txt = readBin(con, "", n = len - 1L)
           txt = readLines(con, 1)
       }

       if(length(txt) == 0)
          return(txt)

       if(len < nchar(txt, "bytes")) {
           tmp = mkSubstringByBytes(txt, len)
          leftOver <<- tmp[2]  # substring(txt, len - 1)
	  txt =tmp[1] #  substring(txt, 1, len - 2)
       } else
          leftOver <<- ""


        paste(txt, "\n", sep = "")
      }

  } else if(is.function(file)) {
      # call with -1 to allow us to close the connection
      # if necessary.
    on.exit(file(-1))
  } else {
   if(!asText && missing(isURL)) {
        # check if this is a URL or regular file.
     isURL <- length(grep("http://",file)) | length(grep("ftp://",file)) | length(grep("file://",file))
   }

   if(isURL == FALSE && asText == FALSE) {
    file = path.expand(file)
    if(file.exists(file) == FALSE)
     stop(paste("File", file, "does not exist "))
   }
   file = as.character(file)
 }

 branches = as.list(branches)
 if(length(branches) > 0 && (length(names(branches)) == 0 || any(names(branches) == "")))
    stop("All branch elements must have a name!")

  old = setEntitySubstitution(replaceEntities)
  on.exit(setEntitySubstitution(old))

  if(!is.function(error))
    stop("error must be a function")

  .oldErrorHandler = setXMLErrorHandler(error)
  on.exit(.Call("RS_XML_setStructuredErrorHandler", .oldErrorHandler, PACKAGE = "XML"), add = TRUE)

 state <- .Call("RS_XML_Parse", file, handlers,  endElementHandlers,
                    as.logical(addContext), as.logical(ignoreBlanks),
                     as.logical(useTagName), as.logical(asText), as.logical(trim),
                      as.logical(useExpat), state, as.logical(replaceEntities),
                       as.logical(validate), as.integer(saxVersion), branches, as.logical(useDotNames), error,
                        addFinalizer, as.character(encoding),
                 PACKAGE = "XML")

  if(!is.null(state))
     return(state)
  else
     return(invisible(handlers))
}

mkSubstringByBytes =
function(txt, nbytes)
{
  letters = strsplit(txt, "")[[1]]
  nb = nchar(letters, "bytes")
  i = which(cumsum(nb) >= nbytes)[1] - 1
  c(paste(letters[1:i], collapse = ""), paste(letters[-(1:i)], collapse = ""))
}

xmlStopParser =
function(parser)
{
  if(!inherits(parser, "XMLParserContext"))
    stop("Need an XMLParserContext object for xmlStopParser")

  .Call("RS_XML_xmlStopParser", parser, PACKAGE = "XML")
}


xmlParserContextFunction =
function(f, class = "XMLParserContextFunction")
{
  class(f) = c(class, class(f))

  f
}

Try the XML package in your browser

Any scripts or data that you put into this service are public.

XML documentation built on Nov. 3, 2023, 1:14 a.m.