R/header.R

Defines functions findHTTPHeaderEncoding getEncoding

Documented in findHTTPHeaderEncoding

parseHTTPHeader =
  #
  # Reads the lines from the HTTP response 
  # header and converts them into a
  # name=value  collection as a character vector.
  #
  # returns a named list of the fields. Allows duplicates.
function(lines, multi = TRUE)
{
 if(length(lines) < 1)
   return(NULL)

 if(length(lines) == 1)
    lines = strsplit(lines, "\r\n")[[1]]

 if(multi) {
    i = grep("^HTTP", lines)
    status = lines[max(i)]
    lines = lines[seq(max(i), length(lines))]
 } else
    status = lines[1]

   # Get the status and if it is 100, then throw away the first 2 lines
   #  e.g. HTTP/1.1 100 Continue
   #       \r\n
 st = getStatus(status)
 if(st[["status"]] == 100) {

     # Have to worry about whether we just get a 100 and nothing else
     # and so lines[3] won't exist.
   if((length(lines) - length(grep("^[[:space:]]*$", lines))) == 1)
     return(st)
     # We seem to jump to the 3 rd line as the real status
     # and discard the first two lines. Is this the Expect.
#   status = lines[3]
#   lines = lines[-(1:2)]
 }
 
 lines = lines[-1]
 lines = gsub("\r\n$", "", lines)  # was \r\n\0$
 lines = lines[ lines != "" ] 

 if(FALSE) { # Doesn't behave. Duplicates!
   header = lines[-1]
   header <- read.dcf(textConnection(header))
 } else {
#   els <- sapply(lines, function(x) strsplit(x, ": *"))
#   header <- sapply(els, function(x) paste(x[2]) #XX what if more than 2 els.
#   names(header) <- sapply(els, function(x) x[1])
   header = structure(sub("[^:]+: (.*)", "\\1", lines),
                      names = sub("([^:]+):.*", "\\1", lines))
 }

# st = getStatus(status)
 header[["status"]] = st[["status"]]
 header[["statusMessage"]] = st[["message"]] 
 
 header
}


getStatus =
  #
  # Reads the first line of the HTTP header, e.g of the form
  #   HTTP/1.1 200 OK
  # or
  #   HTTP/1.1 404 Not Found
  # and returns
  #   list(status = 404, message = "Not Found")
function(status)
{
 els <- strsplit(status, " ")[[1]] 
 list(status = as.integer(els[2]), message = paste(els[-c(1,2)], collapse = " "))
}




getEncoding =
function(x) {
  val = gsub("Content-Type:.*;\\W*charset=", "", x)
  if(all(nchar(val) == nchar(x))) # See if there was any substitution done, i.e. a match.
    return(NA)


  switch(val, "UTF-8" =, "utf-8" = 1L, "ISO-8859-1" = 2L, -1L)
}

findHTTPHeaderEncoding =
function(str)
{
  els = strsplit(str, "\\\r\\\n")
  v = lapply(els, getEncoding)

  if(any(!is.na(v))) {
    v[[!is.na(v)]][[1]]
  } else
     -1L
}

Try the RCurl package in your browser

Any scripts or data that you put into this service are public.

RCurl documentation built on Nov. 3, 2023, 1:09 a.m.