getURLContent =
#
# Used to be
# header = basicTextGatherer()
# ans = getBinaryURL(url, headerfunction = header$update, curl = header$curl())
# processContent(ans, header$header(), .encoding)
# but now we use the dynamic reader.
#
function(url, ..., curl = getCurlHandle(.opts = .opts), .encoding = NA, binary = NA, .opts = list(...),
header = dynCurlReader(curl, binary = binary, baseURL = url, isHTTP = isHTTP, encoding = .encoding),
isHTTP = length(grep('^[[:space:]]*http', url)) > 0)
{
url = as(url, "character")
if(!missing(curl))
curlSetOpt(.opts = .opts, curl = curl)
if(is.logical(header)) {
returnHeader = header
header = dynCurlReader(curl, binary = binary, baseURL = url, isHTTP = isHTTP, encoding = .encoding, header = header)
} else
returnHeader = FALSE
if(!('headerfunction' %in% names(.opts))) {
# .opts$headerfunction = header$update
protect = missing(header)
curlSetOpt(curl = curl, .isProtected = protect,
headerfunction = header$update)
}
if(!isHTTP && !('writefunction' %in% names(.opts))) {
# If for example this is scp where there is no header
# or headerfunction will never get called. So we have to
# set the writefunction as well.
# .opts$headerfunction = header$update
protect = missing(header)
curlSetOpt(curl = curl, .isProtected = protect,
writefunction = header$update)
}
curlPerform(url = url, curl = curl, .opts = .opts)
if(isHTTP && length(header$header())) {
http.header = parseHTTPHeader(header$header())
stop.if.HTTP.error(http.header, header$value())
}
if(returnHeader)
list(header = if(is(returnHeader, "AsIs"))
header$header()
else
parseHTTPHeader(header$header()),
body = header$value())
else
header$value()
}
stop.if.HTTP.error =
function(http.header, body = NA)
{
if(length(http.header) == 0)
return(NA) # or TRUE
if( floor(as.integer(http.header[["status"]])/100) >= 4) {
klass = getHTTPErrorClass(http.header[["status"]])
err = simpleError(http.header[["statusMessage"]])
err$httpHeader = http.header
err$body = body
class(err) = c(klass, class(err))
#signalCondition(err)
stop(err)
}
TRUE
}
processContent =
#
# Figure out how to interpret the contents based on the HTTP response's header
# i.e. look at its Content-Type.
#
function(ans, header, .encoding = NA)
{
headerText = if(is.character(header)) header else header$value()
http.header = parseHTTPHeader(headerText)
stop.if.HTTP.error(http.header, ans)
content.type = getContentType(http.header)
binary = isBinaryContent(http.header, content.type)
if(!(is.na(binary) || binary)) {
ans = rawToChar(ans)
if(length(.encoding) == 0 || is.na(.encoding)) {
charset = grep("charset", content.type, value = TRUE)
if(length(charset))
.encoding = strsplit(charset, "=")[[1]][2]
}
if(length(.encoding) && !is.na(.encoding))
Encoding(ans) = .encoding
} else {
attr(ans, "Content-Type") = getContentType(http.header)
ans
}
ans
}
trim =
function(x)
{
gsub("(^[[:space:]]+|[[:space:]]+$)", "", x, perl = TRUE)
}
getContentType =
function(header, full = FALSE)
{
i = match("content-type", tolower(names(header)))
if( is.na( i ) )
return(character())
tmp = trim(strsplit(header[i[1]], "; *")[[1]])
if(!full)
return(tmp)
vals = strsplit(tmp, "=")
structure(gsub(";$", "", sapply(vals, function(x) x[length(x)])),
names = sapply(vals, function(x) if(length(x) > 1) x[1] else ""))
}
# See http://www.iana.org/assignments/media-types/
textContentTypes = c("html", "text", "xhtml", "plain", "xml", "tex", "x-latex", "css", "latex", "x-texinfo",
"sgml", "postscript", "texinfo", "ecmascript", "javascript",
"atom+xml", "json", "x-bsh", "x-sh", "x-shar","x-rtf", "base64", "x-tcl", "x-lisp", "x-java-source",
"x-fortran", "x-script.csv", "x-csh", "x-c")
if(FALSE) {
# see mime.R
guessMIMEType =
function(filename)
{
ext = getExtension(filename)
data("mimeTypeExtensions", package = "RCurl", envir = environment())
mimeTypeExtensions[ext]
}
}
isBinaryFile =
function(name, mimeType = NA)
{
# type = "application/binary"
if(is.na(mimeType))
mimeType = guessMIMEType()
isBinaryContent( , mimeType)
}
isBinaryContent =
#
# type can be given as a list intended to be separate header elements
# e.g. Content-Type, Content-Encoding, etc.
# Each can be a vector.
#
function(header, type = getContentType(header)[1],
textTypes = getOption("text.content.types", textContentTypes))
{
if(is.list(type) && length(type) > 1) {
last <- TRUE
for(i in type) {
if(length(i) && !is.na(i) && (last <- isBinaryContent(header, i, textTypes)))
return(TRUE)
}
return(last)
}
if(length(type) == 0)
return(NA)
if(is.null(textTypes))
textTypes = textContentTypes
type.els = strsplit(type, "/")[[1]]
if(type.els[1] == "text")
return(FALSE)
if(any(type.els %in% textContentTypes))
return(FALSE)
if(length(grep("\\+xml$", type.els)))
return(FALSE)
TRUE
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.