# See concurrent.xml for more details
library(RCurl)
getURIs =
function(uris, ..., multiHandle = getCurlMultiHandle(), .perform = TRUE)
{
content = list()
curls = list()
for(i in uris) {
curl = getCurlHandle()
content[[i]] = basicTextGatherer()
opts = curlOptions(URL = i, writefunction = content[[i]]$update, ...)
curlSetOpt(.opts = opts, curl = curl)
multiHandle = push(multiHandle, curl)
}
if(.perform) {
complete(multiHandle)
lapply(content, function(x) x$value())
} else {
return(list(multiHandle = multiHandle, content = content))
}
}
uris = c("http://www.omegahat.org/index.html",
"http://www.r-project.org/src/contrib/PACKAGES.html",
"http://developer.r-project.org/index.html",
"http://www.slashdot.org/philosophy.xml",
"http://fxfeeds.mozilla.org/rss20.xml",
"http://www.nytimes.com/index.html")
#atimes = sapply(1:40, function(i) system.time(getURIs(uris, maxconnects = 100)))
#stimes = sapply(1:40, function(i) system.time(lapply(uris, getURI)))
n = 100
# do one function call for each URI
serialTimes = replicate(n, system.time(lapply(uris, getURI)))
# do all in function call but sequentially
vectorizedTimes = replicate(n, system.time(getURI(uris, async = FALSE)))
# use the asynchronous approach
performingAsyncTimes = replicate(n,
system.time({
getURIs(uris)
}))
# And do the asynchronous approach more manually
asyncTimes = replicate(n,
system.time({
z = getURIs(uris, .perform = FALSE)
complete(z$multiHandle)
lapply(z$content, function(x) x$value())
# Can fetch the download times of the individual documents
#sapply(z$multiHandle@subhandles, function(x) getCurlInfo(x)$total.time)
}))
timings =
data.frame(user = c(performingAsyncTimes[1,], vectorizedTimes[1,], serialTimes[1,]),
system = c(performingAsyncTimes[2,], vectorizedTimes[2,], serialTimes[2,]),
elapsed = c(performingAsyncTimes[3,], vectorizedTimes[3,], serialTimes[3,]),
mode = factor(rep(c("Asynchronous", "Vectorized", "Serial"), rep(n, 3))))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.