# This file is part of RWebData.
#
# RWebData is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# RWebData is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with RWebData. If not, see <http://www.gnu.org/licenses/>.
##' Download and transform data from a web API
##'
##' A high-level function that automates the querying and extraction of data for multiple apirequest objects.
##' @usage apiDownload(x, chunksize=50, pause=0, backupfile="apiDL.list.Rdata",
##' shortnames=FALSE, progress="bar", ...)
##' @param x a list of apirequest objects
##' @param chunksize numeric, the size (number of requests) that should be processed in one batch (default is 50)
##' @param pause numeric, the number of seconds the download process should be paused after each request (default is 0)
##' @param backupfile character string with the path/name of the backup-file where the data should
##' be saved batchwise during the download process.
##' @param shortnames logical, indicating whether the resulting tables (data frames) should have
##' short variable names (default is FALSE, variable names contain nesting hierarchy)
##' @param progress, either character "bar" (indicates progress with a progress-bar), "text"
##' (textual indication of progress)
##' @param ... currently only one parameter (simplify) passed down to the mapping algorithm if
##' simplify is TRUE, the document tree is made simpler if possible (by removing unnecessary nodes)
##' @return either one data frame or a list containing several data.frames into which the tree
##' structured web-data has been transformed.
##' @details The core of the function is based on mapiDatalight(). However, it is specifically
##' designed for the processing of various requests and a save download of the data.
##' the requests are split up in batches and saved batchwise on disk during the download process.
##' @export
##' @examples
##' \dontrun{apidata <- apiDownload(x)}
apiDownload <-
function(x, chunksize=50, pause=0, backupfile="apiDL.list.Rdata",
shortnames=FALSE, progress="bar", ... ) {
stopifnot((unlist(lapply(x,is.apirequest))|is.character(x)))
if (all(is.character(x))) {x <- lapply(x,url2apirequest)}
# to be extended/reconsidered:
# check whether requests are all for the same API method
# this could later on be changed into a warning and the function extended to handling
# requests for different API methods separately
# servers <- unique(unlist(lapply(x, FUN=function(i){i@server})))
# if (length(servers)>1) stop("x contains apirequest-objects for more than one request-method.")
response.list <- saveDL(request.function=mapiDatalight,
request.id=x,
chunksize=chunksize,
pause=pause,
backupfile=backupfile,
progress=progress,
...)
response.list2 <- unlist(response.list, recursive=FALSE)
if (length(response.list2)==length(x)) {# only one df per request as response? simply rbind all dfs
response.list <- redlist(response.list)
if(is.list(response.list[[1]])) response.list <- unlist(response.list, recursive=FALSE)
response <- dfList(response.list)
} else { # several dfs per request as response? rbind dfs with same name separately
dfnames <- names(response.list2)
udfnames <- unique(dfnames)
n <- length(response.list2)
response <- list()
for (i in udfnames) {
ldfs <- response.list2[dfnames %in% i]
i.response <- dfList(ldfs)
response[[i]] <- i.response
}
}
# cosmetics...
if (shortnames==TRUE) {response <- lapply(response, onlyLeafnames)}
return(response)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.