##' Retrieve Data From the Reserve Bank of New Zealand Website
##'
##' Retrieves data from \url{https://www.rbnz.govt.nz/statistics} for the series
##' specified by the user. This involves downloading one or more spreadsheets
##' and reading them into R.
##'
##' @param series Character string giving the name of the data series to retrieve.
##' @param option Character string specifying which format of the data series
##' to retrieve, if necessary. For information on the available options
##' see the help file for the particular series name (for example ?B1
##' will tell you that the B1 series can be downloaded either as daily
##' or monthly data, with the former being the default). This argument
##' is unnecessary for series that only have one data format.
##' @param replaceColumnNames Logical indicating whether to change the column names of
##' the output data from the Series ID's used in the source
##' spreadsheets to more informative values based on the
##' information in the metadata. Defaults to TRUE.
##' @param fieldForColumnNames If replaceColumnNames is TRUE then this variable
##' specifies which columns of the meta data file will be
##' used to construct the column names. Defaults to
##' \dQuote{Series} but sometimes you may want to change
##' it to \dQuote{Group} or c(\dQuote{Group},
##' \dQuote{Series}). \dQuote{SeriesID} is also allowed.
##' @param reloadOnlyLatest Logical indicated whether to skip downloading files containing
##' past data (with a defined endpoint) if they already exist. For example if a series has
##' separate spreadsheets for start-to-2015 and 2015-onwards then we don't need to repeatedly
##' download the former item; only the latter one will be updated.
##' @param cacheOnly Logical indicating that the RBNZ website should not be read and
##' the files only loaded from an existing cache on disk.
##' @param destDir File path to a directory in which to place the downloaded
##' spreadsheets. If not specified the value of getOption("RBNZ.destDir") will
##' be checked. If that is NULL too then the files will be placed into tempdir()
##' and deleted on exit.
##' @param quiet Logical to be passed to utils::download.file.
##' @param wait Numeric giving the number of seconds to wait between downloads.
##' Defaults to 60. Note that this applies to reading the series page
##' and to each individual spreadsheet download, so a series with one
##' spreadsheet would have two downloads sixty seconds apart.
##'
##' @return For most series a list containing the fields \dQuote{meta} and
##' \dQuote{data} which are data frames containing the metadata and
##' actual data, respectively. There are a few series that do not follow
##' this format, and for these the spreadsheets are just downloaded and
##' read into a list of data frames, one for each sheet, with no organising
##' or cleaning.
##'
##' @author Jasper Watson
##'
##' @examples
##'
##' ## List the available data series.
##' print(allAvailableSeries())
##'
##' \dontrun{
##'
##' ## Download exchange rate data and plot NZD vs sterling.
##' b1 <- getSeries('B1')
##' plot(b1$data$Date, b1$data$UK_pound_sterling, type = 'l')
##'
##' ## Now use monthly data (previous was daily).
##' b1_monthly <- getSeries('B1', 'monthly')
##' plot(b1_monthly$data$Date, b1_monthly$data$UK_pound_sterling,
##' type = 'l')
##'
##' }
##' @export
##'
##
getSeries <- function(series,
option = getDefaultOption(series),
replaceColumnNames = TRUE,
fieldForColumnNames = "Series",
reloadOnlyLatest = TRUE,
cacheOnly = FALSE,
destDir = NULL,
quiet = TRUE,
wait = 60
) {
if (is.null(destDir)) {
if (is.null(getOption("RBNZ.destDir"))) {
destDir <- tempdir()
deleteFiles <- TRUE
} else {
destDir <- getOption("RBNZ.destDir")
deleteFiles <- FALSE
}
} else {
deleteFiles <- FALSE
}
stopifnot(series %in% allAvailableSeries(),
length(series) == 1,
length(option) == 1,
option %in% allowedOptions(series),
fieldForColumnNames %in% c("Group", "Series", "SeriesID"),
(cacheOnly && !is.null(destDir))
)
if (reloadOnlyLatest && is.null(destDir)) {
stop("When using reloadOnlyLatest = TRUE, you must specify a value for destDir")
}
## Some series are spread across multiple files. We will download all of them
## and bind them together.
allFiles <- sapply(seriesOptions(series)[[option]], function(x) NULL,
USE.NAMES = TRUE, simplify = FALSE)
for (ii in seq_along(allFiles)) {
subFileName <- names(allFiles)[ii]
destfile <- path.expand(file.path(destDir, paste0(subFileName, ".xlsx")))
if (file.exists(destfile) ||
(reloadOnlyLatest && ii != length(allFiles)) ||
cacheOnly
) {
if (!quiet) {
message("File already exists with no updates. Nothing to download")
}
} else {
url <- dataFileURL(series, option)
Foo <- try(download.file(url = url,
destfile = destfile,
mode = "wb",
quiet = quiet
)
)
if (inherits(Foo, "try-error") || Foo > 0) {
stop("Could not download ", url)
}
Sys.sleep(wait)
}
results <- readSpreadsheet(destfile, series, subFileName,
fieldForColumnNames, deleteFiles)
allFiles[[ii]] <- results
}
if (series == "SDDS") {
out <- allFiles[[1]]
out <- append(out, allFiles[[2]])
out <- append(out, allFiles[[3]])
return(out)
}
## Some spreadsheets are not in the standard format.
## We don't try to do anything with them.
if (series %in% specialCases())
return(allFiles[[1]])
## For datasets that contain more than one spreadsheet (stored here as a list
## of data frames) we will combine them together. This applies to the data files
## and the metadata files.
## I tried doing this with Reduce(merge... but it was actually slower and the
## order kept getting messed up. Here the order of colnames(allData) matches
## the order of allMeta so I won't change it any further.
allData <- lapply(allFiles, function(x) x[["data"]])
if (length(allData) > 1) {
allColumnNames <- Reduce(union, lapply(allData, colnames))
for (ii in seq_along(allData)) {
for (jj in allColumnNames[allColumnNames %!in% colnames(allData[[ii]])]) {
allData[[ii]][, jj] <- NA_real_
}
allData[[ii]] <- allData[[ii]][, allColumnNames]
}
allData <- specialRBind(allData)
} else {
allData <- allData[[1]]
}
allMeta <- lapply(allFiles, function(x) x[["meta"]])
if (length(allMeta) > 1) {
keys <- lapply(allMeta, function(x) x[, "Series Id"])
allKeys <- unique(do.call(c, keys))
index <- match(allKeys, unlist(keys))
allMeta <- do.call(rbind, allMeta)[index, ]
} else {
allMeta <- allMeta[[1]]
}
if (series == "B13")
allMeta <- allMeta[!duplicated(allMeta[, "Series Id"]), ]
if (!all(colnames(allData)[-1] == allMeta[, "Series Id"])) {
stop("SeriesID's do not match between data and meta. Something has gone wrong.")
}
allData <- allData[, c("Series Id", allMeta[, "Series Id"])]
colnames(allData)[[1]] <- "Date"
if (replaceColumnNames)
colnames(allData) <- transformColumnNames(allMeta, fieldForColumnNames)
out <- list(meta = allMeta, data = allData)
rownames(out[["meta"]]) <- NULL
rownames(out[["data"]]) <- NULL
out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.