R/main.R

Defines functions make_request check_status create_query_url convert_dates get_description get_dimensions get_data get_dataflows

Documented in convert_dates get_data get_dataflows get_description get_dimensions

#' Retrieve data frame of all datasets in the ECB Statistical Data Warehouse
#'
#' @param ... Arguments passed to GET (e.g. timeout(10) to add maximum request time)
#'
#' @return A dataframe
#' @export
#'
#' @examples
#' df <- get_dataflows()
#' head(df)
get_dataflows <- function(...) {

  query_url <- "https://sdw-wsrest.ecb.europa.eu/service/dataflow"

  req <- make_request(query_url, "metadata", ...)

  res <- xml2::read_xml(httr::content(req, "text"), verbose = TRUE)

  ecb_ns <- xml2::xml_ns(res) # xml namespace

  data_flows_nodes <- xml2::xml_find_all(res, "//str:Dataflow", ecb_ns)
  name_nodes <- xml2::xml_find_all(xml2::xml_children(data_flows_nodes),
                                   "//com:Name", ecb_ns)

  flow_ref <- xml2::xml_attr(data_flows_nodes, "id")
  title <- xml2::xml_text(name_nodes)

  df <- data.frame(flow_ref, title, stringsAsFactors = FALSE)
  structure(df, class = c("tbl_df", "tbl", "data.frame"))
}

#' Retrieve data from the ECB Statistical Data Warehouse API
#'
#' @param key A character string identifying the series to be retrieved
#' @param filter A named list with additional filters (see \code{details})
#' @param ... Arguments passed to GET (e.g. timeout(10) to add maximum request time)
#'
#' @details
#' The \code{filter} option of \code{get_data()} takes a named list of key-value pairs.
#' If left blank, it returns all data for the current version.
#'
#' Available filter parameters:
#'
#' \itemize{
#' \item \code{startPeriod} & \code{endPeriod}
#'  \itemize{
#'    \item \code{YYYY} for annual data (e.g.: 2013)
#'    \item \code{YYYY-S[1-2]} for semi-annual data (e.g.: 2013-S1)
#'    \item \code{YYYY-Q[1-4]} for quarterly data (e.g.: 2013-Q1)
#'    \item \code{YYYY-MM} for monthly data (e.g.: 2013-01)
#'    \item \code{YYYY-W[01-53]} for weekly data (e.g.: 2013-W01)
#'    \item \code{YYYY-MM-DD} for daily data (e.g.: 2013-01-01)
#'    }
#' \item \code{updatedAfter}
#'  \itemize{
#'    \item A timestamp to retrieve the latest version of changed values in the database since a certain point in time
#'    \item Example: \code{filter = list(updatedAfter = 2009-05-15T14:15:00+01:00)}
#'    }
#' \item \code{firstNObservations} & \code{lastNObservations}
#'  \itemize{
#'    \item Example: \code{filter = list(firstNObservations = 12)} retrieves the first 12 observations of all specified series
#'    }
#' \item \code{detail}
#'  \itemize{
#'    \item Possible options: \code{full/dataonly/serieskeysonly/nodata}
#'    \item \code{dataonly} is the default
#'    \item Use \code{serieskeysonly} or \code{nodata} to list series that match a certain query, without returning the actual data
#'    \item An alternative to using \code{serieskeys/nodata} is the convenience function \code{get_dimensions()}, which returns a list of dataframes with dimensions and explanations (see extended example below).
#'    \item \code{full} returns both the series values and all metadata. This entails retrieving much more data than with the `dataonly` option.
#'    }
#' \item \code{includeHistory} (not currently implemented)
#'  \itemize{
#'    \item \code{false} (default) returns only version currently in production
#'    \item \code{true} returns version currently in production, as well as all previous versions
#'    }
#' }
#' See the \href{https://sdw-wsrest.ecb.europa.eu/}{SDW API} for more details.
#'
#' @return A data frame
#' @export
#'
#' @examples
#' # Get monthly data on annualized euro area headline HICP
#' hicp <- get_data("ICP.M.U2.N.000000.4.ANR")
#' head(hicp)
get_data <- function(key, filter = NULL, ...) {

  if(!"detail" %in% names(filter)) {
    filter <- c(filter, "detail" = "dataonly")
  }

  if(!filter[["detail"]] %in% c("full", "dataonly")) {
    return(get_dimensions(key))
  }

  query_url <- create_query_url(key, filter = filter)

  req <- make_request(query_url, "data", ...)

  tmp <- tempfile()
  writeLines(httr::content(req, "text", encoding = "utf-8"), tmp)

  result <- rsdmx::readSDMX(tmp, FALSE)

  unlink(tmp)

  df <- as.data.frame(result)
  df <- structure(df,
                  class = c("tbl_df", "tbl", "data.frame"),
                  names = tolower(names(df)))
  df
}

#' Retrieve dimensions of series in the ECB's SDW
#'
#' @param key A character string identifying the series to be retrieved
#' @param ... Arguments passed to GET (e.g. timeout(10) to add maximum request time)
#'
#' @return A list of data frames, one for each series retrieved
#' @export
#'
#' @examples
#' hicp_dims <- get_dimensions("ICP.M.U2.N.000000.4.ANR")
#' hicp_dims[[1]]
get_dimensions <- function(key, ...) {

  query_url <- create_query_url(key, filter = list("detail" = "nodata"))

  # Used in creating names (series_names) below
  flow_ref <- regmatches(key, regexpr("^[[:alnum:]]+", key))

  req <- make_request(query_url, "metadata", ...)

  skeys <- xml2::read_xml(httr::content(req, "text", encoding = "utf-8"),
                          verbose = TRUE)

  skeys_ns <- xml2::xml_ns(skeys) # xml namespace

  series <- xml2::xml_find_all(skeys, "//generic:Series", skeys_ns)

  series_list <- lapply(series, xml2::xml_children)

  # Concatenate dimensions to recreate series code
  series_names <- vapply(series_list, function(x) {

      attrs <- xml2::xml_attr(xml2::xml_children(x[1]), "value")
      name <- paste0(attrs, collapse = ".")
      paste(flow_ref, name, sep = ".")

    }, character(1))

  # Return list of dataframes, one for each series, with dimension-value pairs
  df_dim <- lapply(series_list, function(nodeset) {

    data.frame(dim = xml2::xml_attr(xml2::xml_children(nodeset), "id"),
               value = xml2::xml_attr(xml2::xml_children(nodeset), "value"),
               stringsAsFactors = FALSE)
  })

  names(df_dim) <- series_names
  df_dim
}

#' Get full, human-readable description of a series
#'
#' @param key A character string identifying the series to be retrieved
#'
#' @return A character vector
#' @export
#'
#' @examples
#' get_description("ICP.M.DE.N.000000+XEF000.4.ANR")
get_description <- function(key) {
  vapply(get_dimensions(key), function(x) x$value[x$dim == "TITLE_COMPL"],
         character(1))
}

#' Format date variable retrieved from the SDW into a proper date variable
#'
#' @param x A vector of dates
#'
#' @return A date-formatted vector
#' @export
#'
#' @examples
#' hicp <- get_data("ICP.M.U2.N.000000.4.ANR")
#' hicp$obstime <- convert_dates(hicp$obstime)
#' str(hicp)
convert_dates <- function(x) {

  # Annual
  if(grepl("^[0-9]{4}$", x[1])) {
    return(as.Date(paste0(x, "-01-01"), "%Y-%m-%d"))
  }

  # Monthly
  if(grepl("^[0-9]{4}-[0-9]{2}$", x[1])) {
    return(as.Date(paste0(x, "-01"), "%Y-%m-%d"))
  }

  # Monthly
  if(grepl("^[0-9]{4}-[0-9]{2}-[0-9]{2}$", x[1])) {
    return(as.Date(x, "%Y-%m-%d"))
  }

  # Quarterly
  if(grepl("^[0-9]{4}-Q[1-4]{1}$", x[1])) {
    stopifnot(requireNamespace("zoo", quietly = TRUE))
    x <- sub("Q", "", x)
    # frac = 1 for end-of-quarter dates
    return(zoo::as.Date(zoo::as.yearqtr(x), frac = 1))
  }
  warning("Could not convert dates - format unknown.")
  x
}

create_query_url <- function(key, filter = NULL) {

  url <- "https://sdw-wsrest.ecb.europa.eu/service/data"

  # Get flow reference (= dataset abbreviation, e.g. ICP or BOP)
  flow_ref <- regmatches(key, regexpr("^[[:alnum:]]+", key))
  key_q <- regmatches(key, regexpr("^[[:alnum:]]+\\.", key),
                      invert = TRUE)[[1]][2]

  if(any(names(filter) == "")) {
    stop("All filter parameters must be named!")
  }

  if("updatedAfter" %in% names(filter)) {
    filter$updatedAfter <- curl::curl_escape(filter$updatedAfter)
  }

  # Create parameter part of query string
  names <- curl::curl_escape(names(filter))
  values <- curl::curl_escape(as.character(filter))
  query <- paste0(names, "=", values, collapse = "&")
  query <- paste0("?", query)

  query_url <- paste(url, flow_ref, key_q, query, sep = "/")
  query_url
}

check_status <- function(req) {
  if(req$status_code >= 400)
    stop("HTTP failure: ", req$status_code, "\n", httr::content(req, "text"))
}

make_request <- function(query_url, header_type, ...) {

  accept_headers <-
    c("metadata" = "application/vnd.sdmx.genericdata+xml;version=2.1",
      "data" = "application/vnd.sdmx.structurespecificdata+xml;version=2.1")

  req <- httr::GET(query_url, httr::add_headers(
    "Accept" = accept_headers[header_type],
    "Accept-Encoding" = "gzip, deflate"), ...)

  check_status(req)
  req
}
expersso/ecb documentation built on April 8, 2021, 2:03 p.m.