R/fetch.R

Defines functions fetch

Documented in fetch

#' Request factor time-series hosted on remote servers.
#'
#' The function \code{fetch} returns the requested time-series represented by
#' the handle string argument, which must be matched against an internal
#' catalog. The catalog includes valid handles, their associated source url, and
#' other fields required to send a valid query.
#'
#' The string handle representing the requested time-series is matched in the
#' internal catalog, which encodes and maintains the parameters required to
#' perform each query (request) associated with a particular data set hosted on
#' remote server.
#'
#' The function currently supports two main sources of factor time-series, the
#' Kenneth French Data Library
#' (\href{https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html}{K.
#' French Data Library}) and the Federal Reserve of St. Louis
#' (\href{https://fred.stlouisfed.org/}{FRED St Louis}). The first site relies
#' on the FTP (file transfer protocol), a standard network protocol used for the
#' transfer of computer files between a client and server . The second site is
#' built around JSON (JavaScript Object Notation), an open standard file format
#' which is language-independent.
#'
#' Requests sent to FTP-based hosting sites generally return compressed files
#' (e.g. .zip), also known as archive format. These files are stored in the
#' Archives sub-directory in native format (e.g. .zip), then uncompressed to a
#' .txt format and stored in the Uncompressed sub-directory. The uncompressed
#' files (.txt) are then read, parsed and stored in .csv format in the
#' Uncompressed sub-directory. JSON-based hosting sites return a content, which
#' can be held in main memory (i.e. random-access memory) to be parsed and
#' subsequently stored in .csv. The JSON framework bypasses the archiving (.zip)
#' and decompressing (.txt) steps. In all cases (FTP, JSON), the extraction
#' process is monitored and documented in .pdf files stored in the Audit
#' sub-directory.
#'
#' All file permissions are set to read-only to prevent unintended
#' modifications. In addition, all file time stamps (birth, modification,
#' access, change) are documented in .pdf files located in the Audit
#' sub-directory (also in read-only permission mode).
#'
#' All web service requests sent to the Federal Reserve of St. Louis site
#' require an application program interface (API) key to identify requests. The
#' general documentation on the API tool kits can be consulted at
#' \href{https://fred.stlouisfed.org/docs/api/fred/}{API Toolkits}. The API key
#' is set using the api_key variable, a 32 character lower-cased alpha-numeric
#' string. The specific API key supporting this package is granted under the
#' FRED® API Terms of Use and will not be shared with other users in compliance
#' with the Terms of Use. In an effort to maintain the package integrity,
#' further details on this topic will only be provided on a need-to-know basis.
#'
#' ADDITIONAL NOTICE: Pursuant to the FRED® API Terms of Use, note that this
#' product uses the FRED® API but is not endorsed or certified by the Federal
#' Reserve Bank of St. Louis. See more details at
#' \href{https://research.stlouisfed.org/docs/api/terms_of_use.html}{Terms of
#' Use}
#'
#' @param hdl_str A string handle, representing the requested time-series. Call
#'   \code{catalog_do(operation = "show")} to see the list of valid handle
#'   parameters. See \code{catalog_do()} documentation for additional details.
#'   The function will stop and generate an error message if an invalid handle
#'   parameter is passed as an argument.
#' @param dest_dir A string representing an existing directory, where all
#'   streams (archive files (e.g. .zip), audit files (.pdf), and uncompressed
#'   files (.txt, .csv)) will be stored. The function will stop and generate an
#'   error message if the directory does not exist. Note that three
#'   sub-directories will be created (if they do not exist): Archives, Audit and
#'   Uncompressed
#' @return A \code{tibble} object containing time stamps (e.g. date, year-month)
#'   and the requested time-series. The \code{tibble} has three attributes:
#'   string handle (i.e. catalog handle), series frequency and series units.
#'
#' @importFrom magrittr "%>%"
#' @export

fetch <- function(hdl_str = '', dest_dir = '') {

  stopifnot(is.character(hdl_str))
  stopifnot(is.character(dest_dir))

  dest_dir <- fs::path_tidy(path = dest_dir)

  # Validate catalog entry.
  entry_hdl <- stringr::str_which(string = catalog$hdl, pattern = hdl_str)
  len <- length(entry_hdl)
  if( (hdl_str %in% catalog$hdl) == FALSE ){
    stop(stringr::str_glue(hdl_str, ' is not a valid catalog entry'),
         call. = T)
  }
  if(len > 1){
    stop(stringr::str_glue(hdl_str, ' maps to multiple catalog entries'),
         call. = T)
  }

  # Validate destination directory and create sub-directories if needed.
  if(fs::dir_exists(dest_dir) == F){
    stop(stringr::str_glue(dest_dir, ' directory does not exits.',
                           '\n (a) Create new directory',
                           '\n (b) Check directory spelling'),
         call. = T)
  } else {
    nm_dir <- stringr::str_glue(dest_dir,'/Archives')
    if(fs::dir_exists(nm_dir) == F) {
      fs::dir_create(path = nm_dir, recurse = T)
    }
    nm_dir <- stringr::str_glue(dest_dir,'/Uncompressed')
    if(fs::dir_exists(nm_dir) == F) {
      fs::dir_create(path = nm_dir, recurse = T)
    }
    nm_dir <- stringr::str_glue(dest_dir,'/Audit')
    if(fs::dir_exists(nm_dir) == F) {
      fs::dir_create(path = nm_dir, recurse = T)
    }
  }


  # ----------------------------------------------------------------------------
  # Dispatch mechanism: compose function call expression and evaluate.
  # ----------------------------------------------------------------------------
  int_STOP <- stringr::str_locate(hdl_str, pattern = '_')[1,1] - 1
  if(is.na(int_STOP)){
    stop('Source prefix in <hdl_str> not followed by underscore', call. = T)
  }
  src_prefix <- stringr::str_sub(hdl_str, start = 1, int_STOP)

  # ----------------------------------------------------------------------------
  # French-Fama (K. French) source
  if(src_prefix == 'FF') {
    # FF_3F_ function call
    if(hdl_str == 'FF_3F_US_M' || hdl_str == 'FF_3F_DEV_M') {
      func <- stringr::str_glue('fetch_',
                                stringr::str_extract(string = hdl_str,
                                                     pattern = 'FF_3F_') ) %>%
        stringr::str_glue('_')
    }

    # FF_OP_ function call
    if(hdl_str == 'FF_OP_US_M' || hdl_str == 'FF_OP_exDiv_US_M') {
      func <- stringr::str_glue('fetch_',
                                stringr::str_extract(string = hdl_str,
                                                     pattern = 'FF_OP_') ) %>%
        stringr::str_glue('_')
    }
  }

  # ----------------------------------------------------------------------------
  # Federal Reserve Bank of St. Louis (FRED) source
  if(src_prefix == 'FRED') {
    func <- 'fetch_fred_series_obs'
  }

  # ----------------------------------------------------------------------------
  # Evaluate function call expression.
  if( exists(x = func) ) {
    unevaluated_func <- base::call(name = func, hdl_str, dest_dir)
    base::eval(unevaluated_func)
  } else {
    error_msg <- stringr::str_glue('Function ', func, '() does not exist')
    stop(error_msg)
  }

}
fognyc/factorr documentation built on Nov. 16, 2020, 8:48 p.m.