R/get_BCCh_data.R

Defines functions get_bcch_data

Documented in get_bcch_data

#' Fetch data using an .iqy file from the BCCh
#'
#' \code{get_bcch_data} parses an .iqy file generated by the Statistical Database
#'     of the Central Bank of Chile (BCCh), gets the required data from their server,
#'     and returns it as a data frame.
#'
#' @param path_to_iqy the path to the .iqy file. Can be a relative path.
#' @param q_values a character vector (or something that can be coerced to it).
#'     Should contain the values for every parameter in the query, in adequate
#'     order. Parameters are almost always start and end date, but use only if
#'     you are sure of this. If missing, the program will ask the user
#'     to input these values.
#' @param ... arguments passed to \code{httr::POST}. For example, you can pass
#'     \code{httr::timeout(20)} to wait 20 seconds for the response if the default
#'     is not enough.
#'
#' @return A data frame containing the requested data and with the following
#'     attributes:
#'     \enumerate{
#'         \item{\code{data_def}: definition of the data requested}
#'         \item{\code{data_types}: type of data stored in each column}
#'     }
#' @seealso Vist the BCCh database at \url{https://si3.bcentral.cl/siete}.
#'
#' @examples
#'
#' \dontrun{
#' # this should ask the user for a start and an end date
#' get_bcch_data("UF_IVP_DIARIO.iqy")
#' }
#'
#' @export
get_bcch_data = function(path_to_iqy, q_values, ...){
  # read iqy file
  iqy_content = readLines(path_to_iqy,
                          warn = FALSE,
                          encoding = "latin1")

  # parse iqy file
  api_url = iqy_content[1]
  query_params = strsplit(iqy_content[2], "&")[[1]]
  query_code = query_params[length(query_params)]
  query_params = query_params[-length(query_params)]
  query_par_mat = stringr::str_match(query_params,
                                     "^([^=]+)=\\[\"(\\w+)\",\"([\\w\\s]+)\"\\]$")[,-1]
  colnames(query_par_mat) = c("param_name", "param_name_2", "param_prompt")

  # check if user provided values for parameters
  if(missing(q_values)){
    cat("Values not provided. Please input a value for each parameter:\n\n")

    # read user input
    q_values = character(nrow(query_par_mat))
    for(i in seq_along(q_values)){
      cat(paste0(query_par_mat[i, "param_prompt"], ": "))
      q_values[i] = readLines(n=1L)
      cat("\n")
    }
  }

  # construct body of query for httr::POST
  post_body = as.list(q_values)
  pos_eq_code = as.integer(regexpr("=", query_code))
  post_body = c(post_body, substr(query_code, pos_eq_code + 1, nchar(query_code)))
  names(post_body) = c(query_par_mat[, "param_name"], substr(query_code, 1, pos_eq_code-1))

  # post query
  r = httr::POST(url = api_url, body = post_body, encode = "form", ...)
  results_cells = rvest::html_text(
    rvest::html_nodes(x     = httr::content(r),
                      xpath = "//td[not(@colspan) and not(table)]")
  )

  # extract data definition
  data_def = rvest::html_text(
    rvest::html_nodes(x     = httr::content(r),
                      xpath = "//td[@colspan]")
  )

  # trick to get # of cols: find position of blank cell below "FECHA"
  res_n_cols = match("", results_cells) - 1L

  # convert to dataframe
  res_df = as.data.frame.matrix(
    x                = matrix(data = results_cells[(2L * res_n_cols + 1L):length(results_cells)],
                              ncol = res_n_cols),
    stringsAsFactors = FALSE
  )
  names(res_df) = results_cells[1L:res_n_cols]
  attr(res_df, "data_def") = data_def
  attr(res_df, "data_types") = results_cells[(res_n_cols+2L):(2L*res_n_cols)]

  # format data
  res_df$FECHA = as.Date.character(res_df$FECHA, format = "%d-%m-%Y")
  # convert data to doubles. warnings are due to missing data points
  res_df[, -1] = suppressWarnings({apply(res_df[, -1], 2, as.double)})

  return(res_df)

}
miguelbiron/BCChqRy documentation built on Sept. 17, 2022, 3:40 a.m.