#' Fetch data using an .iqy file from the BCCh
#'
#' \code{get_bcch_data} parses an .iqy file generated by the Statistical Database
#' of the Central Bank of Chile (BCCh), gets the required data from their server,
#' and returns it as a data frame.
#'
#' @param path_to_iqy the path to the .iqy file. Can be a relative path.
#' @param q_values a character vector (or something that can be coerced to it).
#' Should contain the values for every parameter in the query, in adequate
#' order. Parameters are almost always start and end date, but use only if
#' you are sure of this. If missing, the program will ask the user
#' to input these values.
#' @param ... arguments passed to \code{httr::POST}. For example, you can pass
#' \code{httr::timeout(20)} to wait 20 seconds for the response if the default
#' is not enough.
#'
#' @return A data frame containing the requested data and with the following
#' attributes:
#' \enumerate{
#' \item{\code{data_def}: definition of the data requested}
#' \item{\code{data_types}: type of data stored in each column}
#' }
#' @seealso Vist the BCCh database at \url{https://si3.bcentral.cl/siete}.
#'
#' @examples
#'
#' \dontrun{
#' # this should ask the user for a start and an end date
#' get_bcch_data("UF_IVP_DIARIO.iqy")
#' }
#'
#' @export
get_bcch_data = function(path_to_iqy, q_values, ...){
# read iqy file
iqy_content = readLines(path_to_iqy,
warn = FALSE,
encoding = "latin1")
# parse iqy file
api_url = iqy_content[1]
query_params = strsplit(iqy_content[2], "&")[[1]]
query_code = query_params[length(query_params)]
query_params = query_params[-length(query_params)]
query_par_mat = stringr::str_match(query_params,
"^([^=]+)=\\[\"(\\w+)\",\"([\\w\\s]+)\"\\]$")[,-1]
colnames(query_par_mat) = c("param_name", "param_name_2", "param_prompt")
# check if user provided values for parameters
if(missing(q_values)){
cat("Values not provided. Please input a value for each parameter:\n\n")
# read user input
q_values = character(nrow(query_par_mat))
for(i in seq_along(q_values)){
cat(paste0(query_par_mat[i, "param_prompt"], ": "))
q_values[i] = readLines(n=1L)
cat("\n")
}
}
# construct body of query for httr::POST
post_body = as.list(q_values)
pos_eq_code = as.integer(regexpr("=", query_code))
post_body = c(post_body, substr(query_code, pos_eq_code + 1, nchar(query_code)))
names(post_body) = c(query_par_mat[, "param_name"], substr(query_code, 1, pos_eq_code-1))
# post query
r = httr::POST(url = api_url, body = post_body, encode = "form", ...)
results_cells = rvest::html_text(
rvest::html_nodes(x = httr::content(r),
xpath = "//td[not(@colspan) and not(table)]")
)
# extract data definition
data_def = rvest::html_text(
rvest::html_nodes(x = httr::content(r),
xpath = "//td[@colspan]")
)
# trick to get # of cols: find position of blank cell below "FECHA"
res_n_cols = match("", results_cells) - 1L
# convert to dataframe
res_df = as.data.frame.matrix(
x = matrix(data = results_cells[(2L * res_n_cols + 1L):length(results_cells)],
ncol = res_n_cols),
stringsAsFactors = FALSE
)
names(res_df) = results_cells[1L:res_n_cols]
attr(res_df, "data_def") = data_def
attr(res_df, "data_types") = results_cells[(res_n_cols+2L):(2L*res_n_cols)]
# format data
res_df$FECHA = as.Date.character(res_df$FECHA, format = "%d-%m-%Y")
# convert data to doubles. warnings are due to missing data points
res_df[, -1] = suppressWarnings({apply(res_df[, -1], 2, as.double)})
return(res_df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.