Nothing
#' Scrape the RBA site to obtain links to tables
#' @return A tibble containing the text and URL of XLS/XLSX links
#' @param cur_hist "current", "historical", or "all"
#' @keywords internal
scrape_table_list <- function(cur_hist = "all") {
if (cur_hist %in% c("current", "historical")) {
table_list <- scrape_indiv_table_list(cur_hist = cur_hist)
} else if (cur_hist == "all") {
table_list <- purrr::map_dfr(
.x = c("current", "historical"),
.f = scrape_indiv_table_list
)
} else {
stop("cur_hist must be 'current', 'historical', or 'all'.")
}
table_list <- note_readable(table_list)
table_list
}
#' Scrape a list of RBA tables.
#' Not intended to be called directly - called from
#' `scrape_table_list()`
#' @noRd
scrape_indiv_table_list <- function(cur_hist = "current") {
if (cur_hist == "current") {
table_url <- "https://www.rba.gov.au/statistics/tables/"
css_selector <- "#tables-list li a"
} else if (cur_hist == "historical") {
table_url <- "https://www.rba.gov.au/statistics/historical-data.html"
css_selector <- ".width-text li a"
}
table_page <- safely_read_html(url = table_url)
link_list <- rvest::html_nodes(table_page, css_selector)
link_list <- link_list[grepl("xls", link_list, fixed = TRUE)]
excel_links <- rvest::html_attr(link_list, "href")
excel_text <- rvest::html_text(link_list, trim = TRUE)
stopifnot(identical(length(excel_links), length(excel_text)))
emdash <- "\u2013"
regex_string <- paste0(emdash, "(?![^", emdash, "]*", emdash, ")")
# Some historical tables don't have a table number; we add one
if (cur_hist == "historical") {
excel_text <- dplyr::case_when(
grepl(paste0("Exchange Rates"), excel_text) &
grepl("Daily", excel_text) &
grepl("Current", excel_text) ~
paste0(
excel_text, " ", emdash, " ",
"ex_daily_",
stringr::str_sub(excel_text, -13, -12),
"cur"
),
grepl(paste0("Exchange Rates ", emdash, " Daily"), excel_text) ~
paste0(
excel_text, " ", emdash, " ",
"ex_daily_",
stringr::str_sub(excel_text, -10, -9),
stringr::str_sub(excel_text, -2, -1)
),
grepl(paste0("Exchange Rates ", emdash, " Monthly"), excel_text) &
grepl("current", excel_text) ~
paste0(excel_text, " ", emdash, " ", "ex_monthly_10cur"),
grepl(paste0("Exchange Rates ", emdash, " Monthly"), excel_text) &
grepl("1969", excel_text) ~
paste0(excel_text, " ", emdash, " ", "ex_monthly_6909"),
TRUE ~ excel_text
)
}
table_list <- dplyr::tibble(
title = excel_text,
url = paste0("https://www.rba.gov.au", excel_links)
)
table_list <- table_list %>%
tidyr::separate(.data$title,
into = c("title", "no"),
sep = regex_string,
fill = "right"
) %>%
dplyr::mutate(dplyr::across(
c("title", "no"),
stringr::str_trim
)) %>%
dplyr::filter(
!is.na(.data$no),
!grepl("Occasional Paper", excel_text),
!grepl("Download", excel_text)
)
table_list$current_or_historical <- cur_hist
table_list
}
#' Indicate tables that can't be read by `read_rba()`
#' Some of these are non-time series; others are very old and formatted in a
#' non-standard way. For now these are hard-coded into this function; in
#' future I will work on programmatically recognising readable tables.
#' @param table_list A dataframe generated by `scrape_indiv_table_list()`
#' @noRd
#' @keywords internal
note_readable <- function(table_list) {
dplyr::mutate(table_list,
readable =
dplyr::case_when(
.data$current_or_historical == "current" &
no %in% c("E3", "E4", "E5", "E6", "E7") ~ FALSE,
.data$current_or_historical == "historical" &
no %in% c(
"A3", "J1", "J2", "E4", "E5", "E6", "E7", "F16", "F17"
) ~ FALSE,
TRUE ~ TRUE
)
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.