#' Scrape and export the dataset `esa_sme`
#'
#' @param pages_n How many pages do you want to scrape? For tests you may scrape
#' only a few pages, say 1 or 2. To update the exported dataset use "all".
#'
#' @export
#' @keywords internal
#'
#' @examples
#' \dontrun{
#' esa_sme_scrape(1)
#' }
esa_sme_scrape <- function(pages_n = "all") {
render(
data_raw("esa_sme.Rmd"),
output_format = github_document(html_preview = FALSE),
params = list(request = TRUE, pages_n = pages_n)
)
}
esa_sme_json <- function(page) {
resp <- page %>%
esa_sme_request() %>%
req_perform()
json <- resp %>%
resp_body_json()
}
#' @examples
#' json <- esa_sme_json(page = 1)
#' esa_sme_last_page_json(json)
#' @noRd
esa_sme_last_page_json <- function(json) {
pages_chr <- json$html %>%
read_html() %>%
html_elements(".pagination") %>%
html_elements("li") %>%
html_text2()
pages_int <- suppressWarnings(as.integer(pages_chr))
max(pages_int, na.rm = TRUE)
}
#' Request one page of esa SME data
#'
#' @examples
#' req <- esa_sme_request(page = 2)
#' req
#' @noRd
esa_sme_request <- function(page) {
request(esa_base_url(), options = dissable_ssl_verifypeer()) %>%
req_url_path_append("PublicEntityDir") %>%
req_url_path_append("PublicEntityDirGridSme") %>%
req_url_query(!!!esa_sme_params(page)) %>%
req_user_agent(pastax_agent())
}
esa_sme_details_request <- function(details_id) {
request(esa_base_url(), options = dissable_ssl_verifypeer()) %>%
req_url_path_append("PublicEntityDir") %>%
req_url_path_append("PublicEntityDirPopupDetailSME") %>%
req_url_path_append(details_id) %>%
req_user_agent(pastax_agent())
}
esa_base_url <- function() {
"https://esastar-emr.sso.esa.int/"
}
dissable_ssl_verifypeer <- function() {
list(ssl_verifypeer = 0)
}
esa_sme_params <- function(page) {
list(
term = "",
isForRegister = "False",
isForEmits = "True",
"grid-page" = page
)
}
pastax_agent <- function() {
"pastax (https://github.com/2DegreesInvesting/pastax)"
}
#' Take an httr2 request object and write the response content to a json file
#' @examples
#' req <- esa_sme_request(1)
#' req %>% esa_sme_req_write(path = tempfile())
#' @noRd
esa_sme_req_write <- function(req, path) {
req %>%
req_perform() %>%
resp_body_json() %>%
write_json(path)
invisible(req)
}
esa_sme_json2html <- function(path) {
read_html(fromJSON(path)$html)
}
esa_sme_enframe <- function(html) {
tibble(
details_url = esa_sme_details_url(html),
name = esa_sme_column(html, 1),
country_of_registration = esa_sme_column(html, 2),
entity_type = esa_sme_column(html, 3),
entity_size = esa_sme_column(html, 4),
esastar_status = esa_sme_column(html, 5),
)
}
esa_sme_details_url <- function(html) {
details <- html %>%
html_elements(".gridDetails") %>%
html_attr("href")
path(esa_base_url(), details)
}
esa_sme_column <- function(html, id) {
html %>%
html_elements(esa_sme_column_css(id)) %>%
html_text2()
}
esa_sme_column_css <- function(id) {
glue(".grid-cell:nth-child({id})")
}
esa_sme_details <- function(html) {
dplyr::bind_cols(
tibble::tibble(description = enframe_description(html)),
details = enframe_info(html)
)
}
esa_sme_base_url <- function() {
esa_base_url()
}
details_url <- function(html) {
details <- html %>%
html_elements(".gridDetails") %>%
html_attr("href")
fs::path(esa_sme_base_url(), details)
}
enframe_info <- function(html) {
ids <- html %>%
html_elements("input") %>%
html_attr("id")
values <- html %>%
html_elements("input") %>%
html_attr("value")
values %>%
purrr::set_names(ids) %>%
tibble::enframe() %>%
dplyr::filter(!.data$name == "") %>%
tidyr::pivot_wider(names_from = .data$name)
}
enframe_description <- function(html) {
html %>%
html_elements(".formEditInput") %>%
html_text2() %>%
trimws() %>%
purrr::keep(nzchar)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.