knitr::opts_chunk$set( cache = TRUE, collapse = TRUE, comment = "#>" )
Create the esa_sme
dataset avoiding needless computation and requests:
cache
parameter to FALSE
in the setup chunk.request
parameter to TRUE
in the yaml header.library(dplyr, warn.conflicts = FALSE) library(janitor, warn.conflicts = FALSE) library(fs) library(purrr) library(usethis) library(here) devtools::load_all()
How many pages do we need to scrape?
any_page <- 2 json <- esa_sme_json(page = any_page) last_page <- esa_sme_last_page_json(json) last_page
Ensure we have a directory where to save the response of each request.
pages_dir <- create_data_raw_dir("esa_sme", "page")
Write each response to a json file.
pages <- seq_len(last_page) if (!params$pages_n == "all") pages <- head(pages, params$pages_n) requests <- map(pages, ~esa_sme_request(.x)) paths <- path(pages_dir, paste0(pages, ".json")) walk2(requests, paths, ~esa_sme_req_write(.x, .y))
Transform each .json file into a row of a data frame.
jsons <- dir_ls(pages_dir) esa_sme_summary <- jsons %>% map(esa_sme_json2html) %>% map_df(esa_sme_enframe, .id = "page_path")
Tidy.
esa_sme_summary <- esa_sme_summary %>% mutate(page_id = path_ext_remove(path_file(page_path))) %>% mutate(details_id = path_ext_remove(path_file(details_url))) esa_sme_summary
Ensure we have a directory where to save the response of each request.
details_dir <- create_data_raw_dir("esa_sme", "details")
Write each response to a json file.
details_id <- path_file(esa_sme_summary$details_url) if (!params$pages_n == "all") details_id <- head(details_id, params$pages_n) requests <- map(details_id, ~esa_sme_details_request(.x)) paths <- path(details_dir, paste0(details_id, ".json")) walk2(requests, paths, ~esa_sme_req_write(.x, .y))
Transform each .json file into a row of a data frame.
jsons <- dir_ls(details_dir) esa_sme_details <- jsons %>% map(~esa_sme_json2html(.x)) %>% map_df(~esa_sme_details(.x), .id = "details_id")
Tidy.
esa_sme_details <- esa_sme_details %>% mutate(details_id = path_ext_remove(path_file(.data$details_id))) %>% clean_names() %>% select(-.data$name) esa_sme_details
Combine the summary and details datasets and tidy.
esa_sme <- esa_sme_summary %>% left_join(esa_sme_details, by = "details_id") %>% arrange(.data$page_id, .data$name) %>% relocate(page_id, details_id) %>% select(-.data$page_path, -.data$details_url) esa_sme
Export.
use_data(esa_sme, overwrite = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.