data-raw/countries_cities.R

## code to prepare `countries_cities` dataset goes here

library(tidyverse)
library(rvest)
library(jsonlite)

#the correspondance countries/cities is described in the following file
raw_text_table <- read_html("https://discomap.eea.europa.eu/map/fme/E1a/country_city_list.js") %>%
  html_node(xpath = "/html/body/p") %>%
  html_text()

countries <- str_extract_all(raw_text_table, '(?<=\\[\").{2}')
cities <- str_extract_all(raw_text_table, '(?<=\", \").+(?=\"\\])')
cities_in_countries <- tibble(iso = countries[[1]], cities = cities[[1]])

#countries which have only one city do not appear in this list
raw_iso_countries <- read_html("https://discomap.eea.europa.eu/map/fme/AirQualityExport.htm") %>%
  html_nodes("script") %>%
  .[2] %>%
  html_text()

iso <- str_extract_all(raw_iso_countries, '.{2}(?=\":\\s\")')
countries_name <- str_extract_all(raw_iso_countries, '(?<=\":\\s\").+(?=\"\r\n)')
countries_iso <- tibble(countries = countries_name[[1]], iso = iso[[1]])

countries_cities <- countries_iso %>%
  left_join(cities_in_countries, by = "iso")

usethis::use_data(countries_cities, overwrite = TRUE)
vincentbagilet/europollution documentation built on May 22, 2020, 12:07 a.m.