R/ep_get_urls.R

Defines functions ep_get_urls

Documented in ep_get_urls

#' Creates a vector of the URLs for the datasets
#'
#' @param country A string. Country ISO code or "all".
#' @param cities A string vector. Contains the names of cities desired or "all".
#' @param pollutants A string vector. Contains the codes of pollutants desired as described in the dataset pollutants_id or "all".
#' @param begin A number. Starting year for the data
#' @param end A number. Ending year for the data
#' @return A vector of the URLs of the datasets corresponding to the desired parameter values.
#' @examples
#' ep_get_urls(
#'   country = "FR",
#'   cities = "Lyon",
#'   pollutants = "O3",
#'   begin = 2020, end = 2020
#' )
#'
#' @export
#' @importFrom stringr str_split
#' @importFrom magrittr %>%
#' @importFrom httr GET
#' @importFrom httr content
#' @importFrom purrr as_vector
#' @importFrom utils data
#' @importFrom lubridate year

ep_get_urls <- function(country = "all", cities = "all", pollutants = "all", begin = 2000, end = 2020) {
  current_year <- lubridate::year(Sys.Date()) #to adapt to future years

  #warnings
  if (!is.numeric(begin) | !is.numeric(end) |
      end != floor(end) | begin != floor(begin) |
      !between(begin, 2000, current_year) | !between(end, 2000, current_year)
  ) {stop("begin and end have to be integers between 2000 and 2020")}

  #difference in syntax between europollution and API
  country_clean <- ifelse(country == "all", "", country)
  cities_clean <- ifelse(cities == "all", "", cities)
  pollutants_clean <- ifelse(pollutants == "all", "", pollutants)

  #handle the two data sources (2000-2012 and 2013-today)
  if (end < 2013) {
    sources <- "Airbase"
  } else if (begin > 2012) {
    sources <- "All"
  } else {
    sources <- c("Airbase", "All")
  }

  vector_urls <- NULL

  for (pollutant in pollutants_clean) {
    for (city in cities_clean) {

      pollutant_row <- match(pollutant, europollution::pollutants_id$code)

      if (is.na(pollutant_row) && pollutant != "") {
        warning(paste(pollutant, "is not a valid pollutant code."))
      } else {
        pollutant <- europollution::pollutants_id[pollutant_row, "id"]
        for (source in sources) {
          request_url <- paste("https://fme.discomap.eea.europa.eu/fmedatastreaming/AirQualityDownload/AQData_Extract.fmw?CountryCode=",
                               country_clean, "&CityName=", city, "&Pollutant=", pollutant, "&Year_from=", begin,"&Year_to=", end,
                               "&Station=&Samplingpoint=&Source=", source ,"&Output=TEXT&UpdateDate=&TimeCoverage=Year", sep = "")
        }
        vector_urls <- httr::GET(request_url) %>%
          httr::content(as = "text") %>%
          stringr::str_split("\r\n") %>%
          purrr::as_vector() %>%
          utils::head(-1) %>% #last value empty: delete
          append(vector_urls, after = 0)
      }
    }
  }
  if (length(vector_urls) == 0) {stop("No data corresponds to the values desired")}

  return(vector_urls)
}
vincentbagilet/europollution documentation built on May 22, 2020, 12:07 a.m.