Nothing
#' Import air quality data from European database until February 2024
#'
#' This function is a simplified version of the `saqgetr` package (see
#' <https://github.com/skgrange/saqgetr>) for accessing European air quality
#' data. As `saqgetr` was retired in February 2024, this function has also been
#' retired, but can still access European air quality data up until that
#' retirement date. Consider using the EEA Air Quality Download Service instead
#' (<https://eeadmz1-downloads-webapp.azurewebsites.net/>).
#'
#' @param site The code of the site(s).
#' @param year Year or years to import. To import a sequence of years from 1990
#' to 2000 use `year = 1990:2000`. To import several specific years use `year
#' = c(1990, 1995, 2000)` for example.
#' @param tz Not used
#' @param meta Should meta data be returned? If `TRUE` the site type, latitude
#' and longitude are returned.
#' @param to_narrow By default the returned data has a column for each
#' pollutant/variable. When `to_narrow = TRUE` the data are stacked into a
#' narrow format with a column identifying the pollutant name.
#' @param progress Show a progress bar when many sites/years are being imported?
#' Defaults to `TRUE`.
#'
#' @return a `tibble`
#' @family import functions
#' @export
#'
#' @examples
#'
#' # import data for Stuttgart Am Neckartor (S)
#' \dontrun{
#' stuttgart <- importEurope("debw118", year = 2010:2019, meta = TRUE)
#' }
#'
importEurope <- function(
site = "debw118",
year = 2018,
tz = "UTC",
meta = FALSE,
to_narrow = FALSE,
progress = TRUE
) {
# warn/error w/ deprecation
msg <-
c(
"!" = "{.fun importEurope} has been discontinued and cannot import data after February 2024.",
"i" = "Consider using the EEA Air Quality Download Service instead {.url https://eeadmz1-downloads-webapp.azurewebsites.net/}"
)
if (any(year > 2024)) {
cli::cli_abort(msg)
} else {
cli::cli_inform(msg, .frequency = "regularly", .frequency_id = "europe")
}
site <- tolower(site)
# The directory
remote_path <-
"http://aq-data.ricardo-aea.com/R_data/saqgetr/observations"
# Produce file names
file_remote <- tidyr::crossing(
site = site,
year = year
) |>
dplyr::arrange(
site,
year
) |>
dplyr::mutate(
file_remote = paste0(
remote_path,
"/",
year,
"/",
"air_quality_data_site_",
site,
"_",
year,
".csv.gz"
)
) |>
dplyr::pull(file_remote)
# Load files
if (progress) {
progress <- "Importing Air Quality Data"
}
df <- purrr::map(
file_remote,
~ get_saq_observations_worker(file = .x, tz = tz),
.progress = progress
) |>
purrr::list_rbind()
if (nrow(df) == 0L) {
warning("No data available,")
return()
}
# just hourly observations
df <- dplyr::filter(df, .data$summary == 1)
if (!to_narrow) {
df <- make_saq_observations_wider(df)
} else {
df <- dplyr::select(df, -"summary", -"process", -"validity")
}
# don't need end date
df <- dplyr::select(df, -"date_end") |>
dplyr::rename(code = "site")
if (meta) {
meta <- importMeta("europe")
df <- dplyr::left_join(df, meta, by = "code")
}
df <- dplyr::arrange(df, .data$code, .data$date)
return(df)
}
get_saq_observations_worker <- function(file, tz) {
# Read data
df <- read_saq_observations(file, tz)
if (nrow(df) == 0) {
return()
}
df <- dplyr::filter(
df,
.data$validity %in% c(1, 2, 3) | is.na(.data$validity)
)
return(df)
}
# Reading function
read_saq_observations <- function(file, tz = tz, verbose) {
# Data types
col_types <- readr::cols(
date = readr::col_character(),
date_end = readr::col_character(),
site = readr::col_character(),
variable = readr::col_character(),
process = readr::col_integer(),
summary = readr::col_integer(),
validity = readr::col_integer(),
unit = readr::col_character(),
value = readr::col_double()
)
# Create gz connection
con <- file |>
url() |>
gzcon()
df <- tryCatch(
{
# Read and parse dates, quiet supresses time zone conversion messages and
# warning supression is for when url does not exist
suppressWarnings(
readr::read_csv(con, col_types = col_types, progress = FALSE) |>
dplyr::mutate(
date = lubridate::ymd_hms(.data$date, tz = tz, quiet = TRUE),
date_end = lubridate::ymd_hms(.data$date_end, tz = tz, quiet = TRUE)
)
)
},
error = function(e) {
# Close the connection on error
close.connection(con)
dplyr::tibble()
}
)
if (nrow(df) == 0) {
warning(paste(basename(file), "is missing."))
}
return(df)
}
make_saq_observations_wider <- function(df) {
tryCatch(
{
df |>
dplyr::select(
"date",
"date_end",
"site",
"variable",
"value"
) |>
tidyr::spread("variable", "value")
},
error = function(e) {
warning(
"Duplicated date-site-variable combinations detected, observations have been removed...",
call. = FALSE
)
df |>
dplyr::select(
"date",
"date_end",
"site",
"variable",
"value"
) |>
dplyr::distinct("date", "site", "variable", .keep_all = TRUE) |>
tidyr::spread("variable", "value")
}
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.