Nothing
#' Import air quality data from European database
#'
#' This function is a simplified version of the \code{saqgetr} package (see
#' \url{https://github.com/skgrange/saqgetr}) for accessing European air quality
#' data. The function only returns valid hourly data and is meant as a fast and
#' convenient way of accessing the most common type of hourly air quality data.
#' The function works in the same way as other \code{openair} functions that
#' import air quality data that generally need a site code and year to be
#' supplied.
#'
#' The function can however return key site meta data.
#'
#' The \code{saqgetr} package is much more comprehensive and provides data at
#' other time averages e.g. daily data.
#'
#' @param site The code of the site(s).
#' @param year Year or years to import. To import a sequence of years from 1990
#' to 2000 use \code{year = 1990:2000}. To import several specific years use
#' \code{year = c(1990, 1995, 2000)} for example.
#' @param tz Not used
#' @param meta Should meta data be returned? If \code{TRUE} the site type,
#' latitude and longitude are returned.
#' @param to_narrow By default the returned data has a column for each
#' pollutant/variable. When \code{to_narrow = TRUE} the data are stacked into
#' a narrow format with a column identifying the pollutant name.
#' @param progress Show a progress bar when many sites/years are being imported?
#' Defaults to `TRUE`.
#'
#' @return a [tibble][tibble::tibble-package]
#' @family import functions
#' @export
#'
#' @examples
#'
#' # import data for Stuttgart Am Neckartor (S)
#' \dontrun{
#' stuttgart <- importEurope("debw118", year = 2010:2019, meta = TRUE)
#' }
#'
importEurope <- function(site = "debw118",
year = 2018,
tz = "UTC",
meta = FALSE,
to_narrow = FALSE,
progress = TRUE) {
site <- tolower(site)
# The directory
remote_path <-
"http://aq-data.ricardo-aea.com/R_data/saqgetr/observations"
# Produce file names
file_remote <- crossing(site = site,
year = year) %>%
arrange(site,
year) %>%
mutate(
file_remote = paste0(
remote_path,
"/",
year,
"/",
"air_quality_data_site_",
site,
"_",
year,
".csv.gz"
)
) %>%
pull(file_remote)
# Load files
if (progress)
progress <- "Importing Air Quality Data"
df <- purrr::map(file_remote,
~ get_saq_observations_worker(file = .x, tz = tz),
.progress = progress) %>%
purrr::list_rbind()
if (nrow(df) == 0L) {
warning("No data available,")
return()
}
# just hourly observations
df <- filter(df, summary == 1)
if (!to_narrow) {
df <- make_saq_observations_wider(df)
} else {
df <- select(df,-summary,-process,-validity)
}
# don't need end date
df <- select(df,-date_end) %>%
rename(code = site)
if (meta) {
meta <- importMeta("europe")
df <- left_join(df, meta, by = "code")
}
df <- arrange(df, code, date)
return(df)
}
get_saq_observations_worker <- function(file, tz) {
# Read data
df <- read_saq_observations(file, tz)
if (nrow(df) == 0) {
return()
}
df <- filter(df, validity %in% c(1, 2, 3) | is.na(validity))
return(df)
}
# Reading function
read_saq_observations <- function(file, tz = tz, verbose) {
# Data types
col_types <- cols(
date = col_character(),
date_end = col_character(),
site = col_character(),
variable = col_character(),
process = col_integer(),
summary = col_integer(),
validity = col_integer(),
unit = col_character(),
value = col_double()
)
# Create gz connection
con <- file %>%
url() %>%
gzcon()
df <- tryCatch({
# Read and parse dates, quiet supresses time zone conversion messages and
# warning supression is for when url does not exist
suppressWarnings(
readr::read_csv(con, col_types = col_types, progress = FALSE) %>%
mutate(
date = lubridate::ymd_hms(date, tz = tz, quiet = TRUE),
date_end = lubridate::ymd_hms(date_end, tz = tz, quiet = TRUE)
)
)
},
error = function(e) {
# Close the connection on error
close.connection(con)
tibble()
})
if (nrow(df) == 0) {
warning(paste(basename(file), "is missing."))
}
return(df)
}
make_saq_observations_wider <- function(df) {
tryCatch({
df %>%
select(date,
date_end,
site,
variable,
value) %>%
spread(variable, value)
},
error = function(e) {
warning(
"Duplicated date-site-variable combinations detected, observations have been removed...",
call. = FALSE
)
df %>%
select(date,
date_end,
site,
variable,
value) %>%
distinct(date,
site,
variable,
.keep_all = TRUE) %>%
spread(variable, value)
})
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.