Nothing
#' worker function that downloads data from a range of networks run by Ricardo
#' @noRd
readUKAQData <-
function(site = "my1",
year = 2009,
data_type = "hourly",
pollutant = "all",
hc = FALSE,
ratified = FALSE,
to_narrow = FALSE,
verbose = FALSE,
source = "aurn",
url_data,
lmam_subfolder) {
# add to path if source = "local"
if (source == "local") {
url_data <- paste0(url_data, lmam_subfolder, "/")
}
# For file name matching, needs to be exact
site <- toupper(site)
files <- paste0(site, "_", year)
# Download and load data.
thedata <-
suppressWarnings(loadData(
x = files,
verbose = verbose,
url_data = url_data,
data_type = data_type
))
# suppress warnings for now - unequal factors, harmless
if (is.null(thedata)) {
cli::cli_abort("No data to import for {.arg site} {.field {site}} and {.arg year} {.field {year}} from {.arg source} {.field {source}}.")
}
# Return if no data
if (nrow(thedata) == 0) {
return()
} ## no data
# change names
names(thedata) <- tolower(names(thedata))
# change nox as no2
id <- which(names(thedata) %in% "noxasno2")
if (length(id) == 1) names(thedata)[id] <- "nox"
# change code to character
thedata$code <- as.character(thedata$code)
# should hydrocarbons be imported?
if (!hc) {
## no hydrocarbons - therefore select conventional pollutants
theNames <- c(
"site", "code", "date", "co", "nox", "no2", "no", "o3", "so2", "pm10",
"pm2.5", "v10", "v2.5", "nv10", "nv2.5", "gr_pm10", "gr_pm2.5",
"ws", "wd", "temp"
)
thedata <- select(thedata, any_of(theNames), matches("_qc"))
} else {
thedata <- dplyr::relocate(thedata, dplyr::any_of(c("site", "code", "date")))
}
# rename "temp" to "air_temp" if appropriate
if ("temp" %in% names(thedata)) {
thedata <- rename(thedata, air_temp = temp)
}
# if particular pollutants have been selected
if (pollutant[1] != "all") {
thedata <-
dplyr::select(thedata, "date", "site", "code",
dplyr::any_of(c(pollutant, "ws", "wd", "air_temp")))
}
# make sure it is in GMT
attr(thedata$date, "tzone") <- "GMT"
# tidy data if requested
if (to_narrow) {
# variables to select or not select
the_vars <- c(
"date", "site", "code",
"latitude", "longitude", "site_type",
"ws", "wd", "air_temp"
)
thedata <-
tidyr::pivot_longer(thedata,
cols = -dplyr::any_of(the_vars),
names_to = "pollutant"
)
# clean tidied data before returning
thedata <- thedata %>%
dplyr::relocate(dplyr::any_of(the_vars)) %>%
dplyr::arrange(site, code, pollutant, date)
}
# add source to output
thedata <-
dplyr::mutate(thedata, source = source, .before = dplyr::everything())
as_tibble(thedata)
}
#' Define downloading and loading function
#' @noRd
loadData <- function(x, verbose, url_data, data_type) {
tryCatch(
{
# Build the file name
fileName <- paste0(
url_data, x,
".RData"
)
# Load the rdata object
con <- url(fileName)
load(con)
# Find appropriate extension per `data_type`
x <- switch(data_type,
hourly = x,
`15min` = paste0(x, "_15min"),
daily = paste0(x, "_daily_mean"),
`8_hour` = paste0(x, "_8hour_mean"),
`24_hour` = paste0(x, "_24hour_mean"),
daily_max_8 = paste0(x, "_daily_max_8hour")
)
# Gravimetric PM is in separate file
# These are measured daily PM measurements rather than daily mean hourly
x2 <- gsub("_daily_mean", "_daily", x)
# Reasign
dat <- get(x)
# if there are two daily data frames to combine
if (data_type == "daily" & exists(x2)) {
dat2 <- get(x2)
dat <- left_join(dat, dat2,
by = c("date", "site", "code")
)
lookup <- c(gr_pm2.5 = "GR2.5", gr_pm10 = "GR10")
dat <- dat %>%
rename(any_of(lookup))
}
# make sure class is correct for lubridate
class(dat$date) <- c("POSIXct", "POSIXt")
return(dat)
},
error = function(ex) {
# Print a message
if (verbose) {
cli::cli_warn(c("i" = "{x} does not exist - ignoring that one."))
return(NULL)
}
},
finally = {
close(con)
}
)
}
#' function to read annual or monthly files
#' @noRd
readSummaryData <-
function(files, year, source, data_type, to_narrow, meta, hc) {
fileName <- purrr::map(year, function(x) paste0(files, x, ".rds")) %>%
purrr::list_c()
thedata <- try(readRDS(url(fileName)), TRUE)
if (inherits(thedata, "try-error")) {
return()
}
names(thedata)[names(thedata) == "NOXasNO2.mean"] <- "nox"
names(thedata)[names(thedata) == "NOXasNO2.capture"] <-
"nox_capture"
names(thedata) <- tolower(names(thedata))
names(thedata) <- gsub(".mean", "", names(thedata))
names(thedata) <- gsub(".capture", "_capture", names(thedata))
if (!hc) {
thedata <- thedata %>%
select(any_of(
c(
"date",
"uka_code",
"code",
"site",
"year",
"o3",
"o3_capture",
"o3.summer_capture",
"o3.daily.max.8hour",
"o3.aot40v",
"o3.aot40f",
"somo35",
"somo35_capture",
"no",
"no_capture",
"no2",
"no2_capture",
"nox",
"nox_capture",
"so2",
"so2_capture",
"co",
"co_capture",
"pm10",
"pm10_capture",
"nv10",
"nv10_capture",
"v10",
"v10_capture",
"pm2.5",
"pm2.5_capture",
"nv2.5",
"nv2.5_capture",
"v2.5",
"v2.5_capture",
"gr10",
"gr10_capture",
"gr2.5",
"gr2.5_capture",
"gr_pm2.5",
"gr_pm10"
)
))
}
if (data_type == "monthly") {
thedata <- mutate(thedata, date = ymd(date, tz = "UTC"))
}
if (data_type == "annual") {
thedata <- rename(thedata, date = year) %>%
drop_na(date) %>%
mutate(date = ymd(paste0(date, "-01-01"), tz = "UTC"))
}
if (to_narrow) {
# make sure numbers are numbers
values <- select(thedata, !contains("capture")) %>%
select(!matches("uka_code"))
capture <-
select(thedata, contains("capture") | c(code, date, site)) %>%
select(!matches("uka_code"))
values <- pivot_longer(values,
-c(date, code, site),
values_to = "value",
names_to = "species"
)
capture <- pivot_longer(capture,
-c(date, code, site),
values_to = "data_capture",
names_to = "species"
)
capture$species <- gsub("_capture", "", capture$species)
thedata <- full_join(values, capture,
by = c("date", "code", "site", "species")
)
}
thedata <- thedata %>%
mutate(
site = as.character(site),
code = as.character(code)
) %>%
mutate(
source = source,
.before = dplyr::everything()
)
return(thedata)
}
#' Function to read DAQI data from a file
#' @noRd
readDAQI <- function(files, year, source) {
fileName <- purrr::map(year, function(x) paste0(files, x, ".rds")) %>%
purrr::list_c()
thedata <- try(readRDS(url(fileName)), TRUE)
if (inherits(thedata, "try-error")) {
return()
}
thedata <- thedata %>%
mutate(
code = as.character(code),
site = as.character(site),
pollutant = as.character(pollutant),
date = ymd(Date, tz = "GMT"),
measurement_period = as.character(measurement_period)
) %>%
select(-Date) %>%
relocate(date, .after = pollutant) %>%
mutate(
source = source,
.before = dplyr::everything()
)
return(thedata)
}
#' function to add meta data based on network and supply of aq data
#' @param source network of interest (e.g., "aurn")
#' @param aq_data imported air quality data (annual, daqi, or otherwise)
#' @noRd
add_meta <- function(source, aq_data) {
meta_data <- importMeta(source = source)
meta_data <- distinct(meta_data, source, site, .keep_all = TRUE) %>%
select(source, site, code, latitude, longitude, site_type)
aq_data <- left_join(aq_data, meta_data, by = c("source", "code", "site"))
return(aq_data)
}
#' Function to add ratified flag to hourly data
#' @noRd
add_ratified <- function(aq_data, source, to_narrow) {
meta <-
importMeta(unique(source), all = T) %>%
dplyr::filter(
code %in% aq_data$code,
!variable %in% c(
"V10", "NV10", "V2.5", "NV2.5",
"ws", "wd", "temp"
)
) %>%
dplyr::select(source, code, variable, ratified_to) %>%
dplyr::mutate(variable = tolower(variable))
if (to_narrow) {
meta <-
dplyr::rename(meta, "pollutant" = variable, "qc" = ratified_to) %>%
dplyr::filter(pollutant %in% tolower(aq_data$pollutant))
aq_data <-
aq_data %>%
dplyr::left_join(meta, by = dplyr::join_by(source, code, pollutant)) %>%
dplyr::mutate(qc = date <= .data$qc)
return(aq_data)
}
meta <-
meta %>%
dplyr::filter(variable %in% names(aq_data)) %>%
tidyr::pivot_wider(
names_from = variable,
values_from = ratified_to,
names_glue = "{variable}_qc"
)
aq_data <-
aq_data %>%
dplyr::left_join(meta, by = dplyr::join_by(source, code)) %>%
dplyr::mutate(dplyr::across(dplyr::contains("_qc"), function(x) {
date <= x
}))
return(aq_data)
}
#' Function to filter annual/DAQI stats using
#' @param site,pollutant,to_narrow Inherits from parent function
#' @noRd
filter_site_pollutant <- function(aq_data, site, pollutant, to_narrow, data_type) {
# if site isn't missing, filter by sites
if (any(site != "all")) {
aq_data <- aq_data[tolower(aq_data$code) %in% tolower(site), ]
}
# if pollutant isn't "all", filter pollutants
if (any(pollutant != "all")) {
polls <- paste(c("source", "uka_code", "code", "site", "date", "pollutant", pollutant), collapse = "|")
if (data_type != "daqi") {
if (to_narrow) {
aq_data <- aq_data[grepl(polls, aq_data$species, ignore.case = TRUE), ]
} else {
aq_data <- aq_data[grepl(polls, names(aq_data), ignore.case = TRUE)]
}
} else {
aq_data <- aq_data[grepl(polls, aq_data$pollutant, ignore.case = TRUE), ]
}
}
# return output
return(aq_data)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.