Nothing
#' Create the path elements for AEMET API
#'
#' Path vectors for AEMET API to use with httr::GET
#'
#' @param api_options Option list as generated by \link{\code{aemet_options}}
#'
#' @noRd
.create_aemet_path <- function(api_options) {
# we need the resolution to create the corresponding path
resolution <- api_options$resolution
# we need to transform the dates to the character string specific format for the AEMET path.
# We will use a stamp function:
aemet_stamp <- lubridate::stamp("2020-12-25T00:00:00UTC", orders = "YOmdHMS", quiet = TRUE)
# current day
if (resolution == 'current_day') {
return(c('opendata', 'api', 'observacion', 'convencional', 'todas'))
}
# daily
if (resolution == 'daily') {
return(
c(
'opendata', 'api', 'valores', 'climatologicos', 'diarios', 'datos',
'fechaini', aemet_stamp(api_options$start_date),
'fechafin', aemet_stamp(api_options$end_date),
'todasestaciones'
)
)
}
# monthly
# monthly API does not work for now
if (resolution %in% c('monthly', 'yearly')) {
# stop if stations is null. For monthly API, one and only one station must be provided
if (length(api_options$stations) < 1) {
cli::cli_abort(c(
"AEMET API for monthly/yearly aggregated values needs one station provided"
))
}
# issue a warning if more than one station is provided
if (length(api_options$stations) > 1) {
cli::cli_warn(c(
"AEMET API for monthly/yearly aggregated values only accepts one station per query.\n",
"Only the first station provided ({.val {api_options$stations[1]}}) will be used."
))
}
return(
c(
'opendata', 'api', 'valores', 'climatologicos', 'mensualesanuales', 'datos',
'anioini', lubridate::year(api_options$start_date), 'aniofin', lubridate::year(api_options$end_date),
'estacion', api_options$stations[1]
)
)
}
# not recognised resolution
cli::cli_abort(c(
"{.arg {api_options$resolution}} is not a valid temporal resolution for AEMET. Please see aemet_options help for more information"
))
}
#' Check status and errors for AEMET
#'
#' Check status and erros for AEMET
#'
#' In the AEMET API we have to do a lot of GETs and checking the status of the response before
#' parsing it to avoid errors (html responses instead of json due to limits, among other things).
#' Why this? Easy, AEMET API does not return data and metadata directly, but a link to a transitory
#' page with the metadata and another link to a transitory page with the data. This means that for
#' a normal daily query the process is the following:
#' \itemize{
#' \item{1. GET to the correct daily path}
#' \item{2. Check statuses and response}
#' \item{3. GET to the data link}
#' \item{4. Check statuses and response}
#' \item{5. GET to the metadata link}
#' \item{6. Check statuses and response}
#' \item{7. GET to the stations info, to get coords, internally using .get_info helper}
#' \item{8. Check statuse and response}
#' \item{9. GET to the stations info data link}
#' \item{10. Check statuses and response}
#' \item{11. GET to the stations info metadata link}
#' \item{12. Check statuses and response}
#' }
#' This makes 6 calls, each of one has the code to check the statuses, because checking only the
#' main call does not avoid getting errors due to limits in the following calls to data or metadata.
#' This means, 6 times repeating the code for checking the statuses and changing lines in 6 places
#' when trying to debug or change something.
#'
#' @section Rationale:
#' So, the rationale is the following, a function accepting \code{...} that will be passed to
#' httr::GET, and internally checking statuses. If error, return the error code and the message.
#' In the main function, if the error returned is API limit, wait 60 seconds, if other, stop
#' and give the correct message.
#'
#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Arguments for httr::GET
#'
#' @noRd
.check_status_aemet <- function(...) {
# GET step
api_response <- safe_api_access(type = 'rest', ...)
# api_response <- httr::GET(...)
response_status <- httr::status_code(api_response)
# and now the status checks
if (response_status == 404) {
res <- list(
status = 'Error',
code = response_status,
message = glue::glue(
"Unable to connect to AEMET API at {api_response$url}: {httr::http_status(api_response)$message}"
)
)
return(res)
}
if (response_status == 401) {
res <- list(
status = 'Error',
code = response_status,
message = glue::glue("Invalid API Key: {httr::http_status(api_response)$message}")
)
return(res)
}
if (response_status == 429) {
res <- list(
status = 'Error',
code = response_status,
message = glue::glue(
"API request limit reached: {httr::http_status(api_response)$message}"
)
)
return(res)
}
# the content check
if (stringr::str_detect(httr::http_type(api_response), "html")) {
# uupps, an html was returned, instead of a json or plain text.
html_text <- httr::content(api_response, 'text')
# this can be an html with the infamous hidden 429 error, so we check that
if (stringr::str_detect(html_text, "429 Too Many Requests")) {
res <- list(
status = 'Error',
code = 429,
message = "API request limit reached, taking a cooldown of 60 seconds to reset."
)
} else {
res <- list(
status = 'Error',
code = response_status,
message = glue::glue("AEMET API returned an error: {html_text}")
)
}
return(res)
}
# Now, finally, we are able to access the response content (because now we are sure is json)
response_content <- jsonlite::fromJSON(httr::content(api_response, as = 'text', encoding = 'ISO-8859-15'))
# the last check, even with 200 code (ok) it can be no data
if (!rlang::is_null(response_content$estado) && response_content$estado != 200) {
res <- list(
status = 'Error',
code = response_content$estado,
message = glue::glue("AEMET API returned no data: {response_content$descripcion}")
)
return(res)
}
# If we reach here, is because everything went well
res <- list(
status = 'OK',
code = response_status,
message = "Data received",
content = response_content
)
return(res)
}
#' Get info for the aemet stations
#'
#' Get info for the aemet stations
#'
#' @noRd
.get_info_aemet <- function(api_options) {
# path
path_resolution <- c(
'opendata', 'api', 'valores', 'climatologicos', 'inventarioestaciones', 'todasestaciones'
)
# create httr config to execute only if in linux, due to the ubuntu 20.04 update to seclevel 2
config_httr_aemet <- switch(
Sys.info()["sysname"],
'Linux' = httr::config(ssl_cipher_list = 'DEFAULT@SECLEVEL=1'),
httr::config()
)
# Status check ------------------------------------------------------------------------------------------
# now we need to check the status of the response (general status), and the status of the AEMET (specific
# query status). They can differ, as you can reach succesfully AEMET API (200) but the response can be
# empty due to errors in the dates or stations (404) or simply the api key is incorrect (xxx).
# This is done with .check_status_aemet helper, which return a list with the status, and if success the
# content parsed already
api_status_check <- .check_status_aemet(
"https://opendata.aemet.es",
httr::add_headers(api_key = api_options$api_key),
path = path_resolution,
httr::user_agent('https://github.com/emf-creaf/meteospain'),
config = config_httr_aemet
)
if (api_status_check$status != 'OK') {
# if api request limit reached, do a recursive call to the function after 60 seconds
if (api_status_check$code == 429) {
return(.manage_429_errors(api_status_check, api_options, .get_info_aemet))
} else {
cli::cli_abort(c(
x = api_status_check$code,
i = api_status_check$message
))
}
}
response_content <- api_status_check$content
# Response data and metadata ----------------------------------------------------------------------------
# Now, as stated in the .check_status_aemet rationale, we need to access data (in this case we don't need
# metadata)
stations_info_check <- .check_status_aemet(
response_content$datos,
httr::user_agent('https://github.com/emf-creaf/meteospain'),
config = config_httr_aemet
)
if (stations_info_check$status != 'OK') {
# if api request limit reached, do a recursive call to the function after 60 seconds
if (stations_info_check$code == 429) {
return(.manage_429_errors(api_status_check, api_options, .get_info_aemet))
} else {
cli::cli_abort(c(
x = stations_info_check$code,
i = stations_info_check$message
))
}
}
# Data transformation ----------------------------------------------------------------------------------
# We can finally take the station info data frame and do the necessary transformations
stations_info_check$content |>
dplyr::as_tibble() |>
# add service name, to identify the data if joining with other services
dplyr::mutate(service = 'aemet') |>
dplyr::select(
"service", station_id = "indicativo", station_name = "nombre",
station_province = "provincia", altitude = "altitud", latitude = "latitud",
longitude = "longitud"
) |>
# latitude and longitude are in strings with the cardinal letter. We need to transform that to numeric
# and negative when S or W.
dplyr::mutate(
altitude = as.numeric(stringr::str_replace_all(.data$altitude, ',', '.')),
altitude = units::set_units(.data$altitude, "m"),
latitude = .aemet_coords_generator(.data$latitude),
longitude = .aemet_coords_generator(.data$longitude)
) |>
sf::st_as_sf(coords = c('longitude', 'latitude'), crs = 4326)
}
#' Get data from AEMET
#'
#' Get data from AEMET service
#'
#' For current_day and daily, there is no need of supply the stations_id in the query,
#' as the data is not so big. So, in case of stations provided, we can filter later, after getting
#' the data. This also has the advantage of using only one query, reducing the probability of reaching
#' the API limit per minute or total.
#'
#' @param api_options Option list as generated by \link{\code{aemet_options}}
#'
#' @noRd
.get_data_aemet <- function(api_options) {
# All necessary things for the GET ----------------------------------------------------------------------
# create api path
path_resolution <- .create_aemet_path(api_options)
# create httr config to execute only if in linux, due to the ubuntu 20.04 update to seclevel 2
config_httr_aemet <- switch(
Sys.info()["sysname"],
'Linux' = httr::config(ssl_cipher_list = 'DEFAULT@SECLEVEL=1'),
httr::config()
)
# GET and Status check ------------------------------------------------------------------------------------------
# now we need to check the status of the response (general status), and the status of the AEMET (specific
# query status). They can differ, as you can reach succesfully AEMET API (200) but the response can be
# empty due to errors in the dates or stations (404) or simply the api key is incorrect (xxx).
# This is done with .check_status_aemet helper, which return a list with the status, and if success the
# content parsed already
api_status_check <- .check_status_aemet(
"https://opendata.aemet.es",
httr::add_headers(api_key = api_options$api_key),
path = path_resolution,
httr::user_agent('https://github.com/emf-creaf/meteospain'),
config = config_httr_aemet
)
if (api_status_check$status != 'OK') {
# if api request limit reached, do a recursive call to the function after 60 seconds
if (api_status_check$code == 429) {
return(.manage_429_errors(api_status_check, api_options, .get_data_aemet))
} else {
cli::cli_abort(c(
x = api_status_check$code,
i = api_status_check$message
))
}
}
response_content <- api_status_check$content
# Response data and metadata ----------------------------------------------------------------------------
# Now, as stated in the .check_status_aemet rationale, we need to access data and metadata
stations_data_check <- .check_status_aemet(
response_content$datos,
httr::user_agent('https://github.com/emf-creaf/meteospain'),
config = config_httr_aemet
)
if (stations_data_check$status != 'OK') {
# if api request limit reached, do a recursive call to the function after 60 seconds
if (stations_data_check$code == 429) {
return(.manage_429_errors(api_status_check, api_options, .get_data_aemet))
} else {
cli::cli_abort(c(
x = stations_data_check$code,
i = stations_data_check$message
))
}
}
stations_metadata_check <- .check_status_aemet(
response_content$metadatos,
httr::user_agent('https://github.com/emf-creaf/meteospain'),
config = config_httr_aemet
)
if (stations_metadata_check$status != 'OK') {
# if api request limit reached, do a recursive call to the function after 60 seconds
if (stations_metadata_check$code == 429) {
return(.manage_429_errors(api_status_check, api_options, .get_data_aemet))
} else {
cli::cli_abort(c(
x = stations_metadata_check$code,
i = stations_metadata_check$message
))
}
}
# We also need the stations info
stations_info <- .get_info_aemet(api_options)
# Filter expression for stations ------------------------------------------------------------------------
# In case stations were supplied, we need also to filter them
filter_expression <- TRUE
# update filter if there is stations supplied, but not for monthly. In monthly only one
# station must be used, so the filtering is unnecesary
if (!rlang::is_null(api_options$stations)) {
filter_expression <- switch(
api_options$resolution,
'current_day' = rlang::expr(.data$idema %in% api_options$stations),
'daily' = rlang::expr(.data$indicativo %in% api_options$stations),
'monthly' = TRUE,
'yearly' = TRUE
)
}
# Resolution specific carpentry -------------------------------------------------------------------------
# Now, current day and daily have differences, in the names of the variables and also
# in the need to join the stations data to offer coords. We can branch the code with ifs, repeating the
# common steps in the data carpentry or we can create the specific functions and have only one common pipe.
resolution_specific_carpentry <- switch(
api_options$resolution,
'current_day' = .aemet_current_day_carpentry,
'daily' = .aemet_daily_carpentry,
'monthly' = .aemet_monthly_yearly_carpentry,
'yearly' = .aemet_monthly_yearly_carpentry
)
# Data transformation -----------------------------------------------------------------------------------
res <- stations_data_check$content |>
dplyr::as_tibble() |>
# remove unwanted stations
dplyr::filter(!! filter_expression) |>
# apply the resolution-specific transformations
resolution_specific_carpentry(stations_info, resolution = api_options$resolution) |>
# arrange data
dplyr::arrange(.data$timestamp, .data$station_id) |>
# reorder variables to be consistent among all services
relocate_vars() |>
# ensure we have an sf
sf::st_as_sf()
# Check if any stations were returned -------------------------------------------------------------------
if ((!is.null(api_options$stations)) & nrow(res) < 1) {
cli::cli_abort(c(
x = "Station(s) provided have no data for the dates selected.",
"Available stations with data for the actual query are:",
glue::glue_collapse(
c(unique(stations_data_check$content$indicativo), unique(stations_data_check$content$idema)),
sep = ', ', last = ' and '
)
))
}
# Copyright message -------------------------------------------------------------------------------------
cli::cli_inform(c(
i = copyright_style(stations_metadata_check$content$copyright),
legal_note_style(stations_metadata_check$content$notaLegal)
))
# Return ------------------------------------------------------------------------------------------------
return(res)
}
# resolution_specific_carpentry -------------------------------------------------------------------------
.aemet_current_day_carpentry <- function(data, stations_info, ...) {
data |>
dplyr::select(dplyr::any_of(c(
timestamp = "fint", station_id = "idema", station_name = "ubi",
altitude = "alt",
temperature = "ta",
min_temperature = "tamin",
max_temperature = "tamax",
relative_humidity = "hr",
precipitation = "prec",
wind_speed = "vv",
wind_direction = "dv",
insolation = "inso",
longitude = "lon", latitude = "lat"
))) |>
# create any variable missing
.create_missing_vars(
var_names = c(
"temperature", "min_temperature", "max_temperature", "relative_humidity",
"precipitation", "wind_speed", "wind_direction", "insolation"
)
) |>
# units
dplyr::mutate(
service = 'aemet',
timestamp = lubridate::as_datetime(.data$timestamp),
altitude = units::set_units(.data$altitude, "m"),
temperature = units::set_units(.data$temperature, "degree_C"),
min_temperature = units::set_units(.data$min_temperature, "degree_C"),
max_temperature = units::set_units(.data$max_temperature, "degree_C"),
relative_humidity = units::set_units(.data$relative_humidity, "%"),
precipitation = units::set_units(.data$precipitation, "L/m^2"),
wind_speed = units::set_units(.data$wind_speed, "m/s"),
wind_direction = units::set_units(.data$wind_direction, "degree"),
insolation = units::set_units(.data$insolation, "hours")
) |>
dplyr::left_join(stations_info, by = c('service', 'station_id', 'station_name', 'altitude')) |>
sf::st_as_sf(coords = c('longitude', 'latitude'), crs = 4326)
}
.aemet_daily_carpentry <- function(data, stations_info, ...) {
data |>
dplyr::select(dplyr::any_of(c(
timestamp = "fecha",
station_id = "indicativo", station_name = "nombre", station_province = "provincia",
mean_temperature = "tmed",
min_temperature = "tmin",
max_temperature = "tmax",
precipitation = "prec",
mean_wind_speed = "velmedia",
# wind_direction = "dir",
insolation = "sol"
))) |>
# create any variable missing
.create_missing_vars(
var_names = c(
"mean_temperature", "min_temperature", "max_temperature",
"precipitation", "mean_wind_speed", "insolation"
)
) |>
# variables are characters, with "," as decimal point, so....
dplyr::mutate(
service = 'aemet',
timestamp = lubridate::as_datetime(.data$timestamp),
mean_temperature = as.numeric(stringr::str_replace_all(.data$mean_temperature, ',', '.')),
min_temperature = as.numeric(stringr::str_replace_all(.data$min_temperature, ',', '.')),
max_temperature = as.numeric(stringr::str_replace_all(.data$max_temperature, ',', '.')),
precipitation = suppressWarnings(as.numeric(stringr::str_replace_all(.data$precipitation, ',', '.'))),
mean_wind_speed = as.numeric(stringr::str_replace_all(.data$mean_wind_speed, ',', '.')),
# wind_direction = as.numeric(stringr::str_replace_all(.data$wind_direction, ',', '.')),
insolation = as.numeric(stringr::str_replace_all(.data$insolation, ',', '.')),
# and set the units also
mean_temperature = units::set_units(.data$mean_temperature, "degree_C"),
min_temperature = units::set_units(.data$min_temperature, "degree_C"),
max_temperature = units::set_units(.data$max_temperature, "degree_C"),
precipitation = units::set_units(.data$precipitation, "L/m^2"),
mean_wind_speed = units::set_units(.data$mean_wind_speed, "m/s"),
# wind_direction = units::set_units(.data$wind_direction, degree),
insolation = units::set_units(.data$insolation, "h")
) |>
dplyr::left_join(stations_info, by = c('service', 'station_id', 'station_name', 'station_province'))
}
.aemet_monthly_yearly_carpentry <- function(data, stations_info, resolution) {
# resolution depending negate argument
negate_filter <- FALSE
if (resolution == "monthly") {
negate_filter <- TRUE
}
# data carpentry
data |>
dplyr::select(dplyr::any_of(c(
timestamp = "fecha",
station_id = "indicativo",
# temperatures
mean_temperature = "tm_mes",
mean_min_temperature = "tm_min",
mean_max_temperature = "tm_max",
# rh
mean_relative_humidity = "hr",
# precipitation
total_precipitation = "p_mes",
days_precipitation = "np_001",
# wind
mean_wind_speed = "w_med",
# radiation
mean_insolation = "inso",
mean_global_radiation = "glo"
))) |>
# remove yearly or monthly values, depending on resolution
dplyr::filter(
stringr::str_detect(.data$timestamp, "-13", negate = negate_filter)
) |>
# remove any "-13" for yearly values (if monthly, this step dont do anything), for
# the timestamp parsing to work
dplyr::mutate(
timestamp = stringr::str_remove(.data$timestamp, "-13")
) |>
# create any variable missing
.create_missing_vars(
var_names = c(
"mean_temperature", "mean_min_temperature", "mean_max_temperature",
"mean_relative_humidity", "total_precipitation", "days_precipitation",
"mean_wind_speed", "mean_insolation", "mean_global_radiation"
)
) |>
# timestamp has to be parsed, "ym" for monthly values, "y" for yearly, and
# variables are characters, with "," as decimal point, so....
dplyr::mutate(
service = 'aemet',
timestamp = lubridate::parse_date_time(.data$timestamp, orders = c("ym", "y")),
mean_temperature = as.numeric(stringr::str_replace_all(.data$mean_temperature, ',', '.')),
mean_min_temperature = as.numeric(stringr::str_replace_all(.data$mean_min_temperature, ',', '.')),
mean_max_temperature = as.numeric(stringr::str_replace_all(.data$mean_max_temperature, ',', '.')),
total_precipitation = suppressWarnings(as.numeric(stringr::str_replace_all(.data$total_precipitation, ',', '.'))),
mean_wind_speed = as.numeric(stringr::str_replace_all(.data$mean_wind_speed, ',', '.')),
mean_relative_humidity = as.numeric(stringr::str_replace_all(.data$mean_relative_humidity, ',', '.')),
days_precipitation = as.numeric(stringr::str_replace_all(.data$days_precipitation, ',', '.')),
mean_insolation = as.numeric(stringr::str_replace_all(.data$mean_insolation, ',', '.')),
# global radiation is in 10*kJ/m2, so we multiply by 10 to set the units later to kJ/m2
mean_global_radiation = 10*as.numeric(stringr::str_replace_all(.data$mean_global_radiation, ',', '.')),
# and set the units also
mean_temperature = units::set_units(.data$mean_temperature, "degree_C"),
mean_min_temperature = units::set_units(.data$mean_min_temperature, "degree_C"),
mean_max_temperature = units::set_units(.data$mean_max_temperature, "degree_C"),
total_precipitation = units::set_units(.data$total_precipitation, "L/m^2"),
mean_wind_speed = units::set_units(.data$mean_wind_speed, "km/h"),
mean_relative_humidity = units::set_units(.data$mean_relative_humidity, "%"),
days_precipitation = units::set_units(.data$days_precipitation, "days"),
mean_insolation = units::set_units(.data$mean_insolation, "hours"),
mean_global_radiation = units::set_units(.data$mean_global_radiation, "kJ/m^2")
) |>
dplyr::left_join(stations_info, by = c('service', 'station_id'))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.