R/ria_helpers.R

Defines functions .ria_daily_mutate_quos .ria_monthly_mutate_quos .ria_daily_select_quos .ria_monthly_select_quos .get_data_ria .get_info_ria .create_ria_path .get_provinces_ria .check_status_ria

#' httr::GET, and internally checking statuses. If error, return the error code and the message.
#' In the main function, if the error returned is API limit, wait 60 seconds, if other, stop
#' and give the correct message.
#'
#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Arguments for httr::GET
#'
#' @noRd
.check_status_ria <- function(...) {

  # GET step
  api_response <- safe_api_access(type = 'rest', ...)
  response_status <- httr::status_code(api_response)

  # and now the status checks
  # 400 Bad Request: Parameters are not correct, message contains further info
  # 404 Not found: Not found
  # 500 Internal Error Server: Internal error, message contains further info
  # 503 Service Unavailable
  if (response_status %in% c(400, 404, 500, 503)) {
    res <- list(
      status = 'Error',
      code = response_status,
      message = glue::glue(
        "Unable to obtain data from RIA API:\n",
        "{httr::http_status(api_response)$message}\n",
        "{rawToChar(api_response$content)}"
      ),
      station_url = api_response$url
    )
    return(res)
  }

  # If we reach here, is because everything went well
  response_content <- jsonlite::fromJSON(httr::content(api_response, as = 'text', encoding = 'UTF-8'))
  res <- list(
    status = 'OK',
    code = response_status,
    message = "Data received",
    content = response_content,
    station_url = api_response$url
  )

  return(res)
}

#' Get province metadata
#'
#' provinces metadata
#'
#' @param api_options Option list as generated by \link{\code{ria_options}}
#' @noRd
.get_provinces_ria <- function(api_options) {

  # path
  path_resolution <- c('agriculturaypesca', 'ifapa', 'riaws', 'provincias')


  # get and status check ----------------------------------------------------------------------------------
  api_status_check <- .check_status_ria(
    'https://www.juntadeandalucia.es',
    path = path_resolution,
    httr::user_agent('https://github.com/emf-creaf/meteospain')
  )

  if (api_status_check$status != 'OK') {
    cli::cli_abort(c(
      x = api_status_check$code,
      i = api_status_check$message
    ))
  }

  response_content <- api_status_check$content |>
    dplyr::as_tibble()
  return(response_content)
}

#' Create the path elements for RIA API
#'
#' Path vectors for RIA API to use with httr::GET
#'
#' @section Stations
#' In this case as RIA is capped to one station per query, so we need to loop by stations provided, or, if
#' NULL, by all the stations available.
#'
#' @param api_options Option list as generated by \link{\code{ria_options}}
#'
#' @noRd
.create_ria_path <- function(api_options) {

  # we need the resolution to create the corresponding path
  resolution <- api_options$resolution

  ria_stamp <- lubridate::stamp("2001-12-25", orders = "Ymd0", quiet = TRUE)

  month_and_years <- dplyr::tibble(
    year = lubridate::year(seq(api_options$start_date, api_options$end_date, 'months')),
    month = lubridate::month(seq(api_options$start_date, api_options$end_date, 'months'))
  ) |>
    dplyr::group_by(.data$year) |>
    dplyr::mutate(min_month = min(.data$month), max_month = max(.data$month)) |>
    dplyr::select(-"month") |>
    dplyr::distinct() |>
    as.list()

  provinces_and_stations <- stringr::str_split(api_options$stations, '-', n = 2, simplify = TRUE)

  # now the path vectors for the resolutions
  paths_resolution <- switch(
    resolution,
    # for daily and monthly, stations are paths.
    'daily' = purrr::map2(
      provinces_and_stations[,1], provinces_and_stations[,2],
      function(province, station) {
        c(
          'agriculturaypesca', 'ifapa', 'riaws', 'datosdiarios', 'forceEt0', province, station,
          ria_stamp(api_options$start_date), ria_stamp(api_options$end_date)
        )
      }
    ),
    'monthly' = purrr::flatten(purrr::map2(
      provinces_and_stations[,1], provinces_and_stations[,2],
      function(province, station) {
        province_station_path <-
          c('agriculturaypesca', 'ifapa', 'riaws', 'datosmensuales', province, station)
        purrr::pmap(
          month_and_years,
          function(year, min_month, max_month) {
            c(province_station_path, year, min_month, max_month)
          }
        )
      }
    )),
    list()
  )

  # not recognised resolution
  if (length(paths_resolution) < 1) {
    cli::cli_abort(c(
      "{.arg {resolution}} is not a valid temporal resolution for ria.\nPlease see ria_options help for more information."
    ))
  }

  return(paths_resolution)
}

#' Get info for the ria stations
#'
#' Get info for the ria stations
#'
#' @noRd

.get_info_ria <- function(api_options) {

  # GET parts needed --------------------------------------------------------------------------------------
  # path
  path_resolution <- c('agriculturaypesca', 'ifapa', 'riaws', 'estaciones')

  # Status check ------------------------------------------------------------------------------------------
  api_status_check <- .check_status_ria(
    'https://www.juntadeandalucia.es',
    path = path_resolution,
    httr::user_agent('https://github.com/emf-creaf/meteospain')
  )

  if (api_status_check$status != 'OK') {
    cli::cli_abort(c(
      x = api_status_check$code,
      i = api_status_check$message
    ))
  }

  # Data --------------------------------------------------------------------------------------------------
  # ria returns a data frame, but some variables are data frames themselves. We need to work on that
  response_content <- api_status_check$content

  province_df <- response_content[['provincia']] |>
    dplyr::rename(station_province = "nombre", province_id = "id")

  response_content |>
    dplyr::as_tibble() |>
    # add service name, to identify the data if joining with other services
    dplyr::mutate(service = 'ria') |>
    dplyr::select(-"provincia") |>
    dplyr::bind_cols(province_df) |>
    dplyr::select(
      "service", station_id = "codigoEstacion", station_name = "nombre",
      "station_province", "province_id",
      altitude = "altitud", "longitud", "latitud", under_plastic = "bajoplastico"
    ) |>
    dplyr::distinct() |>
    dplyr::mutate(
      station_id = as.character(glue::glue("{province_id}-{station_id}")),
      altitude = units::set_units(.data$altitude, 'm'),
      latitud = .parse_coords_dmsh(.data$latitud),
      longitud = .parse_coords_dmsh(.data$longitud),
    ) |>
    sf::st_as_sf(coords = c('longitud', 'latitud'), crs = 4326)

}

#' Get data from RIA
#'
#' Get data from RIA service
#'
#' For all resolutions, if no stations are provided all stations will be retrieved
#'
#' @param api_options Option list as generated by \link{\code{ria_options}}
#'
#' @noRd
.get_data_ria <- function(api_options) {

  # All necessary things for the GET ----------------------------------------------------------------------
  # stations_info and update api_options
  # we need the stations id and their province
  stations_info <- .get_info_ria(api_options)

  if (is.null(api_options$stations)) {
    api_options$stations <- stations_info[['station_id']]
  }
  # create api paths
  paths_resolution <- .create_ria_path(api_options)

  # GET and Status check ----------------------------------------------------------------------------------
  # Here the things are a little convoluted. ria, for returning all stations only allows one variable
  # and one day. This means that for all variables, we need to loop around all paths (variables) needed,
  # checking statuses and retrieving data if everything is ok.
  api_statuses <- paths_resolution |>
    purrr::map(
      \(path) {
        .check_status_ria(
          "https://www.juntadeandalucia.es",
          path = path,
          httr::user_agent('https://github.com/emf-creaf/meteospain')
        )
      }
    )

  ria_statuses <- purrr::map_depth(api_statuses, 1, 'status') |>
    purrr::flatten_chr()
  ria_codes <- purrr::map_depth(api_statuses, 1, 'code') |>
    purrr::flatten_dbl()
  ria_messages <- purrr::map_depth(api_statuses, 1, 'message') |>
    purrr::flatten_chr()
  ria_urls <- purrr::map_depth(api_statuses, 1, 'station_url') |>
    purrr::flatten_chr()

  messages_to_show <- ria_messages[which(ria_codes != 200)] |> unique()
  stations_with_problems <- ria_urls[which(ria_codes != 200)] |>
    unique() |>
    purrr::map_chr(.f = .ria_url2station) |>
    sort()

  if (all(ria_statuses != 'OK')) {
    cli::cli_abort(c(
      messages_to_show
    ))
  }

  if (any(ria_statuses != 'OK')) {
    cli::cli_inform(c(
      w = copyright_style("Some stations didn't return data for some dates:"),
      stations_with_problems
    ))
  }

  # Resolution specific carpentry -------------------------------------------------------------------------
  # Now, instant/hourly and daily/monthly/yearly differs in the unnest step, as the column names are called
  # differently. It also differs in the select step as in the latter group there is no repetition of column
  # names after the unnest step.
  resolution_specific_select_quos <- switch(
    api_options$resolution,
    'daily' = .ria_daily_select_quos,
    'monthly' = .ria_monthly_select_quos
  )

  resolution_specific_mutate_quos <- switch(
    api_options$resolution,
    'daily' = .ria_daily_mutate_quos,
    'monthly' = .ria_monthly_mutate_quos
  )

  # Data transformation -----------------------------------------------------------------------------------

  res <- purrr::map_depth(api_statuses, 1, 'content') |>
    purrr::set_names(ria_urls) |>
    purrr::discard(is.null) |>
    purrr::imap(
      \(.x, .y) {dplyr::mutate(.x, station_id = .ria_url2station(.y))}
    ) |>
    purrr::list_rbind() |>
    dplyr::select(
      !!! resolution_specific_select_quos(), "station_id",
      mean_temperature = "tempMedia", min_temperature = "tempMin", max_temperature = "tempMax",
      mean_relative_humidity = "humedadMedia", min_relative_humidity = "humedadMin",
      max_relative_humidity = "humedadMax",
      mean_wind_speed = "velViento", mean_wind_direction = "dirViento",
      precipitation = "precipitacion",
      solar_radiation = "radiacion"
    ) |>
    dplyr::mutate(
      !!! resolution_specific_mutate_quos(),
      mean_temperature = units::set_units(.data$mean_temperature, "degree_C"),
      min_temperature = units::set_units(.data$min_temperature, "degree_C"),
      max_temperature = units::set_units(.data$max_temperature, "degree_C"),
      mean_relative_humidity = units::set_units(.data$mean_relative_humidity, "%"),
      min_relative_humidity = units::set_units(.data$min_relative_humidity, "%"),
      max_relative_humidity = units::set_units(.data$max_relative_humidity, "%"),
      mean_wind_speed = units::set_units(.data$mean_wind_speed, 'm/s'),
      mean_wind_direction = units::set_units(.data$mean_wind_direction, 'degree'),
      precipitation = units::set_units(.data$precipitation, "L/m^2"),
      solar_radiation = units::set_units(.data$solar_radiation, "MJ/d/m^2"),
      timestamp = lubridate::as_datetime(.data$timestamp),
      station_id = as.character(.data$station_id)
    ) |>
    dplyr::left_join(stations_info, by = 'station_id') |>
    dplyr::select(!dplyr::any_of(c('month', 'year', 'province_id'))) |>
    # reorder variables to be consistent among all services
    relocate_vars() |>
    # ensure we have an sf
    sf::st_as_sf()

  # Copyright message -------------------------------------------------------------------------------------
  cli::cli_inform(c(
    i = copyright_style("Data provided by Red de Informaci\u00F3n Agroclim\u00E1tica de Andaluc\u00EDa (RIA)"),
    legal_note_style("https://www.juntadeandalucia.es/agriculturaypesca/ifapa/riaweb/web/")
  ))

  return(res)
}



# resolution specific carpentry -------------------------------------------------------------------------

.ria_monthly_select_quos <- function() {
  return(rlang::quos(year = "anyo", month = "mes"))
}

.ria_daily_select_quos <- function() {
  return(rlang::quos(timestamp = "fecha"))
}

.ria_monthly_mutate_quos <- function() {
  return(rlang::quos(timestamp = as.Date(glue::glue("{year}-{month}-01"))))
}

.ria_daily_mutate_quos <- function() {
  return(rlang::quos())
}

Try the meteospain package in your browser

Any scripts or data that you put into this service are public.

meteospain documentation built on May 29, 2024, 1:59 a.m.