R/meteogalicia_helpers.R

Defines functions .info_table_checker .meteogalicia_monthly_carpentry .meteogalicia_daily_carpentry .meteogalicia_current_day_carpentry .meteogalicia_instant_carpentry .meteogalicia_monthly_unnesting .meteogalicia_daily_unnesting .meteogalicia_current_day_unnesting .meteogalicia_instant_unnesting .get_data_meteogalicia .get_info_meteogalicia .create_meteogalicia_query .create_meteogalicia_path

#' Create the path elements for MeteoGalicia API
#'
#' Path vectors for MeteoGalicia API to use with httr::GET
#'
#' @param api_options Option list as generated by \link{\code{meteogalicia_options}}
#'
#' @noRd
.create_meteogalicia_path <- function(api_options) {

  # we need the resolution to create the corresponding path
  resolution <- api_options$resolution

  temp_path <- switch(
    resolution,
    "instant" = c('mgrss', 'observacion', 'ultimos10minEstacionsMeteo.action'),
    "current_day" = c('mgrss', 'observacion', 'ultimosHorariosEstacions.action'),
    "daily" = c('mgrss', 'observacion', 'datosDiariosEstacionsMeteo.action'),
    "monthly" = c('mgrss', 'observacion', 'datosMensuaisEstacionsMeteo.action'),
    FALSE
  )

  # not recognised resolution, we abort
  if (isFALSE(temp_path)) {
    cli::cli_abort(c(
      "{.arg {api_options$resolution}} is not a valid temporal resolution for MeteoGalicia. Please see meteogalicia_options help for more information"
    ))
  }

  return(temp_path)
}

#' Create the query element for MeteoGalicia API
#'
#' Query string for MeteoGalicia API to use with httr::GET
#'
#' MeteoGalicia allows to include queries in the link, to select dates, stations and hours in current. This
#' is more flexible than the paths in AEMET and allows to retrieve the stations desired directly in an easy
#' way.
#'
#' @param api_options Option list as generated by \link{\code{meteogalicia_options}}
#'
#' @noRd
.create_meteogalicia_query <- function(api_options) {

  # In case of dates supplied and in the corresponding resolutions, we need to transform the dates to the
  # character string specific format (dd-mm-yyyy) for the meteogalicia query
  # We will use a stamp function:
  meteogalicia_stamp <- lubridate::stamp("25/12/2001", orders = "d0mY", quiet = TRUE)

  # the first thing is the stations, as it is the common part for any resolution
  stations_query_string <- glue::glue("idEst={glue::glue_collapse(api_options$stations, sep = ',')}")

  # dates also can be done, and used if needed
  dates_query_string <- glue::glue(
    "dataIni={meteogalicia_stamp(api_options$start_date)}&dataFin={meteogalicia_stamp(api_options$end_date)}"
  )

  # now the specifics for each resolution:
  #   - instant, nothing, only the stations if any
  #   - current day, stations if any and numHoras=24
  #   - daily, stations if any, start date and end date
  #   - monthly, stations if any, start date and end date

  if (api_options$resolution == 'instant') {
    res <- .empty_string_to_null(stations_query_string)
  }
  if (api_options$resolution == 'current_day') {
    if (rlang::is_null(api_options$stations)) {
      res <- "numHoras=24"
    } else {
      res <- glue::glue("{stations_query_string}&numHoras=24")
    }
  }
  if (api_options$resolution %in% c('daily', 'monthly')) {
    if (rlang::is_null(api_options$stations)) {
      res <- dates_query_string
    } else {
      res <- glue::glue("{stations_query_string}&{dates_query_string}")
    }
  }

  return(res)
}


#' Get info for the meteogalicia stations
#'
#' Get info for the meteogalicia stations
#'
#' @noRd

.get_info_meteogalicia <- function() {

  # GET ---------------------------------------------------------------------------------------------------
  # path
  path_resolution <- c(
    'mgrss', 'observacion', 'listaEstacionsMeteo.action'
  )
  # api response
  api_response <- safe_api_access(
    type = 'rest',
    "https://servizos.meteogalicia.gal",
    config = list(http_version = 2),
    path = path_resolution,
    httr::user_agent('https://github.com/emf-creaf/meteospain')
  )

  # Status check ------------------------------------------------------------------------------------------
  if (api_response$status_code != 200) {
    cli::cli_abort(c(
      "Unable to connect to meteogalicia API at {.url {api_response$url}}"
    ))
  }


  # Data --------------------------------------------------------------------------------------------------
  response_content <- jsonlite::fromJSON(httr::content(api_response, as = 'text'))

  # Meteogalicia returns a list, with one element called listaEstacionsMeteo, that is parsed directly to
  # a data.frame with all the info. We work with that.
  response_content$listaEstacionsMeteo |>
    dplyr::as_tibble() |>
    dplyr::mutate(service = 'meteogalicia') |>
    .info_table_checker() |>
    dplyr::select(
      "service", station_id = "idEstacion", station_name = "estacion", station_province = "provincia",
      "altitude", "lat", "lon"
    ) |>
    dplyr::mutate(
      station_id = as.character(.data$station_id),
      altitude = units::set_units(.data$altitude, "m")
    ) |>
    sf::st_as_sf(coords = c('lon', 'lat'), crs = 4326)

}

#' Get data from MeteoGalicia
#'
#' Get data from MeteoGalicia service
#'
#' MeteoGalicia API, based on RSS, is very well organized. It allows to supply dates and stations as
#' query parameters, which gives a lot of flexibility and makes my life easier. All my love to MeteoGalicia!!
#'
#' @param api_options Option list as generated by \link{\code{meteogalicia_options}}
#'
#' @noRd
.get_data_meteogalicia <- function(api_options) {


  # GET ---------------------------------------------------------------------------------------------------
  # api path
  path_resolution <- .create_meteogalicia_path(api_options)
  # get api query
  query_resolution <- .create_meteogalicia_query(api_options)
  # get the api response
  api_response <- safe_api_access(
    type = 'rest',
    "https://servizos.meteogalicia.gal",
    config = list(http_version = 2),
    path = path_resolution,
    query = query_resolution,
    httr::user_agent('https://github.com/emf-creaf/meteospain')
  )


  # Status check ------------------------------------------------------------------------------------------
  # bad stations return code 500
  if (api_response$status_code %in% c(500L, 404L)) {
    cli::cli_abort(c(
      "MeteoGalicia API returned an error:",
      stringr::str_remove_all(
        httr::content(api_response, 'text'),
        '<.*?>|\\t|\\n|<!DOCTYPE((.|\n|\r)*?)(\"|])>'
      ),
      i = 'This usually happens when unknown station ids are supplied.'
    ))
  }
  # check any other codes besides 200
  if (api_response$status_code != 200) {
    cli::cli_abort(c(
      "Unable to connect to meteogalicia API at {.url {api_response$url}}"
    ))
  }
  # Check when html with error is returned (bad stations)
  # LEGACY, bad stations now are reported with error 500 in the new meteogalicia API
  if (httr::http_type(api_response) != "application/json") {
    cli::cli_abort(c(
      "MeteoGalicia API returned an error:",
      stringr::str_remove_all(
        httr::content(api_response, 'text'),
        '<.*?>|\\t|\\n|<!DOCTYPE((.|\n|\r)*?)(\"|])>'
      ),
      i = 'This usually happens when unknown station ids are supplied.'
    ))
  }
  # response content
  response_content <- jsonlite::fromJSON(httr::content(api_response, as = 'text'))
  # Check when empty lists are returned (bad dates)
  if (length(response_content[[1]]) < 1) {
    cli::cli_abort(c(
      "MeteoGalicia API returned no data:\n",
      i = "This usually happens when there is no data for the dates supplied."
    ))
  }

  # Resolution specific carpentry -------------------------------------------------------------------------
  # Now, resolutions have differences, in the component names of the list returned and also in variables
  # returned. So we create specific functions for each resolution and use a common pipe (see aemet.helpers
  # for a more complete rationale)
  resolution_specific_unnesting <- switch(
    api_options$resolution,
    'instant' = .meteogalicia_instant_unnesting,
    'current_day' = .meteogalicia_current_day_unnesting,
    'daily' = .meteogalicia_daily_unnesting,
    'monthly' = .meteogalicia_monthly_unnesting
  )
  resolution_specific_carpentry <- switch(
    api_options$resolution,
    'instant' = .meteogalicia_instant_carpentry,
    'current_day' = .meteogalicia_current_day_carpentry,
    'daily' = .meteogalicia_daily_carpentry,
    'monthly' = .meteogalicia_monthly_carpentry
  )
  resolution_specific_joinvars <- c('service', 'station_id', 'station_name')
  if (api_options$resolution %in% c('daily', 'monthly')) {
    resolution_specific_joinvars <- c(resolution_specific_joinvars, 'station_province')
  }

  # Data transformation -----------------------------------------------------------------------------------
  res <-
    resolution_specific_unnesting(response_content) |>
    # final unnest, common to all resolutions
    unnest_safe("listaMedidas") |>
    # remove the non valid data (0 == no validated data, 3 = wrong data, 9 = data not registered)
    dplyr::filter(!.data$lnCodigoValidacion %in% c(0, 3, 9)) |>
    # remove unwanted variables
    dplyr::select(-"lnCodigoValidacion", -"nomeParametro", -"unidade") |>
    # now, some stations can have errors in the sense of duplicated precipitation values.
    # We get the first record
    tidyr::pivot_wider(
      names_from = "codigoParametro", values_from = "valor", values_fn = dplyr::first
    ) |>
    # resolution-specific transformations
    resolution_specific_carpentry() |>
    dplyr::arrange(.data$timestamp, .data$station_id) |>
    dplyr::left_join(.get_info_meteogalicia(), by = resolution_specific_joinvars) |>
    # reorder variables to be consistent among all services
    relocate_vars() |>
    sf::st_as_sf()

  # Copyright message -------------------------------------------------------------------------------------
  cli::cli_inform(c(
    i = copyright_style("A informaci\u00F3n divulgada a trav\u00E9s deste servidor ofr\u00E9cese gratuitamente aos cidad\u00E1ns para que poida ser"),
    copyright_style("utilizada libremente por eles, co \u00FAnico compromiso de mencionar expresamente a MeteoGalicia e \u00E1"),
    copyright_style("Conseller\u00EDa de Medio Ambiente, Territorio e Vivenda da Xunta de Galicia como fonte da mesma cada vez"),
    copyright_style("que as utilice para os usos distintos do particular e privado."),
    legal_note_style("https://www.meteogalicia.gal/web/informacion/notaIndex.action")
  ))

  return(res)
}


# resolution_specific_unnesting --------------------------------------------------------------------------
.meteogalicia_instant_unnesting <- function(response_content) {
  return(response_content$listUltimos10min)
}

.meteogalicia_current_day_unnesting <- function(response_content) {
  res <- response_content$listHorarios |>
    unnest_safe("listaInstantes")

  return(res)
}

.meteogalicia_daily_unnesting <- function(response_content) {
  res <- response_content$listDatosDiarios |>
    unnest_safe("listaEstacions")

  return(res)
}

.meteogalicia_monthly_unnesting <- function(response_content) {
  res <- response_content$listDatosMensuais |>
    unnest_safe("listaEstacions")

  return(res)
}


# resolution_specific_carpentry -------------------------------------------------------------------------

.meteogalicia_instant_carpentry <- function(data) {
  data |>
    # When querying stations, it can happen that some stations lack some variables, making the further
    # select step to fail. We create missing variables and populate them with NAs to avoid this error
    .create_missing_vars(
      var_names = c(
        'TA_AVG_1.5m', 'DV_AVG_2m', 'VV_AVG_2m', 'HR_AVG_1.5m',
        'PP_SUM_1.5m', 'HSOL_SUM_1.5m', 'RS_AVG_1.5m'
      )
    ) |>
    dplyr::select(
      timestamp = "instanteLecturaUTC", station_id = "idEstacion", station_name = "estacion",
      temperature = "TA_AVG_1.5m",
      wind_direction = "DV_AVG_2m",
      wind_speed = "VV_AVG_2m",
      relative_humidity = "HR_AVG_1.5m",
      precipitation = "PP_SUM_1.5m",
      insolation = "HSOL_SUM_1.5m"
      # global_solar_radiation = "RS_AVG_1.5m"
    ) |>
    dplyr::mutate(
      timestamp = lubridate::as_datetime(.data$timestamp),
      service = 'meteogalicia',
      station_id = as.character(.data$station_id),
      temperature = units::set_units(.data$temperature, "degree_C"),
      wind_direction = units::set_units(.data$wind_direction, "degree"),
      wind_speed = units::set_units(.data$wind_speed, "m/s"),
      relative_humidity = units::set_units(.data$relative_humidity, "%"),
      precipitation = units::set_units(.data$precipitation, "L/m^2"),
      insolation = units::set_units(.data$insolation, "h")
      # global_solar_radiation = units::set_units(
      #   units::set_units(.data$global_solar_radiation, "J/s/m^2") * insolation, 'MJ/m^2'
      # )
    )
}
.meteogalicia_current_day_carpentry <- function(data) {
  data |>
    # When querying stations, it can happen that some stations lack some variables, making the further
    # select step to fail. We create missing variables and populate them with NAs to avoid this error
    .create_missing_vars(
      var_names = c(
        'TA_AVG_1.5m', 'TA_MIN_1.5m', 'TA_MAX_1.5m', 'DV_AVG_2m', 'VV_AVG_2m',
        'HR_AVG_1.5m', 'PP_SUM_1.5m', 'HSOL_SUM_1.5m'
      )
    ) |>
    dplyr::select(
      timestamp = "instanteLecturaUTC", station_id = "idEstacion", station_name = "estacion",
      temperature = "TA_AVG_1.5m",
      min_temperature = "TA_MIN_1.5m",
      max_temperature = "TA_MAX_1.5m",
      wind_direction = "DV_AVG_2m",
      wind_speed = "VV_AVG_2m",
      relative_humidity = "HR_AVG_1.5m",
      precipitation = "PP_SUM_1.5m",
      insolation = "HSOL_SUM_1.5m"
    ) |>
    dplyr::mutate(
      timestamp = lubridate::as_datetime(.data$timestamp),
      service = 'meteogalicia',
      station_id = as.character(.data$station_id),
      temperature = units::set_units(.data$temperature, "degree_C"),
      min_temperature = units::set_units(.data$min_temperature, "degree_C"),
      max_temperature = units::set_units(.data$max_temperature, "degree_C"),
      wind_direction = units::set_units(.data$wind_direction, "degree"),
      wind_speed = units::set_units(.data$wind_speed, "m/s"),
      relative_humidity = units::set_units(.data$relative_humidity, "%"),
      precipitation = units::set_units(.data$precipitation, "L/m^2"),
      insolation = units::set_units(.data$insolation, "h")
    )
}
.meteogalicia_daily_carpentry <- function(data) {
  data |>
    # When querying stations, it can happen that some stations lack some variables, making the further
    # select step to fail. We create missing variables and populate them with NAs to avoid this error
    .create_missing_vars(
      var_names = c(
        'TA_AVG_1.5m', 'TA_MIN_1.5m', 'TA_MAX_1.5m', 'DV_AVG_2m', 'VV_AVG_2m',
        'HR_AVG_1.5m', 'HR_MIN_1.5m', 'HR_MAX_1.5m', 'PP_SUM_1.5m', 'HSOL_SUM_1.5m'
      )
    ) |>
    dplyr::select(
      timestamp = "data",
      station_id = "idEstacion", station_name = "estacion", station_province = "provincia",
      mean_temperature = "TA_AVG_1.5m",
      min_temperature = "TA_MIN_1.5m",
      max_temperature = "TA_MAX_1.5m",
      mean_wind_direction = "DV_AVG_2m",
      mean_wind_speed = "VV_AVG_2m",
      mean_relative_humidity = "HR_AVG_1.5m",
      min_relative_humidity = "HR_MIN_1.5m",
      max_relative_humidity = "HR_MAX_1.5m",
      precipitation = "PP_SUM_1.5m",
      insolation = "HSOL_SUM_1.5m"
    ) |>
    dplyr::mutate(
      timestamp = lubridate::as_datetime(.data$timestamp),
      service = 'meteogalicia',
      station_id = as.character(.data$station_id),
      mean_temperature = units::set_units(.data$mean_temperature, "degree_C"),
      min_temperature = units::set_units(.data$min_temperature, "degree_C"),
      max_temperature = units::set_units(.data$max_temperature, "degree_C"),
      mean_wind_direction = units::set_units(.data$mean_wind_direction, "degree"),
      mean_wind_speed = units::set_units(.data$mean_wind_speed, "m/s"),
      mean_relative_humidity = units::set_units(.data$mean_relative_humidity, "%"),
      min_relative_humidity = units::set_units(.data$min_relative_humidity, "%"),
      max_relative_humidity = units::set_units(.data$max_relative_humidity, "%"),
      precipitation = units::set_units(.data$precipitation, "L/m^2"),
      insolation = units::set_units(.data$insolation, "h")
    )
}
.meteogalicia_monthly_carpentry <- function(data) {
  data |>
    # When querying stations, it can happen that some stations lack some variables, making the further
    # select step to fail. We create missing variables and populate them with NAs to avoid this error
    .create_missing_vars(
      var_names = c(
        'TA_AVG_1.5m', 'TA_MIN_1.5m', 'TA_MAX_1.5m', 'VV_AVG_2m',
        'HR_AVG_1.5m', 'PP_SUM_1.5m', 'HSOL_SUM_1.5m'
      )
    ) |>
    dplyr::select(
      timestamp = "data",
      station_id = "idEstacion", station_name = "estacion", station_province = "provincia",
      mean_temperature = "TA_AVG_1.5m",
      min_temperature = "TA_MIN_1.5m",
      max_temperature = "TA_MAX_1.5m",
      mean_wind_speed = "VV_AVG_2m",
      mean_relative_humidity = "HR_AVG_1.5m",
      precipitation = "PP_SUM_1.5m",
      insolation = "HSOL_SUM_1.5m"
    ) |>
    dplyr::mutate(
      timestamp = lubridate::as_datetime(.data$timestamp),
      service = 'meteogalicia',
      station_id = as.character(.data$station_id),
      mean_temperature = units::set_units(.data$mean_temperature, "degree_C"),
      min_temperature = units::set_units(.data$min_temperature, "degree_C"),
      max_temperature = units::set_units(.data$max_temperature, "degree_C"),
      mean_wind_speed = units::set_units(.data$mean_wind_speed, "m/s"),
      mean_relative_humidity = units::set_units(.data$mean_relative_humidity, "%"),
      precipitation = units::set_units(.data$precipitation, "L/m^2"),
      insolation = units::set_units(.data$insolation, "h")
    )
}


# info table checker ------------------------------------------------------------------------------------

.info_table_checker <- function(data) {

  mandatory_names <- c("service", "idEstacion", "estacion", "provincia", "altitude", "lat", "lon")
  names_ok <- mandatory_names %in% names(data)

  if (!all(names_ok)) {
    cli::cli_abort(c(
      x = "Oops, something went wrong and some info about stations is missing:",
      mandatory_names[names_ok]
    ))
  }

  return(data)
}

Try the meteospain package in your browser

Any scripts or data that you put into this service are public.

meteospain documentation built on May 29, 2024, 1:59 a.m.