R/meteocat_helpers.R

Defines functions .meteocat_long_carpentry .meteocat_short_carpentry .get_data_meteocat .get_info_meteocat .create_meteocat_query .create_meteocat_path .get_variables_meteocat .get_quota_meteocat .check_status_meteocat

#' Check status and errors for MeteoCat
#'
#' Check status and erros for MeteoCat
#'
#' In the MeteoCat API errors are correctly raised and can be checked straightforward.
#'
#' @section Rationale:
#' So, the rationale is the following, a function accepting \code{...} that will be passed to
#' httr::GET, and internally checking statuses. If error, return the error code and the message.
#' In the main function, if the error returned is API limit, wait 60 seconds, if other, stop
#' and give the correct message.
#'
#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Arguments for httr::GET
#'
#' @noRd
.check_status_meteocat <- function(...) {

  # GET step
  api_response <- safe_api_access(type = 'rest', ...)
  response_status <- httr::status_code(api_response)

  # and now the status checks
  # 400 Bad Request: Parameters are not correct, message contains further info
  # 403 Forbidden: If message indicates Forbidden, bad api key, if the message indicates Missing authentication
  #   token means that the resource does not exists
  # 429 Too Many Request: Quota limit or request per second limit passed
  # 500 Internal Error Server: Internal error, message contains further info
  if (response_status == 400) {
    res <- list(
      status = 'Error',
      code = response_status,
      message = glue::glue(
        "Unable to obtain data from MeteoCat API:\n",
        "{httr::http_status(api_response)$message}\n",
        "{jsonlite::fromJSON(httr::content(api_response, as = 'text', encoding = 'UTF-8'))$message}"
      )
    )
    return(res)
  }

  if (response_status == 403) {
    response_message <- httr::http_status(api_response)$message

    if (stringr::str_detect(response_message, 'Forbidden')) {
      res <- list(
        status = 'Error',
        code = response_status,
        message = glue::glue(
          "Invalid API Key: {response_message}"
        )
      )
      return(res)
    } else {
      res <- list(
        status = 'Error',
        code = response_status,
        message = glue::glue(
          "Resource does not exist: {response_message}"
        )
      )
    }
  }

  if (response_status == 429) {
    res <- list(
      status = 'Error',
      code = response_status,
      message = httr::http_status(api_response)$message
    )
  }

  if (response_status == 500) {
    res <- list(
      status = 'Error',
      code = response_status,
      message = glue::glue(
        "Unable to obtain data from MeteoCat API:\n",
        "{httr::http_status(api_response)$message}\n",
        "{jsonlite::fromJSON(httr::content(api_response, as = 'text', encoding = 'UTF-8'))$message}"
      )
    )
    return(res)
  }

  # If we reach here, is because everything went well
  response_content <- jsonlite::fromJSON(httr::content(api_response, as = 'text', encoding = 'UTF-8'))
  res <- list(
    status = 'OK',
    code = response_status,
    message = "Data received",
    content = response_content
  )

  return(res)
}

#' Get quota
#'
#' User quota
#'
#' @param api_options Option list as generated by \link{\code{meteocat_options}}
#' @noRd
.get_quota_meteocat <- function(api_options) {

  # path
  path_quota <- c('quotes', 'v1', 'consum-actual')

  api_status_check <- .check_status_meteocat(
    'https://api.meteo.cat',
    httr::add_headers(`x-api-key` = api_options$api_key),
    path = path_quota,
    httr::user_agent('https://github.com/emf-creaf/meteospain')
  )

  if (api_status_check$status != 'OK') {
    # if api request limit reached, do a recursive call to the function after 60 seconds
    if (api_status_check$code == 429) {
      return(.manage_429_errors(api_status_check, api_options, .get_quota_meteocat))
    } else {
      cli::cli_abort(c(
        x = api_status_check$code,
        i = api_status_check$message
      ))
    }
  }

  response_content <- api_status_check$content$plans |>
    dplyr::as_tibble()
  return(response_content)
}

#' Get variables metadata
#'
#' Variables metadata
#'
#' @param api_options Option list as generated by \link{\code{meteocat_options}}
#' @noRd
.get_variables_meteocat <- function(api_options) {

  # path
  path_resolution <- switch(
    api_options$resolution,
    'instant' = c('xema', 'v1', 'variables', 'mesurades', 'metadades'),
    'hourly' = c('xema', 'v1', 'variables', 'mesurades', 'metadades'),
    'daily' = c('xema', 'v1', 'variables', 'estadistics', 'diaris', 'metadades'),
    'monthly' = c('xema', 'v1', 'variables', 'estadistics', 'mensuals', 'metadades'),
    'yearly' = c('xema', 'v1', 'variables', 'estadistics', 'anuals', 'metadades')
  )


  # get and status check ----------------------------------------------------------------------------------
  api_status_check <- .check_status_meteocat(
    'https://api.meteo.cat',
    httr::add_headers(`x-api-key` = api_options$api_key),
    path = path_resolution,
    httr::user_agent('https://github.com/emf-creaf/meteospain')
  )

  if (api_status_check$status != 'OK') {
    # if api request limit reached, do a recursive call to the function after 60 seconds
    if (api_status_check$code == 429) {
      return(.manage_429_errors(api_status_check, api_options, .get_variables_meteocat))
    }
  }

  response_content <- api_status_check$content |>
    dplyr::as_tibble()
  return(response_content)
}

#' Create the path elements for MeteoCat API
#'
#' Path vectors for MeteoCat API to use with httr::GET
#'
#' @section Dates
#' In this case as MeteoCat is capped to one date: one day for hourly, one month for daily, one year for
#' monthly and all years for yearly (no date needed). So we only use the start_date
#'
#' @section Variables
#' MeteoCat API only return one variable for all stations, so we need to iterate the desired variables. This
#' means that \code{.create_meteocat_path} should return a vector of paths for which iterate the get function.
#' Desired variables are:
#' \itemize{
#'   \item{instant & hourly: 32 (temp), 33 (humidity), 35 (precip), 36 (rad), 46 (windspeed), 47 (winddir)}
#'   \item{daily: 1000:1002 (temp), 1100:1102 (humidity), 1300 (precip), 1400 (rad), 1505 (windspeed), 1511(winddir)}
#'   \item{monthly: 2000:2004 (temp), 2100:2104 (humidity), 2300 (precip), 2400 (rad), 2505 (windspeed), 2511(winddir)}
#'   \item{yearly: 3000:3004 (temp), 3100:3104 (humidity), 3300 (precip), 3400 (rad), 3505 (windspeed), 3511(winddir)}
#' }
#'
#' @param api_options Option list as generated by \link{\code{meteocat_options}}
#'
#' @noRd
.create_meteocat_path <- function(api_options) {

  # we need the resolution to create the corresponding path
  resolution <- api_options$resolution

  # depending on resolution, the variables list is different
  variables_list <- switch(
    api_options$resolution,
    'instant' = c(32, 33, 35, 36, 46, 47),
    'hourly' = c(32, 33, 35, 36, 46, 47),
    'daily' = c(1000:1002, 1100:1102, 1300, 1400, 1505, 1511),
    'monthly' = c(2000:2004, 2100:2104, 2300, 2400, 2505, 2511),
    'yearly' = c(3000:3004, 3100:3104, 3300, 3400, 3505, 3511)
  )

  # now the path vectors for the resolutions
  paths_resolution <- switch(
    api_options$resolution,
    'instant' = purrr::map(
      variables_list,
      function(variable) { c('xema', 'v1', 'variables', 'mesurades', variable, 'ultimes') }
    ),
    'hourly' = purrr::map(
      variables_list,
      function(variable) {
        c(
          'xema', 'v1', 'variables', 'mesurades', variable, lubridate::year(api_options$start_date),
          format(api_options$start_date,"%m"), format(api_options$start_date,"%d")
        )
      }
    ),
    # for daily and monthly, dates are query parameters not path ones.
    'daily' = purrr::map(
      variables_list,
      function(variable) { c('xema', 'v1', 'variables', 'estadistics', 'diaris', variable) }
    ),
    'monthly' = purrr::map(
      variables_list,
      function(variable) { c('xema', 'v1', 'variables', 'estadistics', 'mensuals', variable) }
    ),
    'yearly' = purrr::map(
      variables_list,
      function(variable) { c('xema', 'v1', 'variables', 'estadistics', 'anuals', variable) }
    ),
    list()
  )

  # not recognised resolution
  if (length(paths_resolution) < 1) {
    cli::cli_abort(c(
      "{.arg {api_options$resolution}} is not a valid temporal resolution for MeteoCat.\nPlease see meteocat_options help for more information"
    ))
  }

  return(paths_resolution)
}

#' Create the query element for MeteoCat API
#'
#' Query string for MeteoCat API to use with httr::GET
#'
#' MeteoCat needs dates for daily and monthly resolutions as query parameters (broken down in year and month).
#'
#' @param api_options Option list as generated by \link{\code{meteocat_options}}
#'
#' @noRd
.create_meteocat_query <- function(api_options) {

  # dates supplied must be broken down and stored in year and month values to create the query parameters
  year_query_par <- glue::glue("any={lubridate::year(api_options$start_date)}")
  month_query_par <- glue::glue("&mes={format(api_options$start_date, '%m')}")

  dates_query_string <- switch(
    api_options$resolution,
    'daily' = glue::glue(year_query_par, month_query_par),
    'monthly' = year_query_par,
    character(0)
  )

  return(dates_query_string)
}

#' Get info for the meteocat stations
#'
#' Get info for the meteocat stations
#'
#' @noRd

.get_info_meteocat <- function(api_options) {

  # GET parts needed --------------------------------------------------------------------------------------
  # path
  path_resolution <- c('xema', 'v1', 'estacions', 'metadades')

  # Status check ------------------------------------------------------------------------------------------
  api_status_check <- .check_status_meteocat(
    "https://api.meteo.cat",
    httr::add_headers(`x-api-key` = api_options$api_key),
    path = path_resolution,
    httr::user_agent('https://github.com/emf-creaf/meteospain')
  )

  if (api_status_check$status != 'OK') {
    # if api request limit reached, do a recursive call to the function after 60 seconds
    if (api_status_check$code == 429) {
      return(.manage_429_errors(api_status_check, api_options, .get_info_meteocat))
    } else {
      cli::cli_abort(c(
        x = api_status_check$code,
        i = api_status_check$message
      ))
    }
  }

  # Data --------------------------------------------------------------------------------------------------
  # Meteocat returns a data frame, but some variables are data frames themselves. We need to work on that
  response_content <- api_status_check$content

  coords_df <- response_content[['coordenades']]
  province_df <- response_content[['provincia']]['nom'] |>
    dplyr::rename(station_province = "nom")

  response_content |>
    dplyr::as_tibble() |>
    # add service name, to identify the data if joining with other services
    dplyr::mutate(service = 'meteocat') |>
    dplyr::select(
      !dplyr::any_of(c(
        'coordenades', 'municipi', 'comarca', 'provincia',
        'xarxa', 'estats', 'tipus', 'emplacament'
      ))
    ) |>
    dplyr::bind_cols(coords_df, province_df) |>
    dplyr::select(
      "service", station_id = "codi", station_name = "nom", "station_province",
      altitude = "altitud", "longitud", "latitud"
    ) |>
    dplyr::distinct() |>
    dplyr::mutate(
      altitude = units::set_units(.data$altitude, 'm')
    ) |>
    sf::st_as_sf(coords = c('longitud', 'latitud'), crs = 4326)

}

#' Get data from MeteoCat
#'
#' Get data from MeteoCat service
#'
#' For all resolutions, there is no need of supply the stations_id in the query,
#' as the data is not so big. So, in case of stations provided, we can filter later, after getting
#' the data. This also has the advantage of using only one query, reducing the probability of reaching
#' the API limit per minute or total.
#'
#' @param api_options Option list as generated by \link{\code{meteocat_options}}
#'
#' @noRd
.get_data_meteocat <- function(api_options) {

  # All necessary things for the GET ----------------------------------------------------------------------
  # create api paths
  paths_resolution <- .create_meteocat_path(api_options)
  # create query if needed (only use it when length is > 0)
  query_resolution <- .create_meteocat_query(api_options)
  if (length(query_resolution) < 1) {
    query_resolution <- NULL
  }

  # GET and Status check ----------------------------------------------------------------------------------
  # Here the things are a little convoluted. MeteoCat, for returning all stations only allows one variable
  # and one day. This means that for all variables, we need to loop around all paths (variables) needed,
  # checking statuses and retrieving data if everything is ok.
  api_statuses <- paths_resolution |>
    purrr::map(
      \(path) {
        .check_status_meteocat(
          "https://api.meteo.cat",
          httr::add_headers(`x-api-key` = api_options$api_key),
          path = path,
          query = query_resolution,
          httr::user_agent('https://github.com/emf-creaf/meteospain')
        )
      }
    )

  variables_statuses <- purrr::map_depth(api_statuses, 1, 'status') |>
    purrr::flatten_chr()
  variables_codes <- purrr::map_depth(api_statuses, 1, 'code') |>
    purrr::flatten_dbl()
  variables_messages <- purrr::map_depth(api_statuses, 1, 'message') |>
    purrr::flatten_chr()

  if (any(variables_statuses != 'OK')) {
    if (any(variables_codes == 429)) {
      messages_to_show <- variables_messages[which(variables_codes == 429)] |> unique()
      return(.manage_429_errors(list(code = 429, message = messages_to_show[1]), api_options, .get_data_meteocat))
    } else {
      messages_to_show <- variables_messages[which(variables_codes != 200)] |> unique()
      cli::cli_abort(c(messages_to_show))
    }
  }

  # Filter expression for stations ------------------------------------------------------------------------
  # In case stations were supplied, we need also to filter them
  filter_expression <- TRUE
  # update filter if there is stations supplied
  if (!rlang::is_null(api_options$stations)) {
    filter_expression <- rlang::expr(.data$station_id %in% api_options$stations)
  }

  # Resolution specific carpentry -------------------------------------------------------------------------
  # Now, instant/hourly and daily/monthly/yearly differs in the unnest step, as the column names are called
  # differently. It also differs in the select step as in the latter group there is no repetition of column
  # names after the unnest step.
  resolution_specific_unnest <- .meteocat_short_carpentry
  if (api_options$resolution %in% c('daily', 'monthly', 'yearly')) {
    resolution_specific_unnest <- .meteocat_long_carpentry
  }

  # Stations info for getting coords ----------------------------------------------------------------------
  stations_info <- .get_info_meteocat(api_options)

  # Data transformation -----------------------------------------------------------------------------------
  response_trasformed <- purrr::map_depth(api_statuses, 1, 'content') |>
    # resolution specific unnesting of raw data
    resolution_specific_unnest() |>
    # transform variable codes to standard names
    dplyr::mutate(variable_name = .meteocat_var_codes_2_names(.data$variable_code)) |>
    # for daily, monthly and yearly, sometimes there are duplicated rows, remove them
    dplyr::distinct() |>
    # each variable in its own column
    tidyr::pivot_wider(
      id_cols = -"variable_code",
      names_from = "variable_name", values_from = "valor"
    ) |>
    # set service, date and units
    dplyr::mutate(
      service = 'meteocat',
      timestamp = lubridate::parse_date_time(.data$timestamp, orders = c('ymdHMS', 'Ymz'), truncated = 5),
      dplyr::across(dplyr::contains('temperature'), ~ units::set_units(.x, 'degree_C')),
      dplyr::across(dplyr::contains('humidity'), ~ units::set_units(.x, '%')),
      dplyr::across(dplyr::contains('precipitation'), ~ units::set_units(.x, 'L/m^2')),
      dplyr::across(dplyr::contains('radiation'), ~ units::set_units(.x, 'MJ/m^2')),
      dplyr::across(dplyr::contains('speed'), ~ units::set_units(.x, 'm/s')),
      dplyr::across(dplyr::contains('direction'), ~ units::set_units(.x, 'degree')),
    )

  res <- response_trasformed |>
    # remove unwanted stations
    dplyr::filter(!! filter_expression) |>
    # join stations_info
    dplyr::left_join(stations_info, by = c('service', 'station_id')) |>
    # arrange data
    dplyr::arrange(.data$timestamp, .data$station_id) |>
    # reorder variables to be consistent among all services
    relocate_vars() |>
    # ensure we have an sf
    sf::st_as_sf()

  # Check if any stations were returned -------------------------------------------------------------------
  if ((!is.null(api_options$stations)) & nrow(res) < 1) {
    cli::cli_abort(c(
      "Station(s) provided have no data for the dates selected.",
      "Available stations with data for the actual query are:",
      glue::glue_collapse(unique(response_trasformed$station_id), sep = ', ', last = ' and ')
    ))
  }

  # Copyright message -------------------------------------------------------------------------------------
  cli::cli_inform(c(
    i = copyright_style("Data provided by meteo.cat \u00A9 Servei Meteorol\u00F2gic de Catalunya"),
    legal_note_style("https://www.meteo.cat/wpweb/avis-legal/#info")
  ))

  return(res)
}



# resolution_specific_carpentry -------------------------------------------------------------------------

.meteocat_short_carpentry <- function(data) {
  data |>
    purrr::map(function(variable_data) {
      unnest_safe(
        variable_data, cols = "variables",
        # names_repair = 'universal'
        names_repair = ~ vctrs::vec_as_names(.x, repair = 'universal', quiet = TRUE)
      ) |>
        unnest_safe(cols = "lectures", names_repair = 'universal')
    }) |>
    purrr::list_rbind() |>
    dplyr::select(
      timestamp = "data", station_id = "codi...1", variable_code = "codi...2", "valor"
    )
}

.meteocat_long_carpentry <- function(data) {
  data |>
    purrr::map(function(variable_data) {
      unnest_safe(variable_data, cols = "valors", names_repair = 'universal')
    }) |>
    purrr::list_rbind() |>
    dplyr::select(
      timestamp = "data", station_id = "codiEstacio", variable_code = "codiVariable",
      "valor"
    )
}

Try the meteospain package in your browser

Any scripts or data that you put into this service are public.

meteospain documentation built on May 29, 2024, 1:59 a.m.