R/webservice_functions.R

Defines functions realtime_ws token_ws

Documented in realtime_ws token_ws

# Copyright 2017 Province of British Columbia
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.



#' Request a token from the Environment and Climate Change Canada [EXPERIMENTAL]
#' @param username Supplied by ECCC. Defaults to NULL which will use WS_USRNM from .Renviron file
#' @param password Supplied by ECCC. Defaults to NULL which will use WS_PWD from .Renviron file
#' Request a token from the ECCC web service using the POST method. This token expires after 10 minutes.
#' You can only have 5 tokens out at once.
#'
#' @details The \code{username} and \code{password} should be treated carefully and should never be entered directly into an r script or console.
#' Rather these credentials should be stored in your \code{.Renviron} file. The .Renviron file can edited using \code{file.edit("~/.Renviron")}.
#' In that file, which is only stored locally and is only available to you, you can assign your \code{username} to WS_USRNM and \code{password}
#' to WS_PWD and then simply issue \code{token_ws}. See \code{?realtime_ws} for examples.
#'
#' @return The token as a string that should be supplied the \code{realtime_ws} function.
#'
#'
#' @family realtime functions
#' @export
#'


token_ws <- function(username = NULL, password = NULL) {

  ## Fail if the environmental variables aren't set
  if(!nzchar(Sys.getenv("WS_USRNM"))){
    stop("No environment variable for username assigned to WS_USRNM in .Renviron file", call. = TRUE)
  }
  if(!nzchar(Sys.getenv("WS_PWD"))){
    stop("No environment variable for password assigned to WS_PWD in .Renviron file", call. = TRUE)
  }


  if(is.null(username) | is.null(password)){
    username <- Sys.getenv("WS_USRNM")
    password <- Sys.getenv("WS_PWD")
  }

  login <- list(
    username = username,
    password = password
  )
  r <- httr::POST("https://wateroffice.ec.gc.ca/services/auth", body = login,
                  httr::user_agent(paste0("tidyhydat.ws v", utils::packageVersion("tidyhydat.ws"))))

  time_token <- Sys.time()

  ## A workaround that pauses for the connection
  #Sys.sleep(2)

  ## If the POST request was not a successful, print out why.
  ## Possibly could provide errors as per web service guidelines
  if (httr::status_code(r) == 422) {
    stop("422 Unprocessable Entity: Username and/or password are missing or are formatted incorrectly.")
  }

  if (httr::status_code(r) == 403) {
    stop("403 Forbidden: the web service is denying your request. Try any of the following options:
         -Ensure your credentials are set in your .Renviron file
         -Ensure you are not currently using all 5 tokens
         -Wait a few minutes and try again
         ")
  }

  ## Catch all error for anything not covered above.
  httr::stop_for_status(r)

  message(paste0("This token will expire at ", format(time_token + 10 * 60, "%H:%M:%S")))

  ## Extract token from POST
  token <- httr::content(r, "text", encoding = "UTF-8")

  attr(token, 'time') <- time_token

  return(token)
  }

#' Download realtime data from the ECCC web service
#'
#' Function to actually retrieve data from ECCC web service. Before using this function,
#' a token from \code{token_ws()} is needed. \code{realtime_ws} will let you know if the token has expired.
#' The maximum number of days that can be queried depends on other parameters being requested.
#' If one station is requested, 18 months of data can be requested. If you continually receiving
#' errors when invoking this function, reduce the number of observations (via station_number,
#' parameters or dates) being requested.
#'
#' @param station_number Water Survey of Canada station number.
#' @param parameters parameter ID. Can take multiple entries. Parameter is a numeric code. See \code{param_id}
#' for some options though undocumented parameters may be implemented. Defaults to Water level provisional, Secondary water level,
#' Tertiary water level, Discharge Provisional, Discharge, sensor, Water temperature, Secondary water temperature, Accumulated precipitation
#' @param start_date Accepts either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS.
#' If only start date is supplied (i.e. YYYY-MM-DD) values are returned from the start of that day.
#' Defaults to 30 days before current date. Time is supplied in UTC.
#' @param end_date Accepts either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS.
#' If only a date is supplied (i.e. YYYY-MM-DD) values are returned from the end of that day.
#' Defaults to current date. Time is supplied in UTC.
#' @param token generated by \code{token_ws()}
#'
#'
#' @format A tibble with 6 variables:
#' \describe{
#'   \item{STATION_NUMBER}{Unique 7 digit Water Survey of Canada station number}
#'   \item{Date}{Observation date and time. Formatted as a POSIXct class as UTC for consistency.}
#'   \item{Name_En}{Code name in English}
#'   \item{Value}{Value of the measurement.}
#'   \item{Unit}{Value units}
#'   \item{Grade}{future use}
#'   \item{Symbol}{future use}
#'   \item{Approval}{future use}
#'   \item{Parameter}{Numeric parameter code}
#'   \item{Code}{Letter parameter code}
#' }
#'
#' @examples
#' \dontrun{
#' token_out <- token_ws()
#'
#' ws_08 <- realtime_ws(station_number = c("08NL071","08NM174"),
#'                          parameters = c(47, 5),
#'                          token = token_out)
#'
#' fivedays <- realtime_ws(station_number = c("08NL071","08NM174"),
#'                          parameters = c(47, 5),
#'                          end_date = Sys.Date(), #today
#'                          start_date = Sys.Date() - 5, #five days ago
#'                          token = token_out)
#' }
#' @family realtime functions
#' @export


realtime_ws <- function(station_number, parameters = NULL,
                        start_date = Sys.Date() - 30, end_date = Sys.Date(), token) {
  #if (length(station_number) >= 300) {
  #  stop("Only 300 stations are supported for one request. If more stations are required,
  #       a separate request should be issued to include the excess stations. This second request can
  #       be issued on the same token if it isn't required.")
  #}

  if (is.null(parameters)) parameters <- c(46, 16, 52, 47, 8, 5, 41, 18)

  if (any(!parameters %in% param_id$Parameter)) {
    stop(paste0(paste0(parameters[!parameters %in% tidyhydat.ws::param_id$Parameter], collapse = ","),
                " are invalid parameters. Check param_id for a list of valid options."), call. = FALSE)
  }

  if (!is.numeric(parameters)) stop("parameters should be a number", call. = FALSE)


  ## Check to see if the token is expired
  time_token <- attr(token, 'time')
  if(format(time_token + 10 * 60) < Sys.time()){
    stop("Your token has expired. Retrieve a new one using token_ws()")
  }

  if(nchar(as.character(start_date)) == 10 | nchar(as.character(end_date)) == 10){
    start_date <- paste0(start_date, " 00:00:00")
    end_date <- paste0(end_date, " 23:59:59")
  }

  if(!grepl('[0-9]{4}-[0-1][0-9]-[0-3][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9]', start_date)) {
    stop("Invalid date format. start_date need to be in either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS formats", call. = FALSE)
    }

  if(!grepl('[0-9]{4}-[0-1][0-9]-[0-3][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9]', end_date)) {
    stop("Invalid date format. start_date need to be in either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS formats", call. = FALSE)
  }


  if(!is.null(start_date) & !is.null(end_date)){
    if (lubridate::ymd_hms(end_date) < lubridate::ymd_hms(start_date)) {
      stop("start_date is after end_date. Try swapping values.", call. = FALSE)
    }
  }



  ## Check date is in the right format
  if (is.na(as.Date(start_date, format = "%Y-%m-%d")) | is.na(as.Date(end_date, format = "%Y-%m-%d"))) {
    stop("Invalid date format. Dates need to be in YYYY-MM-DD format")
  }

  #if (as.Date(end_date) - as.Date(start_date) > 60) {
  #  stop("The time period of data being requested should not exceed 2 months.
  #       If more data is required, then a separate request should be issued to include a different time period.")
  #}
  ## English parameter names

  ## Is it a valid parameter name?

  ## Is it a valid Station name?


  ## Build link for GET
  baseurl <- "https://wateroffice.ec.gc.ca/services/real_time_data/csv/inline?"
  station_string <- paste0("stations[]=", station_number, collapse = "&")
  parameters_string <- paste0("parameters[]=", parameters, collapse = "&")
  date_string <- paste0("start_date=", substr(start_date, 1, 10), "%20", substr(start_date, 12, 19),
         "&end_date=", substr(end_date, 1, 10), "%20", substr(end_date, 12, 19))
  token_string <- paste0("token=", token[1])

  ## paste them all together
  url_for_GET <- paste0(
    baseurl,
    station_string, "&",
    parameters_string, "&",
    date_string, "&",
    token_string
  )

  ## Get data
  get_ws <- httr::GET(url_for_GET, httr::user_agent(paste0("tidyhydat.ws v", utils::packageVersion("tidyhydat.ws"))))

  ## Give webservice some time
  Sys.sleep(1)

  if (httr::status_code(get_ws) == 403) {
    stop("403 Forbidden: the web service is denying your request. Try any of the following options:
         -Ensure you are not currently using all 5 tokens
         -Wait a few minutes and try again
         -Copy the token_ws call and paste it directly into the console
         -Try using realtime_ws if you only need water quantity data
         ")
  }

  ## Check the GET status
  httr::stop_for_status(get_ws)

  if (httr::headers(get_ws)$`content-type` != "text/csv; charset=utf-8") {
    stop("GET response is not a csv file")
  }

  ## Turn it into a tibble and specify correct column classes
  csv_df <- httr::content(
    get_ws,
    type = "text/csv",
    encoding = "UTF-8",
    col_types = "cTidcci")

  ## Check here to see if csv_df has any data in it
  if (nrow(csv_df) == 0) {
    stop("No data exists for this station query")
  }

  ## Rename columns to reflect tidyhydat naming
  colnames(csv_df) <- c("STATION_NUMBER","Date","Parameter","Value","Grade","Symbol","Approval")

  csv_df <- dplyr::left_join(
    csv_df,
    dplyr::select(tidyhydat.ws::param_id, -.data$Name_Fr),
    by = c("Parameter")
  )
  csv_df <- dplyr::select(csv_df, .data$STATION_NUMBER, .data$Date, .data$Name_En, .data$Value, .data$Unit,
                          .data$Grade, .data$Symbol, .data$Approval, .data$Parameter, .data$Code)

  ## What stations were missed?
  differ <- setdiff(unique(station_number), unique(csv_df$STATION_NUMBER))
  if (length(differ) != 0) {
    if (length(differ) <= 10) {
      message("The following station(s) were not retrieved: ", paste0(differ, sep = " "))
      message("Check station number for typos or if it is a valid station in the network")
    }
    else {
      message("More than 10 stations from the initial query were not returned. Ensure realtime and active status are correctly specified.")
    }
  } else {
    message("All station successfully retrieved")
  }

  p_differ <- setdiff(unique(parameters), unique(csv_df$Parameter))
  if (length(p_differ) != 0) {
      message("The following valid parameter(s) were not retrieved for at least one station you requested: ", paste0(p_differ, sep = " "))
  } else {
    message("All parameters successfully retrieved")
  }


  ## Return it
  csv_df

  ## Need to output a warning to see if any stations weren't retrieved
  }
bcgov/tidyhydat.ws documentation built on June 2, 2025, 5:06 a.m.