R/rfca_naefs_forecast.R

Defines functions rfca_naefs_forecast

Documented in rfca_naefs_forecast

#' rfca_naefs_forecast
#'
#' @description Retrieves NAEFS forecast for specified location.
#' @param forecast_location The name of the location to retrieve forecast for.
#' See `rfca_naefs_locations()` for valid options
#' @param forecast_date The date of the forecast in yyyy-mm-dd format. Defaults to today.
#' Archived forecasts are available for the past 30 days.
#' @param forecast_time Forecast validity start. Either "00" (0 zulu) or "12" (12 zulu)
#' @param forecast_forecast_parameter (Optional) A specific forecast_parameter to retrieve from forecast.
#' Defaults to all forecast_parameters.
#' See `rfca_naefs_forecast_parameters()` for valid options.
#' @return Data frame containing
#' \item{Timestamp}{The timestamp for the corresponding forecast values}
#' \item{Values}{Forecast values}
#' \item{forecast_parameter}{The forecast_parameter corresponding with value}
#' \item{Units}{The units for corresponding forecast_parameter and value}
#' \item{Model_Id}{A number specifying the model in the ensemble associated with the value}
#' @export

rfca_naefs_forecast <- function(forecast_location, forecast_date, forecast_time, forecast_parameter){

  # Default to today
  if(missing(forecast_date)){
    forecast_date <- Sys.Date()
  }else{
    # Make sure date is valid
    forecast_date <- ymd(forecast_date)
    if(is.na(forecast_date)){
      stop(
        paste0(
          "Invalid forecast_date format. ",
          "forecast_date should be yyyy-mm-dd format."
        )
      )
    }
  }

  forecast_date_str <- format(forecast_date, "%Y%m%d")

  # forecast_parameters available for querying
  available_params <- c(
    "APCP-SFC", "MSLP", "TCDC",
    "HGT-500HPA", "LAYER-1000-500HPA",
    "WIND-SFC", "WDIR-SFC", "WIND-200HPA",
    "RELH-SFC", "TMP-SFC"
  )

  # Check for location
  if(missing(forecast_location)){
    stop(
      "Please specify a forecast_location"
    )
  }else{
    location_meta <- suppressMessages(rfca_naefs_locations())
    location_meta <- dplyr::filter(location_meta, City == forecast_location)

    if(nrow(location_meta) == 0){
      stop(
        paste0(
          "Invalid location specified. ",
          "See output of rfca_naefs_locations() ",
          "for valid forecast_location(s)"
        )
      )
    }else{
      location_path <- location_meta$File_path[[1]]
    }

  }


  # Check that date falls within archive range
  if (lubridate::ymd(forecast_date) - Sys.Date() > 30) {
    stop(
      paste0(
        "Forecasts only archived for 30 days.",
        "The oldest forecast currently available is for ",
        Sys.Date() - 30
      )
    )
  }

  # Check that date isn't in future
  if (lubridate::ymd(forecast_date) > Sys.Date()) {
    stop(
      "forecast_date is greater than current date"
    )
  }

  # Check that forecast_time is valid
  if (!forecast_time %in% c("00", "12")) {
    stop(
      "forecast_time should be '00' or '12'"
    )
  }

  # Default to all forecast_parameters if not specified
  if (missing(forecast_parameter)) {
    message(
      "No forecast_parameter specified, returning results for all forecast_parameters"
    )
    forecast_parameter <- available_params
  } else {
    # Check to make sure forecast_parameters exist
    if (!length(forecast_parameter) == sum(forecast_parameter %in% available_params)) {
      stop(
        paste0(
          "One or more invalid forecast_parameters specified. ",
          "See output of forecast_parameter FUNCTION for valid forecast_parameters."
        )
      )
    }
  }

  base_url <- "http://dd.weatheroffice.gc.ca/ensemble/naefs/xml/"

  # Build URLs for downloading zipped XMLs
  forecast_parameter_url <- lapply(1:length(forecast_parameter), function(x){
    paste0(
      base_url, forecast_date_str,
      "/", forecast_time, "/",
      forecast_parameter[[x]], "/raw/",
      forecast_date_str, forecast_time,
      "_GEPS-NAEFS-RAW_",
      location_path, "_", forecast_parameter[[x]],
      "_000-384.xml.bz2"
    )
  })

  forecast_parameter_url <- unlist(forecast_parameter_url)
  names(forecast_parameter_url) <- forecast_parameter

  # Iterate through forecast_parameter URLs
  forecast_parameter_dat <- lapply(1:length(forecast_parameter_url), function(x){
    # Create temp file and download zipped XML
    temp_file <- tempfile()
    download.file(forecast_parameter_url[[x]], temp_file)

    # Unzip and read XML
    raw_dat <- xml2::read_xml(bzfile(temp_file))

    # Grab units
    header <- xml2::xml_child(
      xml2::xml_children(raw_dat)[[1]],
      "forecast_element"
    )

    units <- xml2::xml_attr(header, "unit_english")

    # Iterate through children (skipping header)
    child_dat <- lapply(2:length(xml2::xml_children(raw_dat)), function(c){
      current_child <- xml2::xml_children(raw_dat)[[c]]
      dat <- data.frame(
        Timestamp = xml2::xml_attr(current_child, "valid_time"),
        Values = xml2::xml_text(xml2::xml_children(current_child)),
        forecast_parameter = names(forecast_parameter_url)[[x]],
        Units = units,
        stringsAsFactors = FALSE
      )

      dat <- dplyr::mutate(
        dat,
        Model_Id = seq(1, length(dat$Values)),
        Timestamp = lubridate::ymd_h(Timestamp),
        Values = as.numeric(Values)
      )
    })

    child_dat <- do.call(rbind, child_dat)
  })

  # Bind rows
  forecast_parameter_dat <- do.call(rbind, forecast_parameter_dat)

  return(forecast_parameter_dat)

}
rywhale/rforecastca documentation built on May 4, 2019, 7:38 a.m.