#' @keywords internal
"_PACKAGE"
#' Retrieve data from NEWA at Cornell University
#'
#' Retrieve Weather data from the Network for Environment and Weather
#' Applications (NEWA) at Cornell University.
#'
#' @importFrom utils write.csv
#'
#' @param client \code{RSelenium} client.
#' @param year Numeric value with the year.
#' @param month Numeric value with the month.
#' @param station String with the station abbreviation. Check the
#' \url{http://newa.cornell.edu/index.php?page=station-pages} for a list.
#' @param base Base URL
#' (default: \url{http://newa.nrcc.cornell.edu/newaLister}).
#' @param interval String with data interval (default: hly, hourly).
#' @param sleep Numeric value with the number of seconds to wait for the page
#' to load the results (default: 6 seconds).
#' @param table_id String with the unique HTML ID assigned to the table
#' containing the data (default: \code{#dtable})
#' @param path String with path to location where CSV files should be stored
#' (default: \code{getwd()}).
#' @param save_file Boolean flag to indicate whether or not the output should
#' be stored as a CSV file.
#'
#' @return Tibble with the data retrieved from the server.
#'
#' @export
#'
#' @examples
#' \dontrun{
#' # Create RSelenium session
#' rD <- RSelenium::rsDriver(browser = "firefox", port = 4544L, verbose = FALSE)
#' # Retrieve data for the Geneva (Bejo) station on 2020/12
#' scrappy::newa_nrcc(rD$client, 2020, 12, "gbe")
#' # Stop server
#' rD$server$stop()
#' }
newa_nrcc <- function(client,
year,
month,
station,
base = "http://newa.nrcc.cornell.edu/newaLister",
interval = "hly",
sleep = 6,
table_id = "#dtable",
path = getwd(),
save_file = TRUE) {
# Navigate to website
client$navigate(paste(base, interval, station, year, month, sep = "/"))
# Wait for the website to load
Sys.sleep(sleep)
# Create output filename
file <- paste0(station, "-", interval, "-", year, "-", month, ".csv")
file <- file.path(path, file)
# Create node ID
node <- paste0("table", table_id)
# Parse HTML code and save CSV file
out <- client$getPageSource()[[1]] %>%
xml2::read_html() %>% # parse HTML
rvest::html_nodes(node) %>% # extract table node
rvest::html_table(fill = TRUE)
original_names <- c(names(out[[1]]), "Station")
out <- as.data.frame(out)
out$station <- station
colnames(out) <- original_names
if (save_file) {
write.csv(out, file, row.names = FALSE)
}
invisible(out)
}
#' Retrieve data from NEWA v3.0 at Cornell University
#'
#' Retrieve Weather data from the Network for Environment and Weather
#' Applications (NEWA) version 3.0 at Cornell University.
#'
#' @param year Numeric value with the start year.
#' @param month Numeric value with the start month.
#' @param day Numeric value with the start day.
#' @param hour Numeric value with the start hour.
#' @param station String with the station abbreviation. Check
#' \code{scrappy::newa3_stations} for a list of stations and abbreviations.
#' @param base Base URL
#' (default: \url{https://hrly.nrcc.cornell.edu/stnHrly}).
#'
#' @return List of data frames with \code{hourly}, \code{daily},
#' \code{hourly_forecast}, and daily forecast (\code{daily_forecast}) data.
#' @export
#'
#' @examples
#' scrappy::newa_nrcc3(2021, 12, 01, 00, "gbe")
newa_nrcc3 <- function(year,
month,
day,
hour,
station,
base = "https://hrly.nrcc.cornell.edu/stnHrly") {
# Local binding
pad_zeros <- function(val, digits = 2) {
if (nchar(val) == digits) {
return(val)
}
return(pad_zeros(paste0("0", val), digits))
}
body <- paste0(
"{\n",
"\"sid\": \"", station, " newa\",\n",
"\"sdate\": \"",
pad_zeros(year, 4),
pad_zeros(month, 2),
pad_zeros(day, 2),
pad_zeros(hour, 2),
"\",\n",
"\"edate\": \"now\"\n}\n\n"
)
tryCatch(
{
request <- httr::POST("https://hrly.nrcc.cornell.edu/stnHrly",
body = body
)
contents <- httr::content(request)
if (typeof(contents) == "character" && grepl(
"Invalid Request|sid",
contents
)) {
stop("'", station, "' is not a valid station code", call. = FALSE)
}
contents <- contents %>%
jsonlite::fromJSON()
# Extract hourly data
hrlyData <- contents$hrlyData %>%
magrittr::set_colnames(contents$hrlyFields) %>%
tibble::as_tibble()
# Extract daily data
dlyData <- contents$dlyData %>%
magrittr::set_colnames(contents$dlyFields) %>%
tibble::as_tibble()
# Extract hourly forecast data
fcstData <- contents$fcstData %>%
magrittr::set_colnames(contents$fcstFields) %>%
tibble::as_tibble()
# Extract daily forecast data
dlyFcstData <- contents$dlyFcstData %>%
magrittr::set_colnames(contents$dlyFcstFields) %>%
tibble::as_tibble()
return(list(
hourly = hrlyData,
daily = dlyData,
hourly_forecast = fcstData,
daily_forecast = dlyFcstData
))
},
error = function(e) {
warning(e)
}
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.