knitr::opts_chunk$set(echo = TRUE)
Create all possible combinations of station-year-month:
stations <- c("acc", "kalb", "alb") years <- 2015:2020 months <- 1:12 combinations <- data.frame(station = rep(stations, each = length(years) * length(months)), year = rep(years, each = length(months)), month = months, downloaded = FALSE)
knitr::kable(head(combinations, 10))
Create RSelenium session
# Create RSelenium session rD <- RSelenium::rsDriver(browser = "firefox", port = 4548L, verbose = FALSE)
Retrieve data
# Path to the output directory outputdir <- getwd() # Create progress bar pb <- progress::progress_bar$new(format = "(:current/:total) [:bar] :percent", total = nrow(combinations), clear = FALSE, width = 80) for (i in seq_len(nrow(combinations))) { tryCatch({ # Check if the data has been downloaded if (!combinations$downloaded[i]) { # Call scrappy scrappy::newa_nrcc(client = rD$client, year = combinations$year[i], month = combinations$month[i], station = combinations$station[i], path = outputdir) combinations$downloaded[i] <- TRUE } }, error = function(e) { message("Error processing combination with index ", i) warning(e) }) # Update progress bar pb$tick() } # Stop server rD$server$stop()
Post-processing: combine individual CSV files by station name:
stations <- c("acc", "kalb", "alb") states <- c("NY", "NY", "NY") for (s in seq_len(length(stations))) { st <- stations[s] message("Processing: ", st) files <- list.files(outputdir, pattern = st, full.names = TRUE) if (length(files) > 0) { message(length(files), " files found...") combined_data <- NULL for (f in files) { aux <- read.csv(f) if (is.null(combined_data)) { combined_data <- aux } else { combined_data <- rbind(combined_data, aux) } } combined_data$State <- states[s] write.csv(combined_data, paste0(st, ".csv"), row.names = FALSE) } }
List the station names:
# Import pipe `%>%` <- scrappy::`%>%` # Create RSelenium session rD <- RSelenium::rsDriver(browser = "firefox", port = 4548L, verbose = FALSE) rD$client$navigate("http://newa.cornell.edu/index.php?page=station-pages") Sys.sleep(5) aux <- rD$client$getPageSource()[[1]] %>% xml2::read_html() %>% # parse HTML rvest::html_nodes("table.table") station_code <- aux %>% rvest::html_nodes("a") %>% rvest::html_attr("href") %>% gsub(pattern = ".*=", replacement = "") stations <- aux %>% rvest::html_table(header = TRUE) %>% as.data.frame() %>% tidyr::separate(col = 1, into = c("name", "state"), sep = ",") stations$code <- station_code # Stop server rD$server$stop()
knitr::kable(head(stations, 10))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.