data-raw/wind.R

## code to prepare `wind` dataset goes here
library(tidyverse)
library(s2)
library(rnoaa)
library(progress)
vic_map <- ozmaps::abs_ste %>% filter(NAME == "Victoria")
vic_stations <- isd_stations() %>%
  filter(between(lon, 112, 155),
         between(lat, -45, -9),
         s2_within(s2_lnglat(lon, lat), vic_map)) %>%
  mutate(begin = as.Date(as.character(begin), format = "%Y%m%d"),
         end = as.Date(as.character(end), format = "%Y%m%d")) %>%
  filter(lubridate::year(end) >= 2020) %>%
  # these two stations cause problem when downloading
  filter(!usaf %in% c("948560", "948731"))

cubble::plot_map(vic_map) +
  geom_point(data = vic_stations, aes(x = lon, y = lat))

year_vec <- c(2019, 2020)
pb <- progress_bar$new(
  total = nrow(vic_stations) * 2,
  format = "downlaoding [:bar] :percent eta: :eta")

# this may take a while to download for the first time
raw <- vic_stations %>%
  rowwise() %>%
  mutate(ts = list(map_dfr(year_vec, ~{pb$tick(); isd(usaf = usaf, wban = wban, year = .x)})))

clean <- raw %>% unnest(ts) %>%
  select(usaf, wban, station_name, lat, lon, elev_m,
         date, time, contains("wind")) %>%
  mutate(usaf = as.numeric(usaf),
         wban = as.numeric(wban),
         station_name = as.factor(stringr::str_to_lower(station_name)),
         time = lubridate::as_datetime(paste0(date, time), format = "%Y%m%d%H%M"),
         wind_direction = as.numeric(wind_direction),
         wind_direction_quality = as.factor(wind_direction_quality),
         wind_code = as.factor(wind_code),
         # convert to kilometer per hour
         wind_speed = as.numeric(wind_speed)/10 * 3.6,
         wind_speed_quality = as.factor(wind_speed_quality)) %>%
  select(-OC1_wind_gust_observation_identifier, -date, -wban,
         -wind_direction_quality, -wind_speed_quality, -wind_code) %>%
  rename(direction = wind_direction,
         speed = wind_speed,
         name = station_name,
         elev = elev_m)

library(cubble)
wind <- clean %>%
  as_cubble(key = usaf, index = time, coord = c(lon, lat)) %>%
  mutate(n = nrow(ts)) %>%
  # a few stations only have less than 1300 obs, while most has >11000.
  # remove the small ones
  filter(n > 2000) %>%
  select(-n)

usethis::use_data(wind, overwrite = TRUE)
huizezhang-sherry/weatherdata documentation built on June 15, 2022, 6:40 p.m.