data-raw/historical.R

## code to prepare `DATASET` dataset goes here
library(rnoaa)
library(tidyverse)
library(cubble)
all_stations <- ghcnd_stations() %>%
  filter(str_starts(id, "ASN")) %>%
  filter(last_year >= 2020) %>%
  mutate(wmo_id = as.numeric(wmo_id),
         name = str_to_lower(name)) %>%
  select(-state, -gsn_flag) %>%
  select(id, longitude, latitude, elevation, name, wmo_id, element, first_year, last_year) %>%
  rename(long = longitude, lat = latitude, elev = elevation)
usethis::use_data(all_stations, overwrite = TRUE)

########################################################
cand <- all_stations %>% filter(element == "PRCP", first_year < 1970, !is.na(wmo_id))

raw_prcp <- cand %>%
  rowwise() %>%
  mutate(ts = list(meteo_pull_monitors(
    monitors = id, var = "PRCP",
    date_min = glue::glue("{first_year}-01-01"),
    date_max = glue::glue("{last_year}-12-31")
    ) %>%
      select(-id)
    )
  )

prcp_clean <- raw_prcp %>%
  select(-element) %>%
  unnest(ts) %>%
  filter(!is.na(prcp)) %>%
  tsibble::as_tsibble(key = id, index = date) %>%
  tsibble::fill_gaps() %>%
  tidyr::fill(long, lat, elev, name, wmo_id, first_year, last_year)

fullmonth_prcp <- prcp_clean %>%
  as_tibble() %>%
  mutate(ym = tsibble::yearmonth(date)) %>%
  nest(ts = c(date, prcp)) %>%
  rowwise() %>%
  filter(nrow(ts) >= 28)

historical_prcp <- fullmonth_prcp %>%
  unnest(ts) %>%
  ungroup() %>%
  group_by(id, long, lat, elev, name, wmo_id, first_year, last_year, ym) %>%
  summarise(prcp = sum(prcp, na.rm = TRUE)) %>%
  as_cubble(index = ym, key = id, coords = c(long, lat))

usethis::use_data(historical_prcp, overwrite = TRUE, compress = "gzip")

########################################################
cand <- all_stations %>%
  filter(element == "TMAX", first_year < 1970, !is.na(wmo_id))

raw_tmax <- cand %>%
  rowwise() %>%
  mutate(ts = list(meteo_pull_monitors(
    monitors = id, var = "TMAX",
    date_min = glue::glue("{first_year}-01-01"),
    date_max = glue::glue("{last_year}-12-31")
    ) %>%
      select(-id)
    )
  )

tmax_clean <- raw_tmax %>%
  select(-element) %>%
  unnest(ts) %>%
  mutate(tmax = tmax/10) %>%
  tsibble::as_tsibble(key = id, index = date) %>%
  tsibble::fill_gaps() %>%
  tidyr::fill(long, lat, elev, name, wmo_id, first_year, last_year)

historical_tmax <- tmax_clean %>%
  as_cubble(index = date, key = id, coords = c(long, lat)) %>%
  add_missing_prct(tmax) %>%
  filter(tmax_missing < 0.3) # based on the distribution of prcp_missing on the histogram

usethis::use_data(historical_tmax, overwrite = TRUE,  compress = "gzip")
huizezhang-sherry/weatherdata documentation built on June 15, 2022, 6:40 p.m.