data-raw/us_weather.r

library(tidyverse)
library(readr)
library(lubridate)

us_weather_1819 <- read_csv("data-raw/lcd-sod-18-19.csv",
                            col_names = c('station', 'date', 'report_type',
                                          'backup_name', 'dewpoint_avg',
                                          'drybulbtemp_avg', 'relativehumidity_avg',
                                          'sealevelpressure_avg',
                                          'stationpressure_avg',
                                          'wetbulbtemp_avg',
                                          'windspeed_avg',
                                          'cooling_degree_days',
                                          'departure_from_normal_temperature',
                                          'heating_degree_days',
                                          'drybulbtemp_max',
                                          'drybulbtemp_min',
                                          'peak_wind_direction',
                                          'peak_wind_speed',
                                          'precipitation',
                                          'snow_depth',
                                          'snowfall',
                                          'wind_direction',
                                          'wind_speed',
                                          'weather_occurances',
                                          'sunrise', 'sunset'),
                            skip = 1) %>%
  mutate(month = month(date, label = TRUE),
         month_numeric = month(date),
         year = year(date),
         day = day(date),
         winter_group = '18_19',
         location = ifelse(station == 72546214937, 'Iowa City, IA',
                           ifelse(station == 72530094846, 'Chicago, IL',
                                  ifelse(station == 72606014764, 'Portland, ME',
                                         ifelse(station == 72509014739, 'Boston, MA',
                                                ifelse(station == 72537094847, 'Detroit, MI',
                                                       ifelse(station == 72745014913, 'Duluth, MN',
                                                              ifelse(station == 72658014922, 'Minneapolis, MN',
                                                                     'Buffalo, NY'))))))),
         fog = ifelse(grepl("FG", weather_occurances), 'Yes', 'No'),
         mist = ifelse(grepl("BR", weather_occurances), 'Yes', 'No'),
         drizzle = ifelse(grepl("DZ", weather_occurances), 'Yes', 'No'),
         rain = ifelse(grepl("RA", weather_occurances), 'Yes', 'No'),
         snow = ifelse(grepl("SN", weather_occurances), 'Yes', 'No'),
         )

us_weather_1920 <- read_csv("data-raw/lcd-sod-19-20.csv",
                            col_names = c('station', 'date', 'report_type',
                                          'backup_name', 'dewpoint_avg',
                                          'drybulbtemp_avg', 'relativehumidity_avg',
                                          'sealevelpressure_avg',
                                          'stationpressure_avg',
                                          'wetbulbtemp_avg',
                                          'windspeed_avg',
                                          'cooling_degree_days',
                                          'departure_from_normal_temperature',
                                          'heating_degree_days',
                                          'drybulbtemp_max',
                                          'drybulbtemp_min',
                                          'peak_wind_direction',
                                          'peak_wind_speed',
                                          'precipitation',
                                          'snow_depth',
                                          'snowfall',
                                          'wind_direction',
                                          'wind_speed',
                                          'weather_occurances',
                                          'sunrise', 'sunset'),
                            skip = 1) %>%
  mutate(month = month(date, label = TRUE),
         month_numeric = month(date),
         year = year(date),
         day = day(date),
         winter_group = '19_20',
         location = ifelse(station == 72546214937, 'Iowa City, IA',
                           ifelse(station == 72530094846, 'Chicago, IL',
                                  ifelse(station == 72606014764, 'Portland, ME',
                                         ifelse(station == 72509014739, 'Boston, MA',
                                                ifelse(station == 72537094847, 'Detroit, MI',
                                                       ifelse(station == 72745014913, 'Duluth, MN',
                                                              ifelse(station == 72658014922, 'Minneapolis, MN',
                                                                     'Buffalo, NY'))))))),
         fog = ifelse(grepl("FG", weather_occurances), 'Yes', 'No'),
         mist = ifelse(grepl("BR", weather_occurances), 'Yes', 'No'),
         drizzle = ifelse(grepl("DZ", weather_occurances), 'Yes', 'No'),
         rain = ifelse(grepl("RA", weather_occurances), 'Yes', 'No'),
         snow = ifelse(grepl("SN", weather_occurances), 'Yes', 'No'),
         drybulbtemp_avg = as.numeric(gsub("s$", "", drybulbtemp_avg)),
         cooling_degree_days = as.numeric(ifelse(cooling_degree_days == '0s',
                                                 0, cooling_degree_days)),
         departure_from_normal_temperature = as.numeric(gsub("s$", "",
                                           departure_from_normal_temperature)),
         heating_degree_days = as.numeric(gsub("s$", "", heating_degree_days)),
         drybulbtemp_max = as.numeric(gsub("s$", "", drybulbtemp_max)),
         drybulbtemp_min = as.numeric(gsub("s$", "", drybulbtemp_min))
         )


us_weather <- bind_rows(
  us_weather_1819,
  us_weather_1920
) %>%
  mutate(
    peak_wind_direction = as.numeric(gsub("s$", "", peak_wind_direction)),
    peak_wind_speed = as.numeric(gsub("s$", "", peak_wind_speed)),
    precipitation = gsub("s$", "", precipitation),
    precipitation = as.numeric(ifelse(precipitation == 'T', 0.005, precipitation)),
    snow_depth = as.numeric(ifelse(snow_depth == 'T', 0.1, snow_depth)),
    snowfall = as.numeric(ifelse(snowfall == 'T', 0.005, snowfall)),
    month = ordered(month,
                    levels = c('Oct', 'Nov', 'Dec',
                               'Jan', 'Feb', 'Mar',
                               'Apr', 'May', 'Jun',
                               'Jul', 'Aug', 'Sep'))
  ) %>%
  select(-report_type, -backup_name)

save(us_weather, file = 'data/us_weather.rda')
lebebr01/statthink documentation built on Feb. 13, 2024, 12:59 p.m.