library("dplyr")
library("DBI")
library("readr")
library("rlang")
library("pipeR")
library("feather")
library("ggplot2")
load("~/Dropbox/thesis/code/pubPriCmp/data/buildings.rda")
load("~/Dropbox/thesis/code/pubPriCmp/data/ghcnd_data_full.rda")
load("~/Dropbox/thesis/code/pubPriCmp/data/buildingData.rda")
buildingLatlng = buildingData %>%
dplyr::select(-`Electric_(kBtu)`, -`Gas_(kBtu)`) %>%
dplyr::group_by(`Name`) %>%
slice(1) %>%
ungroup() %>%
{.}
## #### ## ## ## ## ## ## ## ## ##
## a slow version of downloading weather data building by building (with retry
## when stations has NA or no data) start
## #### ## ## ## ## ## ## ## ## ##
duration = "years"
## all_times = seq(as.Date("2011-01-01"), as.Date("2014-01-01"), duration)
## start_times = all_times[1:(length(all_times) - 1)]
## end_times = all_times[2:length(all_times)]
start_times = seq(as.Date("2011-01-01"), as.Date("2013-01-01"), duration)
end_times = seq(as.Date("2011-12-31"), as.Date("2013-12-31"), duration)
## change this to TMIN or TMAX
## v = "TMIN"
v = "TMAX"
## only search regarding the variables interested
ghcnd_data_var = ghcnd_data_full %>%
dplyr::filter(`element` == v) %>%
{.}
devtools::load_all("~/Dropbox/gsa_2017/get.noaa.weather")
## fixme: add error handling for
## Error in curl::curl_fetch_memory(url, handle = handle) :
## Recv failure: Operation timed out
for (j in 1:3) {
## for (j in 1:length(start_times)) {
date_min = start_times[j]
date_max = end_times[j]
bad = NULL
if (file.exists(sprintf("~/Dropbox/thesis/code/pubPriCmp/data-raw/noaa/bad_%s_%s.csv", v, date_min))) {
bad_acc = readr::read_csv(sprintf("~/Dropbox/thesis/code/pubPriCmp/data-raw/noaa/bad_%s_%s.csv", v, date_min))$bad
} else {
bad_acc = NULL
}
good_ghcnd <- ghcnd_data_var %>%
dplyr::filter(!(`id` %in% bad_acc))
start = 1
counter = start
for (b in buildings[start:length(buildings)]) {
print(b)
## this part only downloads data
if (!file.exists(sprintf("~/Dropbox/thesis/code/pubPriCmp/data-raw/building_%s/%s_station_distance_%s_%s.csv",
v, b, v, date_min))) {
print(sprintf("downloading %s --------------%s -----------", counter, b))
b_loc = buildingLatlng %>%
dplyr::filter(`Name`==b) %>%
{.}
print(b_loc)
acc <- NULL
good_ghcnd <- good_ghcnd %>%
dplyr::filter(!(`id` %in% bad)) %>%
{.}
print(sprintf("size of search space: %s", nrow(good_ghcnd)))
result = get_nearby_ghcnd_stations_one_loc(lat_lon_df=b_loc, id_col_name="Name", ghcnd_data=good_ghcnd, v=v,
radius=100, limit=3,
date_min=date_min, date_max=date_max,
year=NULL)
print("final result---------")
print(result$df)
bad <- result$bad
bad_acc <- c(bad_acc, bad)
data.frame(bad=bad_acc) %>%
readr::write_csv(sprintf("~/Dropbox/thesis/code/pubPriCmp/data-raw/noaa/bad_%s_%s.csv", v, date_min))
result$df %>%
readr::write_csv(sprintf("~/Dropbox/thesis/code/pubPriCmp/data-raw/building_%s/%s_station_distance_%s_%s.csv",
v, b, v, date_min))
## when weather file is already downloaded
} else if (!file.exists(sprintf("~/Dropbox/thesis/code/pubPriCmp/data-raw/building_%s/%s_%s_%s.csv",
v, b, v, date_min))){
print("compile weather for building")
print(sprintf("%s --------------%s -----------", counter, b))
df <- readr::read_csv(sprintf("~/Dropbox/thesis/code/pubPriCmp/data-raw/building_%s/%s_station_distance_%s_%s.csv", v, b, v, date_min))
weatheri = compile_weather_ghcnd_main(building=b, station_df=df,
date_min=date_min, date_max=date_max,
var=v,
format_fun=get.noaa.weather::format_noaa_temperature)
print(weatheri)
weatheri %>%
readr::write_csv(sprintf("~/Dropbox/thesis/code/pubPriCmp/data-raw/building_%s/%s_%s_%s.csv", v, b, v, date_min))
## when weather output is compiled
} else {
print(sprintf("%s file exists for %s", v, b))
}
counter = counter + 1
}
## acc %>%
## readr::write_csv(sprintf("%s_%s.csv", b, v))
}
## #### ## ## ## ## ## ## ## ## ##
## a slow version of downloading weather data end
## #### ## ## ## ## ## ## ## ## ##
## compile individual building's weather data to one data frame
for (b in buildings) {
## for (b in buildings[start:length(buildings)]) {
print(b)
if (file.exists(sprintf("~/Dropbox/thesis/code/pubPriCmp/data-raw/building_AVGMINMAX/%s.feather", b))) {
print("building AVGMINNMAX already created -----")
next
}
acc = NULL
for (v in c("TMIN", "TMAX")) {
for (j in 1:3) {
date_min = start_times[j]
filename = sprintf("~/Dropbox/thesis/code/pubPriCmp/data-raw/building_%s/%s_%s_%s.csv", v, b, v,
date_min)
if (file.exists(filename)) {
dfvar <- readr::read_csv(filename, col_types=cols()) %>%
dplyr::mutate(`varname`=v) %>%
tibble::as_data_frame() %>%
{.}
acc <- rbind(acc, dfvar)
} else {
print(sprintf("file not exist %s_%s_%s.csv", b, v, date_min))
}
}
}
acc %>%
dplyr::arrange(`date`, `varname`) %>%
dplyr::group_by(`date`, `varname`) %>%
slice(n()) %>%
dplyr::ungroup() %>%
tidyr::spread(varname, weighted) %>%
dplyr::mutate(`AVGMINMAX` = (`TMIN` + `TMAX`) / 2) %>%
dplyr::mutate(`HDD` = ifelse(`AVGMINMAX` <= 65, 65 - `AVGMINMAX`, 0)) %>%
dplyr::mutate(`CDD` = ifelse(`AVGMINMAX` > 65, `AVGMINMAX` - 65, 0)) %>%
feather::write_feather(sprintf("~/Dropbox/thesis/code/pubPriCmp/data-raw/building_AVGMINMAX/%s.feather", b))
## readr::write_csv(sprintf("~/Dropbox/thesis/code/pubPriCmp/data-raw/building_AVGMINMAX/%s.csv", b))
}
## compile individual building's weather station distance data to one data frame
acc_distance = NULL
for (b in buildings) {
## for (b in buildings[start:length(buildings)]) {
print(b)
for (v in c("TMIN", "TMAX")) {
for (j in 1:3) {
date_min = start_times[j]
filename = sprintf("~/Dropbox/thesis/code/pubPriCmp/data-raw/building_%s/%s_station_distance_%s_%s.csv",
v, b, v, date_min)
if (file.exists(filename)) {
dfvar <- readr::read_csv(filename, col_types=cols()) %>%
dplyr::mutate(`varname`=v) %>%
dplyr::mutate(`Name`=b) %>%
dplyr::mutate(`start_time`=date_min) %>%
tibble::as_data_frame() %>%
{.}
acc_distance <- rbind(acc_distance, dfvar)
} else {
print(sprintf("file not exist %s_%s_%s.csv", b, v, date_min))
}
}
}
}
acc_distance %>% feather::write_feather("~/Dropbox/thesis/code/pubPriCmp/data-raw/station_distance.feather")
## combine all buildings' weather data into one file
acc = NULL
for (b in buildings) {
print(b)
df <- feather::read_feather(sprintf("~/Dropbox/thesis/code/pubPriCmp/data-raw/building_AVGMINMAX/%s.feather", b)) %>%
dplyr::mutate(`Name`=b) %>%
{.}
acc <- rbind(acc, df)
}
acc %>% feather::write_feather("~/Dropbox/thesis/code/pubPriCmp/data-raw/all_weather.feather")
## produce table with columns of AVGMINMAX bins
lowerbound = min(acc$AVGMINMAX)
upperbound = max(acc$AVGMINMAX)
breaks = c(lowerbound, seq(10, 90, by=10), upperbound)
break_labels = c("<10", sprintf("[%s-%s)", seq(10, 80, by=10), seq(20, 90, by=10)), ">90")
avgminmax_bin = acc %>%
dplyr::select(`date`, `Name`, `AVGMINMAX`) %>%
dplyr::mutate(`value_label`=dplyr::case_when(`AVGMINMAX` < 10 ~ "<10",
`AVGMINMAX` < 20 ~ break_labels[2],
`AVGMINMAX` < 30 ~ break_labels[3],
`AVGMINMAX` < 40 ~ break_labels[4],
`AVGMINMAX` < 50 ~ break_labels[5],
`AVGMINMAX` < 60 ~ break_labels[6],
`AVGMINMAX` < 70 ~ break_labels[7],
`AVGMINMAX` < 80 ~ break_labels[8],
`AVGMINMAX` < 90 ~ break_labels[9],
TRUE ~ break_labels[10]
)) %>%
dplyr::mutate(`value_label`=factor(`value_label`, levels=break_labels)) %>%
dplyr::mutate(`year`=format(date, "%Y")) %>%
dplyr::mutate(`month`=format(date, "%m")) %>%
dplyr::select(-`date`) %>%
dplyr::group_by(`Name`, `year`, `month`, `value_label`) %>%
dplyr::summarise(`value_count`=n()) %>%
dplyr::ungroup() %>%
tidyr::spread(key=`value_label`, value=`value_count`) %>%
dplyr::mutate_all(funs(ifelse(is.na(.), 0L, .))) %>%
{.}
avgminmax_bin %>%
feather::write_feather("~/Dropbox/thesis/code/pubPriCmp/data-raw/avgminmax_bin.feather")
## compile monthly degree day
hddcdd = acc %>%
dplyr::select(`date`, `Name`, `HDD`, `CDD`) %>%
dplyr::mutate(`year`=format(date, "%Y")) %>%
dplyr::mutate(`month`=format(date, "%m")) %>%
dplyr::select(-`date`) %>%
dplyr::group_by(`Name`, `year`, `month`) %>%
dplyr::summarise(`HDD`=sum(`HDD`), `CDD`=sum(`CDD`)) %>%
dplyr::ungroup() %>%
{.}
hddcdd %>%
feather::write_feather("~/Dropbox/thesis/code/pubPriCmp/data-raw/hddcdd.feather")
building_weather = avgminmax_bin %>%
dplyr::left_join(hddcdd, by=c("Name", "year", "month")) %>%
{.}
## get all temperature bin and building weather variables
building_weather %>%
feather::write_feather("~/Dropbox/thesis/code/pubPriCmp/data-raw/building_weather.feather")
## #### ## ## ## ## ## ## ## ## ##
## This section is for downloading for a set of buildings, haven't finished/tested, neglect for now, START
## #### ## ## ## ## ## ## ## ## ##
## retry for all buildings
v = "TMIN"
## only search regarding the variables interested
ghcnd_data_var = ghcnd_data_full %>%
dplyr::filter(`element` == v) %>%
{.}
devtools::load_all("~/Dropbox/gsa_2017/get.noaa.weather")
acc <- NULL
startClock = Sys.time()
for (j in 1:1) {
## for (j in 1:length(start_times)) {
date_min = start_times[j]
date_max = end_times[j]
result = get_nearby_ghcnd_stations_all_loc(lat_lon_df=buildingLatlng,
id_col_name="Name",
ghcnd_data=ghcnd_data_full, v=v,
radius=100, limit=5,
date_min=date_min,
date_max=date_max, year=NULL, testing=TRUE)
## result %>%
## readr::write_csv(sprintf("%s_station_distance_%s_%s.csv", b, v, date_min))
## weatheri = compile_weather_ghcnd_main(building=b, station_df=result,
## date_min=date_min, date_max=date_max,
## var=v,
## format_fun=get.noaa.weather::format_noaa_temperature)
## print(weatheri)
## acc <- rbind(acc, weatheri)
}
print("Takes time: %s min", Sys.time() - startClock)
## acc %>%
## readr::write_csv(sprintf("%s_%s.csv", b, v))
## #### ## ## ## ## ## ## ## ## ##
## This section is for downloading for a set of buildings, haven't finished/tested, neglect for now, END
## #### ## ## ## ## ## ## ## ## ##
## get the average of daily mean per month
files = list.files("building_AVGMINMAX")
acc = NULL
for (f in files) {
print(f)
building = gsub(".feather", "", f)
df <- feather::read_feather(paste0("building_AVGMINMAX/", f)) %>%
dplyr::mutate(`year`=format(`date`, "%Y"),
`month`=format(`date`, "%m")) %>%
dplyr::group_by(`year`, `month`) %>%
dplyr::summarise(`mean_avgminmax`=mean(`AVGMINMAX`)) %>%
dplyr::ungroup() %>%
dplyr::mutate(`Name`=building) %>%
{.}
acc = rbind(acc, df)
}
acc %>%
feather::write_feather("building_mean_avgminmax.feather")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.