knitr::opts_chunk$set(echo = TRUE)
#Map lines to ttld & Name data("njt_features") line_info <- data.frame(Line = unique(njt_features$Line)) line_ttld <- read.table("external_data/line_model_map.txt", header = TRUE) main_info <- inner_join(line_info, line_ttld, by = "Line") %>% mutate(Line = gsub(" ","_",Line)) write.csv(main_info, file = "../shinyapp/main_info.csv", row.names = FALSE)
pred_df <- main_info %>% mutate(Current_Date = Sys.time(), dep_hour = hour(Current_Date), dep_mon = month(Current_Date, label = TRUE), dep_wday = wday(Current_Date, label = TRUE))
max_delay = 24*60 ttld_file <- read.csv("http://52.88.4.39/ttld_log.txt") pred_df <- ttld_file %>% select(ttld_Line = Line, Scheduled_Start_Time) %>% right_join(pred_df) %>% mutate(ttl_line = as.numeric(difftime(Current_Date, Scheduled_Start_Time, units = "mins"))) %>% mutate(ttl_line = ifelse(is.na(ttl_line)|ttl_line > max_delay, max_delay, ttl_line))
# hourly_newark <- "http://api.wunderground.com/api/547710b840da62b4/conditions/q/NJ/Newark.json" hourly_newark <- "http://forecast.weather.gov/MapClick.php?lat=40.7242&lon=-74.1726&FcstType=json" w_get <- GET(hourly_newark) w_text <- content(w_get, as = "text") %>% fromJSON() pred_df$Temp_F <- as.numeric(w_text$currentobservation$Temp) pred_df$Visibility <- as.double(w_text$currentobservation$Visibility) pred_df$WindSpeed <- as.numeric(w_text$currentobservation$Winds)
predict_prob <- inner_join(pred_df, modelfit_byline, by = "Line") %>% mutate(Line_Name = Full.Name) %>% group_by(Line_Name) %>% do(data.frame(p=predict(.$fit, ., type = "prob")[[1]][[1]][1]))
w_get_hourly <- GET("http://api.wunderground.com/api/547710b840da62b4/hourly/q/NJ/Newark.json") w_text_hourly <- content(w_get_hourly, as = "text") %>% fromJSON() hourly_forecast <- w_text_hourly$hourly_forecast %>% flatten %>% transmute(WindSpeed = wspd.english, Temp_F = temp.english, Date_Time = paste(FCTTIME.hour_padded, FCTTIME.min, FCTTIME.mon_padded, FCTTIME.mday_padded, FCTTIME.year)) %>% mutate(Date_Time = as.POSIXct(strptime(Date_Time, format = "%H %M %m %d %Y")), Visibility = pred_df$Visibility[1], dep_hour = hour(Date_Time), dep_mon = month(Date_Time, label = TRUE), dep_wday = wday(Date_Time, label = TRUE)) %>% slice(1:12)
data("njt_features") njt_features <- njt_features %>% ungroup %>% mutate(is_delayed = factor(is_delayed, levels = c(TRUE,FALSE), labels = c("Yes","No")), Line = gsub(" ","_",Line))
cpt_base <- njt_features %>% count(Line, is_delayed) %>% spread(is_delayed, n) %>% transmute(Line, base_pct = Yes/(No+Yes)*100) %>% arrange(desc(base_pct))
cpt_wday <- njt_features %>% count(Line, is_delayed, dep_wday) %>% spread(is_delayed, n) %>% mutate(Line, dep_wday, pct = Yes/(No+Yes)*100) %>% inner_join(cpt_base, by = "Line") %>% transmute(Line, feature = "dep_wday", feature_val = dep_wday, pct, p_fold = log2(pct/base_pct)) cpt_mon <- njt_features %>% count(Line, is_delayed, dep_mon) %>% spread(is_delayed, n) %>% mutate(Line, dep_mon, pct = Yes/(No+Yes)*100) %>% inner_join(cpt_base, by = "Line") %>% transmute(Line, feature = "dep_mon", feature_val = dep_mon, pct, p_fold = log2(pct/base_pct)) cpt_hour <- njt_features %>% count(Line, is_delayed, dep_hour) %>% spread(is_delayed, n) %>% mutate(Line, dep_hour, pct = Yes/(No+Yes)*100) %>% inner_join(cpt_base, by = "Line") %>% transmute(Line, feature = "dep_hour", feature_val = as.factor(dep_hour), pct, p_fold = log2(pct/base_pct))
breaks_WindSpeed = quantile(njt_features$WindSpeed, probs= seq(0,1,.25)) cpt_WindSpeed <- njt_features %>% group_by(Line) %>% mutate(bin = cut(WindSpeed, breaks_WindSpeed , include.lowest = TRUE)) %>% count(Line, is_delayed, bin) %>% spread(is_delayed, n) %>% mutate(Line, bin, pct = Yes/(No+Yes)*100) %>% inner_join(cpt_base, by = "Line") %>% transmute(Line, feature = "WindSpeed_break", feature_val = as.factor(bin), pct, p_fold = log2(pct/base_pct)) breaks_Temp_F = c(min(njt_features$Temp_F),seq(33,126,20), max(njt_features$Temp_F)) cpt_Temp_F <- njt_features %>% group_by(Line) %>% mutate(bin = cut(Temp_F, breaks_Temp_F , include.lowest = TRUE)) %>% count(Line, is_delayed, bin) %>% spread(is_delayed, n) %>% mutate(Line, bin, pct = Yes/(No+Yes)*100) %>% inner_join(cpt_base, by = "Line") %>% transmute(Line, feature = "Temp_F_break", feature_val = as.factor(bin), pct, p_fold = log2(pct/base_pct)) breaks_Visibility = c(seq(1,45,10),max(njt_features$Visibility)) cpt_Visibility <- njt_features %>% group_by(Line) %>% mutate(bin = cut(Visibility, breaks_Visibility , include.lowest = TRUE)) %>% count(Line, is_delayed, bin) %>% spread(is_delayed, n) %>% mutate(Line, bin, pct = Yes/(No+Yes)*100) %>% inner_join(cpt_base, by = "Line") %>% transmute(Line, feature = "Visibility_break", feature_val = as.factor(bin), pct, p_fold = log2(pct/base_pct))
breaks_ttl_line = c(seq(0,180,30),max(njt_features$ttl_line)) cpt_ttl_line <- njt_features %>% group_by(Line) %>% mutate(bin = cut(ttl_line, breaks_ttl_line , include.lowest = TRUE)) %>% count(Line, is_delayed, bin) %>% spread(is_delayed, n) %>% mutate(Line, bin, pct = Yes/(No+Yes)*100) %>% inner_join(cpt_base, by = "Line") %>% transmute(Line, feature = "ttl_line_break", feature_val = as.factor(bin), pct, p_fold = log2(pct/base_pct))
cpt_complete <- do.call(rbind,list(cpt_wday, cpt_mon, cpt_hour, cpt_Visibility, cpt_Temp_F, cpt_WindSpeed, cpt_ttl_line)) # save(cpt_complete, breaks_WindSpeed, breaks_ttl_line, breaks_Visibility, breaks_Temp_F, file="shinyapp/cpt_complete.rda")
pred_cpt <- pred_df %>% mutate( ttl_line_break = cut(ttl_line, breaks_ttl_line , include.lowest = TRUE), Temp_F_break = cut(Temp_F, breaks_Temp_F , include.lowest = TRUE), WindSpeed_break = cut(WindSpeed, breaks_WindSpeed , include.lowest = TRUE), Visibility_break = cut(Visibility, breaks_Visibility , include.lowest = TRUE) ) %>% select(Line, dep_hour:Visibility_break) %>% gather(feature, feature_val, -Line, -ttl_line:-WindSpeed) %>% inner_join(cpt_complete) %>% mutate(feature = factor(feature, levels = c("ttl_line", "Temp_F", "WindSpeed", "Visibility", "dep_hour", "dep_wday", "dep_mon"), labels = c("Time To Last Delay", "Current Temperature", "Current Wind Speed", "Current Visibility", "Time of Day", "Day of the Week", "Current Month") ))
pred_cpt %>% ggplot(aes(feature,p_fold, fill = p_fold)) + geom_bar(stat = "identity") + facet_wrap(~Line, ncol = 3) + coord_flip() + theme_minimal() + scale_fill_gradient2(low = "blue", mid = "black", high = "red", midpoint = 0) + ylim(-1,1)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.