library(dplyr) library(njtPredict) library(lubridate) library(ggplot2) # library(broom) library(tidyr) library(knitr) # devtools::load_all() opts_chunk$set(echo = TRUE, message = FALSE, prompt = FALSE, warning = FALSE, cache = TRUE, fig.width = 14, fig.height = 8)
data("njt_features") njt_features <- njt_features %>% mutate(is_delayed = factor(is_delayed, levels = c(TRUE,FALSE), labels = c("Yes","No")))
The purpose of this notebook is to characterize & explore the njt data and weather data.
njt_features %>% count(Line, is_delayed) %>% spread(is_delayed, n) %>% mutate(pct_delays = Yes/(No+Yes)*100) %>% arrange(desc(pct_delays)) %>% kable
Not suprisingly, Corridor has the most amount of delays.
#To easly adjust all graphs, standarizing via function plotPctFacet <- function(df,x_var){ ggplot(df,aes_string(x_var, "pct_delays")) + geom_bar(stat="identity") + facet_wrap(~Line, scale = "free", ncol = 5) + theme_classic() }
njt_features %>% count(Line, dep_wday, is_delayed) %>% spread(is_delayed,n) %>% mutate(pct_delays = Yes/(No+Yes)*100) %>% plotPctFacet(x_var = "dep_wday")
njt_features %>% count(Line, dep_mon, is_delayed) %>% spread(is_delayed,n) %>% mutate(pct_delays = Yes/(No+Yes)*100) %>% plotPctFacet(x_var = "dep_mon")
njt_features %>% count(Line, dep_hour, is_delayed) %>% spread(is_delayed,n) %>% mutate(pct_delays = Yes/(No+Yes)*100) %>% plotPctFacet(x_var = "dep_hour")
previous_delay_window = 90
plotPrDelayFacet <- function(df){ df %>% filter(!is.na(prdelay)) %>% count(Line, prdelay, is_delayed) %>% spread(is_delayed,n) %>% mutate(pct_delays = Yes/(No+Yes)*100) %>% ggplot(aes(prdelay, pct_delays)) + geom_bar(stat="identity", position = "dodge") + facet_wrap(~Line, scale = "free_y", ncol = 5) + theme_classic() + xlab(paste("Delay Occurred withing the last",previous_delay_window,"minutes")) + ylab("% of all Trains") }
njt_features %>% mutate(prdelay = ttl_line <= previous_delay_window) %>% plotPrDelayFacet
njt_features %>% mutate(prdelay = ttl_dep_line <= previous_delay_window) %>% plotPrDelayFacet
njt_features %>% mutate(prdelay = ttl_dep_arv_line <= previous_delay_window) %>% plotPrDelayFacet
TODO
Need to propogate Reasons (MVP++)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.