library(dplyr) library(knitr) opts_chunk$set(echo = TRUE, message = FALSE, prompt = FALSE, warning = FALSE, cache = TRUE)
library(caret) data("rf_fit") variableimprtance <- varImp(rf_fit)$importance %>% fix_data_frame() variableimprtance %>% arrange(desc(Overall)) %>% slice(1:10) %>% # Fix: Manually specify levels to make sure labels are always correct mutate(term = factor(term, levels = term, labels = c("Time Since Last Delay", "Temperature", "Wind Speed", "1AM", "Visibility", "NE Corridor Line", "4AM", "Coast Line", "Departure: Tuesday", "Departure: Wednesday"))) %>% mutate(term = factor(term, levels = rev(levels(term)))) %>% ggplot(aes(term, Overall)) + geom_bar(stat = 'identity', fill = "orange3", width = .7) + theme_classic() + coord_flip() + xlab("") + ylab("Variable Importance %") + # scale_x_discrete(expand = c(0, 0)) + #Fix the 0 offset scale_y_continuous(expand = c(0, 0)) + theme(axis.text=element_text(size=18), axis.title = element_text(size=18))
library(pROC) data("training_data") model_predictions <- predict(rf_fit, testing,type = "prob" ) roc_test <- testing roc_test$pred_prob <- model_predictions$Yes roc_obj <- roc(roc_test$Class,roc_test$pred_prob, plot = FALSE) # Obtain values from object to plot using ggplot roc_df <- data.frame(Sensitivity = roc_obj$sensitivities, Specificity = roc_obj$specificities) roc_df %>% ggplot(aes(1-Specificity, Sensitivity)) + geom_line(color = "red2", cex = 1) + geom_abline(intercept = 0, slope = 1, color = "black", linetype = 2) + theme_classic() + scale_x_continuous(expand = c(0, 0)) + #Fix the 0 offset scale_y_continuous(expand = c(0, 0)) + xlab("False Positive Rate") + ylab("True Positive Rate")
data("njt_features") njt_features <- njt_features %>% mutate(is_delayed = factor(is_delayed, levels = c(TRUE,FALSE), labels = c("Yes","No"))) feat_ex <- njt_features %>% filter(Line == "CORRIDOR") bywday <- feat_ex %>% count(Line, dep_wday, is_delayed) %>% spread(is_delayed, n) %>% transmute(pct_delays = Yes/(No+Yes)*100, feat = dep_wday, type = "Day of Week") byhour <- feat_ex %>% count(dep_hour, is_delayed) %>% spread(is_delayed, n) %>% transmute(pct_delays = Yes/(No+Yes)*100, feat = as.character(dep_hour), type = "Hour of Day") timetolast <- feat_ex %>% group_by(Line) %>% mutate(bin = cut(ttl_line, breaks = c(seq(0,120,30), max(feat_ex$ttl_line)), include.lowest = TRUE, labels = c("<30", "30-60", "60-90", "90-120", ">120")) ) %>% count(bin, is_delayed) %>% spread(is_delayed, n) %>% transmute(pct_delays = Yes/(No+Yes)*100, feat = bin, type = "Time Since\nLast Delay") bytemp <- feat_ex %>% group_by(Line) %>% mutate(bin = cut(Temp_F, breaks = c(0, 32,60, 80, 100, 150), include.lowest = TRUE, labels = c("<32", "32-60", "60-80", "80-100", ">100"))) %>% count(bin, is_delayed) %>% spread(is_delayed,n) %>% transmute(pct_delays = Yes/(No+Yes)*100, feat = bin, type = "Temperature") bywind <- feat_ex %>% group_by(Line) %>% mutate(bin = cut(WindSpeed, breaks = c(0,15,30,60), include.lowest = TRUE)) %>% # labels = c("<15mph", "15-30", "30-45",">45mph"))) %>% count(bin, is_delayed) %>% spread(is_delayed,n) %>% transmute(pct_delays = Yes/(No+Yes)*100, feat = bin, type = "Temperature") # Baseline probability of delay baseline <- feat_ex %>% count(is_delayed, Line) %>% spread(is_delayed, n) %>% mutate(pct_delays = Yes/ (No + Yes) * 100) %>% .$pct_delays rbind(timetolast, byhour, bytemp, bywday) %>% ggplot(aes(feat, pct_delays)) + geom_bar(stat = "identity") + facet_wrap(~type, scale = "free") + geom_hline(yintercept=baseline, color = "red", size = 1, linetype = 2) + theme_minimal() + xlab("") + ylab("% of Trains Delayed")
data(list = list("glmnet_fit","rf_fit","knn_fit", "gbm_fit", "logit_f")) resamps <- resamples(list( "Logistic Regression" = logit_fit, "Regularized Regression" = glmnet_fit, "Random Forest" = rf_fit, "K-Nearest Neighbors" = knn_fit)) # "Gradient Boosting Machine" = gbm_fit)) diff(resamps) %>% summary bwplot(resamps, layout = c(3, 1))
cutoff_df <- readRDS(file = "data/delayed_cutoffcompare.rds") cutoff_df %>% ggplot(aes(factor(method), value)) + geom_boxplot() + facet_wrap(~metric) + theme_bw() + xlab("Delay Cutoff (Minutes)") + ylab("")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.