knitr::opts_chunk$set(echo = TRUE)
library(readr) library(dplyr) library(stringr) df <- read.csv('raw_data/coffee_compare.csv') coffee <- df %>% select(DBA, reinspections, checks, violations, score, inspections, BORO, SCORE) coffee$DBA = ifelse(str_detect(coffee$DBA, "DUNKIN"), 'DD', 'Starbucks') coffee %>% select(BORO, DBA) %>% group_by(BORO) %>% table()
library(ggthemes) library(ggplot2) library(plotly) coffee_new <- coffee %>% group_by(DBA, BORO) %>% summarize(Value = n()) pc <- ggplot(coffee_new, aes(fill=DBA, y=Value, x=BORO)) + geom_bar(position="dodge", stat="identity", width = 0.5) + xlab('Neighborhood') + ylab('Health Violations') + labs(caption = 'Data Source: DOHMH', fill = 'Brands') + ggtitle('Health Violations of Coffee Brands by Neighborhoods') + theme_bw() + scale_fill_manual(values = c("#f09a56", "#87dc97")) + theme(plot.title = element_text(size=12, face="bold", hjust = 0.5), legend.text = element_text(size=8), legend.title = element_text(size=8)) pc
pc1 <- ggplot(coffee, aes(x = BORO, y = violations, color = DBA)) + geom_point(alpha = 0.5) + xlab('Neighborhood') + ylab('Health Violations') + labs(caption = 'DOHMH') + ggtitle('Health Violations of Coffee Brands by Neighborhoods') + theme_bw() + theme(plot.title = element_text(size=12, face="bold", hjust = 0.5), legend.text = element_text(size=8), legend.title = element_text(size=8)) + scale_color_manual(values = c("DD" = "#f09a56", 'Starbucks' = "#87dc97")) pc1
coffee_new <- coffee %>% group_by(DBA, BORO) %>% summarize(total_vio = sum(violations, na.rm = T)) pc11 <- ggplot(coffee_new, aes(fill=DBA, y=total_vio, x=BORO)) + geom_bar(position="dodge", stat="identity", width = 0.5) + xlab('Neighborhood') + ylab('Health Violations') + labs(caption = 'Data Source: DOHMH', fill = 'Brands') + ggtitle('Health Violations of Coffee Brands by Neighborhoods') + theme_bw() + scale_fill_manual(values = c("#f09a56", "#87dc97")) + theme(plot.title = element_text(size=12, face="bold", hjust = 0.5), legend.text = element_text(size=8), legend.title = element_text(size=8)) pc11
pc2 <- ggplot(coffee, aes(x = BORO, y = SCORE, color = DBA)) + geom_point(alpha = 0.5) + xlab('Neighborhood') + ylab('Total Score for a Particular Inspection') + labs(caption = 'DOHMH') + ggtitle('Score of Coffee Brands by Neighborhoods') + theme_bw() + theme(plot.title = element_text(size=12, face="bold", hjust = 0.5), legend.text = element_text(size=8), legend.title = element_text(size=8)) + scale_color_manual(values = c("DD" = "#f09a56", 'Starbucks' = "#87dc97")) pc2
coffee_new <- coffee %>% group_by(DBA, BORO) %>% summarize(total_score = sum(SCORE, na.rm = T)) pc11 <- ggplot(coffee_new, aes(fill=DBA, y=total_score, x=BORO)) + geom_bar(position="dodge", stat="identity", width = 0.5) + xlab('Neighborhood') + ylab('Total Score for a Particular Inspection') + labs(caption = 'Data Source: DOHMH', fill = 'Brands') + ggtitle('Inspection Score of Coffee Brands by Neighborhoods') + theme_bw() + scale_fill_manual(values = c("#f09a56", "#87dc97")) + theme(plot.title = element_text(size=12, face="bold", hjust = 0.5), legend.text = element_text(size=8), legend.title = element_text(size=8)) pc11
library(jtools) coffee_lm <- coffee %>% group_by(DBA, BORO) %>% summarize(total_num = n(), total_vio = sum(violations, na.rm = T), total_score = sum(SCORE, na.rm = T)) coffee_lm$DBA = ifelse(coffee_lm$DBA == "DD", 1, 0) coffee_sb <- coffee_lm %>% filter(DBA == 1) coffee_dd <- coffee_lm %>% filter(DBA == 0)
lm <- lm(total_num ~ total_vio + total_score, data = coffee_sb) summary(lm) summ(lm)
lm <- lm(total_num ~ total_vio + total_score, data = coffee_dd) summary(lm) summ(lm)
lm1 <- lm(total_vio ~ total_num, data = coffee_sb) summary(lm1) summ(lm)
lm1 <- lm(total_vio ~ total_num, data = coffee_dd) summary(lm1) summ(lm1)
lm2 <- lm(total_vio ~ total_num*DBA, data = coffee_lm) summary(lm2) summ(lm2)
lm2 <- lm(total_score ~ total_num*DBA, data = coffee_lm) summary(lm2) summ(lm2)
library(caret) coffee$DBA = ifelse(coffee$DBA == "DD", 0, 1) coffee$DBA <- factor(coffee$DBA, labels = c("Starbucks", "DD"), levels = 1:0) set.seed(12345) in_train <- createDataPartition(y = coffee$DBA, p = 0.8, list = FALSE) training <- coffee[ in_train, ] testing <- coffee[-in_train, ]
logit <- glm(DBA ~ checks + violations + score + BORO, data = training, family = binomial(link = "logit")) y_hat_logit <- predict(logit, newdata = testing, type = "response") z_logit <- factor(y_hat_logit > 0.5, levels = c(TRUE, FALSE), labels = c("Starbucks", "DD")) confusionMatrix(z_logit, reference = testing$DBA)
LDA <- train(DBA ~ checks + violations + score + BORO, data = training, method = "lda", reProcess = c("center", "scale")) z <- predict(LDA, newdata = testing) confusionMatrix(z, testing$DBA)
QDA <- train(DBA ~ checks + violations + score + BORO, data = training, method = "qda", preProcess = c("center", "scale")) z_QDA <- predict(QDA, newdata = testing) confusionMatrix(z_QDA, reference = testing$DBA)
ctrl_PLSDA <- trainControl(method = "repeatedcv", repeats = 2, classProbs = TRUE, summaryFunction = twoClassSummary) PLSDA_grid <- expand.grid(.ncomp = 1:2) PLSDA <- train(DBA ~ checks + violations + score + BORO, data = training, method = "pls", preProcess = c("center", "scale"), metric = "ROC", trControl = ctrl_PLSDA, tuneGrid = PLSDA_grid) z_PLSDA <- predict(PLSDA, newdata = testing) confusionMatrix(z_PLSDA, reference = testing$DBA)
ctrl_NSC <- trainControl(method = "repeatedcv", repeats = 3, classProbs = TRUE, summaryFunction = twoClassSummary) grid_NSC <- data.frame(.threshold = 0:25) NSC <- train(DBA ~ checks + violations + score + BORO, data = training, method = "pam", preProcess = c("center", "scale"), metric = "ROC", trControl = ctrl_NSC, tuneGrid = grid_NSC) z_NSC <- predict(NSC, newdata = testing) confusionMatrix(z_NSC, reference = testing$DBA)
gbmControl = trainControl(method="cv", number=5, returnResamp = "all") gbm = train(DBA ~ checks + violations + score + BORO, data=training, method="gbm", distribution="bernoulli", trControl=gbmControl, verbose=F, tuneGrid=data.frame(.n.trees=seq(100, 1000, by = 100), .shrinkage=c(0.01, 0.1), .interaction.depth=1:2, .n.minobsinnode=1:5), na.action = na.omit) y_hat_gbm <- predict(gbm, newdata = testing, na.action = na.pass) confusionMatrix(y_hat_gbm, reference = testing$DBA)
library(randomForest) rf<- randomForest(DBA ~ checks + violations + score + BORO, data=training, importance = TRUE, na.action = na.omit) y_hat_rf <- predict(rf, newdata = testing, type = "response", na.action = na.pass) confusionMatrix(y_hat_rf, reference = testing$DBA)
nnetGrid <- expand.grid(.decay = c(0, 0.01, .1), .size = c(1:10)) ctrl_nn <- trainControl(method = "cv", number = 10) nn <- train(DBA ~ checks + violations + score + as.factor(BORO), data = training, method = "nnet", trControl = ctrl_nn, tuneGrid = nnetGrid, preProcess = c("center", "scale"), trace = FALSE) y_hat_nn <- predict(nn, newdata = testing) confusionMatrix(y_hat_nn, reference = testing$DBA)
ctrl_mars <- trainControl(method = "cv", number = 10) marsGrid <- expand.grid(.degree = 1:3, .nprune = 1:10) mars <- train(DBA ~ checks + violations + score + as.factor(BORO), data = training, method = "earth", trControl = ctrl_mars, tuneGrid = marsGrid) y_hat_mars <- predict(mars, newdata = testing) confusionMatrix(y_hat_mars, reference = testing$DBA)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.