library(flexdashboard) library(shiny) library(tidyverse) library(stringr) library(ggthemes) source('../model.R')
## Load & Score Test Data Set new_data <- read.table( cloudml::gs_data("gs://rstudio-cloudml-demo-ml/census/data/adult.test"), col.names = CSV_COLUMNS, header = FALSE, sep = ",", stringsAsFactors = FALSE ) # Clean Up Data new_data$fnlwgt <- NULL label <- new_data[[LABEL_COLUMN]] new_data[[LABEL_COLUMN]] <- NULL # generate predictions predictions <- cloudml::local_predict("../jobs/local", new_data) # flatten predictions new_data[[LABEL_COLUMN]] <- label new_data$score <- predictions$predictions %>% map_dbl(~ .x$probabilities[2])
Predict whether income exceeds \$50K/yr based on census data. Data extraction was done by Barry Becker from the 1994 Census database. Prediction task is to determine whether a person makes over 50K a year. See the data source and description for more information.
sliderInput("cutoff", "Probability Cutoff", min = 0, max = 1, value = 0.5)
valueBoxOutput("accuracy")
tableOutput("conf")
labelled_data <- reactive({ new_data$Label <- ifelse(new_data$score > input$cutoff, " >50K.", " <=50K.") new_data }) output$accuracy <- renderValueBox({ accuracy <- paste0(round(sum(labelled_data()$Label == labelled_data()$income_bracket) / nrow(labelled_data()), 2)*100, "%") valueBox(accuracy, caption = "accuracy", color = "primary", icon = "fa-check-circle") }) output$conf <- renderTable(rownames = TRUE, digits = 0, { conf_matrix <- matrix(data = rep(0,4), nrow = 2, ncol = 2) conf_matrix[1,1] <- sum(labelled_data()$Label == " <=50K." & labelled_data()$income_bracket == " <=50K." ) conf_matrix[1,2] <- sum(labelled_data()$Label == " >50K." & labelled_data()$income_bracket == " <=50K." ) conf_matrix[2,1] <- sum(labelled_data()$Label == " <=50K." & labelled_data()$income_bracket == " >50K." ) conf_matrix[2,2] <- sum(labelled_data()$Label == " >50K." & labelled_data()$income_bracket == " >50K." ) colnames(conf_matrix) <- c("Predicted <=50K", ">50K") rownames(conf_matrix) <- c("Actual <=50K", ">50K") conf_matrix })
plotOutput("gender")
output$gender <- renderPlot({ plotDiscrete("gender") })
plotOutput("age")
output$age <- renderPlot({ plotContinuous("age") })
plotOutput("relationship")
output$relationship <- renderPlot({ plotDiscrete("relationship") })
plotOutput("marital_status")
output$marital_status <- renderPlot({ plotDiscrete("marital_status") })
plotOutput("race")
output$race <- renderPlot({ plotDiscrete("race") })
plotOutput("native_country")
output$native_country <- renderPlot({ plotDiscrete("native_country") })
plotOutput("hours_per_week")
output$hours_per_week <- renderPlot({ plotContinuous("hours_per_week") })
plotOutput("occupation")
output$occupation <- renderPlot({ plotDiscrete("occupation") })
plotOutput("workclass")
output$workclass <- renderPlot({ plotDiscrete("workclass") })
plotOutput("education")
output$education <- renderPlot({ plotDiscrete("education") })
plotOutput("education_num")
output$education_num <- renderPlot({ plotContinuous("education_num") })
plotContinuous <- function(variable = "hours_per_week") { lab <- str_replace_all(variable, "_", " ") %>% str_to_title() ggplot(labelled_data()) + geom_density(aes_string(x = variable)) + labs( title = lab, fill = "" ) + scale_y_continuous(label = function(x){paste0(x*100, "%")}) + theme_fivethirtyeight() + scale_fill_fivethirtyeight() + facet_wrap(~Label) } plotDiscrete <- function(variable = "gender") { lab <- str_replace_all(variable, "_", " ") %>% str_to_title() labelled_data() %>% mutate_(var = variable) %>% group_by(Label, var) %>% summarise(n = n()) %>% mutate(prop = n / sum(n)) %>% ggplot() + geom_col(aes(x = reorder(var, prop), y = prop), position = 'dodge') + labs( title = lab, fill = "" ) + scale_y_continuous(label = function(x){paste0(x*100, "%")}) + theme_fivethirtyeight() + theme(axis.text.x = element_text(angle = 90, size = 14)) + scale_fill_fivethirtyeight() + facet_wrap(~Label) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.