library(learnr) library(shiny) options(dplyr.summarise.inform = FALSE) library(tidyverse) library(tidymodels) data("ames", package = "modeldata") knitr::opts_chunk$set(echo = FALSE, eval=TRUE) set.seed(1) ames <- ames %>% mutate(Sale_Price = Sale_Price / 1000) %>% select(Lot_Area, Bsmt_Unf_SF, Sale_Price) %>% mutate(across(everything(), as.numeric)) ames_split <- rsample::initial_split(ames, prop = 1/10) training_set <- rsample::training(ames_split) test_set <- rsample::testing(ames_split) min_intercept <- 0 max_intercept <- 250
We'll start by looking at the simplest possible model, one that predicts a constant sale price for every home. This is analogous to saying something like "Homes around here generally go for around $150k".
We'll start by using a small subset of the home sale dataset.
training_set
summary(training_set)
Adjust the value of "intercept" to make your line give the best prediction.
sliderInput("intercept", "Intercept", min = min_intercept, max = max_intercept, value = 125) plotOutput("plot_intercept_train") tableOutput("error_intercept_train")
plotModel <- function(dataset, intercept, coef_lot_area) { ggplot(dataset, aes(x = Lot_Area, y = Sale_Price)) + geom_point() + geom_abline(intercept = intercept, slope = coef_lot_area) } errorTable <- function(dataset, intercept, coef_lot_area) { dataset %>% mutate(prediction = intercept + Lot_Area * coef_lot_area, residual = Sale_Price - prediction) %>% summarize( mean_error = mean(residual), mean_absolute_error = mean(abs(residual)), max_abs_error = max(abs(residual)), mean_squared_error = mean(residual * residual), rmse = sqrt(mean_squared_error) ) } output$plot_intercept_train <- renderPlot({ plotModel(training_set, input$intercept, 0) }) output$plot_intercept_test <- renderPlot({ plotModel(test_set, input$intercept, 0) }) output$plot_lotarea_train <- renderPlot({ plotModel(training_set, input$intercept2, input$coef_lot_area)}) output$plot_lotarea_test <- renderPlot({ plotModel(test_set, input$intercept2, input$coef_lot_area) }) output$error_intercept_train <- renderTable({ errorTable(training_set, input$intercept, 0) }) output$error_intercept_test <- renderTable({ errorTable(test_set, input$intercept, 0) }) output$error_lotarea_train <- renderTable({ errorTable(training_set, input$intercept2, input$coef_lot_area) }) output$error_lotarea_test <- renderTable({ errorTable(test_set, input$intercept2, input$coef_lot_area) })
question("We get the lowest Mean Absolute Error when we set the intercept to the ___ of the Sale_Price column (see summary table above)", answer("minimum"), answer("mean"), answer("exactly halfway between minimum and maximum"), answer("median", correct = TRUE), answer("maximum"), incorrect = "Try changing the intercept to each of these options and see what error you get!", allow_retry = TRUE)
question("We get the lowest Mean Squared Error when we set the intercept to the ___ of the Sale_Price column (see summary table above)", answer("minimum"), answer("mean", correct = TRUE), answer("exactly halfway between minimum and maximum"), answer("median"), answer("maximum"), incorrect = "Try changing the intercept to each of these options and see what error you get!", allow_retry = TRUE)
Here's the rest of the data, which we're calling the test set. It has r nrow(test_set)
observations.
summary(test_set)
plotOutput("plot_intercept_test") tableOutput("error_intercept_test")
Start with "intercept" at the same value as your best value above. Now, adjust both "intercept" and "coef_lot_area" to minimize error on the training set.
sliderInput("intercept2", "Intercept", min = min_intercept, max = max_intercept, value = 125) sliderInput("coef_lot_area", "Lot_Area", min = -.01, max = .01, value = 0, step = .0001) plotOutput("plot_lotarea_train") tableOutput("error_lotarea_train")
Consider these questions:
Now let's look at the test set:
plotOutput("plot_lotarea_test") tableOutput("error_lotarea_test")
Write code here to add a prediction
and residual
column to test_set
.
Make sure that you use the correct sign for residual.
intercept = 125 coef_lot_area <- .1 test_set_augmented <- test_set # Your code here
intercept = 125 coef_lot_area <- .1 test_set_augmented <- test_set %>% mutate(predicted = intercept + coef_lot_area * Lot_Area, residual = Sale_Price - predicted)
Now, write code that uses test_set_augmented
to compute the
mean error (bias), mean absolute error, and mean squared error. Use summarize
.
test_set_augmented %>% summarize( mean_error = 0, # your code here mean_absolute_error = 0, # etc. mean_squared_error = 0 )
test_set_augmented %>% summarize( mean_error = mean(residual), mean_absolute_error = mean(abs(residual)), mean_squared_error = mean(residual * residual) )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.