```{=html}

```r
# load packages-----------------------------------------------------------------
library(learnr)
library(gradethis)
library(tidyverse)
library(tidymodels)
library(palmerpenguins)

# set options for exercises and checking ---------------------------------------
tutorial_options(
  exercise.timelimit = 60, 
  exercise.checker = gradethis::grade_learnr
  )

# hide non-exercise code chunks ------------------------------------------------
knitr::opts_chunk$set(echo = FALSE)
penguins <- penguins
penguins <- penguins |>
  mutate(
    body_mass_kg = 0.001 * body_mass_g,
    flipper_length_cm = 0.1 * flipper_length_mm,
    bill_length_cm = 0.1 * bill_length_mm,
    bill_depth_cm = 0.1 * bill_depth_mm
  ) 
penguins <- penguins |>
  mutate(
    body_mass_kg = 0.001 * body_mass_g,
    flipper_length_cm = 0.1 * flipper_length_mm,
    bill_length_cm = 0.1 * bill_length_mm,
    bill_depth_cm = 0.1 * bill_depth_mm
  ) 

mass_flip_fit <- linear_reg() |>
  set_engine("lm") |>
  fit(body_mass_kg ~ flipper_length_cm, data = penguins)
penguins <- penguins |>
  mutate(
    body_mass_kg = 0.001 * body_mass_g,
    flipper_length_cm = 0.1 * flipper_length_mm,
    bill_length_cm = 0.1 * bill_length_mm,
    bill_depth_cm = 0.1 * bill_depth_mm
  ) 

mass_spec_fit <- linear_reg() |>
  set_engine("lm") |>
  fit(body_mass_kg ~ species, data = penguins)
penguins <- penguins |>
  mutate(
    body_mass_kg = 0.001 * body_mass_g,
    flipper_length_cm = 0.1 * flipper_length_mm,
    bill_length_cm = 0.1 * bill_length_mm,
    bill_depth_cm = 0.1 * bill_depth_mm
  ) 

mass_len_fit <- linear_reg() |>
  set_engine("lm") |>
  fit(body_mass_kg ~ bill_length_cm, data = penguins)
penguins <- penguins |>
  mutate(
    body_mass_kg = 0.001 * body_mass_g,
    flipper_length_cm = 0.1 * flipper_length_mm,
    bill_length_cm = 0.1 * bill_length_mm,
    bill_depth_cm = 0.1 * bill_depth_mm
  ) 

mass_dep_fit <- linear_reg() |>
  set_engine("lm") |>
  fit(body_mass_kg ~ bill_depth_cm, data = penguins)
penguins <- penguins |>
  mutate(
    body_mass_kg = 0.001 * body_mass_g,
    flipper_length_cm = 0.1 * flipper_length_mm,
    bill_length_cm = 0.1 * bill_length_mm,
    bill_depth_cm = 0.1 * bill_depth_mm
  )

mass_flip_fit <- linear_reg() |>
  set_engine("lm") |>
  fit(body_mass_kg ~ flipper_length_cm, data = penguins)
newflip <- data.frame(flipper_length_cm = seq(17, 24, by = 0.1))
penguins <- penguins |>
  mutate(
    body_mass_kg = 0.001 * body_mass_g,
    flipper_length_cm = 0.1 * flipper_length_mm,
    bill_length_cm = 0.1 * bill_length_mm,
    bill_depth_cm = 0.1 * bill_depth_mm
    ) 

mass_flip_fit <- linear_reg() |>
  set_engine("lm") |>
  fit(body_mass_kg ~ flipper_length_cm, data = penguins)

newflip <- data.frame(flipper_length_cm = seq(17, 24, by = 0.1))

conf_int <- predict(mass_flip_fit$fit, newflip, interval = "confidence")
pred_int <- predict(mass_flip_fit$fit, newflip, interval = "prediction")

int_dat <- data.frame(newflip, conf_int, pred_int)

plot_1 <- ggplot(data = int_dat, aes(x = flipper_length_cm)) +
  geom_point(data = penguins, 
             aes(x = flipper_length_cm, y = body_mass_kg, colour = species)) +
  scale_colour_manual(values = c("orange", "purple", "cyan4")) +
  labs(x = "Flipper length (cm)", y = "Body mass (kg)")

Introduction

knitr::include_graphics("images/penguinsallisonhorst.png")

Data were collected and made available by Dr. Kristen Gorman and the Palmer Station, Antarctica LTER, a member of the Long Term Ecological Research Network.

The data are distributed in the palmerpenguins package.

Learning goals

Packages

We'll use the tidymodels package for this analysis and the palmerpenguins package for the data. Run the following code to load this package.

library(tidyverse)
library(tidymodels)
library(palmerpenguins)
library(tidyverse)
library(tidymodels)
library(palmerpenguins)
grade_this_code("The tidyverse and palmerpenguins packages are now loaded!")

Data

knitr::include_graphics("images/bill-measurement.png",)

The codebook for the dataset is as follows:

+---------------------+-------------------------------------------+ | Name | Description | +:====================+:==========================================+ | species | Penguin species | +---------------------+-------------------------------------------+ | island | Island where penguins are observed | +---------------------+-------------------------------------------+ | bill_length_mm | Length of bill measured in millimetres | +---------------------+-------------------------------------------+ | bill_depth_mm | Depth of bill measured in millimetres | +---------------------+-------------------------------------------+ | flipper_length_mm | Length of flipper measured in millimetres | +---------------------+-------------------------------------------+ | body_mass_g | Penguin's body mass measured in grams | +---------------------+-------------------------------------------+ | sex | Sex of the penguin | +---------------------+-------------------------------------------+ | year | Year of data collection | +---------------------+-------------------------------------------+

Penguin measurements

Body mass

The body mass of penguins are measured and recorded in grams in the body_mass_g variable.

In the following code block, use mutate() to add the new variable body_mass_kg that records body mass in kilograms.

Note that 1 gram = 0.001 kilogram.

penguins <- penguins |> 
  ___ 
Use the 'mutate()' function!
penguins <- penguins |>
  mutate(___) 
penguins <- penguins |>
  mutate(body_mass_kg = ___) 
penguins <- penguins |>
  mutate(body_mass_kg = 0.001 * body_mass_g)
grade_this_code("The variable has been added.")

The penguins' flipper lengths, bill lengths and bill depths are measured in millimetres. Similar to what you did for body mass, introduce new variables flipper_length_cm, bill_length_cm and bill_depth_cm that record these variables in centimetres.

Note that 1 millimetre = 0.1 centimetre.

Flipper length

Using the mutate() function, create the variable flipper_length_cm.

penguins <- penguins |>
  ___ 
penguins <- penguins |>
  mutate(___)
penguins <- penguins |>
  mutate(flipper_length_cm = ___)
penguins <- penguins |>
  mutate(flipper_length_cm = 0.1 * flipper_length_mm)
grade_this_code("The new variable has been added.")

Bill length

Create the variable bill_length_cm.

penguins <- penguins |>
  ___ 
Refer to the previous exercise, it's pretty similar!
penguins <- penguins |>
  mutate(bill_length_cm = ___)
penguins <- penguins |>
  mutate(bill_length_cm = 0.1 * bill_length_mm)
grade_this_code("The new varible has been created.")

Bill depth

Create the variable bill_depth_cm.

penguins <- penguins |>
  ___ 
Look at previous exercises for help.
penguins_new <- penguins |>
  mutate(bill_depth_cm = ___)
penguins <- penguins |>
  mutate(bill_depth_cm = 0.1 * bill_depth_mm)
grade_this_code("Good work creating the variable!")

Let's see how our dataset looks now:

glimpse(penguins)

Patterns in visualisations

Create a scatter plot of the penguin's body mass against their flipper length. Using scale_colour_manual(), represent each species with the following colours:

+----------------+-------------+ | Species | Colour | +:===============+:============+ | Adelie | "orange" | +----------------+-------------+ | Chinstrap | "purple" | +----------------+-------------+ | Gentoo | "cyan4" | +----------------+-------------+

ggplot() +
  geom____(data = ___, 
           aes(x = ___, ___ = ___, ___ = ___))+
  ___()
ggplot() +
  geom_point(data = ___, 
           aes(x = ___, ___ = ___, ___ = ___))+
  scale_colour_manual(values = c(___,___,___))+
  labs(x = "Flipper length (cm)", y = "Body mass (kg)")
ggplot() +
  geom_point(data = penguins, 
           aes(x = ___, y = ___, colour = ___))+
  scale_colour_manual(values = c("orange",___,___))+
  labs(x = "Flipper length (cm)", y = "Body mass (kg)")
ggplot() +
  geom_point(data = penguins, 
           aes(x = flipper_length_cm, y = ___, colour = species))+
  scale_colour_manual(values = c("orange",___,___))+
  labs(x = "Flipper length (cm)", y = "Body mass (kg)")
ggplot() +
  geom_point(data = penguins, 
           aes(x = flipper_length_cm, y = body_mass_kg, colour = species))+
  scale_colour_manual(values = c("orange", "purple", "cyan4"))+
   labs(x = "Flipper length (cm)", y = "Body mass (kg)") 
grade_this_code("That's one nice looking plot!")

Based on the plot you created, answer the following question:

question("Which of these statements are true? Select all that apply",
  answer("There is a positive association between penguins' flipper length and body mass across each of the three species.", correct = TRUE),
  answer("There is a negative association between penguins' flipper length and body mass across each of the three species.", message = "As flipper length increases, does body mass decrease or increase?"),
  answer("The Gentoo penguins in general have longer flippers compared to the other species.", correct = TRUE),
  answer("The longest flipper length belongs to an Adelie penguin.", message = "Make sure to associate each species with the correct colour."),
  correct = "Correct!",
  allow_retry = TRUE,
  random_answer_order = TRUE
)

Modelling

Assumptions

We shall first construct a linear model that predicts the body mass of penguins based on its flipper length.

The simple linear regression model can be expressed with the following equation:

For $i = 1,2,...,n$, $$Y_i=\beta_0+\beta_1x_i+\epsilon_i$$ where for the $i$th observation $Y_i$ represents the response variable (body mass), $x_i$ the explanatory variable (flipper length), $\beta_0$ the intercept, $\beta_1$ the slope and $\epsilon_i$ the error term.

Before we start modelling, answer the following questions:

question("Which of these assumptions are made for the simple linear regression model? Select two.",
  answer("The body mass of penguins are independent and identically distributed random variables.", correct = TRUE),
  answer("The penguins' flipper lengths are independent random variables.", message = "The penguins' flipper lengths are values that we have observed to predict our dependant variable, in this case they aren't random variables."),
  answer("The relationship between the body mass and the mean flipper lengths is linear."),
  answer("The relationship between the mean body mass and the flipper lengths is linear", correct = TRUE),
  correct = "Correct!",
  allow_retry = TRUE,
  random_answer_order = TRUE
)
question("Which of these assumptions are made for the simple linear regression model? Select two.",
  answer("The error (residual) terms have equal variance.", correct = TRUE),
  answer("The variance of the error (residual) terms are different for each value of the dependent variable."),
  answer("The error (residual) terms are not independent."),
  answer("The error (residual) terms are independent random variables.", correct = TRUE),
  correct = "Correct!",
  allow_retry = TRUE,
  random_answer_order = TRUE
)

Body mass vs flipper length

Now, using the tidymodels package, we can fit a linear model that predicts body mass based on flipper length.

Linear regression using ordinary least squares (OLS) is implemented by using the linear_reg() and set_engine() functions, whereby we set the engine to "lm". We then use the fit() function to specify the formula which takes the form of y \~ x, where y and x are the response and explanatory variables respectively.

The tidy() function can be used to view the attributes of the model.

mass_flip_fit <- linear_reg() |>
  set_engine("lm") |>
  fit(body_mass_kg ~ flipper_length_cm, data = penguins)
tidy(mass_flip_fit)
mass_flip_fit <- linear_reg() |>
  set_engine("lm") |>
  fit(body_mass_kg ~ flipper_length_cm, data = penguins)
tidy(mass_flip_fit)
grade_this({
  if(identical(floor(.result$estimate[1]), -6)) {
    pass("You have written the model correctly")
  }
  if(identical(floor(.result$estimate[1]), 13)) {
    fail("Did you maybe try to predict the flipper length instead of body mass? Remember, lm(y~x,data).")
  }
  if(identical(round(.result$estimate[1], digits = 1), 0.4)) {
    fail("Did you maybe try to predict the body mass from the bill length?")
  }
  if(identical(floor(.result$estimate[1]), 7)) {
    fail("Did you maybe try to predict the body mass from the bill depth?")
  }
  fail("Not quite. Look at the hints for help!")
})

Use the code chunk below to obtain the $R^2$ value of the model.

glance(___)$___
glance(___)$r.squared
glance(mass_flip_fit)$r.squared
grade_this_code("Your solution is correct!")

Based on your findings, answer the following questions:

question("Which interpretations are correct?",
  answer("The model has the intercept at approximately -5.78kg, which means that on average, a penguin with flipper length of 0 millimetre has a mass of -5.78 kg. Clearly, such an estimate is implausible due to extrapolation beyond observed range of flipper lengths.",
    correct = TRUE
  ),
  answer("The model has the intercept at approximately 0.497cm, which means that for a penguin with flipper length of 0 centimetre, we can expect it to weigh, on average, 0.497kg"),
  answer("The slope of the model is approximately 0.497, which means that, for each additional centimetre increase in flipper length, we can expect the mass of the penguin to increase by 0.497kg.",
    correct = TRUE
  ),
  answer("The slope of the model is approximately -5.78, which means that, for each additional millimitre increase in flipper length, we can expect the mass of the penguin to decrease by 5.78kg."),
  correct = "Correct!",
  allow_retry = TRUE,
  random_answer_order = TRUE
)
question("What does the R-squared mean in this context?",
  answer("75.9% of the variability in body mass of the penguins is explained by flipper length.",
    correct = TRUE
  ),
  answer("The model predicts the body mass of penguins 75.9% of the time."),
  answer("On average, the body mass of penguins predicted by the model differs from the actual mass by 75.9%."),
  answer("75.9% of the variability in body mass of the penguins is not explained by flipper length."),
  correct = "Correct!",
  allow_retry = TRUE,
  random_answer_order = TRUE
)

Body mass vs species

Unlike flipper length, species is a categorical variable. Similar to what we did for flipper length, fit a linear model predicting body mass from species.

mass_spec_fit <- ___(___)
___(___)
Look at the previous question for help!
mass_spec_fit <- linear_reg() |>
  set_engine("___") |>
  fit(___ ~ ___, data = ___)
tidy(___)
mass_spec_fit <- linear_reg() |>
  set_engine("lm") |>
  fit(body_mass_kg ~ ___, data = ___)
tidy(mass_spec_fit)
mass_spec_fit <- linear_reg() |>
  set_engine("lm") |>
  fit(body_mass_kg ~ species, data = penguins)
tidy(mass_spec_fit)
grade_this({
  if(identical(round(.result$estimate[1], 2), 3.70)) {
    pass("You have written the model correctly")
  }
  if(identical(round(.result$estimate[1], 2), 3.86)) {
    fail("Oops, did you maybe try to predict the body mass from sex?")
  }
  if(identical(floor(.result$estimate[1]), -6)) {
    pass("Did you maybe try to predict the body mass from the flipper length?")
  }
  fail("Not quite. Look at the hints for help!")
})

Use the code chunk below to obtain the $R^2$ value of the model.


How did you do it for mass_flip_fit?
glance(___)$___
glance(mass_spec_fit)$r.squared
grade_this_code("Your solution is correct!")

Based on your findings, answer the following question.

question("Which of the following statements are correct? Select all that apply.",
  answer("The intercept for Chinstrap penguins is higher compared to Adelie penguins", 
         correct = TRUE),
  answer("The intercept for Gentoo penguins is lower than for Adelie penguins.",
         message = "Look at the regression output for a hint!"),
  answer("The average body mass of Gentoo penguins is approximately 1.38kg.",
         message = "Looking at the regression output, speciesGentoo is a dummy variable that takes on value 1 if penguin is Gentoo and 0 otherwise, and the species Adelie is taken as the reference category."),
  answer("The average body mass of Adelie penguins is approximately 3.7kg.",
         correct = TRUE),
  correct = "Correct!",
  allow_retry = TRUE,
  random_answer_order = TRUE
)
question("Which of the following statements are correct? Select all that apply.",
  answer("About 67% of the variability in body mass is explained by species.", correct = TRUE),
  answer("The model is able to predict the correct body mass 67% of the time."),
  answer("The difference in the mean body mass between Adelie and Gentoo penguins is 1.38kg.", correct = TRUE),
  answer("The difference in the mean body mass between Chinstrap and Gentoo penguins is 0.0324g.",
         message = "In the regression output, both speciesChinstrap and speciesGentoo are dummy variables, this leaves the species Adelie as the reference category."),
  correct = "Correct!",
  allow_retry = TRUE,
  random_answer_order = TRUE
)

Body mass vs bill length/depth

In each code chunk below, fit another two linear models predicting body mass from bill length and bill depth respectively.

Predicting body mass from bill length:

mass_len_fit <- ___(___)
___(___)
Look at the previous exercises for help!
mass_len_fit <- linear_reg() |>
  set_engine("___") |>
  fit(___ ~ ___, data = ___)
tidy(mass_len_fit)
mass_len_fit <- linear_reg() |>
  set_engine("lm") |>
  fit(___ ~ bill_length_cm, data = ___)
tidy(mass_len_fit)
mass_len_fit <- linear_reg() |>
  set_engine("lm") |>
  fit(body_mass_kg ~ bill_length_cm, data = penguins)
tidy(mass_len_fit)
grade_this({
  if(identical(round(.result$estimate[2], digits = 1), 0.9)) {
    pass("You have written the model correctly")
  }
  if(identical(floor(.result$estimate[1]), 7)) {
    fail("Did you maybe try to predict the body mass from the bill depth?")
  }
  if(identical(floor(.result$estimate[1]), -6)) {
    fail("Did you maybe try to predict the body mass from the flipper length?")
  }
  fail("Not quite. Look at the hints for help!")
})

Predicting body mass from bill depth:

mass_dep_fit <- ___
___
Look at previous questions for help!
mass_dep_fit <- linear_reg() |>
  set_engine("lm") |>
  fit(___ ~ ___, data = ___)
tidy(mass_dep_fit)
mass_dep_fit <- linear_reg() |>
  set_engine("lm") |>
  fit(body_mass_kg ~ ___, data = ___)
tidy(mass_dep_fit)
mass_dep_fit <- linear_reg() |>
  set_engine("lm") |>
  fit(body_mass_kg ~ bill_depth_cm, data = penguins)
tidy(mass_dep_fit)
grade_this({
  if(identical(floor(.result$estimate[2]), -2)) {
    pass("You have written the model correctly")
  }
  if(identical(round(.result$estimate[1], digits = 1), 0.4)) {
    fail("Did you maybe try to predict the body mass from the bill length?")
  }
  if(identical(floor(.result$estimate[1]), -6)) {
    fail("Did you maybe try to predict the body mass from the flipper length?")
  }
  fail("Not quite. Look at the hints for help!")
})

Record the $R^2$ values of each model

#R squared for mass_len_fit
glance(___)$___
glance(___)$r.squared
glance(mass_len_fit)$r.squared
grade_code("Brilliant! Your solution is spot on!")
#R squared for mass_dep_fit
glance(___)$___
glance(___)$r.squared
glance(mass_dep_fit)$r.squared
grade_code("Brilliant! Your solution is spot on!")

Using these findings, answer the following:

question("Which is the better explanatory variable of penguins body mass?",
  answer("Bill length is the better predictor, since the R-squared is slightly higher.", 
         message = "Determine the R-squared values again to find the correct answer!"),
  answer("Bill Depth is the better predictor, since the R-squared is slightly lower.",
         message = "Determine the R-squared values again to find the correct answer!"
  ),
  answer("Flipper length is the better predictor, since the R-squared is higher.",
         correct = TRUE
  ),
  answer("Species is a better predictor, because the R-squared is higher.",
         message = "Determine the R-squared values again to find the correct answer!"
  ),
  answer("Flipper length is the worst predictor, since the R-squared is higher.",
         message = "To pick the better model, we want a higher R-squared value."
  ),
  correct = "Correct!",
  allow_retry = TRUE,
  random_answer_order = TRUE
)

Making predictions

Using the best model, let's try estimating a penguin's body mass given a certain measurement.

Given a measurement of 17.3cm, what is the estimated mean body mass? Compute using the formula of best fit in the code chunk below. Hint: slope = 0.497, intercept = -5.781


___*17.3_____
What are the values of the slope and the intercept for the best model?
0.497*17.3 - 5.781
grade_this({
  if(identical(round(.result, digits = 3), 2.817)) {
    pass("You got it! Using the best model, the predicted body mass is approximately 2.82kg when flipper length is 17.3cm.")
  }
  fail("Make sure you are using the correct intercept and slope values from the best model.")
})

Using the same method you did as before, predict the body mass for measurement of 10cm.


___*10_____
0.497*10 - 5.781
grade_this({
  if(identical(round(.result, digits = 3), -0.811)) {
    pass("Your solution is correct. The predicted body mass is a negative value in this case.")
  }
  fail("Make sure you are using the correct intercept and slope values from the best model.")
})

Did you get a negative value? Why do you think this is the case?

textInput("prediction", NULL)

Prediction made simple

Instead of manually computing the predictions, we can simply use the augment() function from the broom package.

We need to store the predictor in a data frame before inputting it as the argument in the augment() function.

flip_len <- data.frame(flipper_length_cm = 17.3)
augment(mass_flip_fit$fit, newdata = flip_len)
flip_len <- data.frame(flipper_length_cm = 17.3)
augment(mass_flip_fit$fit, newdata = flip_len)
grade_this_code("Now you know the trick! ")

Using the augment() function, we can predict the body masses for a sequence of measurements, 17cm - 24cm with an increment of 0.1cm.

Store the range of measurements in a data frame, newflip.

newflip <- data.frame(flipper_length_cm = seq(17, 24, by = 0.1))
newflip <- data.frame(flipper_length_cm = seq(17, 24, by = 0.1))
grade_this_code("Great! The values have been stored!")

Now, predict using the augment() function:

___(___)
How did you do it for a single observation? It's pretty similar.
augment(___, ___ )
augment(mass_flip_fit$fit, ___ )
augment(mass_flip_fit$fit, newdata = newflip)
grade_this_code("Good job predicting the body masses!")

Confidence & prediction Intervals

In addition to our analyses, we shall take a look at confidence intervals and prediction intervals.

Using the predict() function, we can compute both the confidence intervals and the prediction intervals. Note that the default intervals are at the 95% significance level.

conf_int <- predict(mass_flip_fit$fit, newflip, interval = "confidence")
head(conf_int)
conf_int <- predict(mass_flip_fit$fit, newflip, interval = "confidence")
head(conf_int)
grade_this_code("You have computed the confidence intervals.")

Here we see the first six rows of conf_int, where fit gives us the predicted body mass, lwr and upr gives us the lower and upper bounds of the confidence interval respectively.

Similarly, by copying the code above and replacing conf_int with pred_int as well as setting the argument interval to "prediction", compute the prediction intervals.


pred_int <- predict(___, ___, interval = ___)
head(pred_int)
pred_int <- predict(mass_flip_fit$fit, newflip, interval = "prediction")
head(pred_int)
grade_this_code("You have computed the prediction intervals.")

Now answer the following quiz:

question("Which of the following statements are correct? Select all that apply.",
  answer("The prediction intervals are wider than the confidence intervals because there is greater uncertainty when you predict an individual value rather than the mean value for body mass.",
    correct = TRUE
  ),
  answer("The confidence intervals are wider than the prediction intervals because there is greater uncertainty when you predict the mean value rather than an individual value for body mass.",
    message = "width = upper bound - lower bound"
  ),
  answer("A prediction interval for body mass is a confidence interval for a future observation of body mass given the explanatory variable flipper length.",
    correct = TRUE
  ),
  answer("The expected value of the body mass will lie within the confidence interval with probability of 95%",
    message = "The expected value of body mass is fixed, it either lies within the confidence interval or not."
  ),
  answer("Out of all the confidence interval computed for the fitted value, we expect 95% of the confidence intervals to contain the expected value of the body mass.", correct = TRUE),
  correct = "Correct!",
  allow_retry = TRUE,
  random_answer_order = TRUE
)

Final plot

Let's visualise our fitted line, prediction and confidence intervals on the scatterplot we created previously.

Before we create the plot, we will need to create a data frame that contains columns for flipper lengths, the predicted body mass as well as the upper and lower bounds for the confidence/prediction intervals.

int_dat <- data.frame(newflip, conf_int, pred_int)
int_dat <- data.frame(newflip, conf_int, pred_int)
grade_this_code("The data frame has been created.")

You can view the first 6 rows of the newly created data frame using the function head.

head(int_dat)

The columns fit and fit.1 record the predicted flipper lengths. The columns lwr and upr give the lower and upper limits for the confidence intervals while columns lwr.1 and upr.1 contain the lower and upper limits of the prediction intervals

Let's store the previously created scatterplot in plot_1.

plot_1 <- ggplot(data = int_dat, aes(x = flipper_length_cm)) +
  geom_point(data = penguins, 
           aes(x = flipper_length_cm, y = body_mass_kg, colour = species))+
  scale_colour_manual(values = c("orange", "purple", "cyan4"))+
  labs(x = "Flipper length (cm)", y = "Body mass (kg)")
plot_1 <- ggplot(data = int_dat, aes(x = flipper_length_cm))+
  geom_point(data = penguins, aes(x = flipper_length_cm, y = body_mass_kg,colour = species))+
  scale_colour_manual(values = c("orange","purple", "cyan4"))+
  labs(x = "Flipper length (cm)", y = "Body mass (kg)")
grade_this_code("Let's get plotting!")

Now in the code chunk below, fill in the missing values to produce the plot. Use 0.25 for the alpha value.

plot_1 +
  geom_line(aes(y = ___) )+
  geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = ___ )+
  geom_ribbon(aes(___), ___)
plot_1 + 
  geom_line(aes(y = ___) )+
  geom_ribbon( aes( ymin = lwr, ymax = upr), alpha = 0.25)+
  geom_ribbon(aes( ymin = ___, ymax = ___), alpha = ___)
plot_1 +
  geom_line(aes(y = fit) )+
  geom_ribbon( aes( ymin = lwr, ymax = upr), alpha = 0.25)+
  geom_ribbon(aes( ymin = lwr.1, ymax = ____), alpha = ___)
plot_1 + 
  geom_line(aes(y = fit) )+
  geom_ribbon( aes( ymin = lwr, ymax = upr), alpha = 0.25)+
  geom_ribbon(aes( ymin = lwr.1, ymax = upr.1), alpha = 0.25)
grade_this_code("Your solution is correct!")

Wrap up

We hope you've enjoyed this chance to practice your modelling skills.

That's it! You've finished the last tutorial, congratulations!



rstudio-education/dsbox documentation built on Oct. 22, 2023, 12:20 a.m.