# load packages ----------------------------------------------------------------

library(learnr)
library(gradethis)
library(tidyverse)
library(dsbox)

# set options for exercises and checking ---------------------------------------

gradethis_setup()


# hide non-exercise code chunks ------------------------------------------------

knitr::opts_chunk$set(echo = FALSE)

# data prep --------------------------------------------------------------------

# create age_group for questions 
lego_sales <- lego_sales |>
  mutate(age_group = case_when(
    age <= 18 ~ "18 and under",
    age >= 19 & age <= 25 ~ "19 - 25",
    age >= 26 & age <= 35 ~ "26 - 35",
    age >= 36 & age <= 50 ~ "36 - 50",
    age >= 51 ~ "51 and over"
  ))

Introduction

knitr::include_graphics("images/james-pond-riEYPSKxoTw-unsplash.jpg")

In this tutorial we'll do some data gymnastics to refresh and review what we learned over the past few weeks.

Learning goals

Packages

In this assignment we will work with the tidyverse package, as well as the dsbox package for the data, so let's go ahead and load it!

___(tidyverse)
___(dsbox)
library(tidyverse)
library(dsbox)
grade_this_code("The tidyverse and dsbox packages you'll use in this analysis are now loaded!")

Data

We have data from LEGO sales in 2018 for a sample of customers who bought LEGO sets in the United States. The dataset is available as part of the dsbox package, and it's called lego_sales.

The codebook for the dataset is as follows:

| Name | Description |:----------------|:-------------------------------- | first_name | First name of customer | last_name | Last name of customer | age | Age of customer | phone_number | Phone number of customer | set_id | Set ID of LEGO set purchased | number | Item number of LEGO set purchased | theme | Theme of LEGO set purchased | subtheme | Sub theme of LEGO set purchased | year | Year of purchase | name | Name of LEGO set purchased | pieces | Number of LEGO pieces in set purchased | us_price | Price of set purchase in US Dollars | image_url | Image URL of LEGO set purchased | quantity | Quantity of LEGO set(s) purchased

Counting frequencies

Most common names

What are the three most common first names of purchasers? Use the code chunk below to find out!

lego_sales |>
  ___(___)
lego_sales |>
  count(___, sort = TRUE)
lego_sales |>
  count(first_name, sort = TRUE)
grade_this({
  if(identical(as.character(.result[1,1]), "Jackson")) {
    pass("You have successfully counted names and sorted the counts in descending order.")
  }
  if(identical(as.character(.result[1,1]), "Aaron")) { 
    fail("Did you forget to sort the counts in descending order?")
  }
  if(identical(as.character(.result[1,1]), "Adoria")) {
    fail("Did you accidentally sort the counts in ascending order?")
  }
  if(identical(as.character(.result[1,1]), "Ackah Yensu")) {
    fail("Did you count last names instead of first names?")
  }
  if(identical(as.character(.result[1,1]), "Brown")) { 
    fail("Did you count last names instead of first names?")
  }
  fail("Not quite. Take a peek at the hint!")
})

Now, based on your findings, answer the following question.

question("What are the three most common names?",
    answer("Michael, Jackson, Jacob"),
    answer("Jackson, Jacob, Joseph",
           correct = TRUE),
    answer("Audrey, Kaitlyn, Joshua"),
    answer("Jacob, Jackson, Audrey"),
    answer("Kaitlyn, Jacob, Lucas"),
    allow_retry = TRUE,
    random_answer_order = TRUE
  )

Most common themes

What are the four most common themes of LEGO sets purchased? Use the code chunk below to find out!

lego_sales |>
  ___(___)
Look at the previous question for help!
lego_sales |>
  count(theme, sort = TRUE)
grade_this({
  if(identical(as.character(.result[1,1]), "Star Wars")) {
    pass("You have counted themes and sorted the counts correctly.")
  }
  if(identical(as.character(.result[1,1]), "Advanced Models ")) {
    fail("Did you forget to sort the counts in descending order?")
  }
  if(identical(as.character(.result[1,1]), "Classic")) {
    fail("Did you accidentally sort the counts in ascending order?")
  }
  if(identical(as.character(.result[1,1]), "Adventure Camp")) {
    fail("Did you count subthemes instead of themes?")
  }
  if(identical(as.numeric(.result[1,2]), 172)) {
    fail("Did you count subthemes instead of themes?")
  }
  fail("Not quite. Take a peek at the hint!")
})

Now, based on your findings, answer the following question.

question("What are the four most commonly purchased themes?",
    answer("Gear, Duplo, Ninjago, Star Wars"),
    answer("Star Wars, Nexo Knights, Gear, City"),
    answer("Star Wars, Gear, Mixels, Bionicle"),
    answer("Gear, Star Wars, Nexo Knights, Mixels",
           correct = TRUE),
    answer("Nexo Knights, Gear, Duplo, Friends"),
    allow_retry = TRUE,
    random_answer_order = TRUE
  )

Most common subtheme

Among the most common theme of LEGO sets purchased, what is the most common subtheme?

lego_sales |>
  ___(___) |>
  ___(___)
lego_sales |>
  filter(___ == ___) |>
  ___(___)
lego_sales |>
  filter(theme == ___) |>
  count(___)

``` {r most-common-subtheme-hint-3} Look at the previous question to see what the most common theme is!

```r
lego_sales |>
  filter(theme == "Star Wars") |>
  count(subtheme, sort = TRUE)
grade_this({
  if(identical(as.character(.result[1,1]), "The Force Awakens")) {
    pass("You have successfully counted subthemes and sorted the counts in descending order.")
  }
  if(identical(as.character(.result[1,1]), "Battlefront")) {
    fail("Did you forget to sort the counts in descending order?")
  }
  if(identical(as.character(.result[1,1]), "Ultimate Collector Series")) {
    fail("Did you accidentally sort the counts in ascending order?")
  }
  if(identical(as.character(.result[1,1]), "Star Wars")) { 
    fail("Did you count themes instead of subthemes?")
  }
fail("Not quite. Take a peek at the hint!")
})

Now, based on your findings, answer the following question.

question("What is the most common subtheme among the most common theme?",
    answer("Buildable Figures"),
    answer("Episode V"),
    answer("The Force Awakens",
           correct = TRUE),
    answer("MicroFighters"),
    answer("Battlefront"),
    allow_retry = TRUE,
    random_answer_order = TRUE
  )

Discretizing variables

Grouping by age

Create a new variable called age_group and group the ages of customers into the following categories: "18 and under", "19 - 25", "26 - 35", "36 - 50", "51 and over".

``` {r create-age-group, exercise = TRUE} lego_sales <- |> (age_group = ( ))

``` {r create-age-group-hint-1}
Try using mutate() and case_when()!

``` {r create-age-group-hint-2} Do you need some help with logical operators? Remember

"18 and under" : age <= 18 "19 - 25" : age >= 19 & age <= 25

etc.

```r
lego_sales <- lego_sales |>
  mutate(age_group = case_when(
    age <= 18             ~ "18 and under",
    ___
  ))

``` {r create-age-group-solution} lego_sales <- lego_sales |> mutate(age_group = case_when( age <= 18 ~ "18 and under", age >= 19 & age <= 25 ~ "19 - 25", age >= 26 & age <= 35 ~ "26 - 35", age >= 36 & age <= 50 ~ "36 - 50", age >= 51 ~ "51 and over" ))

```r
grade_this({
  if(identical(paste(as.numeric(table(.result[,"age_group"])), collapse = ", "), "30, 129, 183, 216, 62")) {
    pass("You have grouped the ages according to the instructions!")
  }
  fail("Not quite. Review the hints.")
})

Most common age group

Count the number of customers in each age group and display the counts in descending order, from most common to least common age group.

Which age group is the most common? Write code in the chunk below to figure it out!

````r lego_sales |> ()

````r
lego_sales |>
  count(age_group, sort = TRUE)
grade_this({
  if(identical(as.character(.result[1,1]), "36 - 50")) { 
    pass("You have successfully determined the most common age group.")
  }
  if(identical(as.character(.result[1,1]), "18 and under")) {
    fail("Did you forget to arrange the counts in descending order?")
  }
  if(identical(as.character(.result[1,1]), "33")) {
    fail("Did you count individual ages instead of age groups?")
  }
  if(identical(as.character(.result[1,1]), "16")) {
    fail("Did you count individual ages instead of age groups?")
  }
  fail("Not quite. Try using the count function!")
})

Now, based on your findings, answer the following question:

question("Which age group is the most common in the dataset?",
    answer("18 and under"),
    answer("19 - 25"),
    answer("26 - 35"),
    answer("36 - 50",
           correct = TRUE),
    answer("51 and over"),
    allow_retry = TRUE,
    random_answer_order = TRUE
  )

Grouped summaries

Quantity of purchases by age

Which age group has purchased the highest number of LEGO sets?

``` {r sales-by-group, exercise = TRUE} lego_sales |> () |> ( totaln = _() ) |> arrange()

``` {r sales-by-group-hint-1}
lego_sales |>
  group_by(___) |>
  summarise(
    total_n = sum(___)
  ) |>
  arrange(___)

``` {r sales-by-group-hint-2} You need to consider the quantity of purchases.

``` {r sales-by-group-hint-3}
lego_sales |>
  group_by(___) |>
  summarise(
    total_n = sum(quantity)
  ) |>
  arrange(desc(___))

``` {r sales-by-group-solution} lego_sales |> group_by(age_group) |> summarise( total_n = sum(quantity) ) |> arrange(desc(total_n))

```r
grade_this({
  if(identical(as.character(.result[1,1]), "36 - 50")) {
    pass("Your solution is correct!")
  }
  if(identical(as.character(.result[1,1]), "18 and under")) {
    fail("Did you forget to sort the counts in descending order?")
  }
  if(identical(as.character(.result[1,1]), "33")) {
    fail("Did you count individual ages instead of age groups?")
  }
  if(identical(as.character(.result[1,1]), "16")) {
    fail("Did you count individual ages instead of age groups?")
  }
  fail("Not quite. Take a peek at the hint!")
})

Now, based on your findings, answer the following question:

question("Which age group has purchased the highest number of LEGO sets?",
    answer("18 and under"),
    answer("19 - 25"),
    answer("26 - 35"),
    answer("36 - 50",
           correct = TRUE),
    answer("51 and over"),
    allow_retry = TRUE
  )

Most generous age group

Which age group has spent the most money on LEGO sets?

lego_sales |>
  ___(
    amount_spent = ___ * ___
  ) |>
  ___(___) |>
  ___(
    total_spent = ___(___)
  ) |>
  arrange(___)

````r You will need to consider quantity of purchases as well as price of LEGO sets.

```r
lego_sales |>
  mutate(
    amount_spent = ___ * ___
  ) |>
  group_by(___) |>
  summarise(
    total_spent = ___(___)
  ) |>
  arrange(desc(___))
lego_sales |>
  mutate(
    amount_spent = us_price * quantity
  ) |>
  group_by(age_group) |>
  summarise(
    total_spent = sum(___)
  ) |>
  arrange(desc(___))
lego_sales |>
  mutate(
    amount_spent = us_price * quantity
  ) |>
  group_by(age_group) |>
  summarise(
    total_spent = sum(amount_spent)
  ) |>
  arrange(desc(total_spent))
grade_this({
  if(identical(as.character(.result[1,1]), "36 - 50")) {
    pass("Your solution is correct!")
  }
  if(identical(as.character(.result[1,1]), "18 and under")) {
    fail("Did you forget to sort the counts in descending order?")
  }
  if(identical(as.character(.result[1,1]), "57")){
    fail("Did you count individual ages instead of age groups?")
  }
  fail("Not quite. Take a peek at the hint!")
})

Now, based on your findings, answer the following question:

question("Which age group has spent the most money on LEGO sets?",
    answer("18 and under"),
    answer("19 - 25"),
    answer("26 - 35"),
    answer("36 - 50",
           correct = TRUE),
    answer("51 and over"),
    allow_retry = TRUE,
    random_answer_order = TRUE
  )

Wrap up

Congratulations, you have completed tutorial 4! We hope you enjoyed reviewing what we've learned so far!



rstudio-education/dsbox documentation built on Oct. 22, 2023, 12:20 a.m.