library(learnr)
library(gradethis)

knitr::opts_chunk$set(
  echo = FALSE,
  exercise.warn_invisible = FALSE
)

# enable code checking
tutorial_options(
  exercise.checker = grade_learnr,
  exercise.lines = 8,
  exercise.reveal_solution = TRUE
)

Challenge 1

1a

First start by trying to summarise a single column, bill_length_mm by calculating its mean.

penguins %>% 
  summarise(_(_, na.rm = _))
penguins %>% 
  summarise(mean(bill_length_mm, na.rm = TRUE))
grade_code(
  correct = random_praise(),
  incorrect = random_encouragement()
)
'Did you remember to place the function first, then the colum name inside the function?'

1b

add a column with the standard deviation of bill length.

penguins %>% 
  summarise(
    mean(bill_length_mm, na.rm = TRUE),
    _(_, na.rm = _),
  )
penguins %>% 
  summarise(
    mean(bill_length_mm, na.rm = TRUE),
    sd(bill_length_mm, na.rm = TRUE)
  )
grade_code(
  correct = random_praise(),
  incorrect = random_encouragement()
)
The function for standard deviation is `sd()`

1c

Now add the same tro metrics for bill depth and give the columns clear names

penguins %>% 
  summarise(
    bill_length__ = mean(bill_length_mm, na.rm = TRUE),
    bill_length__ = sd(bill_length_mm, na.rm = TRUE),
    _ = mean(__, na.rm = TRUE),

    _ = sd(__, na.rm = TRUE)
  )
penguins %>% 
  summarise(
    bill_length_mean = mean(bill_length_mm, na.rm = TRUE),
    bill_length_sd = sd(bill_length_mm, na.rm = TRUE),
    bill_depth_mean = mean(bill_depth_mm, na.rm = TRUE),
    bill_depth_sd = sd(bill_depth_mm, na.rm = TRUE)
  )
grade_code(
  correct = random_praise(),
  incorrect = random_encouragement()
)
The function for standard deviation is `sd`

Challenge 2

2a

Get the mean of all columns containing an underscore ("_")

penguins %>% 
  summarise(across(__, 
                     .fns = ~ __))
penguins %>% 
  summarise(across(contains("_"), 
                   .fns = ~ mean(.x, na.rm = TRUE)))
grade_code(
  correct = random_praise(),
  incorrect = random_encouragement()
)
Remember to use the tidy selectors like "ends_with", "contains", and "starts_with"
use the `mean` function
Remember to use the interal .x, and na.rm = TRUE in the mean function

2b

Get the mean, minimum and maximum of all columns containing an underscore ("_")

penguins %>% 
  summarise(across(contains("_"), 
                   .fns = list(
                     ~ mean(.x, na.rm = TRUE),
                     ~ __(__, na.rm = TRUE),
                     ~ __(__, na.rm = TRUE)
                   ))
  )
penguins %>% 
  summarise(across(contains("_"), 
                   .fns = list(
                     ~ mean(.x, na.rm = TRUE),
                     ~ min(.x, na.rm = TRUE),
                     ~ max(.x, na.rm = TRUE)
                   ))
  )
grade_code(
  correct = random_praise(),
  incorrect = random_encouragement()
)
Remember the `min` and `max` functions.

2c

Do the same as the above, but now make sure the columns are given nicer names by naming the functions in the list.

penguins %>% 
  summarise(across(contains("_"), 
                   .fns = list(
                     _ = ~ mean(.x, na.rm = TRUE),
                      _ = ~ __(__, na.rm = TRUE),
                      _ = ~ __(__, na.rm = TRUE)
                   ))
  )
penguins %>% 
  summarise(across(contains("_"), 
                   .fns = list(
                     mean = ~ mean(.x, na.rm = TRUE),
                     min = ~ min(.x, na.rm = TRUE),
                     max = ~ max(.x, na.rm = TRUE)
                   ))
  )
grade_code(
  correct = random_praise(),
  incorrect = random_encouragement()
)

2d

Simplify the code to not use the tilde (~) but assign na.rm = TRUE to all functions at once.

penguins %>% 
  summarise(across(contains("_"), 
                   .fns = list(mean = __,
                               min = __,
                               max = __), 
                   __ = __)
  )
penguins %>% 
  summarise(across(starts_with("bill"), 
                   .fns = list(mean = mean,
                               min = min,
                               max = max), 
                   na.rm = TRUE)
  )
grade_code(
  correct = random_praise(),
  incorrect = random_encouragement()
)
The expectation here is to name the output with the exact same name as the function
be sure to use all small letters here

Challenge 3

3a

Group the data by species and get the mean of the bill_length_mm column

penguins %>% 
  group_by(__) %>% 
  summarise(_(_, na.rm = _))
penguins %>% 
  group_by(species) %>% 
  summarise(mean(bill_length_mm, na.rm = TRUE))
grade_code(
  correct = random_praise(),
  incorrect = random_encouragement()
)
Did you remember to place the function first, then the colum name inside the function

3b

Group the data by island instead, and take the summary of the bill length and bill depth columns

penguins %>% 
  group_by(_) %>% 
  summarise(bill_length_mm = mean(__, na.rm = _),
            bill_depth_mm = mean(__, na.rm = _))
penguins %>% 
  group_by(island) %>% 
  summarise(bill_length_mm = mean(bill_length_mm, na.rm = TRUE),
            bill_depth_mm = mean(bill_depth_mm, na.rm = TRUE))
grade_code(
  correct = random_praise(),
  incorrect = random_encouragement()
)
Make sure the correct column names go to the correct summary

3c

Group the data by island AND species, what happened?

penguins %>% 
  group_by(__, __) %>% 
  summarise(
    bill_length_mm = mean(bill_length_mm, na.rm = TRUE),
    bill_depth_mm = mean(bill_depth_mm, na.rm = TRUE)
  )
penguins %>% 
  group_by(island, species) %>% 
  summarise(
    bill_length_mm = mean(bill_length_mm, na.rm = TRUE),
    bill_depth_mm = mean(bill_depth_mm, na.rm = TRUE)
  )
grade_code(
  correct = random_praise(),
  incorrect = random_encouragement()
)
Make sure the correct column names go to the correct summary

Challenge 4

4a

Get the descriptive statistics (mean, sd, min and max) for all the columns ending with "mm", grouped by species

penguins %>% 
  summarise(across(__, .fns = list(mean = mean,
                                   _ = _,
                                   _ = _,
                                   _ = _), 
                     na.rm = TRUE)
  )
penguins %>% 
  group_by(species) %>% 
  summarise(across(ends_with("mm"), 
                   .fns = list(mean = mean,
                               sd = sd,
                               min = min,
                               max = max), 
                   na.rm = TRUE)
  )
grade_code(
  correct = random_praise(),
  incorrect = random_encouragement()
)
The expectation here is to name the output with the exact same name as the function
be sure to use all small letters here

4b

Now add a count of how many observations there are of each species and place that in the column named n.

penguins %>% 
  summarise(across(ends_with("mm"), 
                   .fns = list(mean = mean,
                               sd = sd,
                               min = min,
                               max = max), 
                   na.rm = TRUE),
            n = length(species)
  )
penguins %>% 
  group_by(species) %>% 
  summarise(across(ends_with("mm"), 
                   .fns = list(mean = mean,
                               sd = sd,
                               min = min,
                               max = max), 
                   na.rm = TRUE),
            n = length(species)
  )
grade_code(
  correct = random_praise(),
  incorrect = random_encouragement()
)
add the new summary vaiable after closing the `across` parentheses.

4c

Try grouping by more variables. Like sex, or island. or add more summary variables.

penguins %>% 
  group_by(species) %>% 
  summarise(across(ends_with("mm"), 
                   .fns = list(mean = mean,
                               sd = sd,
                               min = min,
                               max = max), 
                   na.rm = TRUE),
            n = length(species)
  )

Challenge 5

5a

Pivot longer a grouped summary over species for the columns containing underscores, with the metrics mean, standard deviation, minimum and maximum values. When you pivot the data, split the column names up into 4 parts describing the part, measurement, unit and statistic used.

penguins %>% 
  group_by(species, island, sex) %>% 
  summarise(across(ends_with("mm"), 
                   .fns = list(mean = mean,
                               sd = sd,
                               min = min,
                               max = max), 
                   na.rm = TRUE)
  ) %>% 
  ___
penguins %>% 
  group_by(species, island, sex) %>% 
  summarise(across(ends_with("mm"), 
                   .fns = list(mean = mean,
                               sd = sd,
                               min = min,
                               max = max), 
                   na.rm = TRUE)
  ) %>% 
  pivot_longer(contains("mm"),
               names_to = c("part", "measurement", "unit", "stat"),
               names_sep = "_") 
grade_code(
  correct = random_praise(),
  incorrect = random_encouragement()
)

5b

Add to the code you just wrote and pivot the data wider again by the statistic column.

penguins %>% 
  group_by(species, island, sex) %>% 
  summarise(across(ends_with("mm"), 
                   .fns = list(mean = mean,
                               sd = sd,
                               min = min,
                               max = max), 
                   na.rm = TRUE),
            n = length(species)
  ) %>% 
  pivot_longer(contains("mm"),
               names_to = c("part", "measurement", "unit", "stat"),
               names_sep = "_") %>% 
  ___
penguins %>% 
  group_by(species, island, sex) %>% 
  summarise(across(ends_with("mm"), 
                   .fns = list(mean = mean,
                               sd = sd,
                               min = min,
                               max = max), 
                   na.rm = TRUE),
            n = length(species)
  ) %>% 
  pivot_longer(contains("mm"),
               names_to = c("part", "measurement", "unit", "stat"),
               names_sep = "_") %>% 
  pivot_wider(names_from = stat, 
              values_from  = value)
grade_code(
  correct = random_praise(),
  incorrect = random_encouragement()
)

5c

Try different statistics, grouping variables and pivoting in different ways. What works, and what does not? Learning what you can and cannot do it often the result of trial and error.

penguins %>% 
  group_by(species, island, sex) %>% 
  summarise(across(ends_with("mm"), 
                   .fns = list(mean = mean,
                               sd = sd,
                               min = min,
                               max = max), 
                   na.rm = TRUE),
            n = length(species)
  ) %>% 
  pivot_longer(contains("mm"),
               names_to = c("part", "measurement", "unit", "stat"),
               names_sep = "_") %>% 
  pivot_wider(names_from = stat, 
              values_from  = value)


Athanasiamo/swc.tidyverse documentation built on Dec. 17, 2021, 9:48 a.m.