knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
library(learnr)
library(tidyverse)
library(gapminder)
tutorial_options(exercise.timelimit = 60, exercise.blanks = "___+", exercise.eval=T)

Recap (key concepts so far)

Data 'wrangling'

Intro to the tidyverse {.smaller}

Pipes

We often want to apply multiple functions ('verbs') to our data in a chain.

%>% is a special R symbol for chaining together functions (part of tidyverse)

An example of pipes

#Very hard to read
bop(scoop(hop(foo_foo, through = forest), up = field_mice), on = head)

#creating unnecessary 'temporary' variables
foo_foo_1 <- hop(foo_foo, through = forest)
foo_foo_2 <- scoop(foo_foo_1, up = field_mice)
foo_foo_3 <- bop(foo_foo_2, on = head)

Using pipes makes your code easy to read and understand as a series of verbs

foo_foo %>%
  hop(through = forest) %>%
  scoop(up = field_mice) %>%
  bop(on = head)

Assigning the results of a chain to a new variable

result <- input_data %>% function_1 %>% function_2

This also works

input_data %>% function_1 %>% function_2 -> result 

The pipe feeds the first argument of the next function

x <- c('a', 'b', 'c')
x %>% c('d')
#same as c(x, 'd')

If you want the piped input to feed a different argument, you can use .:

x %>% c('d', .)
#same as c('d', x)

Why use pipes

Data wrangling verbs

Key data wrangling verbs

dplyr package

Gapminder package

library(tidyverse)

#install.packages('gapminder')
library(gapminder)
head(gapminder)

Filter

gapminder %>% filter(year == 2007)

Logical operators for filtering

gapminder %>% 
  filter(year == 2002, continent == "Asia") %>% 
  sample_n(4)

Logical operators for filtering

gapminder %>% 
  filter(year == 2002 | continent == "Asia") %>% 
  sample_n(4)

Logical operators for filtering

gapminder %>% 
  filter(country %in% c("Argentina", "Belgium", "Mexico"),
         year %in% c(1987, 1992))

Select

gapminder %>% 
  select(country, year, lifeExp) %>% 
  head(4)

Handling improper column names

df %>% select(`1999`, `badly named variable`)

Rename

gapminder %>% 
  rename(lifeExpectancy = lifeExp, population = pop) %>% 
  head(3)

Arrange

gapminder %>% 
  arrange(year) %>% 
  head(4)

Arrange

gapminder %>% 
  arrange(year, lifeExp) %>% 
  head(4)

Desc

gapminder %>%
  filter(year > 2000) %>%
  arrange(desc(country)) %>%
  head(4)

Mutate

gapminder %>% 
  mutate(just_one = 1) %>% 
  head(4)

Mutate

gapminder %>%
  mutate(gdp = pop * gdpPercap) %>% 
  head(4)

Mutate

gapminder %>%
  mutate(pop = pop/1e6) %>% 
  head(4)

Mutate and ifelse {.smaller}

x <- 10
ifelse(x > 9, "x is greater than 9", "x is not greater than 9")

Allows you to use mutate in a 'condition-dependent' way

gapminder %>% 
  mutate(adjusted_gdp = ifelse(year < 1980, gdpPercap * 2, gdpPercap)) %>% 
  sample_n(5)

Summarize

gapminder %>% 
  filter(year == 1997) %>% 
  summarize(max_exp = max(lifeExp),
            sd_exp = sd(lifeExp))

Group-by

gapminder %>% 
  filter(year == 1997) %>% 
  group_by(continent) %>%
  summarize(max_exp = max(lifeExp),
            sd_exp = sd(lifeExp))

Group-by {.smaller}

gapminder %>% 
  group_by(continent, year) %>%
  summarize(num_rows = n(),
            max_exp = max(lifeExp),
            sd_exp = sd(lifeExp)) %>% 
  head(4)

Recap

(google 'dplyr cheat sheet') Cheat sheet

Additional resources



AshirBorah/cp_bootcamp_r_tutorials documentation built on May 16, 2024, 3:24 p.m.