knitr::opts_chunk$set(echo = TRUE, tidy = TRUE, cache = 1, tidy.opts=list(blank=FALSE, width.cutoff=60))

Excercise set 1

For Tasks 1--4 use the following data

library(tidyverse)
mpg %>% tbl_df

Task 1

ggplot(mpg, aes(x = displ, y = hwy, colour = class)) + geom_point()

Task 2

ggplot(mpg, aes(x = displ, y = hwy)) + 
  geom_point(colour = 'red') + 
  geom_smooth()

Task 3

ggplot(mpg, aes(x = displ, y = hwy, colour = drv)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE)

Task 4

#wrong
ggplot(mpg, aes(x = displ, y = hwy, colour = cyl)) + geom_point() + geom_smooth()
#right
ggplot(mpg, aes(x = displ, y = hwy, colour = factor(cyl))) + geom_point() + geom_smooth()

Exercise set 2

For Task 5, use the following data

library(tidyverse)
mpg %>% tbl_df

Task 5

ggplot(mpg, aes(x = displ, y = hwy, colour = factor(cyl))) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE)
ggplot(mpg, aes(x = displ, y = hwy, colour = factor(cyl))) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  scale_x_log10(breaks = 2:7) +
  scale_y_log10(breaks = c(20, 30, 40)) +
  facet_wrap( ~ year, labeller = as_labeller(c('1999' = 'Model year 1999',
                                               '2008' = 'Model year 2008'))) +
  labs(x = 'Displacement', y = 'MPG, highway',
       colour = 'Cylinders', title = 'Fuel economy and engine size')

Exercise set 3

For tasks 6--11, use the following data:

library(tidyverse)
data(mpg, package = 'ggplot2')
mpg %>% tbl_df

Task 6

(mpg2 <- mpg %>% select(`manufacturer`, `model`, `displ`, `year`, `cyl`, `trans`, `cty`, `hwy`))
mpg2 <- mpg %>% [ your code from the previous step ]

Task 7

(mpg3 <- mpg2 %>% mutate( displ2 = displ^2, vol_per_cyl = round(displ / cyl,2) ))
mpg3 <- mpg2 %>% [ your code from the previous step ]

Task 8

mpg3 %>% arrange( desc( vol_per_cyl))
mpg3 %>% filter( manufacturer == 'chevrolet' ) %>% arrange( desc(vol_per_cyl))
(mpg4 <- mpg3 %>% group_by( manufacturer, year ) %>% summarise( max_vol_per_cyl = max(vol_per_cyl) ))
mpg4 <- mpg3 %>% [ your code from the previous step ]

Task 9

(mpg5 <- mpg4 %>% spread( year, max_vol_per_cyl ))
mpg5 <- mpg4 %>% [ your code from the previous step ]

Task 10

(mpg6 <- mpg5 %>% mutate( change = `2008` - `1999` ))
mpg6 <- mpg5 %>% [ your code from the previous step ]

Task 11

mpg6 %>% rename( max_vpc_1999 = `1999`, max_vpc_2008 = `2008` ) %>% gather( variable, value, -manufacturer ) %>% as.data.frame

Exercise set 4

install.packages('nycflights13')
library(tidyverse)
library(nycflights13)
flights %>% tbl_df
airlines %>% tbl_df
weather %>% tbl_df

Task 12

flights2 <- flights %>% select(origin, year, month, day, hour, sched_dep_time, dep_delay, carrier)
weather2 <- weather %>% select(origin, year, month, day, hour, precip, wind_speed, visib )
flights2 %>% inner_join( airlines )
flights2 %>% left_join( weather2 ) 

Task 13

weather2 %>%
  summarise(min_precip = min(precip, na.rm = TRUE),
            min_wind = min(wind_speed, na.rm = TRUE),
            max_visib = max(visib, na.rm = TRUE)
            )
(good_weather_delays <- flights2 %>% inner_join(weather2, by = c("origin", "year", "month", "day", "hour")) %>% filter( precip == 0 & wind_speed == 0 & visib == 10 ) )
(avg_good_weather_delays <- good_weather_delays %>% group_by(carrier) %>% summarise( dep_delay = mean(dep_delay,na.rm=TRUE) ) %>% arrange(desc(dep_delay)) %>% inner_join(airlines, by = "carrier"))
avg_good_weather_delays <- [ your code from the previous step ]

Task 14

ranked_airline_labels <- 
  avg_good_weather_delays %>% 
  transmute(carrier, name = factor(-row_number(), labels = name))
good_weather_delays %>% inner_join(ranked_airline_labels) %>% ggplot( aes( x = name, y = dep_delay ) ) + stat_summary() + coord_flip() + labs(x='', y = 'Average departure delay', title = 'Departure delays under ideal weather conditions\nNYC airports, 2013' )
ranked_airline_labels <- 
  avg_good_weather_delays %>% 
  transmute(carrier, name = factor(-row_number(), labels = name))

Solutions

library(tidyverse)
library(nycflights13)

## Task 1
ggplot(mpg, aes(x = displ, y = hwy, colour = trans)) + geom_point()

## Task 2
ggplot(mpg, aes(x = displ, y = hwy)) + geom_point(colour='red') + geom_smooth()

## Task 3
ggplot(mpg, aes(x = displ, y = hwy, colour=drv)) + geom_point() + geom_smooth(method="lm", se=FALSE)

## Task 4
ggplot(mpg, aes(x = displ, y = hwy, colour=factor(cyl))) + geom_point() + geom_smooth()

## Task 5
ggplot(mpg, aes( x = displ, y = hwy, colour = factor(cyl))) + geom_point() + geom_smooth(method="lm",se=FALSE) + scale_x_log10(breaks=2:7) + scale_y_log10(breaks=c(20,30,40)) + facet_wrap(~year, labeller = as_labeller(c('1999' = 'Model year 1999', '2008' = 'Model year 2008'))) + labs( x = 'Displacement', y = 'MPG, highway', colour = 'Cylinders', title = 'Fuel economy and engine size' )

## Task 6
mpg2 <- mpg %>% select(manufacturer, model, displ, year, cyl, trans, cty, hwy)

## Task 7
mpg3 <- mpg2 %>% mutate( displ2 = displ^2, vol_per_cyl = round(displ / cyl,2) )

## Task 8
mpg3 %>% arrange( desc( vol_per_cyl))
mpg3 %>% filter( manufacturer == 'chevrolet' ) %>% arrange( desc(vol_per_cyl))
mpg4 <- mpg3 %>% group_by( manufacturer, year ) %>% summarise( max_vol_per_cyl = max(vol_per_cyl) )

## Task 9
mpg5 <- mpg4 %>% spread( year, max_vol_per_cyl )

## Task 10
mpg6 <- mpg5 %>% mutate( change = `2008` - `1999` )

## Task 11
mpg6 %>% rename( max_vpc_1999 = `1999`, max_vpc_2008 = `2008` ) %>% gather( variable, value, -manufacturer ) %>% as.data.frame

## Task 12
flights2 <- flights %>% select(origin, year, month, day, hour, sched_dep_time, dep_delay, carrier)
weather2 <- weather %>% select(origin, year, month, day, hour, precip, wind_speed, visib )
flights2 %>% inner_join( airlines )
flights2 %>% left_join( weather2 ) 

## Task 13
weather2 %>% summarise(min_precip = min(precip,na.rm=TRUE), min_wind = min(wind_speed,na.rm=TRUE),max_visib = max(visib,na.rm=TRUE))
good_weather_delays <- flights2 %>% inner_join(weather2) %>% filter( precip == 0 & wind_speed == 0 & visib == 10 )
avg_good_weather_delays <- good_weather_delays %>% group_by(carrier) %>% summarise( dep_delay = mean(dep_delay,na.rm=TRUE) ) %>% arrange(desc(dep_delay)) %>% inner_join(airlines)

## Task 14
ranked_airline_labels <- avg_good_weather_delays %>% transmute( carrier, name = factor(-row_number(),labels=name) )
good_weather_delays %>% inner_join(ranked_airline_labels) %>% ggplot( aes( x = name, y = dep_delay ) ) + stat_summary() + coord_flip() + labs(x='', y = 'Average departure delay', title = 'Departure delays under ideal weather conditions\nNYC airports, 2013' )


jasonmtroos/rook documentation built on May 24, 2020, 3:16 p.m.