library(tidyverse)
library(edr)

2.1. Programming Basics and Package Installation

Listing 2.1. Assigning a single number to a_number.

a_number <- 5

a_number

Listing 2.2. Assigning a vector of three numbers to three_numbers.

three_numbers <- c(1, 2, 3)

three_numbers

Listing 2.3. Assigning a character string to one_word.

one_word <- "hello"

one_word

Listing 2.4. Replacing the one_word variable with a different value.

one_word <- "hi"

one_word

2.2. Using dplyr to Transform Data

Listing 2.5. Loading the tidyverse and edr packages.

library(tidyverse)
library(edr)

Listing 2.6. The sw dataset can be printed to the console by using sw.

sw

Listing 2.7. The filter() function, and filtering by a single value in the species column of the sw dataset.

filter(sw, species == "Droid")

Listing 2.8. Filtering to only keep rows where the height column has values greater than 220.

filter(sw, height > 220)

Listing 2.9. Filtering characters by either height above 210, or, mass above 120.

filter(sw, height > 210 | mass > 120)

Listing 2.10. Get all Droid and Human characters (method 1 with a [expr | expr] construction).

filter(sw, species == "Droid" | species == "Human")

Listing 2.11. Get all Droid and Human characters (method 2 with [colname %>% c(...)] construction).

filter(sw, species %in% c("Droid", "Human"))

Listing 2.12. Arranging characters by increasing height.

arrange(sw, height)

Listing 2.13. Arranging characters by hair color and then by gender.

arrange(sw, hair_color, gender)

Listing 2.14. Arranging by descending hair_color, then by ascending gender.

arrange(sw, desc(hair_color), gender)

Listing 2.15. Selecting the name, gender, and species columns.

select(sw, name, gender, species)

Listing 2.16. Selecting the gender, species, and names columns (in that order).

select(sw, gender, species, name)

Listing 2.17. Selecting the name column and any additional columns ending with "s".

select(sw, name, ends_with("s"))

Listing 2.18. Selecting the name column and any additional columns containing a "_" character.

select(sw, name, contains("_"))

Listing 2.19. Selecting three specific columns and then everything() else after that.

select(sw, name, homeworld, gender, everything())

Listing 2.20. Create the sw_small table containing just three columns.

sw_small <- select(sw, name, height, mass)
sw_small

Listing 2.21. Add the bmi column to sw_small using the mutate() function.

mutate(sw_small, bmi = mass / (height / 100)^2)

Listing 2.22. Add the bmi column as before and then create bmi_rnd.

mutate(
  sw_small,
  bmi = mass / (height / 100)^2,
  bmi_rnd = round(bmi, 0)
)

Listing 2.23. Grouping by the species variable and summarizing to get the mean mass.

# Create grouped data; where the table is grouped by `species`
by_species <- group_by(sw, species)

# Create a data summary that gets the mean mass by `species`
summarize(by_species, avg_mass = mean(mass))

Listing 2.24. A slight modification to the summary expression to deal with missing (NA) values.

summarize(by_species, avg_mass = mean(mass, na.rm = TRUE))

Listing 2.25. Grouping by two columns and obtaining a summary with two new columns.

# Create grouped data; grouping by `species` and `gender`
grouping <- group_by(sw, species, gender)

# Summarize to get average mass and average height
summarize(
  grouping,
  avg_mass = mean(mass, na.rm = TRUE), 
  avg_height = mean(height, na.rm = TRUE)
)

Listing 2.26. Bringing it all together with multiple dplyr functions and assigning each step to a different object.

# Get average BMI values for humans across the different worlds

humans <- filter(sw, species == "Human")

humans_bmi <- mutate(humans, bmi = mass / (height / 100)^2)

humans_bmi_by_homeworld <- group_by(humans_bmi, homeworld)

humans_avg_bmi_by_homeworld <- 
  summarize(humans_bmi_by_homeworld, avg_bmi = mean(bmi, na.rm = TRUE))

humans_avg_bmi_by_homeworld_sorted <-
  arrange(humans_avg_bmi_by_homeworld, desc(avg_bmi))

humans_avg_bmi_by_homeworld_sorted

Listing 2.27. Bringing it all together with multiple dplyr functions: this time using the pipe operator.

bmi_sorted <-
  sw %>%
  filter(species == "Human") %>%
  mutate(bmi = mass / (height / 100)^2) %>%
  group_by(homeworld) %>%
  summarize(avg_bmi = mean(bmi, na.rm = TRUE)) %>%
  arrange(desc(avg_bmi))

bmi_sorted

Listing 2.28. Using pseudocode to demonstrate the readability of the code in the previous listing.

# Get a table of average BMI values for humans across
# the different worlds in a single, piped expression;
# The following is written as pseudocode
# (don’t run it, just read it)

# {assigning to bmi_sorted} <-
#   {start with the starwars table} %{then}%
#   {keep only the rows where species is “Human”} %{then}%
#   {create a new column called bmi using a calculation} %{then}%
#   {create a group for each unique value in homeworld} %{then}%
#   {for each group create a summary table (one col: avg_bmi)} %{then}%
#   {order the rows by decreasing avg_bmi value}
# 
# {view bmi_sorted}

2.3. Creating our own Tabular Data

Listing 2.29. Using tibble() with equal-length vectors to make a tibble.

tibble( 
  a = c(3, 5, 2, 6),
  b = c("a", "b", "g", "b")
)

Listing 2.30. Using tibble() with two vectors: one of length 1 and the other of length 4.

tibble(
  a = 3,
  b = c("a", "b", "g", "b")
)

Listing 2.31. Using tibble() with two vectors that contain NA values.

tibble(
  a = c(3, 5, 2, NA),
  b = c("a", NA, "g", "b")
)

Listing 2.32. Using a single-length vector with an NA value in tibble().

tibble(
  a = NA,
  b = c("a", "b", "g", "b")
)

Listing 2.33. Using a single-length vector with an NA value in tibble().

tibble(
  a = NA_character_,
  b = c("a", "b", "g", "b")
)

Listing 2.34. Creating a tibble using the tribble() function.

tribble(
  ~a, ~b,
  3,  "a",
  5,  "b",
  2,  "g",
  6,  "b",
)


rich-iannone/rwr documentation built on Jan. 22, 2021, 7:51 p.m.