In colinbousige/tutor: Interactive tutorials for learning R

# rmarkdown::run("vectors.Rmd")
library(learnr)
library(tidyverse)
gradethis::gradethis_setup()

Exercise 1

Consider two vectors x and y such as:

x <- 1:5
y <- seq(0, 4, along=x)

Without typing it into Rstudio, what are the values of x, y, and x*y?

x <- 1:5
y <- seq(0, 4, along = x)

Consider two vectors a and b such as:

a <- c(1,5,4,3,6)
b <- c(3,5,2,1,9)

Without typing it into Rstudio, what is the value of: a<=b{.R}?

a <- c(1, 5, 4, 3, 6)
b <- c(3, 5, 2, 1, 9)
___

x <- c(1:12, NA, 5:2)

Without typing it into Rstudio, what is the value of: length(x){.R}

x <- ___
___

a <- 12:5

Without typing it into Rstudio, what is the value of: is.numeric(a){.R}

a <- 12:5
is.numeric(a)

Consider two vectors x and y such as:

x <- 12:4
y <- c(0, 1, 2, 0, 1, 2, 0, 1, 2)

Without typing it into Rstudio, what is the value of: which(!is.finite(x/y)){.R}?

x <- 12:4
y <- c(0, 1, 2, 0, 1, 2, 0, 1, 2)
___

x <- c('blue', 'red', 'green', 'yellow')

Without typing it into Rstudio, what is the value of: is.character(x){.R}?

x <- ___
___

x <- c('blue', 10, 'green', 20)

Without typing it into Rstudio, what is the value of: is.character(x){.R}?

x <- ___
___

Assign value 5 to the variable x.

Is there a difference between 1:x-1{.R} and 1:(x-1){.R} ?

Explain.

___
___
___

Generate the sequence 9, 18, 27, 36, 45, 54, 63, 72, 81, 90{.R} in 4 different manners.

c(___)
seq(___)
___:___
___

c(9, 18, 27, 36, 45, 54, 63, 72, 81, 90)
seq(9, 90, by = 9)
seq(9, 90, length = 10)
seq(1, 10, 1)*9
1:10*9

quiz(
question("If `x <- c(\"w\", \"h\", \"f\", \"g\", \"k\")`, what will be the output for `x[c(2,3)]`?",
  answer("\"h\", \"f\"", correct = TRUE),
  answer("\"h\""),
  answer("\"f\""),
  incorrect = "Incorrect!"
),
question("Without typing it into Rstudio, if `x <- c(\"w\", \"h\", \"f\", \"g\", \"k\")`, what will be the third value in the index vector operation `x[c(2, 4, 4)]`?",
  answer("\"h\""),
  answer("NA"),
  answer("\"g\"", correct = TRUE),
  incorrect = "Incorrect!"
),
question("Without typing it into Rstudio, if `x <- c(\"w\", \"h\", \"f\", \"g\", \"k\")`, what will be the fourth value in the index vector operation `x[-2]`",
  answer("\"h\""),
  answer("\"g\""),
  answer("\"k\"", correct = TRUE),
  incorrect = "Incorrect!"
)
)

Let a <- c(2, 4, 6, 8){.R} and b <- c(TRUE, FALSE, TRUE, FALSE){.R}.

Without typing it into Rstudio, what will be the output for the R expression max(a[b]){.R}?

Exercice 2

We have the following x vector: x <- c("10K", "100K", "200K", "500K", "1000K"). Get rid of the "K" character and turn this into a numerical vector.

x <- c("10K", "100K", "200K", "500K", "1000K")

# Use gsub() and as.numeric()

x <- as.numeric(gsub("K", "", x))

grade_code()

Exercice 3

We have the following times vector: times <- c("010_min", "100_sec", "200_sec", "050_min"). We want a numerical vector containing times in seconds only.

Using substr(), create 2 vectors times_values and times_units containing the numbers and the units from times.

times <- c("010_min", "100_sec", "200_sec", "050_min")

times_values <- substr(string, start, stop)
times_units  <- ___

times_values <- as.numeric(substr(times, 1, 3))
times_units  <- substr(times, 5, 7)

grade_code()

You could do the same using strsplit() and unlist()

times <- c("010_min", "100_sec", "200_sec", "050_min")

strsplit("test", "e")
unlist(strsplit("test", "e"))

times_values <- as.numeric(unlist(strsplit(times, "_"))[1:8 %% 2 == TRUE])
times_units  <- unlist(strsplit(times, "_"))[1:8%%2==FALSE]

grade_code()

Now, using ifelse(test, yes, no), create the times_sec vector containing the numerical values of time all converted to seconds.

times <- c("010_min", "100_sec", "200_sec", "050_min")
times_values <- as.numeric(unlist(strsplit(times, "_"))[1:8 %% 2 == TRUE])
times_units <- unlist(strsplit(times, "_"))[1:8 %% 2 == FALSE]

times_sec <- ifelse(test, yes, no)

times_sec <- ifelse(times_units == "min", times_values*60, times_values)
times_sec

grade_code()

Finally, tidyr contains the separate() function that is very useful to do this kind of things. However, separate() takes a table as first argument, not a vector:

times <- c("010_min", "100_sec", "200_sec", "050_min")

tibble(times) %>% 
    separate(input_column, output_columns, other_options)

tibble(times) %>% 
    separate(times, c("values", "units"), convert = TRUE)

grade_code()

Exercise 4

Let's say we have this population data for these French cities for the two years 1962 and 2012:

cities <- c(
    "Angers", "Bordeaux", "Brest", "Dijon", "Grenoble", "Le Havre",
    "Le Mans", "Lille", "Lyon", "Marseille", "Montpellier", "Nantes",
    "Nice", "Paris", "Reims", "Rennes", "Saint-Etienne", "Strasbourg",
    "Toulon", "Toulouse"
)
pop_1962 <- c(
    115273, 278403, 136104, 135694, 156707, 187845, 132181, 239955,
    535746, 778071, 118864, 240048, 292958, 2790091, 134856, 151948,
    210311, 228971, 161797, 323724
)
pop_2012 <- c(
    149017, 241287, 139676, 152071, 158346, 173142, 143599, 228652,
    496343, 852516, 268456, 291604, 343629, 2240621, 181893, 209860,
    171483, 274394, 164899, 453317
)
df <- tibble::tibble(cities, "1962" = pop_1962, "2012" = pop_2012) %>%
    pivot_longer(cols = -cities, names_to = "year", values_to = "pop") %>%
    pivot_wider(names_from = "cities", values_from = "pop")
knitr::kable(df[,1:10])
knitr::kable(df[,11:ncol(df)])

Create a cities vector containing all the cities listed above:

cities <- c(___)

cities <- c(
    "Angers", "Bordeaux", "Brest", "Dijon", "Grenoble", "Le Havre",
    "Le Mans", "Lille", "Lyon", "Marseille", "Montpellier", "Nantes",
    "Nice", "Paris", "Reims", "Rennes", "Saint-Etienne", "Strasbourg",
    "Toulon", "Toulouse"
)

grade_code()

Create a pop_1962 and pop_2012 vectors containing the populations of each city at these years. Print the 2 vectors.

pop_1962 <- c(___)
pop_2012 <- c(___)

pop_1962 <- c(
    115273, 278403, 136104, 135694, 156707, 187845, 132181, 239955,
    535746, 778071, 118864, 240048, 292958, 2790091, 134856, 151948,
    210311, 228971, 161797, 323724
)
pop_2012 <- c(
    149017, 241287, 139676, 152071, 158346, 173142, 143599, 228652,
    496343, 852516, 268456, 291604, 343629, 2240621, 181893, 209860,
    171483, 274394, 164899, 453317
)

grade_code()

Use names(){.R} to name values of pop_1962 and pop_2012. Print the 2 vectors again. Are there any change?

names(pop_1962) <- ___
names(pop_2012) <- ___

names(pop_1962) <- cities
names(pop_2012) <- cities
pop_1962
pop_2012

grade_code()

cities <- c(
    "Angers", "Bordeaux", "Brest", "Dijon", "Grenoble", "Le Havre",
    "Le Mans", "Lille", "Lyon", "Marseille", "Montpellier", "Nantes",
    "Nice", "Paris", "Reims", "Rennes", "Saint-Etienne", "Strasbourg",
    "Toulon", "Toulouse"
)
pop_1962 <- c(
    115273, 278403, 136104, 135694, 156707, 187845, 132181, 239955,
    535746, 778071, 118864, 240048, 292958, 2790091, 134856, 151948,
    210311, 228971, 161797, 323724
)
pop_2012 <- c(
    149017, 241287, 139676, 152071, 158346, 173142, 143599, 228652,
    496343, 852516, 268456, 291604, 343629, 2240621, 181893, 209860,
    171483, 274394, 164899, 453317
)
names(pop_2012) <- names(pop_1962) <- cities

What are the cities with more than 200000 people in 1962? Save the list of these cities into a vector named cities200k. For these, how many residents were there in 2012?

cities200k <- ___

cities200k <- cities[pop_1962 > 200000]
cities200k
pop_2012[cities200k]

grade_code()

What is the population evolution of Montpellier and Nantes?

pop_2012 - pop_1962

pop_2012["Montpellier"] - pop_1962["Montpellier"]
pop_2012["Nantes"] - pop_1962["Nantes"]

grade_code()

Create a pop_diff vector to store population change between 1962 and 2012

pop_diff <- ___

pop_diff <- pop_2012 - pop_1962

grade_code()

cities <- c(
    "Angers", "Bordeaux", "Brest", "Dijon", "Grenoble", "Le Havre",
    "Le Mans", "Lille", "Lyon", "Marseille", "Montpellier", "Nantes",
    "Nice", "Paris", "Reims", "Rennes", "Saint-Etienne", "Strasbourg",
    "Toulon", "Toulouse"
)
pop_1962 <- c(
    115273, 278403, 136104, 135694, 156707, 187845, 132181, 239955,
    535746, 778071, 118864, 240048, 292958, 2790091, 134856, 151948,
    210311, 228971, 161797, 323724
)
pop_2012 <- c(
    149017, 241287, 139676, 152071, 158346, 173142, 143599, 228652,
    496343, 852516, 268456, 291604, 343629, 2240621, 181893, 209860,
    171483, 274394, 164899, 453317
)
names(pop_2012) <- names(pop_1962) <- cities
pop_diff <- pop_2012 - pop_1962

Print cities with a negative change

pop_diff

cities[pop_diff < 0]

grade_code()

Print cities which broke the 300000 people barrier between 1962 and 2012

pop_2012 > 300000

cities[pop_2012 > 300000 & pop_1962 < 300000]

grade_code()

Compute the total change in population of the 10 largest cities (as of 1962) between 1962 and 2012.

order(pop_1962)
ten_largest <- cities[___]
sum(___)

ten_largest <- cities[order(pop_1962, decreasing = TRUE)[1:10]]
sum(pop_2012[ten_largest] - pop_1962[ten_largest])

grade_code()

Compute the population mean for year 1962

mean(pop_1962)

grade_code()

Compute the population mean of Paris over these two years

mean(___)

mean(c(pop_1962['Paris'], pop_2012['Paris']))

grade_code()

Sort the cities by decreasing order of population for 1962

sort(pop_1962, decreasing = TRUE)

grade_code()

Exercise 5

Let's create a named vector containing age of the students in the class, the names of each value being the first name of the students.

students <- c(
    "John" = 35,
    "Jane" = 36
)
students

Then (do it in Rstudio this time):

Compute the average age and its standard deviation
Compute the median age
What is the maximum, minimum and range of the ages in the class?
What are all the student names in the class?
Print the sorted ages by increasing and decreasing order
Print the ages sorted by alphabetically ordered names (increasing and decreasing)
Show a histogram of the ages distribution using hist(){.R} and play with the parameter breaks to modify the histogram
Show a boxplot of the ages distribution using boxplot(){.R}