# rmarkdown::run("vectors.Rmd")
library(learnr)
library(tidyverse)
gradethis::gradethis_setup()

Exercise 1

Consider two vectors x and y such as:

x <- 1:5
y <- seq(0, 4, along=x)

Without typing it into Rstudio, what are the values of x, y, and x*y?

x <- 1:5
y <- seq(0, 4, along = x)

Consider two vectors a and b such as:

a <- c(1,5,4,3,6)
b <- c(3,5,2,1,9)

Without typing it into Rstudio, what is the value of: a<=b{.R}?

a <- c(1, 5, 4, 3, 6)
b <- c(3, 5, 2, 1, 9)
___

If

x <- c(1:12, NA, 5:2)

Without typing it into Rstudio, what is the value of: length(x){.R}

x <- ___
___

If

a <- 12:5

Without typing it into Rstudio, what is the value of: is.numeric(a){.R}

a <- 12:5
is.numeric(a)

Consider two vectors x and y such as:

x <- 12:4
y <- c(0, 1, 2, 0, 1, 2, 0, 1, 2)

Without typing it into Rstudio, what is the value of: which(!is.finite(x/y)){.R}?

x <- 12:4
y <- c(0, 1, 2, 0, 1, 2, 0, 1, 2)
___

If

x <- c('blue', 'red', 'green', 'yellow')

Without typing it into Rstudio, what is the value of: is.character(x){.R}?

x <- ___
___

If

x <- c('blue', 10, 'green', 20)

Without typing it into Rstudio, what is the value of: is.character(x){.R}?

x <- ___
___

Assign value 5 to the variable x.

Is there a difference between 1:x-1{.R} and 1:(x-1){.R} ?

Explain.

___
___
___

Generate the sequence 9, 18, 27, 36, 45, 54, 63, 72, 81, 90{.R} in 4 different manners.

c(___)
seq(___)
___:___
___
c(9, 18, 27, 36, 45, 54, 63, 72, 81, 90)
seq(9, 90, by = 9)
seq(9, 90, length = 10)
seq(1, 10, 1)*9
1:10*9

quiz(
question("If `x <- c(\"w\", \"h\", \"f\", \"g\", \"k\")`, what will be the output for `x[c(2,3)]`?",
  answer("\"h\", \"f\"", correct = TRUE),
  answer("\"h\""),
  answer("\"f\""),
  incorrect = "Incorrect!"
),
question("Without typing it into Rstudio, if `x <- c(\"w\", \"h\", \"f\", \"g\", \"k\")`, what will be the third value in the index vector operation `x[c(2, 4, 4)]`?",
  answer("\"h\""),
  answer("NA"),
  answer("\"g\"", correct = TRUE),
  incorrect = "Incorrect!"
),
question("Without typing it into Rstudio, if `x <- c(\"w\", \"h\", \"f\", \"g\", \"k\")`, what will be the fourth value in the index vector operation `x[-2]`",
  answer("\"h\""),
  answer("\"g\""),
  answer("\"k\"", correct = TRUE),
  incorrect = "Incorrect!"
)
)

Let a <- c(2, 4, 6, 8){.R} and b <- c(TRUE, FALSE, TRUE, FALSE){.R}.

Without typing it into Rstudio, what will be the output for the R expression max(a[b]){.R}?


Exercice 2

We have the following x vector: x <- c("10K", "100K", "200K", "500K", "1000K"). Get rid of the "K" character and turn this into a numerical vector.

x <- c("10K", "100K", "200K", "500K", "1000K")
x
# Use gsub() and as.numeric()
x <- as.numeric(gsub("K", "", x))
grade_code()

Exercice 3

We have the following times vector: times <- c("010_min", "100_sec", "200_sec", "050_min"). We want a numerical vector containing times in seconds only.

Using substr(), create 2 vectors times_values and times_units containing the numbers and the units from times.

times <- c("010_min", "100_sec", "200_sec", "050_min")
times_values <- substr(string, start, stop)
times_units  <- ___
times_values <- as.numeric(substr(times, 1, 3))
times_units  <- substr(times, 5, 7)
grade_code()

You could do the same using strsplit() and unlist()

times <- c("010_min", "100_sec", "200_sec", "050_min")
strsplit("test", "e")
unlist(strsplit("test", "e"))
times_values <- as.numeric(unlist(strsplit(times, "_"))[1:8 %% 2 == TRUE])
times_units  <- unlist(strsplit(times, "_"))[1:8%%2==FALSE]
grade_code()

Now, using ifelse(test, yes, no), create the times_sec vector containing the numerical values of time all converted to seconds.

times <- c("010_min", "100_sec", "200_sec", "050_min")
times_values <- as.numeric(unlist(strsplit(times, "_"))[1:8 %% 2 == TRUE])
times_units <- unlist(strsplit(times, "_"))[1:8 %% 2 == FALSE]
times_sec <- ifelse(test, yes, no)
times_sec <- ifelse(times_units == "min", times_values*60, times_values)
times_sec
grade_code()

Finally, tidyr contains the separate() function that is very useful to do this kind of things. However, separate() takes a table as first argument, not a vector:

times <- c("010_min", "100_sec", "200_sec", "050_min")
tibble(times) %>% 
    separate(input_column, output_columns, other_options)
tibble(times) %>% 
    separate(times, c("values", "units"), convert = TRUE)
grade_code()

Exercise 4

Let's say we have this population data for these French cities for the two years 1962 and 2012:

cities <- c(
    "Angers", "Bordeaux", "Brest", "Dijon", "Grenoble", "Le Havre",
    "Le Mans", "Lille", "Lyon", "Marseille", "Montpellier", "Nantes",
    "Nice", "Paris", "Reims", "Rennes", "Saint-Etienne", "Strasbourg",
    "Toulon", "Toulouse"
)
pop_1962 <- c(
    115273, 278403, 136104, 135694, 156707, 187845, 132181, 239955,
    535746, 778071, 118864, 240048, 292958, 2790091, 134856, 151948,
    210311, 228971, 161797, 323724
)
pop_2012 <- c(
    149017, 241287, 139676, 152071, 158346, 173142, 143599, 228652,
    496343, 852516, 268456, 291604, 343629, 2240621, 181893, 209860,
    171483, 274394, 164899, 453317
)
df <- tibble::tibble(cities, "1962" = pop_1962, "2012" = pop_2012) %>%
    pivot_longer(cols = -cities, names_to = "year", values_to = "pop") %>%
    pivot_wider(names_from = "cities", values_from = "pop")
knitr::kable(df[,1:10])
knitr::kable(df[,11:ncol(df)])

Create a cities vector containing all the cities listed above:

cities <- c(___)
cities <- c(
    "Angers", "Bordeaux", "Brest", "Dijon", "Grenoble", "Le Havre",
    "Le Mans", "Lille", "Lyon", "Marseille", "Montpellier", "Nantes",
    "Nice", "Paris", "Reims", "Rennes", "Saint-Etienne", "Strasbourg",
    "Toulon", "Toulouse"
)
grade_code()

Create a pop_1962 and pop_2012 vectors containing the populations of each city at these years. Print the 2 vectors.

pop_1962 <- c(___)
pop_2012 <- c(___)
pop_1962 <- c(
    115273, 278403, 136104, 135694, 156707, 187845, 132181, 239955,
    535746, 778071, 118864, 240048, 292958, 2790091, 134856, 151948,
    210311, 228971, 161797, 323724
)
pop_2012 <- c(
    149017, 241287, 139676, 152071, 158346, 173142, 143599, 228652,
    496343, 852516, 268456, 291604, 343629, 2240621, 181893, 209860,
    171483, 274394, 164899, 453317
)
grade_code()

Use names(){.R} to name values of pop_1962 and pop_2012. Print the 2 vectors again. Are there any change?

names(pop_1962) <- ___
names(pop_2012) <- ___
names(pop_1962) <- cities
names(pop_2012) <- cities
pop_1962
pop_2012
grade_code()
cities <- c(
    "Angers", "Bordeaux", "Brest", "Dijon", "Grenoble", "Le Havre",
    "Le Mans", "Lille", "Lyon", "Marseille", "Montpellier", "Nantes",
    "Nice", "Paris", "Reims", "Rennes", "Saint-Etienne", "Strasbourg",
    "Toulon", "Toulouse"
)
pop_1962 <- c(
    115273, 278403, 136104, 135694, 156707, 187845, 132181, 239955,
    535746, 778071, 118864, 240048, 292958, 2790091, 134856, 151948,
    210311, 228971, 161797, 323724
)
pop_2012 <- c(
    149017, 241287, 139676, 152071, 158346, 173142, 143599, 228652,
    496343, 852516, 268456, 291604, 343629, 2240621, 181893, 209860,
    171483, 274394, 164899, 453317
)
names(pop_2012) <- names(pop_1962) <- cities

What are the cities with more than 200000 people in 1962? Save the list of these cities into a vector named cities200k. For these, how many residents were there in 2012?

cities200k <- ___
cities200k <- cities[pop_1962 > 200000]
cities200k
pop_2012[cities200k]
grade_code()

What is the population evolution of Montpellier and Nantes?

pop_2012 - pop_1962
pop_2012["Montpellier"] - pop_1962["Montpellier"]
pop_2012["Nantes"] - pop_1962["Nantes"]
grade_code()

Create a pop_diff vector to store population change between 1962 and 2012

pop_diff <- ___
pop_diff <- pop_2012 - pop_1962
grade_code()

cities <- c(
    "Angers", "Bordeaux", "Brest", "Dijon", "Grenoble", "Le Havre",
    "Le Mans", "Lille", "Lyon", "Marseille", "Montpellier", "Nantes",
    "Nice", "Paris", "Reims", "Rennes", "Saint-Etienne", "Strasbourg",
    "Toulon", "Toulouse"
)
pop_1962 <- c(
    115273, 278403, 136104, 135694, 156707, 187845, 132181, 239955,
    535746, 778071, 118864, 240048, 292958, 2790091, 134856, 151948,
    210311, 228971, 161797, 323724
)
pop_2012 <- c(
    149017, 241287, 139676, 152071, 158346, 173142, 143599, 228652,
    496343, 852516, 268456, 291604, 343629, 2240621, 181893, 209860,
    171483, 274394, 164899, 453317
)
names(pop_2012) <- names(pop_1962) <- cities
pop_diff <- pop_2012 - pop_1962

Print cities with a negative change

pop_diff
cities[pop_diff < 0]
grade_code()

Print cities which broke the 300000 people barrier between 1962 and 2012

pop_2012 > 300000
cities[pop_2012 > 300000 & pop_1962 < 300000]
grade_code()

Compute the total change in population of the 10 largest cities (as of 1962) between 1962 and 2012.

order(pop_1962)
ten_largest <- cities[___]
sum(___)
ten_largest <- cities[order(pop_1962, decreasing = TRUE)[1:10]]
sum(pop_2012[ten_largest] - pop_1962[ten_largest])
grade_code()

Compute the population mean for year 1962


mean(pop_1962)
grade_code()

Compute the population mean of Paris over these two years

mean(___)
mean(c(pop_1962['Paris'], pop_2012['Paris']))
grade_code()

Sort the cities by decreasing order of population for 1962


sort(pop_1962, decreasing = TRUE)
grade_code()

Exercise 5

Let's create a named vector containing age of the students in the class, the names of each value being the first name of the students.

students <- c(
    "John" = 35,
    "Jane" = 36
)
students

Then (do it in Rstudio this time):



colinbousige/tutor documentation built on Jan. 29, 2023, 7:35 p.m.