library(learnr)
library(gradethis)
library(data.table)
library(tibble)
knitr::opts_chunk$set(echo = FALSE)
gradethis_setup(
  pass.praise = TRUE, fail.hint = FALSE, 
  fail.encourage = TRUE, maybe_code_feedback = FALSE)

Data import - content quiz

quiz(
  question(
    "Which function from which package should you use to import `csv`-files?",
    answer("`data.table::fread()`", correct = TRUE),
    answer("`data.table::fwrite()`"),
    answer("`dplyr::fread()`"),
    answer("`readr::read_csv()`"),
    answer("`tidyverse::fread()`"),
    answer("`read.csv()`")
  ),
  question(
    "Which function should you use to save `csv`-files?",
    answer("`data.table::fread()`"),
    answer("`data.table::fwrite()`", correct = TRUE),
    answer("`saveRDS()`"),
    answer("`save()`"),
    answer("`tidyverse::fwrite()`"),
    answer("`data.table::write_csv()`")
  ),
  question(
    "What is the difference between `.Rds`-files and `.Rdata`-files?",
    answer(paste("`.Rds`-files contain single R objects,",
                 "`.Rdata`-files may contain several"), 
           correct = TRUE),
    answer(paste("`.Rdata`-files contain single R objects,",
                 "`.Rds`-files may contain several")),
    answer(paste("`.Rdata`-files can be loaded without additional packages,",
                 "`.Rds`-files require the package `tidyerse`.")),
    answer(paste("`.Rdata`-files are faster to read and write, but the",
                 "compression level of `.Rds`-files is much better."))
  ),
  question(paste0(
    "To read `.Rds`-files you use `readRDS()`, to read `Rdata`-files you use `load()`. ",
    "In what sense do these functions differ in terms of their behavior?"),
    answer("`readRDS()` returns an object, `load()` does not return anything.", 
           correct = TRUE),
    answer("`load` returns an object, `readRDS()` does not return anything."),
    answer("Both behave the same way."),
    answer(paste("`load` returns object of type `data.frame`,",
                 "the objects of `readRDS()` retain their original type."))
  ),
  question(paste0(
    "What is the default type of the objects returned by `data.table::fread()`?"),
    answer("`data.table`", correct = TRUE),
    answer("`data.frame`"),
    answer("`tibble`"),
    answer("`table`"),
    answer("Depends on the input file.")
  )
  )

Data import - coding exercises

For the following exercises, assume that your project has the following directory structure with all data sets being located directly within the folder data, and you are working from within the script myscript.R:

Exercise6
│   Exercise6.Rproj    
│
└───data
│   │   growth_data_1.csv
|   |   product_complexity.csv
|   |   unemp_pop.csv
└───R
    │   myscript.R

Attention: Due to the way these exercises were created you cannot use the function here(), as you would usually do. Thus, use relative paths from the project main directory, i.e. the paths you would normally pass to the function here().

Importing csv files I

Import the file growth_data_1.csv, assign to the resulting object the name growth_data, transform it into a tibble, and call the object by its name. The file growth_data_1.csv looks like this:

country;Growth;year
Germany;-4.56961671737398;2020
Germany;1.05550824713283;2019
Germany;1.08602451412267;2018
Germany;2.68023111405891;2017
Germany;2.22999986782015;2016
Germany;1.49193152760772;2015

Hint: Make sure you set the decimal sign and the column separator explicitly!


growth_data <- ____(
  ____ = ____, 
  ____ = ____, 
  ____ = ____)
growth_data <- ____(growth_data)
growth_data
growth_data <- data.table::____(
  ____ = ____, 
  ____ = ____, 
  ____ = ____)
growth_data <- ____(growth_data)
growth_data
growth_data <- data.table::fread(
  ____ = ____, 
  ____ = ____, 
  ____ = ____)
growth_data <- ____(growth_data)
growth_data
growth_data <- data.table::fread(
  ____ = "data/growth_data_1.csv", 
  ____ = ____, 
  ____ = ____)
growth_data <- ____(growth_data)
growth_data
growth_data <- data.table::fread(
  ____ = "data/growth_data_1.csv", 
  sep = ____, 
  ____ = ____)
growth_data <- ____(growth_data)
growth_data
growth_data <- data.table::fread(
  ____ = "data/growth_data_1.csv", 
  sep = ____, 
  dec = ____)
growth_data <- ____(growth_data)
growth_data
growth_data <- data.table::fread(
  ____ = "data/growth_data_1.csv", 
  sep = ____, 
  dec = ____)
growth_data <- tibble::____(growth_data)
growth_data
growth_data <- data.table::fread(
  ____ = "data/growth_data_1.csv", 
  sep = ____, 
  dec = ____)
growth_data <- tibble::as_tibble(growth_data)
growth_data
growth_data <- data.table::fread(
  file = "data/growth_data_1.csv", 
  sep = ____, 
  dec = ____)
growth_data <- tibble::as_tibble(growth_data)
growth_data
growth_data <- data.table::fread(
  file = "data/growth_data_1.csv", 
  sep = ____, 
  dec = ____)
growth_data <- tibble::as_tibble(growth_data)
growth_data
growth_data <- data.table::fread(
  file = "data/growth_data_1.csv", 
  sep = ";", 
  dec = ____)
growth_data <- tibble::as_tibble(growth_data)
growth_data
growth_data <- data.table::fread(
  file = "data/growth_data_1.csv", 
  sep = ";", 
  dec = ".")
growth_data <- tibble::as_tibble(growth_data)
growth_data
growth_data <- tibble::as_tibble(data.table::fread(
  file = "data/growth_data_1.csv", 
  sep = ";", 
  dec = "."))

grade_this({
  fail_if(stringr::str_detect(.user_code, "here\\("), 
            message = paste0(
              "Your code should not use the function `here::here()`, although ",
              "you should usually use this function. But for the sake of this ",
              "exercise simply use the relative paths you would usually have ",
              "passed to `here::here()`."
              )
          )
  fail_if(!tibble::is_tibble(.result), 
            message = paste0(
              "Your solution is not a tibble. Did you forget to return it? Or ",
              "did you forget to transform the imported data into a `tibble`? ",
              "The package `tibble` contains a function with which you can ",
              "transform data frames into a `tibble`!"
              )
          )
  fail_if(!stringr::str_detect(.user_code, "fread"), 
            message = paste0(
              "You need to use the function `fread()` from the package ",
              "`data.table` to import the data!"
              )
          )
  fail_if(!stringr::str_detect(.user_code, "sep"), 
            message = paste0(
              "You forgot to set the column separator explicitly. Check out ",
              "the possible arguments to `data.table::fread()`!"
              )
          )
  fail_if(!stringr::str_detect(.user_code, "dec"), 
            message = paste0(
              "You forgot to set the decimal sign explicitly. Check out ",
              "the possible arguments to `data.table::fread()`!"
              )
          )
  pass_if(identical(.result, growth_data))
  fail()
})

Importing csv files

Import the file product_complexity.csv. Assign to the resulting object the name product_complexity, transform it into a tibble, and call the object by its name. The file product_complexity.csv looks like this:

commoditycode.product_complexity.exp_share
0101.0,0642426217529412.4,64
0102.-0,492542902941177.3,7
0103.0,510823855941177.4,082
0104.-1,12472420411765.0,916
0105.-0,173124057611765.1,331
0106.-1,37125134705882.0,766
0201.-0,103990797158824.25,346
0202.-0,989652876470588.5,429
0203.0,858757858235294.23,61
0204.-0,891279994705882.5,466

Hint: Do not forget to set the decimal sign and the column separator explicitly!


product_complexity <- data.table::fread(
  file = "data/product_complexity.csv",
  sep = ".", dec = ",",
  colClasses = c("character", rep("double", 2))
  )
product_complexity <- tibble::as_tibble(product_complexity)
product_complexity
product_complexity <- tibble::as_tibble(data.table::fread(
  file = "data/product_complexity.csv",
  sep = ".", dec = ",",
  colClasses = c("character", rep("double", 2))
  ))

grade_this({
  fail_if(stringr::str_detect(.user_code, "here\\("), 
            message = paste0(
              "Your code should not use the function `here::here()`, although ",
              "you should usually use this function. But for the sake of this ",
              "exercise simply use the relative paths you would usually have ",
              "passed to `here::here()`."
              )
          )
  fail_if(!tibble::is_tibble(.result), 
            message = paste0(
              "Your solution is not a tibble. Did you forget to return it? Or ",
              "did you forget to transform the imported data into a `tibble`? ",
              "The package `tibble` contains a function with which you can ",
              "transform data frames into a `tibble`!"
              )
          )
  fail_if(!stringr::str_detect(.user_code, "fread"), 
            message = paste0(
              "You need to use the function `fread()` from the package ",
              "`data.table` to import the data!"
              )
          )
  fail_if(!stringr::str_detect(.user_code, "sep"), 
            message = paste0(
              "You forgot to set the column separator explicitly. Check out ",
              "the possible arguments to `data.table::fread()`!"
              )
          )
  fail_if(!stringr::str_detect(.user_code, "dec"), 
            message = paste0(
              "You forgot to set the decimal sign explicitly. Check out ",
              "the possible arguments to `data.table::fread()`!"
              )
          )
  fail_if(!is.character(.result[["commoditycode"]]), 
            message = paste0(
              "The column `commoditycode` should be of type `character`, ",
              "otherwise the columns are not read correctly. In your solution ",
              "its of type {typeof(.result[['commoditycode']])}. You should ",
              "set the column types via the correct argument in `fread()`!"
              )
          )
  fail_if(!is.double(.result[["product_complexity"]]), 
            message = paste0(
              "The column `product_complexity` should be of type `double`, ",
              "otherwise the columns are not read correctly. In your solution ",
              "its of type {typeof(.result[['product_complexity']])}. You should ",
              "set the column types via the correct argument in `fread()`!"
              )
          )
  fail_if(!is.double(.result[["exp_share"]]), 
            message = paste0(
              "The column `exp_share` should be of type `double`, ",
              "otherwise the columns are not read correctly. In your solution ",
              "its of type {typeof(.result[['exp_share']])}. You should ",
              "set the column types via the correct argument in `fread()`!"
              )
          )
  pass_if(identical(.result, product_complexity))
  fail()
})

Import subsets of files

Import the file unemp_pop.csv, but only the columns country, year, and labor_force, and only the first two rows. Also, make sure that the data set is saved as a tibble, and that the variable labor_force has the type double, and the variable year has the type integer.

The file unemp_pop.csv looks like this:

country,year,population,labor_force
Germany,2015,81686611,42654597
Germany,2016,82348669,43551192
Germany,2017,82657002,43789062
Germany,2018,82905782,43884544
Germany,2019,83092962,44351163
Germany,2020,83240525,44120268

unemp_pop <- ____(
  ____ = ____,
  ____ = ____, 
  ____ = ____,
  ____ = ____)
unemp_pop <- ____(unemp_pop)
unemp_pop
unemp_pop <- data.table::fread(
  ____ = ____,
  ____ = ____, 
  ____ = ____,
  ____ = ____)
unemp_pop <- ____(unemp_pop)
unemp_pop
unemp_pop <- data.table::fread(
  file = ____,
  ____ = ____, 
  ____ = ____,
  ____ = ____)
unemp_pop <- ____(unemp_pop)
unemp_pop
unemp_pop <- data.table::fread(
  file = ____,
  sep = ____, 
  ____ = ____,
  ____ = ____)
unemp_pop <- ____(unemp_pop)
unemp_pop
unemp_pop <- data.table::fread(
  file = ____,
  sep = ____, 
  select = ____,
  ____ = ____)
unemp_pop <- ____(unemp_pop)
unemp_pop
unemp_pop <- data.table::fread(
  file = ____,
  sep = ____, 
  select = ____,
  nrows = ____)
unemp_pop <- ____(unemp_pop)
unemp_pop
unemp_pop <- data.table::fread(
  file = "data/unemp_pop.csv",
  sep = ____, 
  select = ____,
  nrows = ____)
unemp_pop <- ____(unemp_pop)
unemp_pop
unemp_pop <- data.table::fread(
  file = "data/unemp_pop.csv",
  sep = ",", 
  select = c(____=____, ____=____, ____=____),
  nrows = ____)
unemp_pop <- ____(unemp_pop)
unemp_pop
unemp_pop <- data.table::fread(
  file = "data/unemp_pop.csv",
  sep = ",", 
  select = c("country"="character", ____=____, ____=____),
  nrows = ____)
unemp_pop <- ____(unemp_pop)
unemp_pop
unemp_pop <- data.table::fread(
  file = "data/unemp_pop.csv",
  sep = ",", 
  select = c("country"="character", ____=____, ____=____),
  nrows = 2)
unemp_pop <- tibble::as_tibble(unemp_pop)
unemp_pop
unemp_pop <- data.table::fread(
  file = "data/unemp_pop.csv",
  sep = ",", 
  select = c("country"="character", "year"="integer", "labor_force"="double"),
  nrows = 2)
unemp_pop <- tibble::as_tibble(unemp_pop)
unemp_pop
unemp_pop <- tibble::as_tibble(data.table::fread(
  file = "data/unemp_pop.csv",
  sep = ",", 
  select = c("country"="character", "year"="integer", "labor_force"="double"),
  nrows = 2)
  )

grade_this({
  fail_if(stringr::str_detect(.user_code, "here\\("), 
            message = paste0(
              "Your code should not use the function `here::here()`, although ",
              "you should usually use this function. But for the sake of this ",
              "exercise simply use the relative paths you would usually have ",
              "passed to `here::here()`."
              )
          )
  fail_if(!tibble::is_tibble(.result), 
            message = paste0(
              "Your solution is not a tibble. Did you forget to return it? Or ",
              "did you forget to transform the imported data into a `tibble`? ",
              "The package `tibble` contains a function with which you can ",
              "transform data frames into a `tibble`!"
              )
          )
  fail_if(!stringr::str_detect(.user_code, "fread"), 
            message = paste0(
              "You need to use the function `fread()` from the package ",
              "`data.table` to import the data!"
              )
          )
  fail_if(!is.double(.result[["labor_force"]]), 
            message = paste0(
              "The column `labor_force` should be of type `double`. ",
              "In your solution, however, ",
              "its of type {typeof(.result[['labor_force']])}. You should ",
              "set the column types via the correct argument in `fread()`!"
              )
          )
  fail_if(!is.integer(.result[["year"]]), 
            message = paste0(
              "The column `year` should be of type `integer`. ",
              "In your solution, however, ",
              "its of type {typeof(.result[['year']])}. You should ",
              "set the column types via the correct argument in `fread()`!"
              )
          )
  fail_if(!setequal(names(.result), names(unemp_pop)), 
          message = paste0(
            "You should only keep the columns `country`, `year`, and ",
            "`labor_force`, but the tibble you created contains the columns ",
            "{paste(names(.result), collapse=', ')}. There are different ",
            "possibilities. Check out the help function for `fread()` or read ",
            "the tutorial on data import."
            )
          )
  fail_if(nrow(.result)!=2, 
          message = paste0(
            "You should only keep the first two rows of the data. ",
            "Check out the help function for `fread()` or read ",
            "the tutorial on data import to find out how to do it. Or search ",
            "the web;)"
            )
          )
  pass_if(identical(.result, unemp_pop))
  fail()
})


graebnerc/DataScienceExercises documentation built on April 11, 2025, 7:57 p.m.