# please do not alter this code chunk knitr::opts_chunk$set(echo = TRUE, message = FALSE, error = TRUE) library(tidyverse) library(reprores) # install the class package reprores to access built-in data # devtools::install_github("psyteachr/reprores-v2) # or download data from the website # https://psyteachr.github.io/reprores/data/data.zip
List the datasets in dplyr
.
data(package = "dplyr")
Load the built-in dataset starwars
and use glimpse()
to see an overview.
data("starwars") glimpse(starwars)
Convert the built-in base R mtcars
dataset to a tibble (you will need to find the function for this; it isn't in the chapter), and store it in the object mt
.
mt <- as_tibble(mtcars)
Using the data directory created by reprores::getdata()
(or download the zip file, read "disgust_scores.csv" into a table.
disgust <- read_csv("data/disgust_scores.csv")
Override the default column specifications to skip the id
column.
my_cols <- cols( id = col_skip() ) disgust_skip <- read_csv("data/disgust_scores.csv", col_types = my_cols)
How many rows and columns are in the disgust
dataset?
## dim() returns a vector c(rows, cols) dimensions <- dim(disgust) disgust_rows <- dimensions[1] disgust_cols <- dimensions[2] ## nrow() returns the number of rows disgust_rows <- nrow(disgust) ## ncol() returns the number of columns disgust_cols <- ncol(disgust)
Load the data in "data/stroop.csv" as stroop1
and "data/stroop.xlsx" as stroop2
.
stroop1 <- read_csv("data/stroop.csv") stroop2 <- readxl::read_xlsx("data/stroop.xlsx")
Use glimpse()
to figure out the difference between the two data tables and fix the problem.
# the difference is the data type of rt is double in stroop1 and character in stroop 2 glimpse(stroop1) glimpse(stroop2) # missing values use the characters "NA", so define the NA value when importing stroop2b <- readxl::read_xlsx("data/stroop.xlsx", na = "NA")
Create a tibble with the columns name
, age
, and country
of origin for 2 people you know.
# you can do this with the tibble function people <- tibble(name = c("Lisa", "Robbie"), age = c(43, 12), country = c("US","UK") ) # also note: # you can type this in row by row, rather than column by column, # using the 'tribble' function people <- tribble(~name, ~age, ~country, "Lisa", 43, "US", "Robbie", 12, "UK")
Create a tibble that has the structure of the table below, using the minimum typing possible. (Hint: rep()
). Store it in the variable my_tbl
.
ID | A | B | C
--|-----|-----|---
1 | A1 | B1 | C1
2 | A1 | B2 | C1
3 | A1 | B1 | C1
4 | A1 | B2 | C1
5 | A2 | B1 | C1
6 | A2 | B2 | C1
7 | A2 | B1 | C1
8 | A2 | B2 | C1
my_tbl <- tibble(ID = 1:8, A = rep(c("A1", "A2"), each = 4), B = rep(c("B1", "B2"), 4), C = "C1")
Set the following objects to the number 1 with the indicated data type:
one_int
(integer)one_dbl
(double)one_chr
(character)one_int <- 1L one_dbl <- 1.0 one_chr <- "1"
Set the objects T_log
, T_chr
, T_int
and T_dbl
to logical, character, integer and double values that will all be equal to TRUE
.
T_log <- TRUE T_chr <- "TRUE" T_int <- 1L T_dbl <- 1.0
Check your answers with this code:
# these should all evaluate to TRUE tests <- list( T_log_is_TRUE = T_log == TRUE, T_chr_is_TRUE = T_chr == TRUE, T_int_is_TRUE = T_int == TRUE, T_dbl_is_TRUE = T_dbl == TRUE, T_log_is_log = is.logical(T_log), T_chr_is_chr = is.character(T_chr), T_int_is_int = is.integer(T_int), T_dbl_is_dbl = is.double(T_dbl) ) str(tests) # this shows a condensed version of the list
Create a vector of the numbers 3, 6, and 9.
threes <- c(3, 6, 9)
The built-in vector letters
contains the letters of the English alphabet. Use an indexing vector of integers to extract the letters that spell 'cat'.
cat <- letters[c(3, 1, 20)]
The function colors()
returns all of the color names that R is aware of. What is the length of the vector returned by this function? (Use code to find the answer.)
col_length <- length(colors())
Create a named list called col_types
where the name is each column in the built-in dataset table1
and the value is the column data type (e.g., "double", "character", "integer", "logical").
# you can do this manually col_types <- list( country = "character", year = "integer", cases = "integer", population = "integer" ) # or with coding col_types <- list( typeof(table1[[1]]), typeof(table1[[2]]), typeof(table1[[3]]), typeof(table1[[4]]) ) names(col_types) <- names(table1) # here is a shortcut to do it all in one step # lapply applies the function (FUN) to each item in the list (X) col_types <- lapply(X = table1, FUN = typeof)
Set the object x
to the integers 1 to 100. Use vectorised operations to set y
to x
squared. Use plot(x, y)
to visualise the relationship between these two numbers.
x <- -100:100 y <- x^2 plot(x, y)
Set t
to the numbers 0 to 100 in increments of 0.1. Set x
to the sine of t
and y
to the cosine of t
(you will need to find the functions for sine and cosine). Plot x
against y
.
t <- seq(0, 100, 0.1) x <- sin(t) y <- cos(t) plot(x, y)
The function call runif(n, min, max)
will draw n
numbers from a uniform distribution from min
to max
. If you set n
to 10000, min
to 0 and max
to 1, this simulates the p-values that you would get from 10000 experiments where the null hypothesis is true. Create the following objects:
pvals
: 10000 simulated p-values using runif()
is_sig
: a logical vector that is TRUE
if the corresponding element of pvals
is less than .05, FALSE
otherwisesig_vals
: a vector of just the significant p-valuesprop_sig
: the proportion of those p-values that were significantset.seed(8675309) # ensures you get the same random numbers each time you run this code chunk pvals <- runif(10000, 0, 1) is_sig <- pvals < .05 sig_vals <- pvals[is_sig] prop_sig <- length(sig_vals) / length(pvals) # alternatively: prop_sig <- mean(is_sig) prop_sig <- mean(pvals < .05)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.