# please do not alter this code chunk
knitr::opts_chunk$set(echo = TRUE,
                      message = FALSE, 
                      error = TRUE)

library(tidyverse)
library(broom)

Edit the code chunks below and knit the document. You can pipe your objects to glimpse() or print() to display them.

Basic Iteration Functions

Question 1

Set the vector v1 equal to the following: 11, 13, 15, 17, 19, ..., 99, 101 (use a function; don't just type all the numbers).

v1 <- seq(11, 101, by = 2) %>% print()

Question 2

Set the vector v2 equal to the following: "A" "A" "B" "B" "C" "C" "D" "D" "E" "E" (note the letters are all uppercase).

v2 <- rep(LETTERS[1:5], each = 2) %>% print()

Question 3

Set the vector v3 equal to the words "dog" 10 times, "cat" 9 times, "fish" 6 times, and "ferret" 1 time.

pets <- c("dog", "cat", "fish", "ferret")
pet_n <- c(10, 9, 6, 1)
v3 <- rep(pets, times = pet_n) %>% print()

map and apply functions

Question 4a

Use apply() or map() functions to create a list of 11 vectors of 100 numbers sampled from 11 random normal distributions with means of 0 to 1.0 (in steps of 0.1) and SDs of 1. Assign this list to the object samples. Set the seed to 321 before you generate the random numbers to ensure reproducibility.

set.seed(321)
mu <- seq(0, 1, 0.1)
samples <- map(mu, rnorm, n = 100)

# alternatively
samples <- lapply(mu, rnorm, n = 100)

Question 4b

Use apply() or map() functions to create a vector of the sample means from the list samples in the previous question.

sample_means <- map_dbl(samples, mean)

## alternatively

sample_means <- sapply(samples, mean) %>% print()

Custom functions

Question 5a

Write a function called my_add that adds two numbers (x and y) together and returns the results.

my_add <- function(x, y) {
  x+y
}

Question 5b

Create a vector testing your function my_add. Every item in the vector should evaluate to TRUE if your function is working correctly.

my_add_test <- c(
  my_add(1, 2) == 3,
  my_add(10, 20) == 30,
  my_add(-1, -3) == -4,
  my_add(0.2, 0.334) == 0.534
) %>% print()

Error handling

Question 6

Copy the function my_add above and add an error message that returns "x and y must be numbers" if x or y are not both numbers.

my_add <- function(x, y) {
  if (!is.numeric(x) | !is.numeric(y)) stop("x and y must be numbers")
  x+y
}

Building up a custom function

Question 7

Create a tibble called dat that contains 20 rows and three columns: id (integers 101 through 120), pre and post (both 20-item vectors of random numbers from a normal distribution with mean = 0 and sd = 1). Set seed to 90210 to ensure reproducible values.

set.seed(90210)

dat <- tibble(
  id = 101:120,
  pre = rnorm(20),
  post = rnorm(20)
) %>% print()

Question 8

Run a two-tailed, paired-samples t-test comparing pre and post. (check the help for t.test)

t <- t.test(dat$post, dat$pre, paired = TRUE) %>% print()

Question 9

Use broom::tidy to save the results of the t-test in question 8 in a table called stats.

stats <- t.test(dat$post, dat$pre, paired = TRUE) %>%
  broom::tidy() %>%
  print()

Question 10

Create a function called report_t that takes a data table as an argument and returns the result of a two-tailed, paired-samples t-test between the columns pre and post in the following format:

"The mean increase from pre-test to post-test was #.###: t(#) = #.###, p = 0.###, 95% CI = [#.###, #.###]."

Hint: look at the function paste0() (simpler) or sprintf() (complicated but more powerful).

NB: Make sure all numbers are reported to three decimal places (except degrees of freedom).

report_t <- function(data) {
  stats <- t.test(data$post, data$pre, paired = TRUE) %>%
    broom::tidy()

  diff <- pull(stats, estimate) %>% round(3) 
  t <- pull(stats, statistic) %>% round(3)
  p <- pull(stats, p.value) %>% round(3)
  df <- pull(stats, parameter)
  ci1 <- pull(stats, conf.low) %>% round(3)
  ci2 <- pull(stats, conf.high) %>% round(3)

  paste0("The mean increase from pre-test to post-test was ", diff, 
         ": t(", df, ") = ", t, 
         ", p = ", p, 
         ", 95% CI = [", ci1, ", ", ci2, "].")
}
# glue works well in a pipeline
# but doesn't format digits as nicely as sprintf (below), 
# so we have to round the values before constructing the text
report_t <- function(data) {
  t.test(data$post, data$pre, paired = TRUE) %>%
    broom::tidy() %>%
    mutate(across(.cols = where(is.numeric), 
                  .fns = round, 
                  digits = 3)) %>%
    mutate(
      text = glue::glue("The mean increase from pre-test to post-test was {estimate}: t({parameter}) = {statistic}, p = {p.value}, 95% CI = [{conf.low}, {conf.high}].")) %>%
    pull(text)
}
# sprintf() is a complicated function, but can be easier to use in long text strings with a lot of things to replace
# it also formats values with leading or trailing zeroes

report_t <- function(data) {
  stats <- t.test(data$post, data$pre, paired = TRUE) %>%
    broom::tidy() %>%
    # make sure to round values
    mutate(across(.cols = where(is.numeric), 
                  .fns = round, 
                  digits = 3))

  sprintf("The mean increase from pre-test to post-test was %.3f: t(%.0f) = %.3f, p = %.3f, 95%% CI = [%.3f, %.3f].",
          pull(stats, estimate), 
          pull(stats, parameter),
          pull(stats, statistic),
          pull(stats, p.value),
          pull(stats, conf.low),
          pull(stats, conf.high)
  )
}

Question 11

Use inline R to include the results of report_t() on the dat data table in a paragraph below.

r report_t(dat)

Functions and GLM

Question 12

Write a function to simulate data with the form.

$Y_i = \beta_0 + \beta_1 X_i + e_i$

The function should take arguments for the number of observations to return (n), the intercept (b0), the effect (b1), the mean and SD of the predictor variable X (X_mu and X_sd), and the SD of the residual error (err_sd). The function should return a tibble with n rows and the columns id, X and Y.

sim_lm_data <- function(n = 100, b0 = 0, b1 = 0, 
                        X_mu = 0, X_sd = 1, err_sd = 1) {
  tibble(
    id = 1:n,
    X = rnorm(n, X_mu, X_sd),
    err = rnorm(n, 0, err_sd),
    Y = b0 + b1*X + err
  ) %>%
    select(id, X, Y)
}

dat12 <- sim_lm_data(n = 10) %>% print() # do not edit

Question 13

Use the function from Question 12 to generate a data table with 10000 subjects, an intercept of 80, an effect of X of 0.5, where X has a mean of 0 and SD of 1, and residual error SD of 2.

dat13 <- sim_lm_data(n = 10000, b0 = 80, b1 = 0.5, 
                   X_mu = 0, X_sd = 1, err_sd = 2)

Analyse the data with lm(). Find where the analysis summary estimates the values of b0 and b1. What happens if you change the simulation values?

mod13 <- lm(Y ~ X, data = dat13)

summary(mod13) # print summary

Question 14

Use the function from Question 6 to calculate power by simulation for the effect of X on Y in a design with 50 subjects, an intercept of 80, an effect of X of 0.5, where X has a mean of 0 and SD of 1, residual error SD of 2, and alpha of 0.05.

Hint: use broom::tidy() to get the p-value for the effect of X.

# ... lets you include any arguments to send to sim_lm_data()
sim_lm_power <- function(...) {
  dat <- sim_lm_data(...)
  lm(Y~X, dat) %>% 
    broom::tidy() %>%
    filter(term == "X") %>%
    pull(p.value)
}

p_values <- replicate(1000, sim_lm_power(n = 50, b0 = 80, b1 = 0.5, X_mu = 0, X_sd = 1, err_sd = 2))

power <- mean(p_values < .05)

power # print the value

Question 15

Calculate power (i.e., the false positive rate) for the effect of X on Y in a design with 50 subjects where there is no effect and alpha is 0.05.

p_values <- replicate(1000, sim_lm_power(n = 50, b1 = 0))

false_pos <- mean(p_values < .05)

false_pos # print the value

Make a histogram of the p-values from the simulation above. Use geom_histogram with binwidth=0.05 and boundary=0. What kind of distribution is this?

ggplot(mapping = aes(x = p_values)) +
  geom_histogram(color = "black", fill = "white", binwidth = .05, boundary = 0)


PsyTeachR/reprores-v2 documentation built on Sept. 26, 2022, 10:06 a.m.