knitr::opts_chunk$set(echo = TRUE)

Simple example of sampling with replacement

library(SDS100)


# simple vector
my_sample <- c(3, 1, 4, 1, 5, 9)


# resampling values in the vector with replacement
boot_sample  <-  sample(my_sample,  6,  replace = TRUE)

$\$

Simple fake data bootstrap example

# simple bootstrap example

my_sample <- c(21, 29, 25, 19, 24, 22, 25, 26, 25, 29)


# calculate the x-bar statistic
obs_mean <- mean(my_sample)


# create the bootstrap distribution
bootstrap_dist <-  do_it(10000) * {    

  curr_boot <- sample(my_sample , 10, replace = TRUE)  
  mean(curr_boot)  

}


# visualize the bootstrap distribution
hist(bootstrap_dist, 100)


# calculate the standard error
SE_boot <- sd(bootstrap_dist)


# 95% confidence interval
CI_lower <-  obs_mean  - 2 * SE_boot 
CI_upper <-  obs_mean  + 2 * SE_boot 


c(CI_lower, CI_upper)

$\$

Let's try the Gettysburg address example

# Get the Gettysburg population data
# SDS100::download_data("gettysburg.Rda")   

load("gettysburg.Rda")

word_lengths <- gettysburg$num_letters 





# We can use the sample(data_vec, n) to get a sample of length n:
curr_sample <- sample(word_lengths, 10)


# calculate the actual x-bar
obs_mean <- mean(curr_sample)



# Let's create a sampling distribution in R
boot_dist <- do_it(10000)  *  {

  curr_sample <- sample(curr_sample, 10, replace = TRUE)  
  mean(curr_sample)

}



# visualize the bootstrap distribution
hist(boot_dist, 100)


# get the bootstrap standard error SE*
SE_boot <- sd(bootstrap_dist)


# 95% confidence interval
CI_lower <-  obs_mean  - 2 * SE_boot 
CI_upper <-  obs_mean  + 2 * SE_boot 

c(CI_lower, CI_upper)




# Is the actual parameter value mu in the confidence interval?  
mean(word_lengths)


emeyers/SDS100 documentation built on April 28, 2024, 5:07 p.m.