knitr::opts_chunk$set(echo = TRUE)
library(SDS100) # download the data download_data("Gettysburg_sample_ave_word_lengths.rda") download_data("gettysburg.Rda")
$\$
Let's create a histogram of the lengths of the 268 words in the Gettysburg address.
# load the actual gettysburg address words and compute mu load("gettysburg.Rda") # get the actual number of letters in the Gettysburg address (population) real_num_letters <- gettysburg$num_letters # calculate the average of the population of word lengths (mu) mean_num_letters <- mean(real_num_letters) # Create a histogram of the word lengths hist(real_num_letters, xlab = "Number of letters", main = "Number of letters is words") # plot the average word length as a red line abline(v = mean_num_letters, col = 'blue')
$\$
Let's create a histogram of the average word length $\bar{x}$ from n = 10 word that students sampled from the Gettysburg address.
# load the class average word lengths and compute the mean E[x-bar] load("Gettysburg_sample_ave_word_lengths.rda") # get a vector of the each students average word length x-bar average_word_lengths <- word_lengths$Ave_word_lengths # take the average of the x-bar's E[x-bar] mean_class_average_word_len <- mean(average_word_lengths) # create a histogram of the class average word lengths hist(average_word_lengths, breaks = 30, xlab = "Average word length of 10 words", main = "Histogram of average of 10 words") # add line for mu abline(v = mean_num_letters, col = 'blue') # add a line for E[x-bar] abline(v = mean_class_average_word_len, col = "red")
$\$
Let's use R to randomly (uniformly) sample 10 words from the Gettysburg address and calculate the average $\bar{X}$ of 10 randomly sampled words.
You can also explore this using this app: https://ethan-meyers-test.shinyapps.io/gettysburg_sampling_distribution/
library(SDS100) # you can ignore this code for now # we will explain how this works soon... sampling_distribution <- do_it(10000) * { random_sample <- sample(real_num_letters, 10) mean(random_sample) } # get the mean of the sampling distribution mean_sampling_distributon <- mean(sampling_distribution) # create a histogram of the randomly sampled average word lengths hist(sampling_distribution, breaks = 30, xlab = "Average word length of 10 words", main = "Histogram of average of 10 words") # add lines for mu and E[x-bar] #abline(v = mean_class_average_word_len, col = "red") abline(v = mean_num_letters, col = 'blue') abline(v = mean_class_average_word_len, col = "red") # add line at mu abline(v = mean_num_letters, col = 'blue') # add a line at E[x-bar] abline(v = mean_class_average_word_len, col = "red") # add a line at the mean of the sampling distribution abline(v = mean_sampling_distributon, col = "green")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.