knitr::opts_chunk$set(echo = TRUE)
library(SDS100)
$\$
$\$
library(fivethirtyeight) # Step 1: # H0: rho = 0 # HA: rho > 0 # Step 2: # Get the data, visualize it, and computer the statistic of interest bechdel2 <- na.omit(bechdel) budget <- bechdel2$budget_2013 revenue <- bechdel2$domgross_2013 # visualize the data plot(budget, revenue, xlab = "Budget ($)", ylab = "Revenue ($)") # calculate the observed correlation (obs_stat <- cor(budget, revenue)) # Create the null distribution null_dist <- do_it(10000) * { cor(budget, shuffle(revenue)) } # visualize the null distribution hist(null_dist, breaks = 100) #, xlim = c(-.7, .7)) abline(v = obs_stat, col = "red") # 4. Get the p-value pnull(obs_stat, null_dist, lower.tail = FALSE) # 5. Decision?
$\$
$\$
# Step 1: # H0: mu_city = mu_town = mu_rural = mu_suburb # HA: mu_i != mu_j for one pair of locations # Step 2: # Get the data, visualize it, and computer the statistic of interest # load the data.. college <- read.csv("https://www.lock5stat.com/datasets3e/CollegeScores4yr.csv") college <- na.omit(college) # delete rows with missing data # how many colleges are in each type of location? cost <- college$Cost locale <- college$Locale table(college$Locale) # visualize the data - does there appear to be a difference? boxplot(cost ~ locale) # calculate the MAD statistic # get_MAD_stat(data_vector, grouping_vector) (obs_stat <- get_MAD_stat(cost, locale)) # Create the null distribution null_dist <- do_it(10000) * { shuffled_location <- shuffle(locale) get_MAD_stat(cost, shuffled_location) } # visualize the null distribution hist(null_dist, breaks = 100) abline(v = obs_stat, col = "red") # 4. Get the p-value pnull(obs_stat, null_dist, lower.tail = FALSE) # 5. Decision?
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.