knitr::opts_chunk$set(echo = TRUE)
library(SDS100)
$\$
Step 1
$H_0: \beta = 0$ $H_A: \beta > 0$
# load the library with the data library(fivethirtyeight) # remove missing values bechdel <- na.omit(bechdel) # extract variables of interest budget <- bechdel$budget_2013/10^6 revenue <- bechdel$domgross_2013/10^6 lm_fit <- lm(revenue ~ budget) obs_stat <- coef(lm_fit)[2] # step 3: using randomization methods null_dist <- do_it(10000) * { lm_shuff <- lm(revenue ~ shuffle(budget)) coef(lm_shuff)[2] } # visualize the null distribution: where is the observed stat hist(null_dist, breaks = 100) # step 4 pnull(obs_stat, null_dist, lower.tail = FALSE) # step 5 # using parametric methods with R's built-in functions summary(lm_fit)
$\$
# could do the bootstrap to get a CI for beta0 using SDS100::resample_pairs() # visualize the bootstrap distribution # use the quantile function to create confidence intervals # using parametric methods to create confidence intervals using the confint() function
$\$
Try to predict a movie's revenue (domgross_2013) from: 1) year 2) whether it passed the Bechdel test 3) the movie's budget
# load the library with the data library(fivethirtyeight) # remove missing values bechdel <- na.omit(bechdel) columns_to_use <- c("year", "binary", "budget_2013", "domgross_2013") bechdel2 <- bechdel[, columns_to_use] # fit a multiple regression model # print regression coefficients, standard errors, etc.
$\$
We can run a parametric test for correlation using the following t-statistic:
$t ~=~ \frac{r\sqrt{n - 2}}{\sqrt{1 - r^2}}$
Let's examine again if there is a correlation between budget and revenue on the Bechdel data.
$\$
Step 1:
# extract variables of interest budget <- bechdel$budget/10^6 revenue <- bechdel$domgross/10^6 # Step 2 - calculate the t-statistic # Steps 3-5: # check with the cor.test() function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.