knitr::opts_chunk$set(echo = TRUE)
library(SDS100)

Bechdel test bootstrap CI

$\$

One more bootstrap confidence interval for the mean gross revenue of movies.

# Let's create a confidence interval for the mean gross revenue of movies
library(fivethirtyeight)



# View(bechdel)
# get the data
gross <- na.omit(bechdel$domgross_2013)


# get number of data points in our sample
n_gross <- length(gross)


# get x-bar
(xbar_gross <- mean(gross))



# create a bootstrap distribution
boot_dist <- do_it(10000) * {
  boot_sample <- sample(gross, n_gross, replace = TRUE)
  mean(boot_sample)
}


# visualize the bootstrap distribution
hist(boot_dist, breaks = 100)

# calculate SE*
(SE_boot <- sd(boot_dist))


# get the CI
CI_lower <- xbar_gross - 2 * SE_boot
CI_upper <- xbar_gross + 2 * SE_boot

c(CI_lower, CI_upper)

Questions?

  1. How do we interpret the results? 2 Do we believe these results?
  2. Are there any concerns about the analysis? a. Is the Bechdel data really a simple random sample from all movies? b. Might the revenue of movies change over time (even after adjusting for inflation)? c. In general, what is the population we are trying to do inference on?

$\$

Testing the Bechdel test

# step 1: state null and alternative hypotheses
# H0: pi = 0.5
# HA: pi < 0.5


# step 2: compute observed statistic
(bechdel_phat <- 803/1794)



# step 3: create the null distribution

null_dist <- do_it(10000) * {

  rflip_count(1794, .5)/1794

}


hist(null_dist, breaks = 30)
abline(v = bechdel_phat, col = "red")



# step 4: calculate the p-value

pnull(bechdel_phat, null_dist, lower.tail = TRUE)




# 5. decision?

$\$

Hypothesis testing with Joy

# step 1: state null and alternative hypotheses
# H0: pi = 0.5
# HA: pi > 0.5


# step 2: compute observed statistic
(joy_phat <- 11/12)



# step 3: create the null distribution

null_dist <- do_it(10000) * {

  rflip_count(12, .5)/12

}


hist(null_dist, breaks = 100)
abline(v = joy_phat, col = "red")



# step 4: calculate the p-value

pnull(joy_phat, null_dist, lower.tail = FALSE)




# 5. decision?


emeyers/SDS100 documentation built on April 28, 2024, 5:07 p.m.