knitr::opts_chunk$set(echo = TRUE)
library(SDS100)

Do private colleges have the same graduation rates as public colleges?

$\$

# State the null and alternative hypotheses in symbols and words: 

# In words: 
#   Null hypothesis is: 
#   Alternative hypothesis is:


# In symbols:
#  H0: 
#  HA: 

# significance level:  






# step 2: observed stat

college <- read.csv("https://www.lock5stat.com/datasets3e/CollegeScores4yr.csv")

public <- subset(college, Control == "Public")
private <- subset(college, Control == "Private")

public_CompRate <- public$CompRate
private_CompRate  <- private$CompRate


# What's a good way to visualize the data for our question of interest? 





# calculate the statistic of interest






# step 3:










# visualize the null distribution 









# step 4:







# step 5: Conclusion? 

$\$

Do movies that pass the Bechdel test have the same average budgets as movies that fail?

$\$

# step 1:

# In words: 

# Null hypothesis is: 
# Alternative hypothesis is:



# In symbols:


# significance level: 




# step 2: observed stat



library(fivethirtyeight)

bechdel <- bechdel


passed <- subset(bechdel, binary == "PASS")
failed <- subset(bechdel, binary == "FAIL")

passed_budget <- passed$budget_2013
failed_budget <- failed$budget_2013


# What's a good way to visualize the data for our question of interest? 





# Calculate the observed statistic
obs_stat <- mean(passed_budget, na.rm = TRUE) - mean(failed_budget, na.rm = TRUE)




# step 3:












# visualize the null distribution 









# step 5: Conclusion? 

$\$

Is there a significant correlation between movie budget and movie revenue?

# Step 1: 







# Step 2: 

# Get the data, visualize it, and computer the statistic of interest

bechdel2 <- na.omit(bechdel)

budget <- bechdel2$budget_2013
revenue <- bechdel2$domgross_2013



# visualize the data






# calculate the observed correlation 






# Create the null distribution






# visualize the null distribution 





# 4. Get the p-value






# 5. Decision? 

$\$

Does the price of the college differ depending on whether the college is in a City, Town, Rural, or Suburb location?

$\$

# Step 1: 





# Step 2: 

# Get the data, visualize it, and computer the statistic of interest

# load the data..
# download_data("CollegeScores4yr.csv")
college <- read.csv("https://www.lock5stat.com/datasets3e/CollegeScores4yr.csv")
college <- na.omit(college)  # delete rows with missing data

# how many colleges are in each type of location? 

cost <- college$Cost
locale <- college$Locale




# visualize the data - does there appear to be a difference? 




# calculate the MAD statistic
# get_MAD_stat(data_vector, grouping_vector)





# Create the null distribution






# visualize the null distribution 






# 4. Get the p-value




# 5. Decision? 


emeyers/SDS100 documentation built on April 28, 2024, 5:07 p.m.