knitr::opts_chunk$set(echo = TRUE)
# download the data

SDS100::download_data("stereograms.txt")
SDS100::download_data("freshman-15.txt")
SDS100::download_data("zodiac.csv")
library(SDS100)

$\$

Hypothesis tests for two means: Stereograms

step 1:

# step 2a: 

# SDS100::download_data("stereograms.txt")
stereograms <- read.table("stereograms.txt", header = TRUE)

no_visual <- subset(stereograms, group == 'NV')$fusion_time
visual <- subset(stereograms, group == 'VV')$fusion_time



# visualize the data






# step 2b - calculate the t-statistic










# step 3: visualize null distribution









# step 4: calculate the p-value








# step 5: make a decision





# can also run t.test()

$\$

Paired t-test: The freshman 15

The data is from: https://dasl.datadescription.com/datafile/freshman-15/

# SDS100::download_data("freshman-15.txt")
freshman <- read.table("freshman-15.txt", header = TRUE)

initial_weight <- freshman$Initial.Weight
final_weight <- freshman$Terminal.Weight


# Let's define: mu_diff  =  mu_final - mu_initial  


# 1. State the null and alternative hypotheses







# calcualte the weight difference for each participant






# 2a. visualize the data




# 2a. stripchart and boxplot 





# 2b. calculate the observed t-statistic







# 3. plot the null distribution 







# 4.  p-value






# 5. conclusion!




# try the t.test() function 




# confidence interval on the weight gain...

$\$

Inference on more than 2 proportions

Let's run a chi-square test for goodness of fit test to see if there are the same number of CEOs born under different astrological signs.

Step 1:

#SDS100::download_data("zodiac.csv")


zodiac <- read.csv("zodiac.csv", header = TRUE)


# 2a: visualize the data

births <- zodiac$Births





# step 2b. calculate the observed statistic






# step 3: visualize the null





# step 4: p-value




# 5. make a decision


# sanity check using built in R functions

$\$

Inference on more than 2 proportions using resampling methods

Can we run the test using resampling methods?

Step 1:

# step 2:  calculate the observed statistic





# step 3: create the null distribution 






# view the null distribution






# how does this compare with the parametric p-value?

$\$

Example 2: Alameda county jury selection

total_selected <- 1453

(obs_counts <- total_selected * c(0.26, 0.08, 0.08, 0.54, 0.04))

expected_props <- c(0.15, 0.18, 0.12, 0.54, 0.01)


emeyers/SDS100 documentation built on April 28, 2024, 5:07 p.m.