In emeyers/SDS100: Tools for the class Introductory Statistics

knitr::opts_chunk$set(echo = TRUE)

$\$

Bechdel analyses: histograms, means, and medians

$\$

library(fivethirtyeight)

data(bechdel)

domgross <- bechdel$domgross_2013



# plot a histogram of the data with the axes labeled
hist(domgross, breaks = 30, 
    ylab = "Frequency", 
    xlab = "Gross domestic revenue ($)", 
    main = " Movies gross domestic revenue")



# calculate the mean and median
(mean_gross <- mean(domgross, na.rm = TRUE))
(median_gross <- median(domgross, na.rm = TRUE))


# add lines to the histogram and the mean and median
abline(v = mean_gross, col = "red")
abline(v = median_gross, col = "blue")

$\$

Bechdel analyses: z-scores and quantiles

We can calculate z-scores using the formula: $z_i = \frac{x_i - \bar{x}}{s}$

$\$

# the revenue for star wars
star_wars_gross <- 1771682790



# calculate the z-score for star wars

mean_gross <- mean(domgross, na.rm = TRUE)
sd_gross <- sd(domgross, na.rm = TRUE)

zscore_starwas <- (star_wars_gross - mean_gross)/sd_gross

zscore_starwas   # z-score of 13!

$\$

Bechdel analyses: quantiles

$\$

# movie quantiles  
quantile(domgross,  c(.25, .75), na.rm = TRUE)


# five number summary
fivenum(domgross)

$\$

DOW analyses: comparison to normal distribution

# load the dow data 

# SDS100::download_data("DowPrices.csv") 
dow <- read.csv("DowPrices.csv")

# get the daily percent change
percent_change <- dow$PercentChange


# calculate the range where 95% of the data is expected if the data were normally distributed
(mean_dow <- mean(percent_change))

(sd_dow <- sd(percent_change))

(normal_95 <- c(mean_dow - 2 * sd_dow, mean_dow + 2 * sd_dow))



# get the actual quantiles from the data
quantile(percent_change, c(.025, .975))


# create a histogram of the data
hist(dow$PercentChange, 
     breaks = 200, 
     xlim = c(-10, 10), 
     main = "DOW daily % change",
     xlab = "Daily percent change")



# get a five number summary
fivenum(percent_change)