Nothing
## ----nomessages, echo = FALSE-------------------------------------------------
# set some default options for chunks
knitr::opts_chunk$set(
warning = FALSE, # avoid warnings and messages in the output
message = FALSE,
collapse = TRUE, # collapse all output into a single block
tidy = FALSE, # don't tidy our code-- assume we do it ourselves
fig.height = 5,
fig.width = 7
)
options(digits=4) # number of digits to display in output; can override with chunk option R.options=list(digits=)
set.seed(1234) # reproducibility
## -----------------------------------------------------------------------------
B = 1e3
u = pnorm(rnorm(B))
## -----------------------------------------------------------------------------
u_true = 1:B / (B+1) # almost true uniform quantiles (slight bias in tails)
plot(u_true, sort(u), pch = 19, cex = 0.25,
main = "Uniform Q-Q Plot",
xlab = "Theoretical Quantiles",
ylab = "Observed Quantiles")
lines(u_true, u_true, lty = 2, col = "red")
## -----------------------------------------------------------------------------
n = 100
u_obs = sort(unlist(replicate(B, pnorm(max(rnorm(n)))^100)))
plot(u_true, u_obs, pch = 19, cex = 0.25,
main = "Uniform Q-Q Plot for Maximum of 100 Normals",
xlab = "Theoretical Quantiles",
ylab = "Observed Quantiles")
lines(u_true,u_true, lty = 2, col = "red")
## ----message = FALSE, warning = FALSE-----------------------------------------
# software
library(Lahman)
library(tidyverse)
#install.packages("devtools")
#library(devtools)
#devtools::install_github(repo = "DEck13/fullhouse")
#library(fullhouse)
# version numbers
packageVersion("Lahman")
packageVersion("tidyverse")
#packageVersion("fullhouse")
# data preprocessing
foo = Batting %>%
# get years 1911 and 1997
filter(yearID %in% c(1911, 1997)) %>%
# compute batting average
mutate(BA = round(H/AB, 3)) %>%
# get player names
left_join(People %>% mutate(name = paste(nameFirst, nameLast, sep = " "))) %>%
# subset columns
select(name, yearID, AB, BA) %>%
# only consider full-time players
filter(AB >= 320)
## -----------------------------------------------------------------------------
max_BA = foo %>%
summarise(name = name[which(BA == max(BA))], max_BA = max(BA), .by = yearID)
max_BA
## -----------------------------------------------------------------------------
means_sds = foo %>%
summarise(mean_BA = mean(BA), sd_BA = sd(BA), .by = yearID)
means_sds
## -----------------------------------------------------------------------------
foo %>%
ggplot() +
aes(x = BA, color = factor(yearID)) +
geom_density() +
theme_minimal() +
labs(color = "yearID")
## -----------------------------------------------------------------------------
foo %>%
summarise(name = name[which(scale(BA) == max(scale(BA)))],
Z_max = max(scale(BA)),
.by = yearID)
## ----message = FALSE----------------------------------------------------------
# talent pool size (from fullhouse package)
N_1911 = 1694866
N_1997 = 9669798
# number of full-time players
ns = foo %>% summarise(n = n(), .by = yearID)
n_1911 = ns %>% filter(yearID == 1911) %>% pull(n)
n_1997 = ns %>% filter(yearID == 1997) %>% pull(n)
## -----------------------------------------------------------------------------
# means and standard deviations for BA from 1911
means_sds_1911 = means_sds %>%
filter(yearID == 1911) %>% select(mean_BA, sd_BA)
# means and standard deviations for BA from 1997
means_sds_1997 = means_sds %>%
filter(yearID == 1997) %>% select(mean_BA, sd_BA)
## -----------------------------------------------------------------------------
# max BA from 1911
max_BA_1911 = max_BA %>%
filter(yearID == 1911) %>% pull(max_BA)
# max BA from 1997
max_BA_1997 = max_BA %>%
filter(yearID == 1997) %>% pull(max_BA)
# 1911 Ty Cobb's extreme batting average percentile
F_max_1911 =
pnorm(max_BA_1911,
mean = means_sds_1911$mean_BA,
sd = means_sds_1911$sd_BA)^n_1911
F_max_1911
# 1997 Tony Gwynn's extreme batting average percentile
F_max_1997 =
pnorm(max_BA_1997,
mean = means_sds_1997$mean_BA,
sd = means_sds_1997$sd_BA)^n_1997
F_max_1997
## -----------------------------------------------------------------------------
# aptitude for 1911 Ty Cobb
qnorm(F_max_1911^(1/N_1911))
# aptitude for 1997 Tony Gwynn
qnorm(F_max_1997^(1/N_1997))
## -----------------------------------------------------------------------------
# input for 1997 Tony Gwynn
sprintf("%.10f", F_max_1997^(1/N_1997))
# input for 1911 Ty Cobb
sprintf("%.10f", F_max_1911^(1/N_1911))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.