Nothing
## ----nomessages, echo = FALSE-------------------------------------------------
# set some default options for chunks
knitr::opts_chunk$set(
warning = FALSE, # avoid warnings and messages in the output
message = FALSE,
collapse = TRUE, # collapse all output into a single block
tidy = FALSE, # don't tidy our code-- assume we do it ourselves
fig.height = 5,
fig.width = 5
)
options(digits=4) # number of digits to display in output; can override with chunk option R.options=list(digits=)
par(mar=c(3,3,1,1)+.1)
set.seed(1234) # reproducibility
## ----load-packages, echo=FALSE------------------------------------------------
library(Lahman) # Load additional packages here
library(ggplot2)
library(dplyr)
## -----------------------------------------------------------------------------
data("Salaries", package="Lahman")
str(Salaries)
sample_n(Salaries, 10)
## ----Teams-names--------------------------------------------------------------
data("Teams", package="Lahman")
dim(Teams)
names(Teams)
## ----Teams-filter-------------------------------------------------------------
Teams <- Teams %>%
select(yearID, lgID, teamID, name, divID, Rank, WSWin, attendance) %>%
filter(yearID >= 1985) %>%
mutate(teamID = droplevels(teamID))
sample_n(Teams, 10)
## ----SeriesPost-names---------------------------------------------------------
data("SeriesPost", package="Lahman")
names(SeriesPost)
## -----------------------------------------------------------------------------
table(SeriesPost$round)
## ----filter-WS----------------------------------------------------------------
WS <- SeriesPost %>%
filter(yearID >= 1985 & round == "WS") %>%
select(-ties, -round) %>%
mutate(lgIDloser = droplevels(lgIDloser),
lgIDwinner = droplevels(lgIDwinner))
dim(WS)
sample_n(WS, 6)
## -----------------------------------------------------------------------------
table(Salaries$yearID)
## -----------------------------------------------------------------------------
range(Salaries$salary)
## -----------------------------------------------------------------------------
Salaries %>%
group_by(yearID) %>%
summarise(min=min(salary),
max=max(salary))
## -----------------------------------------------------------------------------
which(Salaries$salary==0)
## ----which-zero---------------------------------------------------------------
Salaries[which(Salaries$salary==0),]
## ----filter-salaries----------------------------------------------------------
Salaries <- Salaries %>%
filter(salary !=0)
## ----summarise-salaries-------------------------------------------------------
payroll <- Salaries %>%
group_by(teamID, yearID) %>%
summarise(payroll = sum(salary)/1000000)
head(payroll)
## ----merge-teams--------------------------------------------------------------
payroll <- merge(payroll, Teams[,c("yearID", "teamID","name", "WSWin")],
by=c("yearID", "teamID"))
sample_n(payroll, 10)
## ----eval=FALSE---------------------------------------------------------------
# left_join(payroll, Teams[,c("yearID", "teamID","name", "WSWin")],
# by=c("yearID", "teamID")) %>%
# sample_n(10)
## ----make-WSWin-factor--------------------------------------------------------
payroll <- payroll %>%
mutate(WSWin = factor(WSWin))
## -----------------------------------------------------------------------------
table(payroll$WSWin, useNA="ifany")
## ----payroll-boxplot, fig.width=7---------------------------------------------
boxplot(payroll ~ yearID, data=payroll, ylab="Payroll ($ millions)")
## ----payroll-Boxplot, fig.width=7---------------------------------------------
out <- car::Boxplot(payroll ~ yearID, data=payroll,
id=list(n=1,
labels=as.character(payroll$teamID)),
ylab="Payroll ($ millions)")
## -----------------------------------------------------------------------------
table(out)
## -----------------------------------------------------------------------------
inflation = c(1, 1.02, 1.06, 1.10, 1.15, 1.21,
1.27, 1.30, 1.34, 1.38, 1.42, 1.46, 1.49, 1.51, 1.55, 1.60,
1.65, 1.67, 1.71, 1.76, 1.82, 1.87, 1.93, 2.00, 1.99, 2.03,
2.09, 2.13, 2.16, 2.20, 2.20 )
inflation.df <- data.frame(year=1985:2015, inflation)
# plot inflation rate
ggplot(inflation.df, aes(y=inflation, x=year)) +
geom_point() +
geom_line() +
geom_smooth(method="lm")
## -----------------------------------------------------------------------------
infl.lm <- lm(inflation ~ year, data=inflation.df)
(coefs <- coef(infl.lm))
## -----------------------------------------------------------------------------
payroll <- payroll %>%
mutate(payrollStd = payroll / (coefs[1] + coefs[2] * yearID))
## ----payroll-Boxplot2, fig.width=7--------------------------------------------
car::Boxplot(payrollStd ~ yearID, data=payroll,
id = list(labels=as.character(payroll$teamID)),
ylab="Payroll (1985-adjusted $ millions)")
## ----payroll-winners-plot, fig.width=7----------------------------------------
Cols <- ifelse(payroll$WSWin=='Y', "red", gray(.7, alpha=0.5))
with(payroll, {
plot(payrollStd ~ jitter(yearID, 0.5),
ylab = "Payroll (inflation-adjusted $ millions)",
ylim = c(5,125), log = "y",
xlab = "Year",
pch = 19, cex = 0.8, col = Cols)
})
with(payroll[payroll$WSWin == 'Y',],
text(y = payrollStd, x = yearID, labels = teamID, pos = 3, cex = 0.8) )
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.