inst/doc/payroll.R

## ----nomessages, echo = FALSE-------------------------------------------------
# set some default options for chunks
knitr::opts_chunk$set(
  warning = FALSE,   # avoid warnings and messages in the output
  message = FALSE,
  collapse = TRUE,   # collapse all output into a single block
  tidy = FALSE,      # don't tidy our code-- assume we do it ourselves
  fig.height = 5,
  fig.width = 5
)
options(digits=4)    # number of digits to display in output; can override with chunk option R.options=list(digits=)
par(mar=c(3,3,1,1)+.1)

set.seed(1234)       # reproducibility

## ----load-packages, echo=FALSE------------------------------------------------
library(Lahman)   # Load additional packages here 
library(ggplot2)
library(dplyr)

## -----------------------------------------------------------------------------
data("Salaries", package="Lahman")
str(Salaries)
sample_n(Salaries, 10)

## ----Teams-names--------------------------------------------------------------
data("Teams", package="Lahman")
dim(Teams)
names(Teams)

## ----Teams-filter-------------------------------------------------------------
Teams <- Teams %>%
  select(yearID, lgID, teamID, name, divID, Rank, WSWin, attendance) %>%
  filter(yearID >= 1985) %>%
  mutate(teamID = droplevels(teamID))

sample_n(Teams, 10)

## ----SeriesPost-names---------------------------------------------------------
data("SeriesPost", package="Lahman")
names(SeriesPost)

## -----------------------------------------------------------------------------
table(SeriesPost$round)

## ----filter-WS----------------------------------------------------------------
WS <- SeriesPost %>%
  filter(yearID >= 1985 & round == "WS") %>%
  select(-ties, -round) %>%
  mutate(lgIDloser = droplevels(lgIDloser),
         lgIDwinner = droplevels(lgIDwinner))
dim(WS)
sample_n(WS, 6)

## -----------------------------------------------------------------------------
table(Salaries$yearID)

## -----------------------------------------------------------------------------
range(Salaries$salary)

## -----------------------------------------------------------------------------
Salaries %>% 
  group_by(yearID) %>% 
  summarise(min=min(salary),
            max=max(salary))

## -----------------------------------------------------------------------------
which(Salaries$salary==0)

## ----which-zero---------------------------------------------------------------
Salaries[which(Salaries$salary==0),]

## ----filter-salaries----------------------------------------------------------
Salaries <- Salaries %>%
    filter(salary !=0)

## ----summarise-salaries-------------------------------------------------------
payroll <- Salaries %>%
                group_by(teamID, yearID) %>%
                summarise(payroll = sum(salary)/1000000)
head(payroll)

## ----merge-teams--------------------------------------------------------------
payroll <- merge(payroll, Teams[,c("yearID", "teamID","name", "WSWin")], 
                 by=c("yearID", "teamID")) 
sample_n(payroll, 10)

## ----eval=FALSE---------------------------------------------------------------
#  left_join(payroll, Teams[,c("yearID", "teamID","name", "WSWin")],
#            by=c("yearID", "teamID")) %>%
#    sample_n(10)

## ----make-WSWin-factor--------------------------------------------------------
payroll <- payroll %>%
  mutate(WSWin = factor(WSWin))

## -----------------------------------------------------------------------------
table(payroll$WSWin, useNA="ifany")

## ----payroll-boxplot, fig.width=7---------------------------------------------
boxplot(payroll ~ yearID, data=payroll, ylab="Payroll ($ millions)")

## ---- payroll-Boxplot, fig.width=7--------------------------------------------
out <- car::Boxplot(payroll ~ yearID, data=payroll,
             id=list(n=1, 
                     labels=as.character(payroll$teamID)), 
             ylab="Payroll ($ millions)")

## -----------------------------------------------------------------------------
table(out)

## -----------------------------------------------------------------------------
inflation = c(1,    1.02, 1.06, 1.10, 1.15, 1.21, 
              1.27, 1.30, 1.34, 1.38, 1.42, 1.46, 1.49, 1.51, 1.55, 1.60,
              1.65, 1.67, 1.71, 1.76, 1.82, 1.87, 1.93, 2.00, 1.99, 2.03,
              2.09, 2.13, 2.16, 2.20, 2.20 )

inflation.df <- data.frame(year=1985:2015, inflation)

# plot inflation rate
ggplot(inflation.df, aes(y=inflation, x=year)) +
  geom_point() +
  geom_line() +
  geom_smooth(method="lm")
  

## -----------------------------------------------------------------------------
infl.lm <- lm(inflation ~ year, data=inflation.df)
(coefs <- coef(infl.lm))

## -----------------------------------------------------------------------------
payroll <- payroll %>%
  mutate(payrollStd = payroll / (coefs[1] + coefs[2] * yearID))

## ---- payroll-Boxplot2, fig.width=7-------------------------------------------
car::Boxplot(payrollStd ~ yearID, data=payroll,
             id = list(labels=as.character(payroll$teamID)), 
             ylab="Payroll (1985-adjusted $ millions)")

## ---- payroll-winners-plot, fig.width=7---------------------------------------
Cols <- ifelse(payroll$WSWin=='Y', "red", gray(.7, alpha=0.5))
with(payroll, {
  plot(payrollStd ~ jitter(yearID, 0.5), 
       ylab = "Payroll (inflation-adjusted $ millions)",
       ylim = c(5,125), log = "y",
       xlab = "Year",
       pch = 19, cex = 0.8, col = Cols)
})
with(payroll[payroll$WSWin == 'Y',],
     text(y = payrollStd, x = yearID, labels = teamID, pos = 3, cex = 0.8) )

Try the Lahman package in your browser

Any scripts or data that you put into this service are public.

Lahman documentation built on May 4, 2023, 9:11 a.m.