#' Get seasonal averages for batting stats
#'
#' @param data A data frame containing batting stats. Should be similar to
#' Lahman::Batting
#' @return A data frame containing league averages for batting stats
#' @export
get_seasonal_averages_batting <- function(data) {
data %>%
dplyr::select(-stint, -lgID) %>%
tidyr::gather(key, value, -playerID, -yearID, -teamID, -PA) %>%
dplyr::mutate(value=as.numeric(value)) %>% group_by(key, yearID) %>%
dplyr::summarise(lgAv=sum(value, na.rm=TRUE)/sum(PA, na.rm=TRUE)) %>%
dplyr::ungroup() %>%
dplyr::select(yearID, key, lgAv) %>%
tidyr::spread(key, lgAv)
}
#' Get stats for batters
#'
#' Gets stats for batters. Pulls data from Lahman:::battingStats(),
#' aggregates over stints and appends primary position and age.
#'
#' @param PrimaryPosition A data frame containing primary position. If not provided,
#' it will be generated by the function.
#' @return A data frame containing batting stats, primary position, and age. Batting stats are aggregated over
#' all stints.
#' @seealso \code{\link{get_primary_pos}} \code{\link{combine_batter_stints}}
#' @export
get_batting_stats <- function(PrimaryPosition=NULL) {
if (is.null(PrimaryPosition)) {
PrimaryPosition <- get_primary_pos()
}
BattingStats <- combine_batter_stints(Lahman::battingStats()) %>%
mutate(X1B = H - X2B - X3B - HR) %>%
merge(PrimaryPosition %>%
select(playerID, yearID, POS),
by=c("playerID", "yearID"), all.x=TRUE)
BattingStats[which(is.na(BattingStats$POS)),]$POS <- 'DH'
BattingStats$Age <- get_age(BattingStats)
BattingStats
}
#' Combine batter stints
#'
#' Combine batting stats over all the stints for a given player and season.
#'
#' @param data A data frame containing batting stats. Should be similar to Batting from the Lahman package.
#' @seealso \code{\link{get_batting_stats}}
#' @return A data frame containing batting stats aggregated over all stints for a given player and season.
combine_batter_stints <- function(data) {
columns_to_sum <- c("G","PA","AB",
"H","X2B","X3B","HR",
"R","RBI"
,"SB","CS","BB","SO","IBB","HBP","SH","SF","GIDP")
grouped_data <- sum_stints(data, columns_to_sum) %>%
dplyr::mutate(OB=OBP*(PA-SH),
BIP=AB-SO-HR+SF,
HOBIP=H-HR,
OBP=sum(OB)/sum(PA-SH),
SLG=sum(TB)/sum(AB),
BABIP=sum(HOBIP)/sum(BIP))
grouped_data %>% dplyr::ungroup() %>% dplyr::filter(stint==1)
}
#' Apply marcels for batters
#'
#' @param data A data frame with batting stats, including seasonal averages.
#'
#' @param metric A string given the name of the metric to compute projections for, e.g. 'HR'
#'
#' @param age_adjustment_fun A callable to make the age adjustment
#'
#' @param metric_weights An array with the weights to give to the projected stats for the previous seasons.
#' The ith elemnt is the weight for the season i years previous. The default is the c(5, 4, 3).
#'
#' @param playing_time_weights An array with the weights to be used for projecting playing time
#'
#' @return A data frame containg Marcel projections for 'metric'. The projection is given the generic
#' name 'proj_value'.
#' @examples
#' a <- get_batting_stats() %>% filter(yearID>=2001, yearID<=2003) %>% filter(POS!="P")
#' b <- tbl_df(append_previous_years(a,
#' get_seasonal_averages_batting,
#' previous_years = 3))
#' mcl = apply_marcel_batting(b, "HR", age_adjustment)
#' mcl %>% filter(projectedYearID==2004, playerID=='beltrca01')
#'
#' playerID yearID projectedYearID age_adj x_metric x_pa x_av proj_pa
#' 1 beltrca01 2003 2004 1.006 318 7938 0.02867422 573.2
#' num denom proj_rate_raw proj_rate proj_value
#' 1 352.4091 9138 0.03856523 0.03879662 22.23822
#' @seealso \code{\link{apply_marcel_pitching}},
#' \code{\link{get_team_projected_batting}},
#' \code{\link{export_marcels}}
#' @export
apply_marcel_batting <- function(data, metric, age_adjustment_fun,
metric_weights=c(5,4,3),
playing_time_weights=c(0.5, 0.1, 0)
) {
sw <- sum(metric_weights)
x_metric <- 0
x_pa <- 0
x_av_num <- 0
x_av_denom <- 0
proj_pa <- 200
pebble <- 1e-6
metric_target_num <- 0
metric_target_denom <- 0
for (idx in seq_along(metric_weights)) {
metric_key <- sprintf('%s.%d', metric, idx)
metric_av_key <- paste0(metric_key, ".SA")
pa_key <- sprintf('%s.%d', "PA", idx)
pa <- na.zero(data[[pa_key]])
sa_value <- na.zero(data[[metric_av_key]])
x_metric <- x_metric + na.zero(data[[metric_key]]) * metric_weights[idx]
x_pa <- x_pa + pa * metric_weights[idx]
x_av_num <- x_av_num + (sa_value * metric_weights[idx] * (pa + pebble))
x_av_denom <- x_av_denom + (metric_weights[idx] * (pa + pebble))
proj_pa <- proj_pa + playing_time_weights[[idx]] * pa
metric_target_num <- metric_target_num + (metric_weights[idx] * sa_value)
metric_target_denom <- metric_target_denom + metric_weights[idx]
}
data$age_adj <- sapply(data$Age+1, age_adjustment_fun)
x_av <- x_av_num / x_av_denom
metric_target <- metric_target_num / metric_target_denom
data.frame(playerID=data$playerID,
yearID=data$yearID,
projectedYearID=data$yearID+1,
age_adj=data$age_adj,
x_metric=x_metric,
x_pa=x_pa,
x_av=x_av,
proj_pa=proj_pa,
metric_target=metric_target) %>%
mutate(num=x_av*100*sw+x_metric,
denom=x_pa+100*sw,
proj_rate_raw=num/denom,
proj_rate=age_adj*proj_rate_raw,
proj_value=proj_pa*proj_rate) %>%
group_by(yearID) %>%
mutate(metric_agg = sum(proj_value)/sum(proj_pa),
proj_value_floating=proj_value,
metric_multiplier = ifelse(metric_agg>0, metric_target/metric_agg, 1),
proj_value=proj_value_floating*metric_multiplier) %>%
ungroup()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.