R/marcelBatting.R

Defines functions get_seasonal_averages_batting get_batting_stats combine_batter_stints apply_marcel_batting

Documented in apply_marcel_batting combine_batter_stints get_batting_stats get_seasonal_averages_batting

#' Get seasonal averages for batting stats
#' 
#' @param data A data frame containing batting stats. Should be similar to 
#' Lahman::Batting
#' @return A data frame containing league averages for batting stats
#' @export
get_seasonal_averages_batting <- function(data) {
  
  data %>%
    dplyr::select(-stint, -lgID) %>%
    tidyr::gather(key, value, -playerID, -yearID, -teamID, -PA) %>%
    dplyr::mutate(value=as.numeric(value)) %>% group_by(key, yearID) %>%
    dplyr::summarise(lgAv=sum(value, na.rm=TRUE)/sum(PA, na.rm=TRUE)) %>%
    dplyr::ungroup() %>% 
    dplyr::select(yearID, key, lgAv) %>% 
    tidyr::spread(key, lgAv)
}

#' Get stats for batters
#' 
#' Gets stats for batters. Pulls data from Lahman:::battingStats(), 
#' aggregates over stints and appends primary position and age.
#' 
#' @param PrimaryPosition A data frame containing primary position. If not provided, 
#' it will be generated by the function. 
#' @return A data frame containing batting stats, primary position, and age. Batting stats are aggregated over
#' all stints.
#' @seealso \code{\link{get_primary_pos}} \code{\link{combine_batter_stints}}
#' @export
get_batting_stats <- function(PrimaryPosition=NULL) {
  if (is.null(PrimaryPosition)) {
    PrimaryPosition <- get_primary_pos()
  }
  
  BattingStats <- combine_batter_stints(Lahman::battingStats()) %>% 
    mutate(X1B = H - X2B - X3B - HR) %>% 
    merge(PrimaryPosition %>%
            select(playerID, yearID, POS),
          by=c("playerID", "yearID"), all.x=TRUE)
  BattingStats[which(is.na(BattingStats$POS)),]$POS <- 'DH'
  BattingStats$Age <- get_age(BattingStats)
  BattingStats
}

#' Combine batter stints
#' 
#' Combine batting stats over all the stints for a given player and season.
#' 
#' @param data A data frame containing batting stats. Should be similar to Batting from the Lahman package. 
#' @seealso \code{\link{get_batting_stats}}
#' @return A data frame containing batting stats aggregated over all stints for a given player and season.
combine_batter_stints <- function(data) {
  
  columns_to_sum <- c("G","PA","AB",
                      "H","X2B","X3B","HR",
                      "R","RBI"
                      ,"SB","CS","BB","SO","IBB","HBP","SH","SF","GIDP")
  grouped_data <- sum_stints(data, columns_to_sum) %>% 
    dplyr::mutate(OB=OBP*(PA-SH), 
                  BIP=AB-SO-HR+SF, 
                  HOBIP=H-HR, 
                  OBP=sum(OB)/sum(PA-SH), 
                  SLG=sum(TB)/sum(AB),
                  BABIP=sum(HOBIP)/sum(BIP))
  
  grouped_data %>% dplyr::ungroup() %>% dplyr::filter(stint==1)
}

#' Apply marcels for batters
#' 
#' @param data A data frame with batting stats, including seasonal averages.
#' 
#' @param metric A string given the name of the metric to compute projections for, e.g. 'HR'
#' 
#' @param age_adjustment_fun A callable to make the age adjustment
#' 
#' @param metric_weights An array with the weights to give to the projected stats for the previous seasons. 
#' The ith elemnt is the weight for the season i years previous. The default is the c(5, 4, 3).
#' 
#' @param playing_time_weights An array with the weights to be used for projecting playing time
#' 
#' @return A data frame containg Marcel projections for 'metric'. The projection is given the generic
#' name 'proj_value'.
#' @examples
#' a <- get_batting_stats() %>% filter(yearID>=2001, yearID<=2003) %>% filter(POS!="P")
#' b <- tbl_df(append_previous_years(a, 
#'                                   get_seasonal_averages_batting, 
#'                                   previous_years = 3))
#' mcl = apply_marcel_batting(b, "HR", age_adjustment)
#' mcl %>% filter(projectedYearID==2004, playerID=='beltrca01')
#' 
#'    playerID yearID projectedYearID age_adj x_metric x_pa       x_av proj_pa
#' 1 beltrca01   2003            2004   1.006      318 7938 0.02867422   573.2
#' num denom proj_rate_raw  proj_rate proj_value
#' 1 352.4091  9138    0.03856523 0.03879662   22.23822
#' @seealso \code{\link{apply_marcel_pitching}}, 
#' \code{\link{get_team_projected_batting}}, 
#' \code{\link{export_marcels}}
#' @export
apply_marcel_batting <- function(data, metric, age_adjustment_fun,
                         metric_weights=c(5,4,3),
                         playing_time_weights=c(0.5, 0.1, 0)
                         ) {
  sw <- sum(metric_weights)
 
  x_metric <- 0
  x_pa <- 0
  x_av_num <- 0
  x_av_denom <- 0
  proj_pa <- 200
  pebble <- 1e-6
  metric_target_num <- 0
  metric_target_denom <- 0
  
  for (idx in seq_along(metric_weights)) {
    metric_key <- sprintf('%s.%d', metric, idx)
    metric_av_key <- paste0(metric_key, ".SA")
    pa_key <- sprintf('%s.%d', "PA", idx)
    pa <- na.zero(data[[pa_key]])
    sa_value <- na.zero(data[[metric_av_key]])
    
    x_metric <- x_metric + na.zero(data[[metric_key]]) * metric_weights[idx]
    x_pa <- x_pa + pa * metric_weights[idx]
    x_av_num <- x_av_num + (sa_value * metric_weights[idx] * (pa + pebble))
    x_av_denom <- x_av_denom + (metric_weights[idx] * (pa + pebble))
    proj_pa <- proj_pa + playing_time_weights[[idx]] * pa
    metric_target_num <- metric_target_num + (metric_weights[idx] * sa_value)
    metric_target_denom <- metric_target_denom + metric_weights[idx]
    }
  data$age_adj <- sapply(data$Age+1, age_adjustment_fun)
  
  x_av <- x_av_num / x_av_denom
  metric_target <- metric_target_num / metric_target_denom
  data.frame(playerID=data$playerID,
             yearID=data$yearID,
             projectedYearID=data$yearID+1,
             age_adj=data$age_adj,
             x_metric=x_metric,
             x_pa=x_pa, 
             x_av=x_av, 
             proj_pa=proj_pa,
             metric_target=metric_target) %>%
    mutate(num=x_av*100*sw+x_metric, 
           denom=x_pa+100*sw,
           proj_rate_raw=num/denom,
           proj_rate=age_adj*proj_rate_raw,
           proj_value=proj_pa*proj_rate) %>% 
    group_by(yearID) %>% 
    mutate(metric_agg = sum(proj_value)/sum(proj_pa),
           proj_value_floating=proj_value,
           metric_multiplier = ifelse(metric_agg>0, metric_target/metric_agg, 1),
           proj_value=proj_value_floating*metric_multiplier) %>% 
    ungroup()

}
bdilday/marcelR documentation built on March 7, 2020, 11:02 a.m.