R/marcelPitching.R

Defines functions get_seasonal_averages_pitching get_pitching_stats combine_pitcher_stints apply_marcel_pitching

Documented in apply_marcel_pitching combine_pitcher_stints get_pitching_stats get_seasonal_averages_pitching

#' Get seasonal averages for pitching stats
#' 
#' @param data A data frame containing pitching stats. Should be similar to 
#' Lahman::Pitching
#' @return A data frame containing league averages for pitching stats
#' @export
get_seasonal_averages_pitching <- function(data) {
  
  data %>%
    dplyr::select(-stint, -lgID) %>%
    tidyr::gather(key, value, -playerID, -yearID, -teamID, -IPouts, -BFP) %>%
    dplyr::mutate(value=as.numeric(value)) %>% group_by(key, yearID) %>%
    dplyr::summarise(lgAv=sum(value, na.rm=TRUE)/sum(IPouts, na.rm=TRUE)) %>%
    dplyr::ungroup() %>% 
    dplyr::select(yearID, key, lgAv) %>% 
    tidyr::spread(key, lgAv)
}

#' Get stats for pitchers
#' 
#' Gets stats for pitchers. Pulls data from Lahman::Pitching, 
#' aggregates over stints and appends primary position and age.
#' 
#' @param PrimaryPosition A data frame containing primary position. If not provided, 
#' it will be generated by the function. 
#' @return A data frame containing pitching stats, primary position, and age. pitching stats are aggregated over
#' all stints.
#' @seealso \code{\link{get_primary_pos}} \code{\link{combine_pitcher_stints}}
#' @export
get_pitching_stats <- function(PrimaryPosition=NULL) {
  if (is.null(PrimaryPosition)) {
    PrimaryPosition <- get_primary_pos()
  }
  
  PitchingStats <- combine_pitcher_stints(Lahman::Pitching) %>% 
    merge(PrimaryPosition %>%
            select(playerID, yearID, POS),
          by=c("playerID", "yearID"))
  
  PitchingStats$Age <- get_age(PitchingStats)
  PitchingStats %>% mutate(PA=BFP)
}

#' Combine pitcher stints
#' 
#' Combine pitching stats over all the stints for a given player and season.
#' 
#' @param data A data frame containing pitching stats. Should be similar to Pitching from the Lahman package. 
#' @seealso \code{\link{get_pitching_stats}}
#' @return A data frame conating pitching stats aggregated over all stints for a given player and season.
combine_pitcher_stints <- function(data) {
  
  columns_to_sum <- c("W", "L" ,"G", "GS","CG",
                      "SHO","SV","IPouts","H",
                      "ER","HR", "BB",
                      "SO","IBB","WP","HBP",
                      "BK","BFP","GF","R","SH", "SF", "GIDP")
  
  grouped_data <- sum_stints(data, columns_to_sum) %>% 
    dplyr::mutate(ERA=sum(ER*27)/sum(IPouts),
                  RA9=sum(R*27)/sum(IPouts),
                  BAOpp=sum(H)/sum(BFP-BB-HBP-SH-SF),
                  BABIP=sum(H-HR)/sum(BFP-BB-HBP-SO),
                  OBPOpp=sum(H+BB+HBP)/sum(BFP-SH), 
                  uFIP=sum(13*HR + 3*(BB+HBP) - 2*SO)/sum(IPouts/3)
    )
  grouped_data %>% dplyr::ungroup() %>% dplyr::filter(stint==1)
}

#' Apply marcels for pitchers
#' 
#' @param data A data frame with pitching stats, including seasonal averages.
#' 
#' @param metric A string given the name of the metric to compute projections for, e.g. 'HR'
#' 
#' @param age_adjustment_fun A callable to make the age adjustment
#' 
#' @param metric_weights An array with the weights to give to the projected stats for the previous seasons. 
#' The ith elemnt is the weight for the season i years previous. The default is the c(5, 4, 3).
#' 
#' @param playing_time_weights An array with the weights to be used for projecting playing time
#' 
#' @return A data frame containg Marcel projections for 'metric'. The projection is given the generic
#' name 'proj_value'.
#' 
#' @seealso \code{\link{apply_marcel_pitching}}, 
#' \code{\link{get_team_projected_batting}}, 
#' \code{\link{export_marcels}}
#' @export
apply_marcel_pitching <- function(data, metric, age_adjustment_fun,
                                  metric_weights=c(3,2,1),
                                  playing_time_weights=c(0.5, 0.1, 0)) {
  
  sw <- sum(metric_weights)
  x_pt <- 0
  x_metric <- 0
  
  x_lgav_num <- 0
  x_lgav_denom <- 0
  proj_pt <- 75 + data$GS/data$G * 105
  pebble <- 1e-6
  metric_target_num <- 0
  metric_target_denom <- 0
  
  for (idx in seq_along(metric_weights)) {
    metric_key <- sprintf('%s.%d', metric, idx)
    metric_av_key <- paste0(metric_key, ".SA")
    pt_key <- sprintf('%s.%d', "IPouts", idx)
    playing_time <- na.zero(data[[pt_key]])
    sa_value <- na.zero(data[[metric_av_key]])
    
    x_pt <- x_pt + playing_time * metric_weights[idx]
    x_metric <- x_metric + na.zero(data[[metric_key]]) * metric_weights[idx]
    x_lgav_num <- x_lgav_num + (sa_value * metric_weights[idx] * (playing_time + pebble))
    x_lgav_denom <- x_lgav_denom + (metric_weights[idx] * (playing_time+ pebble))
    proj_pt <- proj_pt + playing_time_weights[[idx]] * playing_time
    metric_target_num <- metric_target_num + (metric_weights[idx] * sa_value)
    metric_target_denom <- metric_target_denom + metric_weights[idx]
    
  }
  
  data$age_adj <- sapply(data$Age+1, age_adjustment_fun)
  x_lgav <- x_lgav_num / x_lgav_denom
  metric_target <- metric_target_num / metric_target_denom
  
  # 134 IPouts * 6 = 804 IPouts ~ 800 IPouts
  data.frame(playerID=data$playerID,
             yearID=data$yearID,
             projectedYearID=data$yearID+1,
             age_adj=data$age_adj,
             x_metric=x_metric,
             x_pt=x_pt, 
             x_lgav=x_lgav, 
             proj_pt=proj_pt,
             metric_target=metric_target) %>%
    mutate(num=x_lgav*134*sw+x_metric, 
           denom=x_pt+134*sw,
           proj_rate_raw=num/denom,
           proj_rate=age_adj*proj_rate_raw,
           proj_value=proj_pt*proj_rate) %>%
    group_by(yearID) %>% 
    mutate(metric_agg = sum(proj_value)/sum(proj_pt),
           proj_value_floating=proj_value,
           metric_multiplier = ifelse(metric_agg>0, metric_target/metric_agg, 1),
           proj_value=proj_value_floating*metric_multiplier) %>% 
    ungroup()
  
}
bdilday/marcelR documentation built on March 7, 2020, 11:02 a.m.