#' zscore
#'
#' @param proj_list named ('h', 'p') list of data frames, ie output of get_steamer
#' @param hit_pitch 'h' or 'p'
#' @param stat_direction vector that flags if the stat is
#' reverse-framed (lower = better). default behavior is
#' to read from user_settings.
#' @param is_rate vector that flags if the state is a rate stat.
#' default behavior is to read from user_settings.
#' @param limit_player_pool zscores relative to all players, or relative size of drafted
#' player pool?
#'
#' @return list of data frames with stat zscores
#' @export
zscore <- function(
proj_list,
hit_pitch,
stat_direction = user_settings %>%
magrittr::extract2(paste0(hit_pitch, '_higher_better')),
is_rate = user_settings %>%
magrittr::extract2(paste0(hit_pitch, '_rate')),
limit_player_pool = TRUE
) {
this_df <- proj_list %>% magrittr::extract2(hit_pitch)
this_stats <- user_settings %>% magrittr::extract2(hit_pitch)
zscored <- list()
zscored[['mlbid']] <- this_df$mlbid
for (i in seq_along(this_stats)) {
stat <- this_stats[i]
stat_dir <- ifelse(stat_direction[i], 1, -1)
stat_rate <- is_rate[i]
for_zscoring <- data.frame(
mlbid = this_df$mlbid,
stat = this_df %>%
magrittr::extract(stat) %>%
unname(),
stringsAsFactors = FALSE
)
#convert rate to count
if (stat_rate & hit_pitch == 'h') {
for_zscoring$stat <- this_df$ab * for_zscoring$stat
}
#this was mostly a bad idea because it penalizes SP with high IP.
#era and whip are currently set to FALSE. rethink this? see issue 47
if (stat_rate & hit_pitch == 'p') {
for_zscoring$stat <- this_df$ip * for_zscoring$stat
}
zscore_df <- for_zscoring %>%
dplyr::mutate(
zscore = (stat - mean(stat)) / sd(stat)
)
#handle missing stats
zscore_df$zscore <- ifelse(is.nan(zscore_df$zscore), 0, zscore_df$zscore)
zscore_df$zscore <- ifelse(is.na(zscore_df$zscore), 0, zscore_df$zscore)
zscore_df$zscore <- zscore_df$zscore * stat_dir
zscored[[paste(stat, 'zscore', sep = '_')]] <- zscore_df$zscore
}
#sum
zscore_cols <- zscored[names(zscored) != 'mlbid']
zscored[['zscore_sum']] <- rowSums(
dplyr::bind_rows(zscore_cols)
)
out <- dplyr::bind_rows(zscored)
if (limit_player_pool) {
total_drafted <- user_settings$league_size * user_settings$roster_size
roster_pct <- ifelse(
hit_pitch == 'h', user_settings$h_roster_pct, 1 - user_settings$h_roster_pct
)
pool_limit <- ceiling(total_drafted * roster_pct)
#cut by the top N
zscored <- dplyr::bind_rows(zscored) %>%
dplyr::mutate(
ranking = rank(desc(zscore_sum), ties.method = 'first')
)
#go back to orig stats
master_zscoring <- this_df %>%
left_join(zscored[, c('mlbid', 'ranking')], by = 'mlbid')
#limit to top N players, then loop over each stat and zscore
top_n <- master_zscoring %>%
dplyr::filter(
ranking <= pool_limit
)
#lists to hold stats
zscored_limit <- list()
zscored_limit[['mlbid']] <- this_df$mlbid
for (i in seq_along(this_stats)) {
stat <- this_stats[i]
stat_dir <- ifelse(stat_direction[i], 1, -1)
stat_rate <- is_rate[i]
top_n$stat <- top_n %>% magrittr::extract(stat)
for_zscoring <- data.frame(
mlbid = this_df$mlbid,
stat = this_df %>% magrittr::extract(stat) %>% unname(),
stringsAsFactors = FALSE
)
#convert rate to count
if (stat_rate & hit_pitch == 'h') {
top_n$stat <- top_n$ab * top_n$stat
for_zscoring$stat <- this_df$ab * for_zscoring$stat
}
#see issue 47
if (stat_rate & hit_pitch == 'p') {
top_n$stat <- top_n$ip * top_n$stat
for_zscoring$stat <- this_df$ip * for_zscoring$stat
}
top_n_stat <- top_n$stat %>% unname() %>% unlist()
stat_zscored <- for_zscoring %>%
dplyr::mutate(
zscore = (stat - mean(top_n_stat)) / sd(top_n_stat)
)
#handle missing stats
stat_zscored$zscore <- ifelse(
is.nan(stat_zscored$zscore), 0, stat_zscored$zscore
)
stat_zscored$zscore <- ifelse(
is.na(stat_zscored$zscore), 0, stat_zscored$zscore
)
stat_zscored$zscore <- stat_zscored$zscore * stat_dir
zscored_limit[[paste(stat, 'zscore', sep = '_')]] <- stat_zscored$zscore
}
#sum
zscored_limit[['zscore_sum']] <- rowSums(
zscored_limit[names(zscored_limit) != 'mlbid'] %>% dplyr::bind_rows()
)
out <- dplyr::bind_rows(zscored_limit)
}
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.