We need a list of players names, dobs, and mlbids for matching.

We also need to have a general player lookup for optimal_auction that can be used to select players. That should be a superset, containing every player in all the projection systems (and arguably all the prospects who might ever be drafted?)

library(magrittr)

First read in Tanner's amazing ID map:

id_map_url <- 'https://docs.google.com/spreadsheets/d/1JgczhD5VDQ1EiXqVG-blttZcVwbZd5_Ne_mefUGwJnk'

#worked
id_map <- id_map_url %>%
  googlesheets::gs_url() %>%
  googlesheets::gs_read_listfeed() %>%
  janitor::clean_names()

head(id_map)

For all of the name columns, we want to trim whitespace:

name_cols <- c(
  "playername", "fangraphsname", "mlbname", "cbsname", "nfbcname", 
  "espnname", "kfflname", "yahooname", "mstrbllname", "fantprosname",
  "fanduelname", "draftkingsname")

for (i in name_cols) {
  id_map[, i] <- lapply(id_map[, i], trim_whitespace)
}

Read in PECOTA and attach the relevant columns to the data frame

pecota_h <- readxl::read_excel(
  path = file.path('..', 'paid_projections', 'pecota_2018.xls'),
  sheet = 2
) %>%
janitor::clean_names() %>%
dplyr::filter(!is.na(mlbcode))

pecota_h$playername <- paste(pecota_h$firstname, pecota_h$lastname)
pecota_h$pos <- trim_whitespace(pecota_h$pos)

pecota_p <- readxl::read_excel(
  path = file.path('..', 'paid_projections', 'pecota_2018.xls'),
  sheet = 3
) %>%
janitor::clean_names() %>%
dplyr::filter(!is.na(mlbcode))

pecota_p$playername <- paste(pecota_p$firstname, pecota_p$lastname)
pecota_p$pos <- ifelse(pecota_p$gs > 1, 'SP', 'RP')

peek(pecota_h)
peek(pecota_p)

pick the relevant cols

id_cols <- c('bpid', 'born', 'mlbcode', 'playername', 'pos')
pecota_h_slim <- pecota_h[, id_cols]
pecota_p_slim <- pecota_p[, id_cols]

pecota_slim <- rbind(pecota_h_slim, pecota_p_slim)
names(pecota_slim)[names(pecota_slim) == 'mlbcode'] <- 'mlbid'

manually add in some names here, and do some cleanup:

#manual additions...
#id_map[id_map$playername == 'Hyun-soo Kim', 'mlbid'] <- 547957
#id_map[id_map$playername == 'Socrates Brito', 'mlbid'] <- 593647
#id_map[id_map$playername == 'Tyler Goeddel', 'mlbid'] <- 595963
#id_map[id_map$playername == 'Aledmys Diaz', 'mlbid'] <- 649557

id_map$pos <- toupper(id_map$pos)
id_map$pos <- trim_whitespace(id_map$pos)
id_map$pos <- clean_OF(id_map$pos)  

anti join gets us everyone NOT in id_map

missing <- pecota_slim %>%
  dplyr::anti_join(
    id_map[!is.na(id_map$mlbid),], by = 'mlbid'
  )

peek(missing)

id_map <- dplyr::bind_rows(id_map, missing)

Then save the id map as an .rda file so it can be used by the package:

save(id_map, file = file.path('..', 'data', 'id_map.rda'))

errata / not run

how to quickly make a minimal documentation file!

paste0('\\item{', names(id_map), '}', '{', names(id_map), '}') -> foo
for (i in foo) {cat('#\' ');cat(i);cat('\n')}


almartin82/projprep documentation built on May 10, 2019, 9:57 a.m.