We need a list of players names, dobs, and mlbids for matching.
We also need to have a general player lookup for optimal_auction that can be used to select players. That should be a superset, containing every player in all the projection systems (and arguably all the prospects who might ever be drafted?)
library(magrittr)
First read in Tanner's amazing ID map:
id_map_url <- 'https://docs.google.com/spreadsheets/d/1JgczhD5VDQ1EiXqVG-blttZcVwbZd5_Ne_mefUGwJnk' #worked id_map <- id_map_url %>% googlesheets::gs_url() %>% googlesheets::gs_read_listfeed() %>% janitor::clean_names() head(id_map)
For all of the name columns, we want to trim whitespace:
name_cols <- c( "playername", "fangraphsname", "mlbname", "cbsname", "nfbcname", "espnname", "kfflname", "yahooname", "mstrbllname", "fantprosname", "fanduelname", "draftkingsname") for (i in name_cols) { id_map[, i] <- lapply(id_map[, i], trim_whitespace) }
Read in PECOTA and attach the relevant columns to the data frame
pecota_h <- readxl::read_excel( path = file.path('..', 'paid_projections', 'pecota_2018.xls'), sheet = 2 ) %>% janitor::clean_names() %>% dplyr::filter(!is.na(mlbcode)) pecota_h$playername <- paste(pecota_h$firstname, pecota_h$lastname) pecota_h$pos <- trim_whitespace(pecota_h$pos) pecota_p <- readxl::read_excel( path = file.path('..', 'paid_projections', 'pecota_2018.xls'), sheet = 3 ) %>% janitor::clean_names() %>% dplyr::filter(!is.na(mlbcode)) pecota_p$playername <- paste(pecota_p$firstname, pecota_p$lastname) pecota_p$pos <- ifelse(pecota_p$gs > 1, 'SP', 'RP') peek(pecota_h) peek(pecota_p)
pick the relevant cols
id_cols <- c('bpid', 'born', 'mlbcode', 'playername', 'pos') pecota_h_slim <- pecota_h[, id_cols] pecota_p_slim <- pecota_p[, id_cols] pecota_slim <- rbind(pecota_h_slim, pecota_p_slim) names(pecota_slim)[names(pecota_slim) == 'mlbcode'] <- 'mlbid'
manually add in some names here, and do some cleanup:
#manual additions... #id_map[id_map$playername == 'Hyun-soo Kim', 'mlbid'] <- 547957 #id_map[id_map$playername == 'Socrates Brito', 'mlbid'] <- 593647 #id_map[id_map$playername == 'Tyler Goeddel', 'mlbid'] <- 595963 #id_map[id_map$playername == 'Aledmys Diaz', 'mlbid'] <- 649557 id_map$pos <- toupper(id_map$pos) id_map$pos <- trim_whitespace(id_map$pos) id_map$pos <- clean_OF(id_map$pos)
anti join gets us everyone NOT in id_map
missing <- pecota_slim %>% dplyr::anti_join( id_map[!is.na(id_map$mlbid),], by = 'mlbid' ) peek(missing) id_map <- dplyr::bind_rows(id_map, missing)
Then save the id map as an .rda file so it can be used by the package:
save(id_map, file = file.path('..', 'data', 'id_map.rda'))
how to quickly make a minimal documentation file!
paste0('\\item{', names(id_map), '}', '{', names(id_map), '}') -> foo for (i in foo) {cat('#\' ');cat(i);cat('\n')}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.