R/stats_StatCrew.R

Defines functions cleanBatting_StatCrew cleanStats_StatCrew fetchStatCrew getStats_StatCrew

getStats_StatCrew <- function(statsURL, player_df, ...) {
  data <- fetchStatCrew(statsURL, ...)
  
  clean <- cleanStats_StatCrew(data)
  
  joined <- joinStatCrew(player_df,clean)
  
  return(joined)
}


fetchStatCrew <- function(url, header = T, fill = T) {
  # Error handling for url
  
  page <- xml2::read_html(url)
  
  tables <- rvest::html_nodes(page,"table[cellpadding]")
  
  tables <- rvest::html_table(tables, header = header, fill = fill)
  
  batting <- tables[1] %>% 
    unlist(recursive = F) %>% 
    dplyr::as_tibble()
  
  pitching <- tables[2] %>% 
    unlist(recursive = F) %>% 
    dplyr::as_tibble()
  
  tables_out <- list(batting = batting, pitching = pitching)
  
  return(tables_out)
  
}




# cleanStats_StatCrew ----

cleanStats_StatCrew <- function(tableList) {
  
  ## Add in name formatting - need conditionals!
  
  if(sum(grepl("([A-Z -']+), ([[:alpha:]\\. '-]+)",tableList$batting$Player)) > 0){
        tableList <- purrr::map(tableList,
          ~dplyr::mutate(.,Player = gsub("([A-Z -']+), ([[:alpha:]\\. '-]+)","\\2 \\1",Player))
            )
  }
  tableList <- purrr::map(tableList, 
               ~dplyr::mutate(.,Player = tolower(Player)))
  
  batting <- cleanBatting_StatCrew(tableList$batting)
  
  pitching <- cleanPitching_StatCrew(tableList$pitching)
  
  
  return(list(batting = batting, pitching = pitching))
  
}



# cleanBatting_StatCrew ----

cleanBatting_StatCrew <- function(table) {
  batting <- table %>% 
    tidyr::separate("sb-att",
                    into = c("sb","att"),
                    sep = "-",
                    convert = T)
  
  # Caught stealing can be easily calculated:
  
  batting <- batting %>% 
    dplyr::mutate(cs = att-sb)
  
  
  # select the appropriate stats 
  
  batting <- dplyr::rename_all(batting,toupper) %>% 
    dplyr::select(PLAYER, base::intersect(battingStats,names(.))) #broadcastR:::battingStats
  
  # lastly, we'll need to filter out incomplete rows, the Total, and Opposition
  # Stats
  batting <- dplyr::filter(batting, !is.na(AB),
                           !grepl("--|totals?|opponents?",PLAYER)) %>% 
    dplyr::rename_at(dplyr::vars(-PLAYER),~paste0(.,"_BattingSeason"))
  
  return(batting)
  
}





cleanPitching_StatCrew <- function(table) {
  pitching <- dplyr::rename_all(table, toupper) %>% 
    dplyr::filter(!is.na(ERA),
           !grepl("--|totals?|opponents?",PLAYER)) %>% 
    tidyr::separate("W-L",
                    into = c("Win", "Loss"),
                    sep = "-") %>% 
    tidyr::separate("APP-GS",
                    into = c("G","GS"),
                    sep = "-") %>% 
    # Take CG shutouts only, not combined ones.
    dplyr::mutate(SHO = stringr::str_extract(SHO,"^\\d+")) %>% 
    dplyr::select(PLAYER, base::intersect(pitchingStats,names(.))) %>%
    dplyr::rename_at(dplyr::vars(-PLAYER),~paste0(.,"_PitchingSeason"))
  
  pitching <- dplyr::mutate(pitching, IP_PitchingSeason = format(IP_PitchingSeason, nsmall = 1))
  
  return(pitching)
}




joinStatCrew <- function(player_df,tableList) {
  
  tableList <- purrr::map(tableList,
                          ~dplyr::mutate(.,PLAYER = tolower(PLAYER)))

  
  player_df$Name <- tolower(player_df$Name)
  
  output <- dplyr::rename_all(player_df,stringr::str_to_title) %>% 
    dplyr::rename(FirstName = First,
                  LastName = Last) %>% 
    dplyr::arrange(Number) %>%
    dplyr::left_join(tableList$batting, by  = c("Name" = "PLAYER")) %>% 
    dplyr::left_join(tableList$pitching, by = c("Name" = "PLAYER")) %>% 
    dplyr::arrange(as.numeric(Number))
  
  return(output)
}
jrogol/broadcastR documentation built on March 10, 2021, 4:20 p.m.