R/prep_shot_chart_data.R

  #' @title Prep Shot Chart Data
  #' @description gets data ready for shot chart viz
  #' @keywords NBA stat.nba.com
  #' @importFrom magrittr %>%
  #' @export prep_hex_data
  #' @export prep_hex_data_team_def
  #' @examples
  #' prep_hex_data('Russell Westbrook', '2018-19', c(10,10))

prep_hex_data <- function(player, year, binwidths, min_hex_size=0.3,
                                 pct_diff_lims = c(-0.25,0.25)) {

  shot_zone_league_df <- readr::read_csv("https://raw.githubusercontent.com/emilykuehler/basketballstatsR/master/data-raw/league-shot-chart-summary.csv") %>%
    dplyr::filter(year==year)

  cat('got league data for',year)

  shot_df <- get_shot_chart_data(player, year)

  cat('got player shot data for',player)

  hex_coord_df <- calculate_hex_coordinates(shot_df, binwidths = binwidths)

  zone_summary_df <- shot_df %>%
    dplyr::group_by(shot_zone_range, shot_zone_area, shot_zone_basic) %>%
    dplyr::summarise(shots_made = sum(shot_made_flag),
                     shots_attempted = sum(shot_attempted_flag),
                     zone_pct = shots_made / shots_attempted) %>%
    dplyr::mutate(player = shot_df$player_name[1])

  hex_data_df <- hex_coord_df %>%
    dplyr::inner_join(shot_zone_league_df, by=c('shot_zone_range', 'shot_zone_area','shot_zone_basic')) %>%
    dplyr::inner_join(zone_summary_df, by=c('shot_zone_range', 'shot_zone_area','shot_zone_basic')) %>%
    dplyr::mutate(diff = zone_pct - league_pct,
                  pct_diff = diff * 100,
                  hex_scale = min_hex_size + (1-min_hex_size)*log(hex_attempts + 1) / log(max(hex_coord_df$hex_attempts)+1),
                  scaled_x = center_x + hex_scale * (x - center_x),
                  scaled_y = center_y + hex_scale * (y - center_y),
                  pct_diff_lim = ifelse(pct_diff < pct_diff_lims[1], pct_diff_lims[1],pct_diff),
                  pct_diff_lim = ifelse(pct_diff_lim > pct_diff_lims[2], pct_diff_lims[2], pct_diff_lim),
                  bins = cut(pct_diff, breaks = c(-100,-6,-3,3,6,100),
                             labels = c('<-6%', '(-6%,-3%)', '(-3%,3%)','(3%,6%)','>6%')))

  return (hex_data_df)

}

prep_hex_data_team_def <- function(shot_df, season, binwidths, min_hex_size=0.3,
                          pct_diff_lims = c(-0.25,0.25)) {

  shot_zone_league_df <- readr::read_csv("https://raw.githubusercontent.com/emilykuehler/basketballstatsR/master/data-raw/league-shot-chart-summary.csv") %>%
    dplyr::filter(year==season)

  cat('got league data for',season)

  hex_coord_df <- calculate_hex_coordinates(shot_df, binwidths = binwidths)

  zone_summary_df <- shot_df %>%
    dplyr::group_by(shot_zone_range, shot_zone_area, shot_zone_basic) %>%
    dplyr::summarise(tm_shots_made = sum(shot_made_flag),
                     tm_shots_attempted = sum(shot_attempted_flag),
                     tm_zone_pct = tm_shots_made / tm_shots_attempted)

  hex_data_df <- hex_coord_df %>%
    dplyr::inner_join(shot_zone_league_df, by=c('shot_zone_range', 'shot_zone_area','shot_zone_basic')) %>%
    dplyr::inner_join(zone_summary_df, by=c('shot_zone_range', 'shot_zone_area','shot_zone_basic')) %>%
    dplyr::mutate(diff = tm_zone_pct - league_pct,
                  pct_diff = diff * 100,
                  hex_scale = min_hex_size + (1-min_hex_size)*log(hex_attempts + 1) / log(max(hex_coord_df$hex_attempts)+1),
                  scaled_x = center_x + hex_scale * (x - center_x),
                  scaled_y = center_y + hex_scale * (y - center_y),
                  # pct_diff_lim = ifelse(diff < pct_diff_lims[1], pct_diff_lims[1],pct_diff),
                  # pct_diff_lim = ifelse(pct_diff_lim > pct_diff_lims[2], pct_diff_lims[2], pct_diff_lim),
                  bins = cut(pct_diff, breaks = c(-100,-6,-3,3,6,100),
                             labels = c('<-6%', '(-6%,-3%)', '(-3%,3%)','(3%,6%)','>6%')))

  return (hex_data_df)

}
emilykuehler/basketballstatsR documentation built on May 31, 2019, 10:01 a.m.