R/calculate_hex_coordinates.R

#' @title Calculate HexBin Coordinates
#' @description Calculates Hexbin coordinates for shot chart
#' @keywords NBA stat.nba.com
#' @param shot_detail_df dataframe containing shot locations
#' @param binwidths a vector of binwidths for x and y axes
#' @import ggplot2
#' @importFrom magrittr %>%
#' @export calculate_hex_coordinates
#' @examples
#' calculate_hex_coordinates(player_df)


hex_bounds <- hex_bounds <- function(x, binwidth) {
  c(
    plyr::round_any(min(x), binwidth, floor) - 1e-6,
    plyr::round_any(max(x), binwidth, ceiling) + 1e-6
  )
}

calculate_hex_coordinates <- function(shot_detail_df, binwidths = c(1,1)) {

  #get inputs for hexbin function
  x_bounds <-  hex_bounds(shot_detail_df$loc_x, binwidths[1])
  x_bins <-  diff(x_bounds) / binwidths[1]
  y_bounds <-  hex_bounds(shot_detail_df$loc_y, binwidths[2])
  y_bins <-  diff(y_bounds) / binwidths[2]

  #get hexbin object
  hex_bin <-  hexbin::hexbin(
    x = shot_detail_df$loc_x,
    y = shot_detail_df$loc_y,
    xbins = x_bins,
    xbnds = x_bounds,
    ybnds = y_bounds,
    shape = y_bins / x_bins,
    IDs = TRUE
  )

  #add hex id to to shot chart
  shot_detail_df <- shot_detail_df %>%
    dplyr::mutate(hex_id = hex_bin@cID)

  #group by id
  #TO DO: add more stats
  hex_bin_summary <-  shot_detail_df %>%
    dplyr::group_by(hex_id) %>%
    dplyr::summarise(
      hex_attempts = n(),
      hex_pct = mean(shot_made_flag)
    )

  hex_bin_zone_summary <- shot_detail_df %>%
    dplyr::group_by(hex_id, shot_zone_range, shot_zone_area, shot_zone_basic) %>%
    dplyr::summarise(attempts = n()) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(hex_id, desc(attempts)) %>%
    dplyr::group_by(hex_id) %>%
    dplyr::top_n(1, attempts) %>%
    dplyr::select(hex_id, shot_zone_range, shot_zone_area, shot_zone_basic)

  hex_bin_summary <- dplyr::inner_join(hex_bin_summary, hex_bin_zone_summary, by = "hex_id")

  #from hexbin package: https://github.com/edzer/hexbin
  sx <-  hex_bin@xbins / diff(hex_bin@xbnds)
  sy <-  (hex_bin@xbins * hex_bin@shape) / diff(hex_bin@ybnds)
  dx <-  1 / (2 * sx)
  dy <-  1 / (2 * sqrt(3) * sy)
  origin_coords <-  hexbin::hexcoords(dx, dy)

  hex_centers <-  hexbin::hcell2xy(hex_bin)

  hex_coords <- dplyr::bind_rows(lapply(1:hex_bin@ncells, function(i) {
    tibble::tibble(
      x = origin_coords$x + hex_centers$x[i],
      y = origin_coords$y + hex_centers$y[i],
      center_x = hex_centers$x[i],
      center_y = hex_centers$y[i],
      hex_id = hex_bin@cell[i]
    )
  }))

  final_df <- dplyr::inner_join(hex_coords, hex_bin_summary, by = "hex_id")

  return(final_df)

}
emilykuehler/basketballstatsR documentation built on May 31, 2019, 10:01 a.m.