R/calculate_xg.R

Defines functions calculate_xg

Documented in calculate_xg

#' Calculate hockeyR expected goals (xG)
#'
#' @description Uses the hockeyR expected goals model to calculate xG for any pbp data frame generated by hockeyR
#'
#' @param pbp A play-by-play data frame, previously returned by hockeyR::scrape_game
#'
#' @return The original supplied play-by-play data with a column for expected goals appended
#' @export
#'
#' @examples
#' \dontrun{
#' pbp <- load_pbp(2022) %>% dplyr::select(-xg)
#' pbp_preds <- calculate_xg(pbp)
#' }
calculate_xg <- function(pbp){

  # get model features
  model_data <- prepare_xg_data(pbp)

  # make 5v5 predictions
  preds_5v5 <- stats::predict(
    xg_model_5v5,
    xgboost::xgb.DMatrix(
      data = model_data %>%
        dplyr::filter(strength_state == "5v5") %>%
        dplyr::select(dplyr::all_of(xg_model_5v5$feature_names)) %>%
        data.matrix(),
      label = model_data %>%
        dplyr::filter(strength_state == "5v5") %>%
        dplyr::select(goal) %>%
        data.matrix()
    )
  ) %>%
    dplyr::as_tibble() %>%
    dplyr::rename(xg = value) %>%
    dplyr::bind_cols(
      dplyr::select(
        dplyr::filter(model_data, strength_state == "5v5"),
        event_id)
    )

  # make ST predictions
  preds_st <- stats::predict(
    xg_model_st,
    xgboost::xgb.DMatrix(
      data = model_data %>%
        dplyr::filter(strength_state != "5v5") %>%
        dplyr::select(dplyr::all_of(xg_model_st$feature_names)) %>%
        data.matrix(),
      label = model_data %>%
        dplyr::filter(strength_state != "5v5") %>%
        dplyr::select(goal) %>%
        data.matrix()
    )
  ) %>%
    dplyr::as_tibble() %>%
    dplyr::rename(xg = value) %>%
    dplyr::bind_cols(
      dplyr::select(
        dplyr::filter(model_data, strength_state != "5v5"),
        event_id)
    )

  # combine
  preds <- dplyr::bind_rows(preds_5v5, preds_st) %>%
    # attach xg column to original pbp data
    dplyr::right_join(pbp, by = "event_id") %>%
    # fix penalty shots
    dplyr::mutate(xg = ifelse(
      secondary_type != "Penalty Shot" | is.na(secondary_type), xg, xg_model_ps
    )) %>%
    dplyr::arrange(event_id)

  return(preds)
}

Try the hockeyR package in your browser

Any scripts or data that you put into this service are public.

hockeyR documentation built on Oct. 12, 2022, 5:07 p.m.