Nothing
#' Scrape game play-by-play
#'
#' @param game_id Game ID to scrape
#' @description Scrapes play-by-play data for a specified game ID.
#'
#' @return A tibble containing event-based play-by-play data for an individual
#' NHL game. The resulting data will have columns for:
#' \describe{
#' \item{xg}{Numeric expected goal value for unblocked shot events}
#' \item{event}{String defining the event}
#' \item{event_type}{String with alternate event definition; in all caps}
#' \item{secondary_type}{String defining secondary event type}
#' \item{event_team}{String defining the primary team involved in the event}
#' \item{event_team_type}{String indicator of event team type: home or away}
#' \item{description}{String detailed description of event}
#' \item{period}{Integer value of the game period}
#' \item{period_seconds}{Numeric value of the seconds into the period of the event}
#' \item{period_seconds_remaining}{Numeric value of the seconds remaining in the period}
#' \item{game_seconds}{Numeric value of the seconds into the game of the event}
#' \item{game_seconds_remaining}{Numeric value of the seconds remaining in the game; negative for overtime periods}
#' \item{home_score}{Integer value of the home team score after the event}
#' \item{away_score}{Integer value of the away team score after the event}
#' \item{event_player_1_name}{String name of the primary event player}
#' \item{event_player_1_type}{String indicator for the role of event_player_1}
#' \item{event_player_2_name}{String name of the secondary event player}
#' \item{event_player_2_type}{String indicator for the role of event_player_2}
#' \item{event_player_3_name}{String name of the tertiary event player}
#' \item{event_player_3_type}{String indicator for the role of event_player_3}
#' \item{event_goalie_name}{String name of the goalie involved in the event}
#' \item{strength_code}{String indicator for game strength: EV, SH, or PP}
#' \item{strength}{String name for game strength: Even, Shorthanded, or Power Play}
#' \item{strength_state}{String name for detailed game strength in the form of '(event team skaters)v(opponent skaters)'}
#' \item{penalty_minutes}{Integer value of the penalty minutes on penalty events}
#' \item{penalty_severity}{String name for penalty severity: Minor or Major}
#' \item{num_on}{Integer value of the number of skaters substituted on during a shift change event}
#' \item{players_on}{String of player names substituted on during a shift change event}
#' \item{num_off}{Integer value of the number of skaters substituted off during a shift change event}
#' \item{players_off}{String of player names substituted off during a shift change event}
#' \item{extra_attacker}{Logical indicator of whether or not the event team had their goalie pulled}
#' \item{x}{Numeric x-coordinate of event in feet, with origin at center ice}
#' \item{y}{Numeric y-coordinate of event in feet, with origin at center ice}
#' \item{x_fixed}{Numeric transformed x-coordinate of event in feet, where the home team always shoots to the right, away team to the left}
#' \item{y_fixed}{Numeric transformed y-coordinate of event in feet, where the home team always shoots to the right, away team to the left}
#' \item{shot_distance}{Numeric distance (in feet) to center of net for unblocked shot events}
#' \item{shot_angle}{Numeric angle (in degrees) to center of net for unlocked shot events}
#' \item{home_skaters}{Numeric value for number of skaters on the ice for the home team, excluding the goalie}
#' \item{away_skaters}{Numeric value for number of skaters on the ice for the away team, excluding the goalie}
#' \item{home_on_1}{String name of home team player on ice}
#' \item{home_on_2}{String name of home team player on ice}
#' \item{home_on_3}{String name of home team player on ice}
#' \item{home_on_4}{String name of home team player on ice}
#' \item{home_on_5}{String name of home team player on ice}
#' \item{home_on_6}{String name of home team player on ice}
#' \item{home_on_7}{String name of home team player on ice}
#' \item{away_on_1}{String name of away team player on ice}
#' \item{away_on_2}{String name of away team player on ice}
#' \item{away_on_3}{String name of away team player on ice}
#' \item{away_on_4}{String name of away team player on ice}
#' \item{away_on_5}{String name of away team player on ice}
#' \item{away_on_6}{String name of away team player on ice}
#' \item{away_on_7}{String name of away team player on ice}
#' \item{home_goalie}{String name of home goalie on ice}
#' \item{away_goalie}{String name of away goalie on ice}
#' \item{game_id}{Integer value of assigned game ID}
#' \item{event_idx}{Numeric index for event}
#' \item{event_id}{Numeric id for event -- more specified than event_idx}
#' \item{event_player_1_id}{Integer value of the player ID for the primary event player}
#' \item{event_player_1_link}{String value of the NHL.com player link for the primary event player}
#' \item{event_player_1_season_total}{Integer value for the total events for the primary event player this season}
#' \item{event_player_2_id}{Integer value of the player ID for the secondary event player}
#' \item{event_player_2_link}{String value of the NHL.com player link for the secondary event player}
#' \item{event_player_2_season_total}{Integer value for the total events for the secondary event player this season}
#' \item{event_player_3_id}{Integer value of the player ID for the tertiary event player}
#' \item{event_player_3_link}{String value of the NHL.com player link for the tertiary event player}
#' \item{event_player_3_season_total}{Integer value for the total events for the tertiary event player this season}
#' \item{event_goalie_id}{Integer value of the player ID for the event goalie}
#' \item{event_goalie_link}{String value of the NHL.com player link for the event goalie}
#' \item{event_goalie_type}{String indicator for the role of the event_goalie}
#' \item{game_winning_goal}{Logical indicator of whether or not the goal scored was the game-winning goal}
#' \item{empty_net}{Logical indicator of whether or not the goal scored was on an empty net}
#' \item{period_type}{String name of period type: REGULAR, OVERTIME, or SHOOTOUT}
#' \item{ordinal_num}{String name of the ordinal period: 1st, 2nd, 3rd, 4th...}
#' \item{period_time}{String value of the time into the period of the event}
#' \item{period_time_remaining}{String value of the time remaining in the period}
#' \item{date_time}{String value of the real-world timestamp of the event}
#' \item{event_team_id}{Integer value of the NHL ID of \code{event_team}}
#' \item{event_team_link}{String value of the NHL.com team link for the \code{event_team}}
#' \item{event_team_abbr}{String value of the 3-letter NHL abbreviation for the \code{event_team}}
#' \item{home_final}{Integer value of the final score for the home team}
#' \item{away_final}{Integer value of the final score for the away team}
#' \item{season}{String value of the official NHL season}
#' \item{season_type}{String indicator of season type: R, or P}
#' \item{game_date}{Date of game}
#' \item{game_start}{Date time of start of game in US/Eastern time zone}
#' \item{game_end}{Date time of end of game in US/Eastern time zone}
#' \item{game_length}{Period value of length of game, in hours:minutes}
#' \item{game_state}{String indicator of state of game}
#' \item{detailed_state}{String indicator of detailed game state}
#' \item{venue_id}{Integer value of the NHL ID for the venue}
#' \item{venue_name}{String name of the game venue}
#' \item{venue_link}{String value of the NHL.com link for the venue}
#' \item{home_name}{String name of the home team}
#' \item{home_abbreviation}{String value of the 3-letter NHL abbreviation of the home team}
#' \item{home_division_name}{String value of the name of the NHL division of the home team}
#' \item{home_conference_name}{String value of the name of the NHL conference of the home team}
#' \item{home_id}{Integer value of the NHL ID of the home team}
#' \item{away_name}{String name of the away team}
#' \item{away_abbreviation}{String value of the 3-letter NHL abbreviation of the away team}
#' \item{away_division_name}{String value of the name of the NHL division of the away team}
#' \item{away_conference_name}{String value of the name of the NHL conference of the away team}
#' \item{away_id}{Integer value of the NHL ID of the away team}
#'}
#'
#' @export
#'
#' @examples
#' \dontrun{
#' pbp <- scrape_game(2020020420)
#' }
scrape_game <- function(game_id){
# get game url
url <- glue::glue("http://statsapi.web.nhl.com/api/v1/game/{game_id}/feed/live")
# get raw json pbp data
site <- tryCatch(
jsonlite::read_json(url),
warning = function(cond){
message(paste0("There was a problem with game ID ",game_id,"\n\n",cond))
return(NULL)
},
error = function(cond){
message(paste0("There was a problem with game ID ",game_id,"\n\n",cond))
return(NULL)
}
)
if(is.null(site)){
stop(paste("Could not get play-by-play for game ID",game_id))
}
game_info <- get_game_info(game_id)
rosters <- get_game_rosters(game_id)
corsi_events <- c("MISSED_SHOT","SHOT","GOAL","BLOCKED_SHOT")
fenwick_events <- c("MISSED_SHOT","SHOT","GOAL")
# unnest game plays
plays <- site$liveData$plays$allPlays %>%
dplyr::tibble() %>%
tidyr::unnest_wider(1) %>%
dplyr::select(-players) %>%
tidyr::unnest_wider(result)
if("strength" %in% names(plays)){
plays <- plays %>%
tidyr::unnest_wider(strength) %>%
dplyr::rename(strength_code = code, strength = name)
}
plays <- plays %>%
tidyr::unnest_wider(about) %>%
tidyr::unnest_wider(goals) %>%
dplyr::rename(home_score = home, away_score = away)
# catch error for seasons that don't include shot locations
# (all seasons prior to 2010-11)
if(!is.null(suppressWarnings(plays$coordinates))){
plays <- plays %>%
tidyr::unnest_wider(coordinates)
}
plays <- plays %>%
tidyr::unnest_wider(team)
# fixed issue with canadian v american all-stars with no team tricode
# create our own
if("triCode" %in% names(plays)){
plays <- plays %>%
dplyr::rename(
event_team = name,
event_team_id = id,
event_team_link = link,
event_team_abbr = triCode
)
} else {
plays <- plays %>%
dplyr::rename(
event_team = name,
event_team_id = id,
event_team_link = link
) %>%
dplyr::mutate(
event_team_abbr = ifelse(
event_team == game_info$home_name,
game_info$home_abbreviation,
game_info$away_abbreviation
)
)
}
plays <- plays %>%
janitor::clean_names() %>%
dplyr::mutate(
# clean up the times
period_seconds = lubridate::period_to_seconds(lubridate::ms(period_time)),
game_seconds = period_seconds + (1200 * (period-1)),
period_seconds_remaining = lubridate::period_to_seconds(lubridate::ms(period_time_remaining)),
game_seconds_remaining = ifelse(
period < 4,
((3-period) * 1200) + period_seconds_remaining,
0 - period_seconds
),
home_final = dplyr::last(home_score),
away_final = dplyr::last(away_score)
) %>%
dplyr::rename(event_type = event_type_id)
# add event players
players <- site$liveData$plays$allPlays %>%
dplyr::tibble() %>%
tidyr::unnest_wider(1) %>%
dplyr::select(players) %>%
tidyr::unnest_wider(players, names_sep = "_")
players <- purrr::map_dfc(
.x = 1:length(players),
~fix_player_columns(players, .x)
)
# combine it all
pbp <- dplyr::bind_cols(plays, players, game_info) %>%
dplyr::select(1:5,dplyr::all_of(names(players)),tidyselect::everything()) %>%
dplyr::filter(
# discard redundant rows
event_type %not_in% c("PERIOD_READY","PERIOD_OFFICIAL","PERIOD_START","GAME_OFFICIAL")
)
# create dummy secondary_type column for preseason/all-star games without one
if("secondary_type" %not_in% names(pbp)){
pbp <- pbp %>%
dplyr::mutate(secondary_type = NA)
}
# create dummy penalty column for preseason/all-star games without one
if("penalty_severity" %not_in% names(pbp)){
pbp <- pbp %>%
dplyr::mutate(penalty_severity = NA)
}
# swap blocked shot event players so
# shooter is player_1 & blocker is player_2
if("BLOCKED_SHOT" %in% pbp$event_type){
pbp_blocks <- pbp %>%
dplyr::filter(event_type == "BLOCKED_SHOT") %>%
dplyr::mutate(
# swap event team to match shooting team instead of blocking team
event_team = ifelse(event_team == home_name, away_name, home_name),
event_team_abbr = ifelse(event_team == home_name, home_abbreviation, away_abbreviation),
event_team_id = ifelse(event_team == home_name, as.integer(home_id), as.integer(away_id)),
event_team_link = glue::glue("/api/v1/teams/{event_team_id}"),
blocker_info = glue::glue(
"{event_player_1_id},{event_player_1_name},{event_player_1_link},{event_player_1_type}"
),
event_player_1_id = event_player_2_id,
event_player_1_name = event_player_2_name,
event_player_1_link = event_player_2_link,
event_player_1_type = event_player_2_type
) %>%
tidyr::separate(
blocker_info,
into = c("event_player_2_id","event_player_2_name",
"event_player_2_link","event_player_2_type"),
sep = ",", remove = TRUE
) %>%
dplyr::mutate(event_player_2_id = as.integer(event_player_2_id))
pbp <- pbp %>%
dplyr::filter(event_type != "BLOCKED_SHOT") %>%
dplyr::bind_rows(pbp_blocks) %>%
dplyr::arrange(event_idx)
}
# replace player_4 with goalie
# though sometimes there is no player_4
# ie if only one goal was scored and it's unassisted,
# there will only be two event players
if("event_player_4_name" %in% names(pbp)){
pbp <- pbp %>%
dplyr::mutate(
event_goalie_id = dplyr::case_when(
event_player_2_type == "Goalie" ~ event_player_2_id,
event_player_3_type == "Goalie" ~ event_player_3_id,
event_player_4_type == "Goalie" ~ event_player_4_id
),
event_goalie_name = dplyr::case_when(
event_player_2_type == "Goalie" ~ event_player_2_name,
event_player_3_type == "Goalie" ~ event_player_3_name,
event_player_4_type == "Goalie" ~ event_player_4_name
),
event_goalie_link = dplyr::case_when(
event_player_2_type == "Goalie" ~ event_player_2_link,
event_player_3_type == "Goalie" ~ event_player_3_link,
event_player_4_type == "Goalie" ~ event_player_4_link
),
event_goalie_type = dplyr::case_when(
event_player_2_type == "Goalie" ~ "Goalie",
event_player_3_type == "Goalie" ~ "Goalie",
event_player_4_type == "Goalie" ~ "Goalie"
)
)
} else if("event_player_3_name" %in% names(pbp) & "event_player_4_name" %not_in% names(pbp)){
pbp <- pbp %>%
dplyr::mutate(
event_goalie_id = dplyr::case_when(
event_player_2_type == "Goalie" ~ event_player_2_id,
event_player_3_type == "Goalie" ~ event_player_3_id
),
event_goalie_name = dplyr::case_when(
event_player_2_type == "Goalie" ~ event_player_2_name,
event_player_3_type == "Goalie" ~ event_player_3_name
),
event_goalie_link = dplyr::case_when(
event_player_2_type == "Goalie" ~ event_player_2_link,
event_player_3_type == "Goalie" ~ event_player_3_link
),
event_goalie_type = dplyr::case_when(
event_player_2_type == "Goalie" ~ "Goalie",
event_player_3_type == "Goalie" ~ "Goalie"
)
)
} else if("event_player_3_name" %not_in% names(pbp)){
pbp <- pbp %>%
dplyr::mutate(
event_goalie_id = dplyr::case_when(
event_player_2_type == "Goalie" ~ event_player_2_id
),
event_goalie_name = dplyr::case_when(
event_player_2_type == "Goalie" ~ event_player_2_name
),
event_goalie_link = dplyr::case_when(
event_player_2_type == "Goalie" ~ event_player_2_link
),
event_goalie_type = dplyr::case_when(
event_player_2_type == "Goalie" ~ "Goalie"
)
)
}
# add shift events
shifts <- get_game_shifts(game_id)
# if shift data exists
if(!is.null(shifts)) {
# MODIFIED FROM THE SCRAPER BY EVOLVING HOCKEY
# WHO MODIFIED ORIGINAL CODE BY MANNY PERRY
# combine shift events with pbp events
pbp_full <- pbp %>%
dplyr::bind_rows(shifts) %>%
dplyr::mutate(
# arrange all events so shift changes are in the proper spot
priority =
1 * (event_type %in% c("TAKEAWAY", "GIVEAWAY", "MISSED_SHOT", "HIT", "SHOT", "BLOCKED_SHOT") & !(period == 5 & season_type == "R")) +
2 * (event_type == "GOAL" & !(period == 5 & season_type == "R")) +
3 * (event_type == "STOP" & !(period == 5 & season_type == "R")) +
4 * (event_type == "PENALTY" & !(period == 5 & season_type == "R")) +
5 * (event_type == "CHANGE" & !(period == 5 & season_type == "R")) +
6 * (event_type == "PERIOD_END" & !(period == 5 & season_type == "R")) +
7 * (event_type == "GAME_END" & !(period == 5 & season_type == "R")) +
8 * (event_type == "FACEOFF" & !(period == 5 & season_type == "R"))
) %>%
dplyr::arrange(period,game_seconds, priority) %>%
dplyr::mutate(
home_index = as.numeric(cumsum(event_type == "CHANGE" &
event_team == unique(pbp$home_name))),
away_index = as.numeric(cumsum(event_type == "CHANGE" &
event_team == unique(pbp$away_name))),
) %>%
dplyr::select(-priority)
# construct a matrix of player names as columns & event row as rows
home_skaters <- NULL
for(i in 1:nrow(rosters)){
player <- rosters$player_name[i]
skaters_i <- dplyr::tibble(
on_ice = cumsum(
1 * stringr::str_detect(
dplyr::filter(pbp_full,
event_type == "CHANGE" &
event_team == unique(pbp$home_name))$players_on,
player) -
1 * stringr::str_detect(
dplyr::filter(pbp_full,
event_type == "CHANGE" &
event_team == unique(pbp$home_name))$players_off,
player)
)
)
suppressMessages({
home_skaters <- dplyr::bind_cols(home_skaters, skaters_i)
})
rm(skaters_i, player)
}
colnames(home_skaters) <- rosters$player_name
home_skaters <- data.frame(home_skaters)
# transform into matrix of only players on the ice
on_home <- which(home_skaters == 1, arr.ind = TRUE) %>%
data.frame() %>%
dplyr::group_by(row) %>%
dplyr::summarize(
home_on_1 = colnames(home_skaters)[unique(col)[1]],
home_on_2 = colnames(home_skaters)[unique(col)[2]],
home_on_3 = colnames(home_skaters)[unique(col)[3]],
home_on_4 = colnames(home_skaters)[unique(col)[4]],
home_on_5 = colnames(home_skaters)[unique(col)[5]],
home_on_6 = colnames(home_skaters)[unique(col)[6]],
home_on_7 = colnames(home_skaters)[unique(col)[7]]
)
away_skaters <- NULL
for(i in 1:nrow(rosters)){
player <- rosters$player_name[i]
skaters_i <- dplyr::tibble(
on_ice = cumsum(
1 * stringr::str_detect(
dplyr::filter(pbp_full,
event_type == "CHANGE" &
event_team == unique(pbp$away_name))$players_on,
player) -
1 * stringr::str_detect(
dplyr::filter(pbp_full,
event_type == "CHANGE" &
event_team == unique(pbp$away_name))$players_off,
player)
)
)
suppressMessages({
away_skaters <- dplyr::bind_cols(away_skaters, skaters_i)
})
rm(skaters_i, player)
}
colnames(away_skaters) <- rosters$player_name
away_skaters <- data.frame(away_skaters)
# transform into matrix of only players on the ice
on_away <- which(away_skaters == 1, arr.ind = TRUE) %>%
data.frame() %>%
dplyr::group_by(row) %>%
dplyr::summarize(
away_on_1 = colnames(away_skaters)[unique(col)[1]],
away_on_2 = colnames(away_skaters)[unique(col)[2]],
away_on_3 = colnames(away_skaters)[unique(col)[3]],
away_on_4 = colnames(away_skaters)[unique(col)[4]],
away_on_5 = colnames(away_skaters)[unique(col)[5]],
away_on_6 = colnames(away_skaters)[unique(col)[6]],
away_on_7 = colnames(away_skaters)[unique(col)[7]]
)
# define goalies
goalies <- rosters %>%
dplyr::filter(position == "G") %>%
dplyr::mutate(
player_name = stringr::str_replace(player_name, " ", ".")
) %>%
dplyr::mutate(
player_name = stringr::str_replace(player_name, "-", ".")
) %>%
dplyr::pull(player_name)
non_plays <- c("GAME_SCHEDULED","PERIOD_END","GAME_END")
pbp_full <- pbp_full %>%
dplyr::left_join(on_home, by = c("home_index" = "row")) %>%
dplyr::left_join(on_away, by = c("away_index" = "row")) %>%
# adding game info to shift change events and moving info to the end
dplyr::select(!dplyr::all_of(names(game_info))) %>%
dplyr::bind_cols(game_info) %>%
dplyr::mutate(
# create goalie on-ice columns
home_goalie = dplyr::case_when(
home_on_1 %in% goalies ~ home_on_1,
home_on_2 %in% goalies ~ home_on_2,
home_on_3 %in% goalies ~ home_on_3,
home_on_4 %in% goalies ~ home_on_4,
home_on_5 %in% goalies ~ home_on_5,
home_on_6 %in% goalies ~ home_on_6,
home_on_7 %in% goalies ~ home_on_7
),
away_goalie = dplyr::case_when(
away_on_1 %in% goalies ~ away_on_1,
away_on_2 %in% goalies ~ away_on_2,
away_on_3 %in% goalies ~ away_on_3,
away_on_4 %in% goalies ~ away_on_4,
away_on_5 %in% goalies ~ away_on_5,
away_on_6 %in% goalies ~ away_on_6,
away_on_7 %in% goalies ~ away_on_7
),
# include only skaters in on-ice columns
# rosters are ordered such that goalies should always be
# last on-ice skater
home_on_1 = ifelse(home_on_1 == home_goalie & !is.na(home_goalie), NA, home_on_1),
home_on_2 = ifelse(home_on_2 == home_goalie & !is.na(home_goalie), NA, home_on_2),
home_on_3 = ifelse(home_on_3 == home_goalie & !is.na(home_goalie), NA, home_on_3),
home_on_4 = ifelse(home_on_4 == home_goalie & !is.na(home_goalie), NA, home_on_4),
home_on_5 = ifelse(home_on_5 == home_goalie & !is.na(home_goalie), NA, home_on_5),
home_on_6 = ifelse(home_on_6 == home_goalie & !is.na(home_goalie), NA, home_on_6),
home_on_7 = ifelse(home_on_7 == home_goalie & !is.na(home_goalie), NA, home_on_7),
away_on_1 = ifelse(away_on_1 == away_goalie & !is.na(away_goalie), NA, away_on_1),
away_on_2 = ifelse(away_on_2 == away_goalie & !is.na(away_goalie), NA, away_on_2),
away_on_3 = ifelse(away_on_3 == away_goalie & !is.na(away_goalie), NA, away_on_3),
away_on_4 = ifelse(away_on_4 == away_goalie & !is.na(away_goalie), NA, away_on_4),
away_on_5 = ifelse(away_on_5 == away_goalie & !is.na(away_goalie), NA, away_on_5),
away_on_6 = ifelse(away_on_6 == away_goalie & !is.na(away_goalie), NA, away_on_6),
away_on_7 = ifelse(away_on_7 == away_goalie & !is.na(away_goalie), NA, away_on_7),
# create strength states
home_skaters =
1 * (!is.na(home_on_1)) + 1 * (!is.na(home_on_2)) +
1 * (!is.na(home_on_3)) + 1 * (!is.na(home_on_4)) +
1 * (!is.na(home_on_5)) + 1 * (!is.na(home_on_6)) +
1 * (!is.na(home_on_7)),
away_skaters =
1 * (!is.na(away_on_1)) + 1 * (!is.na(away_on_2)) +
1 * (!is.na(away_on_3)) + 1 * (!is.na(away_on_4)) +
1 * (!is.na(away_on_5)) + 1 * (!is.na(away_on_6)) +
1 * (!is.na(away_on_7)),
strength_state = dplyr::case_when(
event_team == home_name ~ glue::glue("{home_skaters}v{away_skaters}"),
event_team == away_name ~ glue::glue("{away_skaters}v{home_skaters}"),
TRUE ~ glue::glue("{home_skaters}v{away_skaters}")
),
strength_code = dplyr::case_when(
home_skaters == away_skaters ~ "EV",
(home_skaters < away_skaters & event_team == home_name) |
(away_skaters < home_skaters & event_team == away_name) ~ "SH",
(home_skaters < away_skaters & event_team == away_name) |
(away_skaters < home_skaters & event_team == home_name) ~ "PP"
),
# fixing the change events at start and end of periods
strength_code = ifelse(
event_type %in% non_plays |
dplyr::lead(event_type) %in% non_plays |
dplyr::lead(dplyr::lead(event_type)) %in% non_plays |
dplyr::lag(event_type) %in% non_plays |
dplyr::lag(dplyr::lag(event_type)) %in% non_plays,
NA, strength_code
),
strength = dplyr::case_when(
strength_code == "EV" ~ "Even",
strength_code == "SH" ~ "Shorthanded",
strength_code == "PP" ~ "Power Play"
),
extra_attacker = ifelse(
((event_team == home_name & is.na(home_goalie)) |
(event_team == away_name & is.na(away_goalie))) &
event_type %not_in% non_plays & event_type != "CHANGE", TRUE, FALSE
)
)
pbp_full <- pbp_full %>%
dplyr::mutate(
event_idx = dplyr::row_number()-1,
home_final = dplyr::last(home_score),
away_final = dplyr::last(away_score),
period_seconds_remaining = dplyr::case_when(
(season_type != "P" & period < 4) | season_type == "P" ~ 1200 - period_seconds,
season_type != "P" & period == 4 ~ 300 - period_seconds,
TRUE ~ period_seconds_remaining
),
description = ifelse(
event_type == "CHANGE",
glue::glue("ON: {players_on}; OFF: {players_off}"),
description
),
secondary_type = ifelse(
event_type == "CHANGE",
"On the fly",
secondary_type
),
secondary_type = ifelse(
event_type == "CHANGE" &
(dplyr::lead(event_type) == "FACEOFF" |
dplyr::lag(event_type) == "STOP" |
dplyr::lag(event_type) == "GOAL" |
dplyr::lead(event_type) == "PERIOD_END" |
dplyr::lag(event_type) == "PERIOD_END" |
dplyr::lead(dplyr::lead(event_type)) == "PERIOD_END" |
dplyr::lag(dplyr::lag(event_type)) == "PERIOD_END"),
"Line change",
secondary_type
)
) %>%
# remove unnecessary columns
dplyr::select(-event_id,-home_index,-away_index,-event_code)
# add fixed x & y coordinates so home team shoots right, away shoots left
pbp_full <- pbp_full %>%
dplyr::group_by(event_team, period, game_id) %>%
# find median x shot coordinate to tell us which side teams are shooting on
dplyr::mutate(med_x = stats::median(x[event_type %in% fenwick_events], na.rm = TRUE)) %>%
dplyr::ungroup() %>%
dplyr::mutate(
x_fixed = dplyr::case_when(
event_team == home_name & med_x > 0 ~ x,
event_team == home_name & med_x < 0 ~ 0 - x,
event_team == away_name & med_x > 0 ~ 0 - x,
event_team == away_name & med_x < 0 ~ x
),
y_fixed = dplyr::case_when(
event_team == home_name & med_x > 0 ~ y,
event_team == home_name & med_x < 0 ~ 0 - y,
event_team == away_name & med_x > 0 ~ 0 - y,
event_team == away_name & med_x < 0 ~ y
),
# add shot distance/angle
shot_distance = dplyr::case_when(
event_team == home_name & event_type %in% fenwick_events ~
round(abs(sqrt((x_fixed - 89)^2 + (y_fixed)^2)),1),
event_team == away_name & event_type %in% fenwick_events ~
round(abs(sqrt((x_fixed - (-89))^2 + (y_fixed)^2)),1)
),
shot_angle = dplyr::case_when(
event_team == home_name & event_type %in% fenwick_events ~
round(abs(atan((0-y_fixed) / (89-x_fixed)) * (180 / pi)),1),
event_team == away_name & event_type %in% fenwick_events ~
round(abs(atan((0-y_fixed) / (-89-x_fixed)) * (180 / pi)),1)
),
# fix behind the net angles
shot_angle = ifelse(
(event_team == home_name & x_fixed > 89) |
(event_team == away_name & x_fixed < -89),
180 - shot_angle,
shot_angle
),
event_team_type = dplyr::case_when(
event_team == home_name ~ "home",
event_team == away_name ~ "away"
)
) %>%
dplyr::select(-med_x)
# reorder the columns
if("event_player_3_name" %in% names(pbp_full)){
pbp_full <- pbp_full %>%
# change event player names to match on-ice player name conventions
dplyr::mutate_at(c("event_player_1_name","event_player_2_name",
"event_player_3_name","event_goalie_name"),
~stringr::str_replace_all(.x, c(" " = ".", "-" = "."))) %>%
dplyr::select(
event_type, event, secondary_type, event_team, event_team_type,
description, period, period_seconds, period_seconds_remaining,
game_seconds, game_seconds_remaining, home_score, away_score,
event_player_1_name, event_player_1_type, event_player_2_name,
event_player_2_type, event_player_3_name, event_player_3_type,
event_goalie_name, strength_state, strength_code:event_idx,
num_on, players_on, num_off, players_off, extra_attacker,
x, y, x_fixed, y_fixed, shot_distance, shot_angle,
home_skaters, away_skaters, home_on_1:away_on_7,
home_goalie, away_goalie, game_id, event_idx,
tidyselect::everything()
)
} else {
pbp_full <- pbp_full %>%
dplyr::mutate_at(c("event_player_1_name","event_player_2_name",
"event_goalie_name"),
~stringr::str_replace_all(.x, c(" " = ".", "-" = "."))) %>%
dplyr::select(
event_type, event, secondary_type, event_team, event_team_type,
description, period, period_seconds, period_seconds_remaining,
game_seconds, game_seconds_remaining, home_score, away_score,
event_player_1_name, event_player_1_type, event_player_2_name,
event_player_2_type,
event_goalie_name, strength_state, strength_code:event_idx,
num_on, players_on, num_off, players_off, extra_attacker,
x, y, x_fixed, y_fixed, shot_distance, shot_angle,
home_skaters, away_skaters, home_on_1:away_on_7,
home_goalie, away_goalie, game_id, event_idx,
tidyselect::everything()
)
}
# fill in current score for shift events
# first assign score of 0 to start of game to fix issue with
# game ID 2018020965, which is missing the "GAME_SCHEDULED" event
pbp_full$home_score[1] <- 0
pbp_full$away_score[1] <- 0
pbp_full$home_score <- zoo::na.locf(pbp_full$home_score)
pbp_full$away_score <- zoo::na.locf(pbp_full$away_score)
# fix period type for shift events
pbp_full <- pbp_full %>%
dplyr::mutate(
period_type = dplyr::case_when(
period < 4 ~ "REGULAR",
season_type %in% c("R","PR") & period == 4 ~ "OVERTIME",
season_type %in% c("R","PR") & period == 5 ~ "SHOOTOUT",
season_type == "P" & period > 3 ~ "OVERTIME"
),
ordinal_num = dplyr::case_when(
period == 1 ~ glue::glue("{period}st"),
period == 2 ~ glue::glue("{period}nd"),
period == 3 ~ glue::glue("{period}rd"),
TRUE ~ glue::glue("{period}th")
)
)
} else if(is.null(shifts) & "x" %in% names(pbp)){
# no shift data available but shot location available
# ie preseason
pbp_full <- pbp %>%
dplyr::select(-event_id,-event_code) %>%
# add fixed x & y coordinates so home team shoots right, away shoots left
dplyr::group_by(event_team, period, game_id) %>%
# find median x shot coordinate to tell us which side teams are shooting on
dplyr::mutate(med_x = stats::median(x[event_type %in% fenwick_events], na.rm = TRUE)) %>%
dplyr::ungroup() %>%
dplyr::mutate(
x_fixed = dplyr::case_when(
event_team == home_name & med_x > 0 ~ x,
event_team == home_name & med_x < 0 ~ 0 - x,
event_team == away_name & med_x > 0 ~ 0 - x,
event_team == away_name & med_x < 0 ~ x
),
y_fixed = dplyr::case_when(
event_team == home_name & med_x > 0 ~ y,
event_team == home_name & med_x < 0 ~ 0 - y,
event_team == away_name & med_x > 0 ~ 0 - y,
event_team == away_name & med_x < 0 ~ y
),
# add shot distance/angle
shot_distance = dplyr::case_when(
event_team == home_name & event_type %in% fenwick_events ~
round(abs(sqrt((x_fixed - 89)^2 + (y_fixed)^2)),1),
event_team == away_name & event_type %in% fenwick_events ~
round(abs(sqrt((x_fixed - (-89))^2 + (y_fixed)^2)),1)
),
shot_angle = dplyr::case_when(
event_team == home_name & event_type %in% fenwick_events ~
round(abs(atan((0-y_fixed) / (89-x_fixed)) * (180 / pi)),1),
event_team == away_name & event_type %in% fenwick_events ~
round(abs(atan((0-y_fixed) / (-89-x_fixed)) * (180 / pi)),1)
),
# fix behind the net angles
shot_angle = ifelse(
(event_team == home_name & x_fixed > 89) |
(event_team == away_name & x_fixed < -89),
180 - shot_angle,
shot_angle
),
event_team_type = dplyr::case_when(
event_team == home_name ~ "home",
event_team == away_name ~ "away"
)
) %>%
dplyr::select(-med_x)
if("event_player_3_name" %in% names(pbp_full)) {
pbp_full <- pbp_full %>%
dplyr::mutate_at(
c("event_player_1_name","event_player_2_name",
"event_player_3_name","event_goalie_name"),
~stringr::str_replace_all(.x, c(" " = ".", "-" = "."))
) %>%
dplyr::select(
event_type, event, secondary_type, event_team, event_team_type,
description, period, period_seconds, period_seconds_remaining,
game_seconds, game_seconds_remaining, home_score, away_score,
event_player_1_name, event_player_1_type, event_player_2_name,
event_player_2_type, event_player_3_name, event_player_3_type,
event_goalie_name, penalty_severity:strength, x, y, x_fixed, y_fixed,
shot_distance, shot_angle,
game_id, event_idx,
tidyselect::everything()
)
} else {
pbp_full <- pbp_full %>%
dplyr::mutate_at(
c("event_player_1_name","event_player_2_name",
"event_goalie_name"),
~stringr::str_replace_all(.x, c(" " = ".", "-" = "."))
) %>%
dplyr::select(
event_type, event, secondary_type, event_team, event_team_type,
description, period, period_seconds, period_seconds_remaining,
game_seconds, game_seconds_remaining, home_score, away_score,
event_player_1_name, event_player_1_type, event_player_2_name,
event_player_2_type,
event_goalie_name, penalty_severity:strength, x, y, x_fixed, y_fixed,
shot_distance, shot_angle,
game_id, event_idx,
tidyselect::everything()
)
}
} else {
# no shift data or shot location
if("event_player_3_name" %in% names(pbp)){
pbp_full <- pbp %>%
dplyr::mutate_at(
c("event_player_1_name","event_player_2_name",
"event_player_3_name","event_goalie_name"),
~stringr::str_replace_all(.x, c(" " = ".", "-" = "."))
) %>%
dplyr::select(-event_id,-event_code) %>%
dplyr::mutate(
event_team_type = dplyr::case_when(
event_team == home_name ~ "home",
event_team == away_name ~ "away"
)
) %>%
dplyr::select(
event_type, event, secondary_type, event_team, event_team_type,
description, period, period_time, period_time_remaining,
event_player_1_name, event_player_1_type, event_player_2_name,
event_player_2_type, event_player_3_name, event_player_3_type,
event_goalie_name, game_id, event_idx, tidyselect::everything()
)
} else {
pbp_full <- pbp %>%
dplyr::mutate_at(
c("event_player_1_name","event_player_2_name",
"event_goalie_name"),
~stringr::str_replace_all(.x, c(" " = ".", "-" = "."))
) %>%
dplyr::select(-event_id,-event_code) %>%
dplyr::mutate(
event_team_type = dplyr::case_when(
event_team == home_name ~ "home",
event_team == away_name ~ "away"
)
) %>%
dplyr::select(
event_type, event, secondary_type, event_team, event_team_type,
description, period, period_time, period_time_remaining,
event_player_1_name, event_player_1_type, event_player_2_name,
event_player_2_type,
event_goalie_name, game_id, event_idx, tidyselect::everything()
)
}
}
# add event_id
pbp_full <- pbp_full %>%
dplyr::mutate(
event_idx = stringr::str_pad(event_idx, width = 4, side = "left", pad = 0),
event_id = as.numeric(paste0(game_id,event_idx)),
secondary_type = ifelse(
stringr::str_detect(dplyr::lead(description), "PS -") &
event_type %in% c("SHOT","MISSED_SHOT","GOAL"),
"Penalty Shot", secondary_type
)
)
# add xg
# depends on strenght state, needs shift data to be there
if("home_skaters" %in% names(pbp_full)){
pbp_full <- calculate_xg(pbp_full)
}
return(pbp_full)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.