#################### NHL HTML ####################
get_nhl_html_events <- function(game_id){
season <-
paste0(
stringr::str_sub(game_id, start = 1, end = 4),
as.numeric(stringr::str_sub(game_id, start = 1, end = 4)) + 1
)
season_game <-
stringr::str_sub(game_id, start = -6, end = -1)
raw_html <- NULL
iattempts <- 5
base_url <- paste0("http://www.nhl.com/scores/htmlreports/", season, "/PL", season_game, ".HTM")
while (! is.character(raw_html) & iattempts > 0){
raw_html <-
tryCatch(
xml2::read_html(base_url),
error = function(e) {NULL}
)
iattempts <- iattempts -1
}
if(length(raw_html) == 0) {
warning("HTML Game Data Not Available")
return(NULL)
} else {
return(raw_html)
}
}
parse_nhl_html_events <- function(raw_html_data, game_id = NULL){
event_col_names <-
c(
"event_id",
"period",
"strength",
"time_elapsed",
"event_type",
"description",
"away_on_ice",
"home_on_ice"
)
raw_html_odds <-
raw_html_data %>%
rvest::html_elements(".oddColor") %>%
rvest::html_children() %>%
rvest::html_text() %>%
matrix(ncol = 8, byrow = TRUE, dimnames = list(NULL, event_col_names)) %>%
tibble::as_tibble(.name_repair = "unique")
raw_html_evens <-
raw_html_data %>%
rvest::html_elements(".evenColor") %>%
rvest::html_children() %>%
rvest::html_text() %>%
matrix(ncol = 8, byrow = TRUE, dimnames = list(NULL, event_col_names)) %>%
tibble::as_tibble(.name_repair = "unique")
pbp_table <-
dplyr::bind_rows(raw_html_odds, raw_html_evens) %>%
dplyr::mutate(
time_elapsed_period =
ifelse(
nchar(stringr::str_extract(time_elapsed, "^[0-9]{1,2}:[0-9]{2}")) == 4,
paste0("0", stringr::str_extract(time_elapsed, "^[0-9]{1,2}:[0-9]{2}")),
stringr::str_extract(time_elapsed, "^[0-9]{1,2}:[0-9]{2}")
),
time_remaining_period =
ifelse(
nchar(stringr::str_extract(time_elapsed, "(?<=:[0-9]{2})[0-9]{1,2}:[0-9]{2}$")) == 4,
paste0("0", stringr::str_extract(time_elapsed, "(?<=:[0-9]{2})[0-9]{1,2}:[0-9]{2}$")),
stringr::str_extract(time_elapsed, "(?<=:[0-9]{2})[0-9]{1,2}:[0-9]{2}$")
),
home_on_ice = stringr::str_replace_all(stringr::str_trim(home_on_ice), "\\s+", "|"),
away_on_ice = stringr::str_replace_all(stringr::str_trim(away_on_ice), "\\s+", "|")
) %>%
dplyr::select(!time_elapsed) %>%
dplyr::arrange(as.numeric(event_id))
if (! is.null(game_id)){
season <-
paste0(
stringr::str_sub(game_id, start = 1, end = 4),
as.numeric(stringr::str_sub(game_id, start = 1, end = 4)) + 1
)
season_game <-
stringr::str_sub(game_id, start = -6, end = -1)
pbp_table <-
pbp_table %>%
dplyr::mutate(
game_id = game_id,
season = season,
season_game = season_game,
.before = event_id
)
}
if(nrow(pbp_table) == 0){
warning("HTML Game Data Not Available")
return(NULL)
}
return(pbp_table)
}
#################### NHL API ####################
get_nhl_api_events <- function(game_id){
raw_feed <- NULL
iattempts <- 5
feed_url <-
paste0("https://statsapi.web.nhl.com/api/v1/game/",
game_id,
"/feed/live")
while(! is.character(raw_feed) & iattempts > 0){
raw_feed <-
tryCatch(
jsonlite::fromJSON(feed_url),
error = function(e) {NULL}
)
iattempts <- iattempts - 1
}
if(length(raw_feed$liveData$plays$allPlays) == 0){
warning("No NHL Game Data Returned")
return(NULL)
} else {
return(raw_feed)
}
}
parse_nhl_api_events <- function(api_events_feed){
event_conversion <-
tibble::tribble(
~event_name, ~event_type,
"GAME_SCHEDULED", "SCHED",
"PERIOD_READY", "PRDY",
"PERIOD_START", "PSTR",
"FACEOFF", "FAC",
"MISSED_SHOT", "MISS",
"SHOT", "SHOT",
"STOP", "STOP",
"GIVEAWAY", "GIVE",
"HIT", "HIT",
"PENALTY", "PENL",
"BLOCKED_SHOT", "BLOCK",
"TAKEAWAY", "TAKE",
"GOAL", "GOAL",
"PERIOD_END", "PEND",
"PERIOD_OFFICIAL", "POFF",
"GAME_END", "GEND",
"GAME_OFFICIAL", "GOFF",
"SHOOTOUT_COMPLETE", "SOC"
)
all_api_plays <- api_events_feed$liveData$plays$allPlays
flat_events <-
all_api_plays %>%
jsonlite::flatten() %>%
dplyr::as_tibble() %>%
tidyr::unnest_wider(players) %>%
tidyr::unnest_wider(player) %>%
tidyr::unnest_wider(id, names_sep = "_", names_repair = "unique") %>%
tidyr::unnest_wider(playerType, names_sep = "_", names_repair = "unique") %>%
tidyr::unnest_wider(fullName, names_sep = "_", names_repair = "unique") %>%
tidyr::unnest_wider(link, names_sep = "_", names_repair = "unique")
if(flat_events$seasonTotal %>% length() > 0){
flat_events <-
flat_events %>%
tidyr::unnest_wider(seasonTotal, names_sep = "_", names_repair = "unique")
}
flat_events <-
flat_events %>%
dplyr::mutate(event_id = dplyr::row_number(),
game_id = api_events_feed$gamePk,
season =
paste0(
stringr::str_sub(api_events_feed$gamePk, start = 1, end = 4),
as.numeric(stringr::str_sub(api_events_feed$gamePk, start = 1, end = 4)) + 1
),
season_game =
stringr::str_sub(api_events_feed$gamePk, start = -6, end = -1)
) %>%
dplyr::select(
dplyr::any_of("game_id"),
dplyr::any_of("season"),
dplyr::any_of("season_game"),
dplyr::any_of("event_id"),
event_index_id = dplyr::any_of("about.eventIdx"),
nhl_event_id = dplyr::any_of("about.eventId"),
event_at = dplyr::any_of("about.dateTime"),
period_num = dplyr::any_of("about.period"),
time_elapsed_period = dplyr::any_of("about.periodTime"),
time_remaining_period = dplyr::any_of("about.periodTimeRemaining"),
event_name = dplyr::any_of("result.eventTypeId"),
event_description = dplyr::any_of("result.description"),
event_detail = dplyr::any_of("result.secondaryType"),
goal_strength = dplyr::any_of("result.strength.code"),
is_game_winning_goal = dplyr::any_of("result.gameWinningGoal"),
is_empty_net_goal = dplyr::any_of("result.emptyNet"),
penalty_severity = dplyr::any_of("result.penaltySeverity"),
penalty_minutes = dplyr::any_of("result.penaltyMinutes"),
home_score = dplyr::any_of("about.goals.home"),
away_score = dplyr::any_of("about.goals.away"),
coordinates_x = dplyr::any_of("coordinates.x"),
coordinates_y = dplyr::any_of("coordinates.y"),
dplyr::starts_with("id_"),
dplyr::starts_with("fullName_"),
dplyr::starts_with("playerType_"),
team_id = team.id,
team_name = team.name,
team_code = team.triCode
) %>%
dplyr::left_join(x = ., y = event_conversion, by = c("event_name" = "event_name")) %>%
dplyr::relocate(event_type, .after = event_name) %>%
dplyr::rename_with(.fn = stringr::str_replace, .cols = everything(), "id_", "player_id_") %>%
dplyr::rename_with(.fn = stringr::str_replace, .cols = everything(), "fullName", "player_name") %>%
dplyr::rename_with(.fn = stringr::str_replace, .cols = everything(), "playerType", "player_type") %>%
dplyr::arrange(event_at)
flat_events$period_num <- as.character(flat_events$period_num)
if(nrow(flat_events) == 0){
warning("No ESPN data available")
return(NULL)
}
return(flat_events)
}
#################### ESPN ####################
get_nhl_espn_data <- function(espn_game_id){
base_URL <-
paste0("https://www.espn.com/nhl/gamecast/data/masterFeed?lang=en&isAll=true&rand=0&gameId=",
espn_game_id)
raw_page <- NULL
iattempts <- 5
while(! is.character(raw_page) & iattempts > 0){
raw_page <-
tryCatch(
RCurl::getURL(
url = base_URL
),
error = function(e) {NULL}
)
iattempts = iattempts - 1
}
if(length(raw_page) == 0){
warning("No ESPN Game Data Returned")
return(NULL)
} else {
converted_page <-
tryCatch(
xml2::read_xml(raw_page),
error = function(e) {NULL}
)
if (is.null(converted_page)){
warning("ESPN XML is Malformed. Exporting entire page as character")
converted_page <-
raw_page
}
}
return(converted_page)
}
parse_nhl_espn_events <- function(raw_espn_data, nhl_game_id = NULL){
espn_plays_columns <-
c(
"coordinates_x", # X1
"coordinates_y", # X2
"espn_type", # X3
"time", # X4
"period_num", # X5
"espn_plays_6", # X6
"espn_plays_7", # X7
"espn_plays_8", # X8
"event_description", # X9
"espn_plays_10", # X10
"espn_plays_11", # X11
"espn_plays_12", # X12
"espn_plays_13", # X13
"espn_plays_14", # X14
"espn_team_id", # X15
"espn_plays_16", # X16
"espn_plays_17", # X17
"espn_plays_18", # X18
"espn_plays_19", # X19
"espn_plays_20" # X20
)
if (is.character(raw_espn_data)){
play_ids <-
raw_espn_data %>%
stringr::str_extract_all("(?<=Play id=\")[0-9]+(?=\">)") %>%
unlist() %>%
tibble::tibble(espn_game_id = .)
play_data <-
raw_espn_data %>%
as.character() %>%
stringr::str_extract_all("<!\\[CDATA\\[.+?>") %>%
sapply(stringr::str_remove,"<!\\[CDATA\\[", simplify = TRUE) %>%
sapply(stringr::str_remove, "\\]{2}>", simplify = TRUE) %>%
dplyr::as_tibble(.name_repair = unique)
parsed_plays <-
play_data %>%
dplyr::filter(!stringr::str_detect(play_data$value, "([0-9]{1,2}:[0-9]{2} PM|[0-9]{1,2}:[0-9]{2} AM|http://)")) %>%
tidyr::separate(col = value, into = espn_plays_columns, sep = "~") %>%
suppressMessages()
final_events <- dplyr::bind_cols(play_ids, parsed_plays) %>% suppressMessages()
} else {
espn_plays <-
raw_espn_data %>%
xml2::xml_child(search = "Plays") %>%
xml2::xml_text()
if (espn_plays == "") {
warning("No Play Data Included in Raw Data")
return(NULL)
} else {
play_ids <-
raw_espn_data %>%
xml2::xml_child(search = "Plays") %>%
xml2::xml_children() %>%
xml2::xml_attr(attr = "id") %>%
tibble::as_tibble_col(column_name = "espn_play_id") %>%
dplyr::mutate(
espn_game_id = stringr::str_sub(espn_play_id, start = 1, end = 9),
.before = espn_play_id
)
parsed_plays <-
raw_espn_data %>%
xml2::xml_child(search = "Plays") %>%
XML::xmlParse() %>%
XML::xpathSApply("//text()", XML::xmlValue) %>%
tibble::as_tibble(.name_repair = "unique") %>%
tidyr::separate(col = value, into = espn_plays_columns, sep = "~", extra = "warn") %>%
suppressMessages()
final_events <- dplyr::bind_cols(play_ids, parsed_plays) %>% suppressMessages()
}
}
if (! is.null(nhl_game_id)){
final_events <-
final_events %>%
dplyr::mutate(
game_id = nhl_game_id,
season =
paste0(
stringr::str_sub(game_id, start = 1, end = 4),
as.numeric(stringr::str_sub(game_id, start = 1, end = 4)) + 1
),
season_game = stringr::str_sub(game_id, start = -6, end = -1),
.before = coordinates_x
)
}
if(nrow(final_events) == 0){
warning("ESPN play data not available")
return(NULL)
}
return(final_events)
}
parse_nhl_espn_rosters <- function(raw_espn_data){
espn_skater_columns <-
c(
"espn_player_id", # X1
"espn_roster_2", # X2
"player_name", # X3
"player_number", # X4
"espn_team_id", # X5
"player_position", # X6
"player_picture_url", # X7
"espn_roster_8", # X8
"espn_roster_9", # X9
"espn_roster_10", # X10
"espn_roster_11", # X11
"espn_roster_12", # X12
"espn_roster_13", # X13
"espn_roster_14", # X14
"espn_roster_15", # X15
"espn_roster_16", # X16
"espn_roster_17", # X17
"espn_roster_18", # X18
"espn_roster_19", # X19
"espn_roster_20", # X20
"espn_roster_21", # X21
"espn_roster_22", # X22
"espn_roster_23", # X23
"espn_roster_24", # X24
"espn_roster_25", # X25
"espn_roster_26", # X26
"espn_roster_27" # X27
)
espn_goalie_columns <-
c(
"espn_player_id", # X1
"espn_roster_2", # X2
"player_name", # X3
"player_number", # X4
"espn_team_id", # X5,
"player_position", # X6
"player_picture_url", # X7
"espn_roster_8", # X8
"espn_roster_9", # X9
"espn_roster_10", # X10
"espn_roster_11", # X11
"espn_roster_12", # X12
"espn_roster_13", # X13
"espn_roster_14", # X14
"espn_roster_15", # X15
"espn_roster_16", # X16
"espn_roster_17" # X17
)
if (is.character(raw_espn_data)){
parsed_data <-
raw_espn_data %>%
as.character() %>%
stringr::str_extract_all("<!\\[CDATA\\[.+?>") %>%
sapply(stringr::str_remove,"<!\\[CDATA\\[", simplify = TRUE) %>%
sapply(stringr::str_remove, "\\]{2}>", simplify = TRUE) %>%
dplyr::as_tibble(.name_repair = unique)
parsed_roster <-
parsed_data %>%
dplyr::filter(stringr::str_detect(parsed_data$value, "http://")) %>%
tidyr::separate(col = value, into = espn_skater_columns, sep = "~", extra = "warn", fill = "right") %>%
dplyr::mutate(player_position = stringr::str_trim(.$player_position))
} else {
parsed_roster <-
raw_espn_data %>%
xml2::xml_child(search = "Players") %>%
XML::xmlParse() %>%
XML::xpathSApply("//text()", XML::xmlValue) %>%
tibble::as_tibble(.name_repair = "unique") %>%
tidyr::separate(col = value, into = espn_skater_columns, sep = "~", extra = "warn", fill = "right") %>%
dplyr::mutate(player_position = stringr::str_trim(.$player_position))
}
skaters <-
parsed_roster %>%
dplyr::filter(player_position != "G")
goalies <-
parsed_roster %>%
dplyr::filter(player_position == "G") %>%
dplyr::select(1:17)
colnames(goalies) <- espn_goalie_columns
return(list(skaters, goalies))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.