#' This function extracts a list of all game ids for a given date range from the NHL.com API
#'
#' @param start_date the start date of the desired interval
#' @param end_date the end date of the desired interval
#' @export
get_nhl_schedule <- function(start_date = lubridate::today(), end_date = start_date, add_espn_ids = FALSE, verbose = NULL){
json_schedule <- NULL
iattempts <- 5
## Attempt to grab the schedule from the NHL API
while (is.null(json_schedule) & iattempts > 0){
schedule_url <-
paste0("https://statsapi.web.nhl.com/api/v1/schedule?startDate=",
start_date,
"&endDate=",
end_date)
json_schedule <-
tryCatch(
jsonlite::fromJSON(schedule_url),
error = function(e) {NULL}
)
iattempts <- iattempts - 1
}
# Check if there is data available
if (length(json_schedule$dates$games) == 0) {
warning("No NHL Schedule Data Returned")
return(NULL)
}
else {
final_schedule <-
dplyr::bind_rows(json_schedule$dates) %>%
tibble::as_tibble(.name_repair = unique) %>%
tidyr::unnest(games) %>%
jsonlite::flatten() %>%
dplyr::select(
game_id = gamePk,
season,
feed_api = link,
game_type = gameType,
game_date = date,
game_start = gameDate,
game_status = status.abstractGameState,
coded_game_state = status.codedGameState,
game_status_detail = status.detailedState,
status_code = status.statusCode,
start_is_tbd = status.startTimeTBD,
home_team_id = dplyr::any_of("teams.home.team.id"),
home_team = dplyr::any_of("teams.home.team.name"),
home_team_api = dplyr::any_of("teams.home.team.link"),
home_team_score = dplyr::any_of("teams.home.score"),
home_team_wins = dplyr::any_of("teams.home.leagueRecord.wins"),
home_team_losses = dplyr::any_of("teams.home.leagueRecord.losses"),
home_team_ot_losses = dplyr::any_of("teams.home.leagueRecord.ot"),
home_team_record_type = dplyr::any_of("teams.home.leagueRecord.type"),
away_team_id = dplyr::any_of("teams.away.team.id"),
away_team = dplyr::any_of("teams.away.team.name"),
away_team_api = dplyr::any_of("teams.away.team.link"),
away_team_score = dplyr::any_of("teams.away.score"),
away_team_wins = dplyr::any_of("teams.away.leagueRecord.wins"),
away_team_losses = dplyr::any_of("teams.away.leagueRecord.losses"),
away_team_ot_losses = dplyr::any_of("teams.away.leagueRecord.ot"),
away_team_record_type = dplyr::any_of("teams.away.leagueRecord.type"),
venue_id = dplyr::any_of("venue.id"),
venue_name = dplyr::any_of("venue.name"),
venue_api = dplyr::any_of("venue.link"),
content_api = dplyr::any_of("content.link")) %>%
dplyr::mutate(game_date = as.Date(game_date))
if(add_espn_ids == TRUE){
raw_espn_games <- get_espn_schedule_dates(unique(final_schedule$game_date), verbose = verbose)
team_ids <- get_nhl_api_teams()[c("team_id", "team_name")]
espn_games <-
raw_espn_games %>%
dplyr::left_join(team_ids, by = c("home" = "team_name")) %>%
dplyr::rename(home_team_id = team_id) %>%
dplyr::left_join(team_ids, by = c("away" = "team_name")) %>%
dplyr::rename(away_team_id = team_id) %>%
dplyr::select(
game_date,
espn_game_id = game_id,
home_team_id,
away_team_id
) %>%
dplyr::mutate(game_date = as.Date(x = game_date, format = "%Y%m%d"))
espn_home_games <-
espn_games %>%
dplyr::select(
game_date,
espn_game_id,
home_team_id
)
espn_away_games <-
espn_games %>%
dplyr::select(
game_date,
espn_game_id,
away_team_id
)
final_schedule <-
final_schedule %>%
dplyr::left_join(espn_games, by = c("game_date", "home_team_id", "away_team_id")) %>%
dplyr::rename(combined_espn_id = espn_game_id) %>%
dplyr::left_join(espn_home_games, by = c("game_date", "home_team_id")) %>%
dplyr::rename(home_espn_id = espn_game_id) %>%
dplyr::left_join(espn_away_games, by = c("game_date", "away_team_id")) %>%
dplyr::rename(away_espn_id = espn_game_id) %>%
dplyr::mutate(espn_game_id = dplyr::coalesce(combined_espn_id, home_espn_id, away_espn_id), .after = game_id) %>%
dplyr::select(-one_of("combined_espn_id", "home_espn_id", "away_espn_id"))
}
return(final_schedule)
}
}
get_espn_game_id <- function(game_id) {
all_schedules <- NULL
game_list <-
game_id %>%
tibble::as_tibble_col(column_name = "game_id")
year_list <-
stringr::str_extract_all(game_id, "^[0-9]{4}", simplify = TRUE) %>%
unique() %>%
tibble::as_tibble_col(column_name = "year") %>%
dplyr::mutate(schedule_year = paste0(year, as.numeric(year) + 1))
for(iSeason in year_list$schedule_year){
raw_json <- NULL
iattempts <- 5
while (is.null(raw_json) & iattempts > 0){
schedule_url <-
paste0("https://statsapi.web.nhl.com/api/v1/schedule?season=",
iSeason
)
raw_json <-
tryCatch(
jsonlite::fromJSON(schedule_url),
error = function(e) {NULL}
)
iattempts <- iattempts - 1
}
if (! is.null(raw_json) ) {
parsed_json <-
dplyr::bind_rows(raw_json$dates) %>%
tibble::as_tibble(.name_repair = unique) %>%
tidyr::unnest(games) %>%
jsonlite::flatten() %>%
dplyr::select(
game_id = gamePk,
game_date = date,
home_team_id = teams.home.team.id,
away_team_id = teams.away.team.id) %>%
dplyr::mutate(game_date = as.Date(x = game_date, format = "%Y-%m-%d"))
} else {
parsed_json <- NULL
}
all_schedules <-
dplyr::bind_rows(
all_schedules,
parsed_json
)
}
appended_games <-
dplyr::left_join(x = game_list, y = all_schedules, by = "game_id")
date_list <-
appended_games$game_date %>%
unique()
raw_espn_games <- get_espn_schedule_dates(date_list)
team_ids <- get_nhl_api_teams()[c("team_id", "team_name")]
espn_games <-
raw_espn_games %>%
dplyr::left_join(team_ids, by = c("home" = "team_name")) %>%
dplyr::rename(home_team_id = team_id) %>%
dplyr::left_join(team_ids, by = c("away" = "team_name")) %>%
dplyr::rename(away_team_id = team_id) %>%
dplyr::select(
game_date,
espn_game_id = game_id,
home_team_id,
away_team_id
) %>%
dplyr::mutate(game_date = as.Date(x = game_date, format = "%Y%m%d"))
espn_home_games <-
espn_games %>%
dplyr::select(
game_date,
espn_game_id,
home_team_id
)
espn_away_games <-
espn_games %>%
dplyr::select(
game_date,
espn_game_id,
away_team_id
)
appended_games <-
appended_games %>%
dplyr::left_join(espn_games, by = c("game_date", "home_team_id", "away_team_id")) %>%
dplyr::rename(combined_espn_id = espn_game_id) %>%
dplyr::left_join(espn_home_games, by = c("game_date", "home_team_id")) %>%
dplyr::rename(home_espn_id = espn_game_id) %>%
dplyr::left_join(espn_away_games, by = c("game_date", "away_team_id")) %>%
dplyr::rename(away_espn_id = espn_game_id) %>%
dplyr::mutate(espn_game_id = dplyr::coalesce(combined_espn_id, home_espn_id, away_espn_id), .after = game_id) %>%
dplyr::select(game_id, espn_game_id)
return(appended_games)
}
get_espn_schedule <- function(start_date = lubridate::today(), end_date = start_date, verbose = NULL) {
if(is.null(verbose)){
verbose_lower <- "na"
} else {
verbose_lower <- tolower(verbose)
}
date_list <-
seq(from = as.Date(start_date), to = as.Date(end_date), by = 1) %>%
stringr::str_remove_all("-|\\s|/")
raw_schedule <- NULL
espn_schedule <- NULL
iattempts <- 5
for (iDate in date_list){
if(verbose_lower %in% c("detail")){
cat("Scraping ESPN Schedule for ", iDate, "\n", sep = "")
}
base_url <-
paste0("https://www.espn.com/nhl/scoreboard?date=", iDate)
while (is.null(raw_schedule) & iattempts > 0){
raw_schedule <-
tryCatch(
xml2::read_html(base_url),
error = function(e) {NULL}
)
iattempts <- iattempts - 1
}
if (is.null(raw_schedule)) {
warning("No NHL Schedule Data Returned")
} else {
espn_game_ids <-
raw_schedule %>%
rvest::html_nodes(".Scoreboard") %>%
rvest::html_attr("id")
espn_home_teams <-
raw_schedule %>%
rvest::html_nodes(".ScoreboardScoreCell__Item--home") %>%
rvest::html_nodes(".ScoreCell__TeamName") %>%
rvest::html_text2()
espn_away_teams <-
raw_schedule %>%
rvest::html_nodes(".ScoreboardScoreCell__Item--away") %>%
rvest::html_nodes(".ScoreCell__TeamName") %>%
rvest::html_text2()
espn_date <-
tibble::tibble(
game_id = espn_game_ids,
home = espn_home_teams,
away = espn_away_teams) %>%
dplyr::mutate(game_date = iDate, .before = game_id)
espn_schedule <-
dplyr::bind_rows(
espn_schedule,
espn_date
)
base_url <- NULL
raw_schedule <- NULL
iattempts <- 5
}
}
return(espn_schedule)
}
get_espn_schedule_dates <- function(dates, verbose = NULL) {
if(is.null(verbose)){
verbose_lower <- "na"
} else {
verbose_lower <- tolower(verbose)
}
date_list <-
dates %>%
stringr::str_remove_all("-|\\s|/")
raw_schedule <- NULL
espn_schedule <- NULL
iattempts <- 5
for (iDate in date_list){
if(verbose_lower %in% c("detail")){
cat("Scraping ESPN Schedule for ", iDate, "\n", sep = "")
}
base_url <-
paste0("https://www.espn.com/nhl/scoreboard?date=", iDate)
while (is.null(raw_schedule) & iattempts > 0){
raw_schedule <-
tryCatch(
xml2::read_html(base_url),
error = function(e) {NULL}
)
iattempts <- iattempts - 1
}
if (is.null(raw_schedule)) {
warning("No NHL Schedule Data Returned")
} else {
espn_game_ids <-
raw_schedule %>%
rvest::html_nodes(".Scoreboard") %>%
rvest::html_attr("id")
espn_home_teams <-
raw_schedule %>%
rvest::html_nodes(".ScoreboardScoreCell__Item--home") %>%
rvest::html_nodes(".ScoreCell__TeamName") %>%
rvest::html_text2()
espn_away_teams <-
raw_schedule %>%
rvest::html_nodes(".ScoreboardScoreCell__Item--away") %>%
rvest::html_nodes(".ScoreCell__TeamName") %>%
rvest::html_text2()
espn_date <-
tibble::tibble(
game_id = espn_game_ids,
home = espn_home_teams,
away = espn_away_teams) %>%
dplyr::mutate(game_date = iDate, .before = game_id)
espn_schedule <-
dplyr::bind_rows(
espn_schedule,
espn_date
)
base_url <- NULL
raw_schedule <- NULL
iattempts <- 5
}
}
return(espn_schedule)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.