R/scrape_full_nhl_game.r

Defines functions full_nhl_scrape get_game_pbp

#' @export
get_game_pbp <- function(game_id, espn_game_id = NULL, verbose = NULL) {

  start_time <- Sys.time()

  if(is.null(verbose)){
    verbose_lower <-
      "na"
  } else {
    verbose_lower <-
      tolower(verbose)
  }

  if(verbose_lower == "summary"){
    cat("Start Time: ", format(start_time, usetz = TRUE), "\n", sep = "")
  } else if (verbose_lower == "detail") {
    cat("Start Time: ", format(start_time, usetz = TRUE), "\n", sep = "")
  }

  final_html <- NULL
  final_api <- NULL
  final_espn <- NULL
  game_num <- 1

  game_count <- length(game_id)

  if(is.null(espn_game_id)){

    if (verbose_lower == "detail") {
      espn_id_start <- Sys.time()
      cat("ESPN Ids Not Included. Adding Now... ")
    }

    game_list <-
        get_espn_game_id(game_id)

    if (verbose_lower == "detail") {
      cat(difftime(Sys.time(), espn_id_start, units = "auto") %>% format(digits = 2, nsmall = 1), "\n")
    }

  } else {
    game_list <-
      tibble::tibble(
        game_id = game_id,
        espn_game_id = espn_game_id
      )
  }

  for (iGame in c(1:game_count)){

    nhl_game <- game_list[[iGame, 1]]
    espn_game <- game_list[[iGame, 2]]

    if(verbose_lower == "detail") {
      cat(paste0(rep.int("-", times = 50), collapse = ""), "\n")
    }

    if(verbose_lower %in% c("summary", "detail")){
      cat("Scraping Game ", nhl_game, " (", iGame, " of ", game_count, ")...", sep = "")
    }

    html_start <- Sys.time()

    raw_html <- get_nhl_html_events(nhl_game)

    if(! is.null(raw_html)){

      parsed_html <-
        parse_nhl_html_events(raw_html, game_id = nhl_game)

      if(! is.null(parsed_html)){
        final_html <-
          dplyr::bind_rows(
            final_html,
            parsed_html %>%
              dplyr::mutate(espn_game_id = espn_game, .before = event_id)
          )
      }

      parsed_html <- NULL
    }

    if(verbose_lower == "detail"){
      html_duration <-
        difftime(Sys.time(), html_start, units = "auto") %>% format(digits = 2, nsmall = 1)

      cat("\nHTML: ", html_duration, sep = "")
    }

    if(verbose_lower == "detail"){
      api_start <- Sys.time()
    }

    raw_api <- get_nhl_api_events(nhl_game)

    if(! is.null(raw_api)){

      parsed_api <-
        parse_nhl_api_events(raw_api)

      if(! is.null(parsed_api)) {
        final_api <-
          dplyr::bind_rows(
            final_api,
            parsed_api %>%
              dplyr::mutate(espn_game_id = espn_game, .before = event_id)
          )

      }

      parsed_api <- NULL
    }

    if(verbose_lower == "detail"){
      api_duration <-
        difftime(Sys.time(), api_start, units = "auto") %>% format(digits = 2, nsmall = 1)

      cat(", API: ", api_duration, sep = "")
    }

    if(verbose_lower == "detail"){
      espn_start <- Sys.time()
    }

    if(! is.na(espn_game)){
      raw_espn <- get_nhl_espn_data(espn_game)

      if(! is.null(raw_espn)){
        final_espn <-
          dplyr::bind_rows(
            final_espn,
            parse_nhl_espn_events(raw_espn, nhl_game_id = nhl_game)
          )
      }
    }

    game_duration <-
      difftime(Sys.time(), html_start, units = "auto") %>% format(digits = 2, nsmall = 1)

    if(verbose_lower == "detail"){
      espn_duration <-
        difftime(Sys.time(), espn_start, units = "auto") %>% format(digits = 2, nsmall = 1)

      cat(", ESPN: ", espn_duration, ", Total: ", game_duration, "\n", sep = "")
    } else if (verbose_lower == "summary") {
      cat(" ", game_duration, "\n", sep = "")
    }

    raw_html <- NULL
    raw_api <- NULL
    raw_espn <- NULL

    game_num <- game_num + 1

  }

  if(verbose_lower %in% c("summary", "detail")){
    total_duration <-
      difftime(Sys.time(), start_time, units = "auto") %>% format(digits = 2, nsmall = 1)

    cat(paste0(rep.int("-", times = 50), collapse = ""), "\nTotal Scrape Time: ", total_duration, "\n", sep = "")
  }

  return(list(final_html, final_api, final_espn))
}

#' @export
full_nhl_scrape <- function(start_date = Sys.Date() - 1, end_date = start_date, verbose = NULL) {

  start_time <- Sys.time()

  if(is.null(verbose)){
    verbose_lower <- "na"
  } else {
    verbose_lower <- tolower(verbose)
  }

  if(verbose_lower == "summary"){
    cat("Start Time: ", format(start_time, usetz = TRUE), "\nScraping Schedule... ", sep = "")
  } else if (verbose_lower == "detail") {
    cat("Start Time: ", format(start_time, usetz = TRUE), "\nScraping Schedule...\n", sep = "")
  }

  raw_schedule <-
    get_nhl_schedule(start_date = start_date, end_date = end_date, add_espn_ids = TRUE, verbose = verbose)

  if(verbose_lower == "summary") {
    schedule_duration <-
      difftime(Sys.time(), start_time, units = "auto") %>% format(digits = 2, nsmall = 1)

    cat(schedule_duration, "\n", sep = "")
  }

  if(is.null(raw_schedule)|nrow(raw_schedule) == 0){
    warning("No Games Returned for Selected Dates")
    return(NULL)
  } else {
    scrape_games <-
      raw_schedule %>%
      dplyr::filter(game_type %in% c('PR', 'R', 'P'))
  }

  pbp_data <-
    get_game_pbp(
      game_id = scrape_games$game_id,
      espn_game_id = scrape_games$espn_game_id,
      verbose = verbose_lower)

  return(pbp_data)

}
EFastner/icescrapR documentation built on Jan. 15, 2022, 1:11 p.m.