R/game_summary.r

Defines functions point_total game_count OT_winners shootout_winners game_endings get_team_results get_game_results

Documented in get_game_results get_team_results

#'Summarize Game Results
#'
#'Utilize NHL RTSS game data to summarize game results. \cr
#'\cr
#'Use get_game_results() to summarize by game ID \cr
#'Use get_team_results() to summarize by team
#'
#'@param raw_data A data frame containing NHL RTSS data
#'@param full_season if TRUE will add columns to summarize total number of games played as well as total points earned
#'
#'@author Eric Fastner (eric.fastner@@gmail.com)
#'@export
get_game_results <- function(raw_data, full_season = FALSE) {
  #DESCRIPTION - take raw PbP data and summarize each game by score, games played, points earned, and cumulative points
  #ARGUMENTS - expects raw PbP data frame

  #Identify the ending line of each game passed
  end_games <- game_endings(raw_data)

  #Select only columns needed and add columns for the number of points earned by each team, game totals, point totals, etc
  game_results <-
    end_games %>%
    dplyr::arrange(game_date) %>%
    dplyr::select(season, session, game_date, game_id, game_period, home_team, away_team, home_score, away_score) %>%
    dplyr::mutate(
      home_points = ifelse(home_score > away_score, 2, ifelse(game_period == 3, 0, 1)),
      away_points = ifelse(away_score > home_score, 2, ifelse(game_period == 3, 0, 1)))


  #Removes any dublicate lines that may show up, most frequently in games that have two different game ending times for some reason
  game_results <- unique(game_results)

  if (full_season == TRUE){

    #Add columns for points and game counts
    game_results <-
      game_results %>%
      dplyr::mutate(home_game_num = NA,
                    home_point_total = NA,
                    away_game_num = NA,
                    away_point_total = NA)

    #Loop through each line to apply game and point count functions
    for (i in 1:nrow(game_results)) {
      game_results[i, "home_game_num"] <-
        game_count(data_set = game_results,
                   gdate = game_results[i, "game_date"],
                   team = as.character(game_results[i, "home_team"]))

      game_results[i, "home_point_total"] <-
        if(game_results[i, "session"] == "R"){
          point_total(data_set = game_results,
                      gdate = game_results[i, "game_date"],
                      team = as.character(game_results[i, "home_team"]))
        } else {
          NA
        }

      game_results[i, "away_game_num"] <-
        game_count(data_set = game_results,
                   gdate = game_results[i, "game_date"],
                   team = as.character(game_results[i, "away_team"]))

      game_results[i, "away_point_total"] <-
        if(game_results[i, "session"] == "R"){
          point_total(data_set = game_results,
                      gdate = game_results[i, "game_date"],
                      team = as.character(game_results[i, "away_team"]))
        } else {
          NA
        }
    }
  }
  return(game_results)
}

#'@rdname get_game_results
#'@export
get_team_results <- function(raw_data, full_season = FALSE) {
  #DESCRIPTION - Takes a summary file generated by get_game_results() and summarizes for each team
  #ARGUMENTS - game_results expects a raw pbp dataframe

  if(full_season == TRUE){
    game_results <-
      get_game_results(raw_data, TRUE)
  } else{
    game_results <-
      get_game_results(raw_data, FALSE)
  }

  home_list <-
    dplyr::rename(game_results,
                  team = home_team,
                  opp = away_team,
                  team_score = home_score,
                  opp_score = away_score,
                  team_points = home_points,
                  opp_points = away_points) %>%
    dplyr::mutate(side = "home",
                  result = ifelse(team_score > opp_score, "W", ifelse(game_period == 3, "L", "OTL")))

  away_list <-
    dplyr::rename(game_results,
                  team = away_team,
                  opp = home_team,
                  team_score = away_score,
                  opp_score = home_score,
                  team_points = away_points,
                  opp_points = home_points) %>%
    dplyr::mutate(side = "away",
                  result = ifelse(team_score > opp_score, "W", ifelse(game_period == 3, "L", "OTL")))

  if(full_season == TRUE){
    home_list <-
      home_list %>%
      dplyr::mutate(team_game = home_game_num,
             opp_game = away_game_num,
             team_point_total = home_point_total,
             opp_point_total = away_point_total)

    away_list <-
      away_list %>%
      dplyr::mutate(team_game = away_game_num,
             opp_game = home_game_num,
             team_point_total = away_point_total,
             opp_point_total = home_point_total)
  }

  team_list <- rbind(home_list, away_list)

  return(team_list)
}

game_endings <- function(data_set) {
  #DESCRIPTION - Identifies the final ending of each game in a data set
  #ARGUMENTS - data_set = a raw PBP data frame, regular season only

  #Attempt to grab the final score of each game based on the period that it ends in
  game_endings <-
    data_set %>%
    dplyr::filter((data_set$event_type == "PEND" & data_set$game_period == 3 & data_set$home_score != data_set$away_score) |
             (data_set$event_type == "GOAL" & data_set$game_period == 4) |
             (data_set$event_type == "SOC" & data_set$game_period == 5 ))

  #Identifies if a game ended in OT or a shootout and calculates the final scores
  for (i in 1:nrow(game_endings)) {
    if (game_endings[i, "game_period"] == 5) {
      so_result <- shootout_winners(data_set, game_endings[i, "game_id"])

      ifelse(so_result == "home",
             game_endings[i, "home_score"] <- as.integer(game_endings[i, "home_score"]) + 1,
             game_endings[i, "away_score"] <- as.integer(game_endings[i, "away_score"]) + 1)

    } else if (game_endings[i, "game_period"] == 4 & (game_endings[i, "home_score"] == game_endings[i, "away_score"])) {
      ot_result <- OT_winners(data_set, game_endings[i, "game_id"])
      game_endings[i, "home_score"] <- ot_result[[1]]
      game_endings[i, "away_score"] <- ot_result[[2]]
    }
  }
  return(game_endings)
}

shootout_winners <- function(data_set, gameID) {
  #DESCRIPTION - totals the number of goals by the home team and total by the away team to find the game winner
  #ARGUMENTS - data_set = a raw PBP file, gameID = a specified game_id

  home_goals <- nrow(dplyr::filter(data_set, game_id == as.integer(gameID) & event_type == "GOAL" & event_team == as.character(home_team)))
  away_goals <- nrow(dplyr::filter(data_set, game_id == as.integer(gameID) & event_type == "GOAL" & event_team == as.character(away_team)))

  return(ifelse(home_goals > away_goals, "home", "away"))
}

OT_winners <- function(data_set, gameID) {
  #DESCRIPTION - finds the max home score and the max away score in an OT period. Sometimes necessary as the score state data from the raw PBP does not always behave as expected
  #ARGUMENTS - data_set = a raw PBP file, gameID = a specified game_id

  home_score <- max(dplyr::filter(data_set, game_id == as.integer(gameID))$home_score)
  away_score <- max(dplyr::filter(data_set, game_id == as.integer(gameID))$away_score)

  return(c(home_score, away_score))
}

game_count <- function(data_set, gdate, team) {
  #DESCRIPTION - Grabs the cumulative game count for a given team, used in fun.game_result_summary
  #ARGUMENTS - data_set = a list of game results created by fun.game_endings, gdate = a given date, team = a given team

  #Uses sum to build an array of games up to a given data that includes a set team
  game_count <-
    sum(data_set$game_date <= gdate &
          (data_set$home_team == team | data_set$away_team == team))

  return(game_count)
}

point_total <- function(data_set, gdate, team) {
  #DESCRIPTION - Calculates a team's total points earned through a given date
  #ARGUMENTS - data_set = a list of game results created by fun.game_endings, gdate = a given date, team = a given team

  point_count <-
    sum((data_set$game_date <= gdate) *
          (((data_set$home_team == team) * (data_set$home_points)) +
             ((data_set$away_team == team) * (data_set$away_points))))

  return(point_count)
}
EFastner/icescrapR documentation built on Jan. 15, 2022, 1:11 p.m.