R/all_functions.R

Defines functions convert_ids convert_numeric event_angle distance_from_net get_roster_info get_team_info get_game_info complete_game_scrape get_play_by_play get_id_schedule get_id_date game_summary get_player_summary

Documented in get_id_date get_id_schedule get_play_by_play get_player_summary

convert_ids <- function(column, player_df){
  #via Matt Barlowe
  column <- player_df[match(column, player_df$id, nomatch = column),
                      c('Player')]
}
convert_numeric <- function(column){
  as.numeric(as.character(column))
}
event_angle <- function(x, y) {
  #via Emmanuel Perry
  return(abs(atan(y/(89 - abs(x)))*(180/pi)))
}
distance_from_net <- function(x, y) {
  # via Emmanuel Perry
  return(sqrt((89 - abs(x))^2 + y^2))
}
bindingFunction <- dplyr::bind_rows
get_roster_info <- function(game_id = NA, roster_json = NA){
  if(!is.na(game_id)) {
    roster_json <- rjson::fromJSON(file =  paste("https://www.nwhl.zone/game/get_play_by_plays?id=", game_id, sep = ''))$roster_player
  }

  roster_data <- lapply(roster_json, unlist)
  roster_data <- lapply(roster_data, FUN = function(x){ data.frame(t(x), stringsAsFactors = F) })
  roster_df <- do.call("bindingFunction", roster_data)
  Sys.sleep(4)
  return(roster_df)
}
get_team_info <- function(game_id = NA, team_json = NA){
  if(!is.na(game_id)) {
    team_json <- rjson::fromJSON(file = paste("https://www.nwhl.zone/game/get_play_by_plays?id=", game_id, sep = ''))$team_instance
  }

  team_data <- lapply(team_json, unlist)
  columns <- names(team_data[[1]])
  roster_df <- data.frame(matrix(unlist(team_data),
                                 byrow = T,
                                 nrow = length(team_data)),
                          stringsAsFactors = F)
  colnames(roster_df) <- columns
  Sys.sleep(4)
  return(roster_df)
}
get_game_info <- function(game_id = NA, game_json = NA){
  if(!is.na(game_id)) {
    game_json <- rjson::fromJSON(file = paste("https://www.nwhl.zone/game/get_play_by_plays?id=", game_id, sep = ''))$game
  }
  game_vect <- unlist(game_json)
  game_df <- data.frame(t(game_vect), stringsAsFactors = F)
  Sys.sleep(4)
  return(game_df)
}
complete_game_scrape <- function(game_id){
  game_id <- as.character(game_id) #handle if user enters it as numeric
  print(game_id)
  # Gets json file
  game_url <- paste("https://www.nwhl.zone/game/get_play_by_plays?id=", game_id, sep = '')
  pbp_json <- rjson::fromJSON(file = game_url)

  #Gets extraneous data from other functions
  game_data <- get_game_info(game_json = pbp_json$game)
  team_data <- get_team_info(team_json = pbp_json$team_instance)
  roster_data <- get_roster_info(roster_json = pbp_json$roster_player)

  #Formats other data
  team_ids <- dplyr::select(team_data, id, abbrev)

  players <- roster_data %>%
    dplyr::select(id, first_name, last_name) %>%
    dplyr::mutate(Player = paste(first_name,last_name)) %>%
    dplyr::select(-first_name, -last_name)

  #extracts play by play
  plays <- pbp_json$plays

  #return na if play by play not found
  if(length(plays) == 0){
    print(paste("NO PLAY BY PLAY DATA AVAILABLE FOR GAMEID",game_id))
    return(NA)
    }

  #This essentially converts fromJSON list to a dataframe
  plays_reduced <- lapply(plays, unlist)
  play_data <- lapply(plays_reduced, FUN = function(x){ data.frame(t(x), stringsAsFactors = F) })
  play_uncleaned <- do.call("bindingFunction", play_data)

  if(!"play_summary.x_coord" %in% colnames(play_uncleaned)){
    play_uncleaned$play_summary.x_coord <- NA
    play_uncleaned$play_summary.y_coord <- NA
    play_uncleaned$play_summary.loser_id <- NA
  }

  #This prepares everything
  play_prep <- play_uncleaned %>%
    #Selects relevant columns
    dplyr::select(game_id, play_index, clock_time, min, sec, time_interval,  play_actions.subseason_id, created_at,
           home_team_score, away_team_score, team_id,
           play_type,play_summary.x_coord, play_summary.y_coord,
           primary_player_id,
           play_summary.loser_id, play_summary.goalie_id,
           play_actions.away_team_goalie, play_actions.home_team_goalie) %>%
    #note: ifelse statements are to create NA column if the column wasn't found
    # common reasons:
        #shutout (no goals scored so no assists or plus/minus)
        #no penalties so no penalty data
        #no empty net goals, etc.
    dplyr::mutate(play_summary.assist_1_id = ifelse(rep("play_summary.assist_1_id" %in% colnames(play_uncleaned), nrow(play_uncleaned)),
                                             play_uncleaned$play_summary.assist_1_id, NA),
           play_summary.assist_2_id = ifelse(rep("play_summary.assist_2_id" %in% colnames(play_uncleaned), nrow(play_uncleaned)),
                                             play_uncleaned$play_summary.assist_2_id, NA),
           play_summary.served_by_id = ifelse(rep("play_summary.served_by_id" %in% colnames(play_uncleaned), nrow(play_uncleaned)),
                                              play_uncleaned$play_summary.served_by_id, NA),
           play_summary.penalty_type = ifelse(rep("play_summary.penalty_type" %in% colnames(play_uncleaned), nrow(play_uncleaned)),
                                              play_uncleaned$play_summary.penalty_type, NA),
           play_summary.infraction_type = ifelse(rep("play_summary.infraction_type" %in% colnames(play_uncleaned), nrow(play_uncleaned)),
                                                 play_uncleaned$play_summary.infraction_type, NA),
           play_summary.penalty_minutes = dplyr::case_when(play_summary.penalty_type == "Minor" ~ 2,
                                                    play_summary.penalty_type == "Major" ~ 4,
                                                    play_summary.penalty_type == "1" ~ 2,
                                                    TRUE ~ 0),
           #PLAYER 1 is faceoff win, goal scorer/shot taker, shot blocker, penalty taker, turnover doer
           event_player_1 = primary_player_id,
           #PLAYER 2 is faceoff loss, assist1, penalty server, goalie on shots on goal
           event_player_2 = dplyr::case_when(play_type == "Faceoff" ~ ifelse(!is.na(play_summary.loser_id),play_summary.loser_id, NA_character_),
                                      play_type == "Shot" ~ ifelse(!is.na(play_summary.goalie_id),play_summary.goalie_id, NA_character_),
                                      play_type == "Penalty" ~ ifelse(!is.na(play_summary.served_by_id),play_summary.served_by_id,NA_character_),
                                      play_type == "Goal" ~ ifelse(!is.na(play_summary.assist_1_id),play_summary.assist_1_id,NA_character_),
                                      TRUE ~ NA_character_
                                      ),
           #PLAYER 3 is secondary assist
           event_player_3 = play_summary.assist_2_id,
           #As of now the only detail is penalty info
           event_detail = ifelse(play_type == "Penalty",
                                 paste( play_summary.penalty_minutes, play_summary.infraction_type, play_summary.penalty_type),
                                 NA),
           #create background columns
           home_team = game_data$home_team, #these are currently ids
           away_team = game_data$away_team,
           game_date = created_at,
           game_seconds = 1200*(convert_numeric(time_interval)-1) + (20-convert_numeric(min)) *60 -convert_numeric(sec),
           #same as before, this deals with columns not found
           minus_player_1 = ifelse(rep("play_actions.minus_player_1" %in% colnames(play_uncleaned), nrow(play_uncleaned)),
                                   play_uncleaned$play_actions.minus_player_1, NA),
           minus_player_2 = ifelse(rep("play_actions.minus_player_2" %in% colnames(play_uncleaned), nrow(play_uncleaned)),
                                   play_uncleaned$play_actions.minus_player_2, NA),
           minus_player_3 = ifelse(rep("play_actions.minus_player_3" %in% colnames(play_uncleaned), nrow(play_uncleaned)),
                                   play_uncleaned$play_actions.minus_player_3, NA),
           minus_player_4 = ifelse(rep("play_actions.minus_player_4" %in% colnames(play_uncleaned), nrow(play_uncleaned)),
                                   play_uncleaned$play_actions.minus_player_4, NA),
           minus_player_5 = ifelse(rep("play_actions.minus_player_5" %in% colnames(play_uncleaned), nrow(play_uncleaned)),
                                   play_uncleaned$play_actions.minus_player_5, NA),
           minus_player_6 = ifelse(rep("play_actions.minus_player_6" %in% colnames(play_uncleaned), nrow(play_uncleaned)),
                                   play_uncleaned$play_actions.minus_player_6, NA),
           plus_player_1 = ifelse(rep("play_actions.plus_player_1" %in% colnames(play_uncleaned), nrow(play_uncleaned)),
                                  play_uncleaned$play_actions.plus_player_1, NA),
           plus_player_2 = ifelse(rep("play_actions.plus_player_2" %in% colnames(play_uncleaned), nrow(play_uncleaned)),
                                  play_uncleaned$play_actions.plus_player_2, NA),
           plus_player_3 = ifelse(rep("play_actions.plus_player_3" %in% colnames(play_uncleaned), nrow(play_uncleaned)),
                                  play_uncleaned$play_actions.plus_player_3, NA),
           plus_player_4 = ifelse(rep("play_actions.plus_player_4" %in% colnames(play_uncleaned), nrow(play_uncleaned)),
                                  play_uncleaned$play_actions.plus_player_4, NA),
           plus_player_5 = ifelse(rep("play_actions.plus_player_5" %in% colnames(play_uncleaned), nrow(play_uncleaned)),
                                  play_uncleaned$play_actions.plus_player_5, NA),
           plus_player_6 = ifelse(rep("play_actions.plus_player_6" %in% colnames(play_uncleaned), nrow(play_uncleaned)),
                                  play_uncleaned$play_actions.plus_player_6, NA)
           )  %>%
    #Adds interval for events, ensures shots before faceoffs
    dplyr::arrange(game_seconds, dplyr::desc((play_type %in% c("Goal","Shot"))*1)) %>%
    dplyr::mutate(event_interval = ifelse(!is.na(dplyr::lag(game_seconds)), game_seconds - dplyr::lag(game_seconds), 0)) %>%
    #Remove columns not needed any more
    dplyr::select(-play_summary.loser_id, -play_summary.assist_1_id, -play_summary.goalie_id, -play_summary.served_by_id,
           -play_summary.assist_2_id, - play_summary.penalty_type, -play_summary.infraction_type) %>%
    #converts team and season ids to values
    dplyr::left_join(team_ids, by = c("home_team"="id")) %>%
    dplyr::left_join(team_ids, by = c("away_team"="id")) %>%
    dplyr::left_join(team_ids, by = c("team_id"="id")) %>%
    dplyr::left_join(seasons, by = c("play_actions.subseason_id" = "id")) %>%
    dplyr::select(-home_team, -away_team, -team_id, -play_actions.subseason_id, -created_at, -clock_time) %>%
    #cleaning column names
    dplyr::rename(home_team = abbrev.x,
           away_team = abbrev.y,
           event_team = abbrev,
           period = time_interval,
           event_type = play_type,
           x_coord = play_summary.x_coord,
           y_coord = play_summary.y_coord,
           away_goalie = play_actions.away_team_goalie,
           home_goalie = play_actions.home_team_goalie,
           penalty_length = play_summary.penalty_minutes
           ) %>%
    #Added in coordinate cleaning
    dplyr::mutate(
      #Converts to (-100,100), (-42.5,42.5) coordinate system
      x_coord = convert_numeric(x_coord)*1.98 - 99,
      y_coord = convert_numeric(y_coord)*0.85 - 42.5,
      #Converts shots to proper side of ice (note blocked shots are oriented to which team took it)
      x_coord_2 = ifelse(((event_team == home_team & x_coord > 0) | (event_team == away_team & x_coord < 0)) & event_type %in% c("Shot","Goal"), -x_coord,x_coord),
      y_coord_2 = ifelse(((event_team == home_team & x_coord > 0) | (event_team == away_team & x_coord < 0)) & event_type %in% c("Shot","Goal"), -y_coord,y_coord),
      x_coord_2 = ifelse(((event_team == home_team & x_coord < 0) | (event_team == away_team & x_coord > 0)) & event_type == "BlockedShot", -x_coord,x_coord),
      y_coord_2 = ifelse(((event_team == home_team & x_coord < 0) | (event_team == away_team & x_coord > 0)) & event_type == "BlockedShot", -y_coord,y_coord),
      #Pushes all events to one side of the ice
      x_coord_1 = ifelse(x_coord < 0, - x_coord, x_coord),
      y_coord_1 = ifelse(x_coord < 0, -y_coord, y_coord),
      event_angle = event_angle(x_coord, y_coord),
      event_distance = distance_from_net(x_coord, y_coord)
    ) %>%
    #putting everything in a logical order
    dplyr::select(Season, game_id, game_date, home_team, away_team,
           play_index, period, min, sec, game_seconds, event_interval,
           event_type, event_detail, x_coord, y_coord, event_team,
           event_player_1, event_player_2, event_player_3,
           event_angle,event_distance,x_coord_2:y_coord_1,
           home_goalie, away_goalie, plus_player_1:plus_player_5,plus_player_6,
           minus_player_1:minus_player_5,minus_player_6,
           penalty_length
           ) %>%
    dplyr::mutate(Season = substr(Season,1,4))

  #Converts source date in GMT into home team time zone then updates game_date colum with converted time
  #Set tz = to neame for the home team time zone in the local OS, ie. tz = "America/Indianapolis" each system is different
  if(any(play_prep$home_team == "MIN")) {
    play_prep$game_date <- as.POSIXct(as.integer(play_uncleaned$created_at_integer), origin = "1970-01-01", tz= "America/Chicago")
    play_prep$game_date <- as.character(play_prep$game_date)
    play_prep$game_date <- gsub(" .*","", play_prep$game_date)
  } else {
    play_prep$game_date <- as.POSIXct(as.integer(play_uncleaned$created_at_integer), origin = "1970-01-01", tz= "America/Indianapolis")
    play_prep$game_date <- as.character(play_prep$game_date)
    play_prep$game_date <- gsub(" .*","", play_prep$game_date)
  }

  #Create Score Columns
  play_prep$home_score <- 0
  play_prep$away_score <- 0

  #gets running count for scores
  play_prep$home_score <- 1*(play_prep$home_team == play_prep$event_team)*(play_prep$event_type == "Goal")
  play_prep$home_score[is.na(play_prep$home_score)] <- 0
  play_prep$home_score <- cumsum(play_prep$home_score)
  play_prep$away_score <- 1*(play_prep$away_team == play_prep$event_team)*(play_prep$event_type == "Goal")
  play_prep$away_score[is.na(play_prep$away_score)] <- 0
  play_prep$away_score <- cumsum(play_prep$away_score)

  #Converts to numeric columns
  play_prep$penalty_length <- convert_numeric(play_prep$penalty_length)
  play_prep$period <- convert_numeric(play_prep$period)

  #Gets home skater strength----

  #This takes events that could impact strength
  home_state_changes <- play_prep %>%
    dplyr::filter((event_type == "Goal" & event_team == away_team) |
             (event_type == "Penalty" & event_team == home_team)) %>%
    dplyr::select(event_type,game_seconds,penalty_length) %>%
    dplyr::mutate(event_type = ifelse(event_type == "Penalty",1,2),
           prev.event = dplyr::lag(event_type),
           prev.time = dplyr::lag(game_seconds),
           prev.length = dplyr::lag(penalty_length))

  #Ok this determines if a player should be added or subtracted from the home team count
  home_pen_mat <- apply(home_state_changes,
                        1,
                         FUN = function(x) {
      #Creates a -1 for duration of penalty and 0s surrounding it
      if(x[1] == 1 & x[2]+x[3]*60 < (max(play_prep$period)*1200-1)){
          c( rep( 0, length( 0:x[2] )),
          rep( -1, x[3]*60),
          rep(0, length((x[2]+x[3]*60 + 1):(max(play_prep$period)*1200-1)))
          )
        #Creates a -1 for duration of penalty and 0s before (for end of game penalties)
        } else if(x[1] == 1 & x[2]+x[3]*60 >= (max(play_prep$period)*1200-1)) {
          c( rep( 0, length( 0:x[2] )),
             rep(-1, max(play_prep$period)*1200-1-x[2] )
          )
        #Creates a +1 from time power play goal is scored to end of penalty to handle skater coming back on
        } else if( x[1] == 2 & (x[2] %in% ifelse(!is.na(x[5]) & !is.na(x[6]) & x[2] != x[5], x[5]:(x[5]+x[6]*60),-1 )) ) {
          c( rep( 0, length( 0:(x[2]) )),
            rep( 1, length( (x[2]+1):(x[6]*60-(x[2]-x[5])))),
            rep(0, length((x[6]*60-(x[2]-x[5])):(max(play_prep$period)*1200-1)))
          )
        # Creates all zeros if event doesnt effect strength
        } else {
          rep(0, length(0:(max(play_prep$period)*1200-1)))
        }
      })

  #creates vector for skaters
  home_skaters <- 5 + apply(home_pen_mat, 1, sum)

  #Gets away skater strength----

  #**See above comments for explanation

  away_state_changes <- play_prep %>%
    dplyr::filter((event_type == "Goal" & event_team == home_team) |
             (event_type == "Penalty" & event_team == away_team)) %>%
    dplyr::select(event_type,game_seconds,penalty_length) %>%
    dplyr::mutate(event_type = ifelse(event_type == "Penalty",1,2),
           prev.event = dplyr::lag(event_type),
           prev.time = dplyr::lag(game_seconds),
           prev.length = dplyr::lag(penalty_length))

  away_pen_mat <- apply(away_state_changes,
                        1,
                        FUN = function(x) {
                          if(x[1] == 1 & x[2]+x[3]*60 < (max(play_prep$period)*1200-1)){
                            c( rep( 0, length( 0:x[2] )),
                               rep( -1, x[3]*60),
                               rep(0, length((x[2]+x[3]*60 + 1):(max(play_prep$period)*1200-1)))
                            )
                          } else if(x[1] == 1 & x[2]+x[3]*60 >= (max(play_prep$period)*1200-1)) {
                            c( rep( 0, length( 0:x[2] )),
                               rep(-1, max(play_prep$period)*1200-1-x[2] )
                            )
                          } else if( x[1] == 2 & x[2] %in% ifelse(!is.na(x[5]) & !is.na(x[6]) & x[2] != x[5], x[5]:(x[5]+x[6]*60),-1 ) ) {
                            c( rep( 0, length( 0:(x[2]) )),
                               rep( 1, length( (x[2]+1):(x[6]*60-(x[2]-x[5])))),
                               rep(0, length((x[6]*60-(x[2]-x[5])+1):(max(play_prep$period)*1200-1)))
                            )
                          } else {
                            rep(0, length(0:(max(play_prep$period)*1200-1)))
                          }
                        })

  away_skaters <- 5 + apply(away_pen_mat, 1, sum)

  #----

  #Adds skater strength to pbp data
  play_df <- play_prep %>%
    dplyr::left_join(data.frame(game_seconds = 0:(max(play_prep$period)*1200-1),
              home_skaters = home_skaters,
              away_skaters = away_skaters),
              by = "game_seconds")

  #deals with extra skater for pulled goalie
  play_df$home_skaters <- ifelse(play_df$home_goalie == "", play_df$home_skaters+1, play_df$home_skaters)
  play_df$away_skaters <- ifelse(play_df$away_goalie == "", play_df$away_skaters+1, play_df$away_skaters)
  #deals with more than 2 penalties
  play_df$home_skaters <- ifelse(play_df$home_skaters < 3,3, play_df$home_skaters)
  play_df$away_skaters <- ifelse(play_df$away_skaters < 3,3, play_df$away_skaters)

  #Converts player ids to player names
  play_df[,c(17:19,26:39)] <-
    play_df[,c(17:19,26:39)] %>% sapply(convert_ids, player_df = players)

  #final sorting of columns and removing unnecessary rows
  play_df <- play_df %>%
    dplyr::select(-penalty_length) %>%
    dplyr::select(Season, game_id:away_team, home_score, away_score, play_index:event_type,
           event_team:event_player_3, event_detail:y_coord,
           home_skaters, away_skaters, event_angle:minus_player_6) %>%
    dplyr::filter(event_type != '')

  #Deals with weird date issues
  date <- play_df$game_date[1]
  play_df$game_date <- date

  message(paste("Finished",first(play_df$game_id)))
  Sys.sleep(4)
  return(play_df)
}

#' Play by Play Scraper
#'
#' This function returns a data frame of the play by play in a tidy format
#' @param game_ids character or numeric vector of unique game ids
#' @keywords play by play
#' @export
#' @import dplyr
#' @examples
#' get_play_by_play(c("22207010","22207013"))
get_play_by_play <- function(game_ids) {
  game_list <- lapply(game_ids, complete_game_scrape)
  game_list <- game_list[which(!is.na(game_list))]
  game_data <- do.call("bindingFunction", game_list)
  return(game_data)
}

#' Schedule ID Scraper
#'
#' This function returns a vector of game ids given a prespecified team and season
#' @param Season takes in the season as year1year2 (yyyyyyyy), Defaults to 20182019
#' @param teams takes a specific team abbreviation, such as "BUF". Defaults to all teams
#' @keywords schedule
#' @export
#' @import dplyr
#' @examples
#' get_id_schedule("20182019", "BUF")
get_id_schedule <- function(Season = "20182019", teams = NA){
  all_teams <- c("BOS", "BUF","CTW","MET", "MIN")
  if(is.na(teams)){
    teams <- all_teams
  }
  if(is.na(Season)){
    print("Please Enter Season")
    return(NA)
  }
  Season <- as.character(Season)

  #team ids change every season
  the1819ids <- c(3629910:3629913, 3629918)
  the1718ids <- 2840761:2840764
  the1617ids <- c(2150454, 2150455, 2150457, 2150459)
  the1516ids <- 2150711:2150714

  #Gets relevant team ids
  team_ids <- if(Season == "20152016") {
    the1516ids[match(teams,all_teams)]
  } else if(Season == "20162017") {
    the1617ids[match(teams,all_teams)]
  } else if(Season == "20172018"){
    the1718ids[match(teams,all_teams)]
  } else{
    the1819ids[match(teams,all_teams)]
  }

  #Gets season id
  seasonid <- as.character(seasons$id[which(seasons$Season == Season)])

  #Creates urls for each team schedule page
  team_urls <- paste("https://www.nwhl.zone/schedule/team_instance/",
                     team_ids[which(!is.na(team_ids))],
                     "?subseason=",
                     seasonid,
                     sep = "")

  # Creates empty vector
  all_games <- c()
  #Iterates through each team
  for(i in team_urls){
    #Pulls urls for each game
    html <- paste(readLines(i), collapse="\n")
    game_ids <- stringr::str_extract_all(html,"(?<=[/])\\d{8}(?=[?])")
    #adds to vector
    all_games <- c(all_games,game_ids[[1]])
    Sys.sleep(4)
  }
  #since duplicate games
  all_games <- unique(all_games)

  return(all_games)
}

#' Date ID Scraper
#'
#' This function returns a vector of game ids given a specific date
#' @param Season takes in the season as year1year2 (yyyyyyyy)
#' @param Year takes in a specific year formatted as yyyy
#' @param Month takes in a specifc month formatted as mm
#' @param Day takes in a specific day formatted as dd
#' @keywords date
#' @import dplyr
#' @export
#' @examples
#' get_id_date(Season = "20182019", Year = "2018, Month = "12", Day = "09")
get_id_date <- function(Season = NA, Year = NA, Month = NA, Day = NA){
  #Gets season id
  seasonid <- as.character(seasons$id[which(seasons$Season == Season)])
  leagueid <- as.character(seasons$League[which(seasons$Season == Season)])

  # Gets daily schedule
  url <- paste("https://www.nwhl.zone/schedule/day/league_instance/",
      leagueid,
      "/",
      Year,
      "/",
      Month,
      "/",
      Day,
      "?subseason=", seasonid, sep = "")

  #Iterates through each team
  html <- paste(readLines(url), collapse="\n")
  game_ids <- stringr::str_extract_all(html,"(?<=[/])\\d{8}(?=[?])")
  Sys.sleep(4)
  return(game_ids[[1]])
}

game_summary <- function(pbp_df){
  # Pass in play by play dataframe for individual game and returns player summaries for game
  # Thanks to @EvolvingWild for providing their NHL game summary as a baseline
  season <- dplyr::first(pbp_df$Season)
  game_id <- dplyr::first(pbp_df$game_id)
  date <- dplyr::first(pbp_df$game_date)
  home <- dplyr::first(pbp_df$home_team)
  away <- dplyr::first(pbp_df$away_team)

  print(game_id)
  team_ids <- dplyr::select(get_team_info(game_id = game_id),
                     roster_id,
                     abbrev)

  roster <- get_roster_info(game_id = game_id) %>%
    dplyr::filter(roster_type == "player") %>%
    dplyr::select(first_name, last_name, position, roster_id) %>%
    dplyr::mutate(Player = paste(first_name,last_name)) %>%
    dplyr::select(-first_name, -last_name) %>%
    dplyr::left_join(team_ids, by = c("roster_id")) %>%
    dplyr::select(-roster_id)

  pbp_player_1 <- pbp_df %>%
    dplyr::group_by(Season, game_id, game_date, home_team, away_team, event_player_1, event_team) %>%
    dplyr::summarise(G = sum(event_type == "Goal"),
              SOG = sum(event_type == "Shot" | event_type == "Goal"),
              FOW = sum(event_type == "Faceoff"),
              Blk = sum(event_type == "BlockedShot"),
              TO = sum(event_type == "Turnover"),
              PEN = sum(event_type == "Penalty"),
              PIM = sum(convert_numeric(substr(event_detail,1,1)), na.rm = T),
              PPG = sum(event_type == "Goal" & ((event_team == home_team & away_skaters < 5) | (event_team == away_team & home_skaters < 5))),
              SHG = sum(event_type == "Goal" & ((event_team == home_team & home_skaters < 5) | (event_team == away_team & away_skaters < 5)))
              ) %>%
    dplyr::rename(Player = event_player_1)

  pbp_player_2 <- pbp_df %>%
    dplyr::group_by(Season, game_id, game_date, home_team, away_team, event_player_2, event_team) %>%
    dplyr::summarise(A1 = sum(event_type == "Goal"),
              PPA1 = sum(event_type == "Goal" & ((event_team == home_team & away_skaters < 5) | (event_team == away_team & home_skaters < 5))),
              SHA1 = sum(event_type == "Goal" & ((event_team == home_team & home_skaters < 5) | (event_team == away_team & away_skaters < 5)))
    ) %>%
    dplyr::filter(!(A1 == 0 & PPA1 == 0 & SHA1 == 0)) %>%
    dplyr::rename(Player = event_player_2)

  pbp_player_flipped <- pbp_df %>%
    dplyr::group_by(Season, game_id, game_date, home_team, away_team, event_player_2, event_team) %>%
    dplyr::summarise(FOL = sum(event_type == "Faceoff"),
              SV = sum(event_type == "Shot")) %>%
    dplyr::filter(FOL > 0 | SV > 0) %>%
    dplyr::mutate(event_team = ifelse(event_team == home_team, away_team,home_team)) %>%
    dplyr::rename(Player = event_player_2)

  pbp_player_3 <- pbp_df %>%
    dplyr::group_by(Season, game_id, game_date, home_team, away_team, event_player_3, event_team) %>%
    dplyr::summarise(A2 = sum(event_type == "Goal"),
              PPA2 = sum(event_type == "Goal" & ((event_team == home_team & away_skaters < 5) | (event_team == away_team & home_skaters < 5))),
              SHA2 = sum(event_type == "Goal" & ((event_team == home_team & home_skaters < 5) | (event_team == away_team & away_skaters < 5)))
    ) %>%
    dplyr::rename(Player = event_player_3)

  pbp_h_goalie <- pbp_df %>%
    dplyr::group_by(Season, game_id, game_date, home_team, away_team, event_team, home_goalie) %>%
    dplyr::summarise(GA = sum(event_type == "Goal" & event_team == away_team)) %>%
    dplyr::rename(Player = home_goalie) %>%
    dplyr::ungroup() %>%
    dplyr::filter(away_team == event_team) %>%
    dplyr::mutate(event_team = home_team)

  pbp_a_goalie <- pbp_df %>%
    dplyr::group_by(Season, game_id, game_date, home_team, away_team, event_team, away_goalie) %>%
    dplyr::summarise(GA = sum(event_type == "Goal" & event_team == home_team)) %>%
    dplyr::rename(Player = away_goalie) %>%
    dplyr::ungroup() %>%
    dplyr::filter(home_team == event_team) %>%
    dplyr::mutate(event_team = away_team)


  pbp_plus_1 <- pbp_df %>% dplyr::select(event_team, plus_player_1) %>% dplyr::rename(Player = plus_player_1)
  pbp_plus_2 <- pbp_df %>% dplyr::select(event_team, plus_player_2) %>% dplyr::rename(Player = plus_player_2)
  pbp_plus_3 <- pbp_df %>% dplyr::select(event_team, plus_player_3) %>% dplyr::rename(Player = plus_player_3)
  pbp_plus_4 <- pbp_df %>% dplyr::select(event_team, plus_player_4) %>% dplyr::rename(Player = plus_player_4)
  pbp_plus_5 <- pbp_df %>% dplyr::select(event_team, plus_player_5) %>% dplyr::rename(Player = plus_player_5)
  pbp_plus_6 <- pbp_df %>% dplyr::select(event_team, plus_player_6) %>% dplyr::rename(Player = plus_player_6)
  pbp_plus <- do.call("rbind",list(pbp_plus_1,pbp_plus_2,pbp_plus_3,pbp_plus_4,pbp_plus_5, pbp_plus_6)) %>%
    dplyr::group_by(event_team,Player) %>%
    dplyr::summarise(Plus = dplyr::n()) %>%
    dplyr::filter(!is.na(Player))

  pbp_minus_1 <- pbp_df %>% dplyr::mutate(event_team = ifelse(event_team == home_team, away_team, home_team)) %>%
    dplyr::select(event_team, minus_player_1) %>% dplyr::rename(Player = minus_player_1)
  pbp_minus_2 <- pbp_df %>% dplyr::mutate(event_team = ifelse(event_team == home_team, away_team, home_team)) %>%
    dplyr::select(event_team, minus_player_2) %>% dplyr::rename(Player = minus_player_2)
  pbp_minus_3 <- pbp_df %>% dplyr::mutate(event_team = ifelse(event_team == home_team, away_team, home_team)) %>%
    dplyr::select(event_team, minus_player_3) %>% dplyr::rename(Player = minus_player_3)
  pbp_minus_4 <- pbp_df %>% dplyr::mutate(event_team = ifelse(event_team == home_team, away_team, home_team)) %>%
    dplyr::select(event_team, minus_player_4) %>% dplyr::rename(Player = minus_player_4)
  pbp_minus_5 <- pbp_df %>% dplyr::mutate(event_team = ifelse(event_team == home_team, away_team, home_team)) %>%
    dplyr::select(event_team, minus_player_5) %>% dplyr::rename(Player = minus_player_5)
  pbp_minus_6 <- pbp_df %>% dplyr::mutate(event_team = ifelse(event_team == home_team, away_team, home_team)) %>%
    dplyr::select(event_team, minus_player_6) %>% dplyr::rename(Player = minus_player_6)
  pbp_minus <- do.call("rbind",list(pbp_minus_1,pbp_minus_2,pbp_minus_3,pbp_minus_4,pbp_minus_5, pbp_minus_6)) %>%
    dplyr::group_by(event_team,Player) %>%
    dplyr::summarise(Minus = n()) %>%
    dplyr::filter(!is.na(Player))

  player_data <- pbp_player_1 %>%
    dplyr::bind_rows(pbp_player_2) %>%
    dplyr::bind_rows(pbp_player_3) %>%
    dplyr::bind_rows(pbp_player_flipped) %>%
    dplyr::bind_rows(pbp_h_goalie) %>%
    dplyr::bind_rows(pbp_a_goalie) %>%
    dplyr::bind_rows(pbp_plus) %>%
    dplyr::bind_rows(pbp_minus) %>%
    dplyr::left_join(roster, by = c("Player","event_team"="abbrev")) %>%
    dplyr::rename(Team = event_team) %>%
    dplyr::filter(!is.na(Player))

  player_data <- player_data %>%
    dplyr::group_by(Player,Team, position) %>%
    dplyr::summarise_if(is.numeric, sum, na.rm = T)

  player_data$Season <- season
  player_data$game_id <- game_id
  player_data$game_date <- date
  player_data$home_team <- home
  player_data$away_team <- away

  player_data <- dplyr::mutate(player_data,
                        GS = ifelse(position == "G",
                                            0.14*SV - GA,
                                            G + 0.64*(A1+A2) + 0.11*SOG + 0.12*(FOW-FOL) - 0.17*(PIM/2)),
                        PTS = G+A1+A2,
                        PrPTS = G+A1,
                        GF. = ifelse((Plus+Minus) != 0 ,Plus/(Plus+Minus),NA_integer_)) %>%
    dplyr::rename(eGF = Plus, eGA = Minus)

  player_data <- dplyr::select(player_data,
                        Season, Player,Team,position,game_id:away_team,
                        G,A1,A2,PTS,PrPTS,GS,SOG,eGF,eGA,GF.,PPG,PPA1,PPA2,SHG,SHA1,SHA2,FOW,FOL,PIM,Blk,TO,SV,GA)

  return(player_data)
}

#' Player Summary Function
#'
#' Given a tidy play by play file from get_play_by_play function, summarizes player stats at a game level
#' @param pbp_df a dataframe consisting of play by play data from get_play_by_play function
#' @export
#' @import dplyr
#' @keywords player
get_player_summary <- function(pbp_df){
  player_games <- pbp_df %>%
    dplyr::ungroup() %>%
    dplyr::mutate(game_id = as.character(game_id)) %>%
    dplyr::group_by(game_id) %>%
    dplyr::do(data.frame(game_summary(.)))
  return(player_games)
}

game_team_summary <- function(pbp_df){
  season <- dplyr::first(pbp_df$Season)
  game_id <- dplyr::first(pbp_df$game_id)
  date <- dplyr::first(pbp_df$game_date)
  home <- dplyr::first(pbp_df$home_team)
  away <- dplyr::first(pbp_df$away_team)

  print(game_id)

  pbp_stats_home <- pbp_df %>%
    dplyr::group_by(Season, game_id, game_date, home_team, away_team) %>%
    dplyr::summarise(GF = sum(event_type == "Goal" & event_team == home_team),
              GA = sum(event_type == "Goal" & event_team == away_team),
              SF = sum(event_type == "Shot" & event_team == home_team),
              SA = sum(event_type == "Shot" & event_team == away_team),
              SF_5v5 = sum(event_type == "Shot" & event_team == home_team & home_skaters == 5 & 5 == away_skaters),
              SA_5v5 = sum(event_type == "Shot" & event_team == away_team & home_skaters == 5 & 5 == away_skaters),
              BlkF = sum(event_type == "BlockedShot" & event_team == home_team),
              BlkA = sum(event_type == "BlockedShot" & event_team == away_team),
              GF_5v5 = sum(event_type == "Goal" & event_team == home_team & home_skaters == 5 & 5 == away_skaters),
              GA_5v5 = sum(event_type == "Goal" & event_team == away_team & home_skaters == 5 & 5 == away_skaters),
              GF_pp = sum(event_type == "Goal" & event_team == home_team & home_skaters > away_skaters),
              GA_pp = sum(event_type == "Goal" & event_team == away_team & home_skaters > away_skaters),
              GF_sh = sum(event_type == "Goal" & event_team == home_team & home_skaters < away_skaters),
              GA_sh = sum(event_type == "Goal" & event_team == away_team & home_skaters < away_skaters),
              FOW = sum(event_type == "Faceoff" & event_team == home_team),
              FOL = sum(event_type == "Faceoff" & event_team == away_team),
              periods = max(period)
    ) %>%
    dplyr::mutate('Sh' = round(GF/SF,2),
           'Sv' = round(1 - GA/SA,2),
           PDO = Sh + Sv,
           SF. = round(SF/(SF+SA),2),
           SF_5v5. = round(SF_5v5/(SF_5v5+SA_5v5),2),
           Team = home_team) %>%
    dplyr::rename('Sh%' = Sh,
           'Sv%' = Sv,
           'SF%' = SF.,
           'SF_5v5%' = SF_5v5.) %>%
    dplyr::select(Season:away_team, Team, GF:SA, 'SF%', SF_5v5, SA_5v5, 'SF_5v5%', BlkF:PDO)

  pbp_stats_away <- pbp_df %>%
    dplyr::group_by(Season, game_id, game_date, home_team, away_team) %>%
    dplyr::summarise(GF = sum(event_type == "Goal" & event_team == away_team),
              GA = sum(event_type == "Goal" & event_team == home_team),
              SF = sum(event_type == "Shot" & event_team == away_team),
              SA = sum(event_type == "Shot" & event_team == home_team),
              SF_5v5 = sum(event_type == "Shot" & event_team == away_team & home_skaters == 5 & 5 == away_skaters),
              SA_5v5 = sum(event_type == "Shot" & event_team == home_team & home_skaters == 5 & 5 == away_skaters),
              BlkF = sum(event_type == "BlockedShot" & event_team == away_team),
              BlkA = sum(event_type == "BlockedShot" & event_team == home_team),
              GF_5v5 = sum(event_type == "Goal" & event_team == away_team & home_skaters == 5 & 5 == away_skaters),
              GA_5v5 = sum(event_type == "Goal" & event_team == home_team & home_skaters == 5 & 5 == away_skaters),
              GF_pp = sum(event_type == "Goal" & event_team == away_team & home_skaters > away_skaters),
              GA_pp = sum(event_type == "Goal" & event_team == home_team & home_skaters > away_skaters),
              GF_sh = sum(event_type == "Goal" & event_team == away_team & home_skaters < away_skaters),
              GA_sh = sum(event_type == "Goal" & event_team == home_team & home_skaters < away_skaters),
              FOW = sum(event_type == "Faceoff" & event_team == away_team),
              FOL = sum(event_type == "Faceoff" & event_team == home_team),
              periods = max(period)
    ) %>%
    dplyr::mutate('Sh' = round(GF/SF,2),
           'Sv' = round(1 - GA/SA,2),
           PDO = Sh + Sv,
           SF. = round(SF/(SF+SA),2),
           SF_5v5. = round(SF_5v5/(SF_5v5+SA_5v5),2),
           Team = away_team) %>%
    dplyr::rename('Sh%' = Sh,
           'Sv%' = Sv,
           'SF%' = SF.,
           'SF_5v5%' = SF_5v5.) %>%
    dplyr::select(Season:away_team, Team, GF:SA, 'SF%', SF_5v5, SA_5v5, 'SF_5v5%', BlkF:PDO)

  pbp_team <- dplyr::bind_rows(pbp_stats_home, pbp_stats_away)
  return(pbp_team)
}

#' Team Summary Function
#'
#' Given a tidy play by play file from get_play_by_play function, summarizes team stats at a game level
#' @param pbp_df a dataframe consisting of play by play data from get_play_by_play function
#' @export
#' @import dplyr
#' @keywords team
get_team_summary <- function(pbp_df){
  team_games <- pbp_df %>%
    dplyr::ungroup() %>%
    dplyr::mutate(game_id = as.character(game_id)) %>%
    dplyr::group_by(game_id) %>%
    dplyr::do(data.frame(game_team_summary(.)))
  return(team_games)
}
jflancer/nwhlR documentation built on Aug. 16, 2019, 10:10 a.m.