##################################################################
### Play-by-play, Drive Summary, and Simple Box Score Function ###
# Authors: Maksim Horowitz, Ron Yurko #
# Code Style Guide: Google R Format #
##################################################################
# Play-by Play Function
#' Parsed Descriptive Play-by-Play Dataset for a Single Game
#' @description This function intakes the JSON play-by-play data of a single
#' game and parses the play description column into individual variables
#' allowing the user to segment the game in a variety of different ways for
#' model building and analysis. WARNING: This function is deprecated and will be removed from the package
#' after the 2018-19 season.
#' @param GameID (character or numeric) A 10 digit game ID associated with a
#' given NFL game.
#' @details Through list manipulation using the do.call and rbind functions
#' a 13 column dataframe with basic information populates directly from the NFL
#' JSON API. These columns include the following:
#' \itemize{
#' \item{"Drive"} - Drive number
#' \item{"sp"} - Whether the play resulted in a score (any kind of score)
#' \item{"qtr"} - Quarter of Game
#' \item{"down"} - Down of the given play
#' \item{"time"} - Time at start of play
#' \item{"yrdln"} - Between 0 and 50
#' \item{"ydstogo"} - Yards to go for a first down
#' \item{"ydsnet"} - Total yards gained on a given drive
#' \item{"posteam"} - The team on offense
#' \item{"AirYards"} - Number of yards the ball was thrown in the air for both
#' complete and incomplete pass attempts (negative means behind line of scrimmage)
#' \item{"YardsAfterCatch"} - Number of yards receiver gained after catch
#' \item{"QBHit"} - Binary: 1 if the QB was knocked to the ground else 0
#' \item{"desc"} - A detailed description of what occured during the play
#' }
#'
#' Through string manipulation and parsing of the description column using
#' base R and stringR, columns were added to the original dataframe allowing
#' the user to have a detailed breakdown of the events of each play. Also
#' provided are calculations for the expected points and win probability
#' for each play using models built entirely on nflscrapR data.
#' The added variables are specified below:
#' \itemize{
#' \item{"Date"} - Date of game
#' \item{"GameID"} - The ID of the specified game
#' \item{"play_id"} - Play id within a game
#' \item{"TimeSecs"} - Time remaining in game in seconds
#' \item{"PlayTimeDiff"} - The time difference between plays in seconds
#' \item{"DefensiveTeam"} - The defensive team on the play (for punts the
#' receiving team is on defense, for kickoffs the receiving team is on offense)
#' \item{"TimeUnder"} - Minutes remaining in half
#' \item{"SideofField"} - The side of the field that the line of scrimmage
#' is on
#' \item{yrdline100} - Distance to opponents endzone, ranges from 1-99
#' situation
#' \item{GoalToGo} - Binary: 1 if the play is in a goal down situation else 0
#' \item{"FirstDown"} - Binary: 1if the play resulted in a first down conversion
#' else 0
#' \item{"PlayAttempted"} - A variable used to count the number of plays in a
#' game (should always be equal to 1)
#' \item{"Yards.Gained"} - Amount of yards gained on the play
#' \item{"Touchdown"} - Binary: 1 if the play resulted in a TD else 0
#' \item{"ExPointResult"} - Result of the extra-point: Made, Missed, Blocked,
#' Aborted
#' \item{"TwoPointConv"} - Result of two-point conversion: Success or Failure
#' \item{"DefTwoPoint"} - Result of defensive two-point conversion: Success or Failure
#' \item{"Safety"} - Binary: 1 if safety was recorded else 0
#' \item{"Onsidekick"} - Binary: 1 if the kickoff was an onside kick
#' \item{"PuntResult} - Result of punt: Clean or Blocked
#' \item{"PlayType"} - The type of play that occured, potential values are:
#' \itemize{
#' \item{Kickoff, Punt}
#' \item{Pass, Sack, Run}
#' \item{Field Goal, Extra Point}
#' \item{Quarter End, Two Minute Warning, Half End, End of Game}
#' \item{No Play, QB Kneel, Spike, Timeout}
#' }
#' \item{"Passer"} - The passer on the play if it was a pass play
#' \item{"Passer_ID"} - NFL GSIS player ID for the passer
#' \item{"PassAttempt"} - Binary: 1 if a pass was attempted else 0
#' \item{"PassOutcome"} - Pass Result: Complete or Incomplete Pass
#' \item{"PassLength"} - Categorical variable indicating the length of the pass:
#' Short or Deep
#' \item{"PassLocation"} - Location of the pass: left, middle, right
#' \item{"InterceptionThrown"} - Binary: 1 if an interception else 0
#' \item{"Interceptor"} - The player who intercepted the ball
#' \item{"Rusher"} - The runner on the play if it was a running play
#' \item{"Rusher_ID"} - NFL GSIS player ID for the rusher
#' \item{"RushAttempt"} - Binary: 1 if a run was attempted else 0
#' \item{"RunLocation"} - Location of the run: left, middle, right
#' \item{"RunGap"} - Gap of the run: guard, tackle, end
#' \item{"Receiver"} - The targeted receiver of a play
#' \item{"Receiver_ID"} - NFL GSIS player ID for the receiver
#' \item{"Reception"} - Binary: 1 if a reception was recorded else 0
#' \item{"ReturnResult"} - Result of a punt, kickoff, interception, or
#' fumble return: Fair Catch, Touchback, Touchdown
#' \item{"Returner"} - The punt or kickoff returner
#' \item{"BlockingPlayer"} - The player who blocked the extra point,
#' field goal, or punt
#' \item{"Tackler1"} - The primary tackler on the play
#' \item{"Tackler2"} - The secondary tackler on the play
#' \item{"FieldGoalResult"} - Outcome of a fieldgoal: No Good, Good, Blocked
#' \item{"FieldGoalDistance"} - Field goal length in yards
#' \item{"Fumble"} - Binary: 1 if a fumble occured else no
#' \item{"RecFumbTeam"} - Team that recovered the fumble
#' \item{"RecFumbPlayer"} - Player that recovered the fumble
#' \item{"Sack"} - Binary: 1 if a sack was recorded else 0
#' \item{"Challenge.Replay"} - Binary: 1 if play was reviewed by the replay official
#' else 0
#' \item{"ChalReplayResult"} - Result of the replay review: Upheld or Reversed
#' \item{"Accepted.Penalty"} - Binary: 1 if penalty was accepted else 0
#' \item{"PenalizedTeam"} - The penalized team on the play
#' \item{"PenaltyType"} - Type of penalty on the play, alues include:
#' \itemize{
#' \item{Unnecessary Roughness, Roughing the Passer}
#' \item{Illegal Formation, Defensive Offside}
#' \item{Delay of Game, False Start, Illegal Shift}
#' \item{Illegal Block Above the Waist, Personal Foul}
#' \item{Unnecessary Roughness, Illegal Blindside Block}
#' \item{Defensive Pass Interference, Offensive Pass Interference}
#' \item{Fair Catch Interference, Unsportsmanlike Conduct}
#' \item{Running Into the Kicker, Illegal Kick}
#' \item{Illegal Contact, Defensive Holding}
#' \item{Illegal Motion, Low Block}
#' \item{Illegal Substitution, Neutral Zone Infraction}
#' \item{Ineligible Downfield Pass, Roughing the Passer}
#' \item{Illegal Use of Hands, Defensive Delay of Game}
#' \item{Defensive 12 On-field, Offensive Offside}
#' \item{Tripping, Taunting, Chop Block}
#' \item{Interference with Opportunity to Catch, Illegal Touch Pass}
#' \item{Illegal Touch Kick, Offside on Free Kick}
#' \item{Intentional Grounding, Horse Collar}
#' \item{Illegal Forward Pass, Player Out of Bounds on Punt}
#' \item{Clipping, Roughing the Kicker, Ineligible Downfield Kick}
#' \item{Offensive 12 On-field, Disqualification}
#' }
#' \item{"PenalizedPlayer"} - The penalized player
#' \item{"Penalty.Yards"} - The number of yards that the penalty resulted in
#' \item{"PosTeamScore"} - The score of the possession team (offensive team)
#' \item{"DefTeamScore"} - The score of the defensive team
#' \item{"ScoreDiff"} - The difference in score between the offensive and
#' defensive teams (offensive.score - def.score)
#' \item{"AbsScoreDiff"} - Absolute value of the score differential
#' \item{"HomeTeam"} - The home team
#' \item{"AwayTeam"} - The away team
#' \item{"Timeout_Indicator"} - Binary: 1 if a timeout was charge else 0
#' \item{"Timeout_Team"} - Team charged with penalty (None if no timeout)
#' \item{"posteam_timeouts_pre"} - Number of timeouts remaining for possession
#' team at the start of the play
#' \item{"HomeTimeouts_Remaining_Pre"} - Number of timeouts remaining for home
#' team at the start of the play
#' \item{"AwayTimeouts_Remaining_Pre"} - Number of timeouts remaining for away
#' team at the start of the play
#' \item{"HomeTimeouts_Remaining_Post"} - Number of timeouts remaining for home
#' team at the end of the play (handles loss of timeout from lost challenge)
#' \item{"AwayTimeouts_Remaining_Post"} - Number of timeouts remaining for away
#' team at the end of the play (handles loss of timeout from lost challenge)
#' \item{"No_Score_Prob"} - Probability of no score occurring within the half
#' \item{"Opp_Field_Goal_Prob"} - Probability of the defensive team scoring a
#' field goal next
#' \item{"Opp_Safety_Prob"} - Probability of the defensive team scoring a
#' safety next
#' \item{"Opp_Touchdown_Prob"} - Probability of the defensive team scoring a
#' touchdown next
#' \item{"Field_Goal_Prob"} - Probability of the possession team scoring a
#' field goal next
#' \item{"Safety_Prob"} - Probability of the possession team scoring a safety
#' next
#' \item{"Touchdown_Prob"} - Probability of the possession team scoring a
#' touchdown next
#' \item{"ExPoint_Prob"} - Probability of the possession team making the PAT
#' \item{"TwoPoint_Prob"} - Probability of the possession team converting
#' the two-point conversion
#' \item{"ExpPts"} - The expected points for the possession team at the
#' start of the play
#' \item{"EPA"} - Expected points added with respect to the possession
#' team considering the result of the play
#' \item{"airEPA"} - Expected points added from air yards
#' \item{"yacEPA"} - Expected points added from yards after catch
#' \item{"Home_WP_pre"} - The win probability for the home team at the start
#' of the play
#' \item{"Away_WP_pre"} - The win probability for the away team at the start
#' of the play
#' \item{"Home_WP_post"} - The win probability for the home team at the
#' end of the play
#' \item{"Away_WP_post"} - The win probability for the away team at the
#' end of the play
#' \item{"Win_Prob"} - The win probability added for team with possession
#' \item{"WPA"} - The win probability added with respect to the
#' possession team
#' \item{"airWPA"} - Win probability added from air yards
#' \item{"yacWPA"} - Win probability added from yards after catch
#'
#' }
#'
#' @return A dataframe with 99 columns specifying various statistics and
#' outcomes associated with each play of the specified NFL game.
#' @examples
#' # Parsed play-by-play of the final game in the 2015 NFL season
#'
#' # Save the gameID into a variable
#' nfl2015.finalregseasongame.gameID <- "2016010310"
#'
#' # Input the variable into the function to output the desired dataframe
#' finalgame2015.pbp <- game_play_by_play(nfl2015.finalregseasongame.gameID)
#'
#' # Subset the dataframe based on passing plays
#' subset(finalgame2015.pbp, PlayType == "Pass")
#' @export
game_play_by_play <- function(GameID) {
# Google R stlye format
#########################
#########################
# Converting JSON data
# Converting GameID into URL string
urlstring <- proper_jsonurl_formatting(GameID)
nfl.json <- tryCatch(RJSONIO::fromJSON(RCurl::getURL(urlstring)),
error=function(cond) {
message("Connection to API disrupted, please re-run code. If multiple failures, then there is no data available for this game yet.")
message(paste("Here is the url:", urlstring))
message("Here's the original error message:")
message(cond)
# Choose a return value in case of error
return(NA)
})
number.drives <- length(nfl.json[[1]]$drives) - 1
# For now hard coded a fix for the drive in the Patriots vs. TB game
# that has an empty drive 3 list:
if (GameID == 2013092206){
PBP <- lapply(c(1, 2, 4:number.drives),
function(x) cbind("Drive"=x,data.frame(do.call(rbind,nfl.json[[1]]$drives[[x]]$plays))[,c(1:9)])) %>%
dplyr::bind_rows()
} else {
PBP <- lapply(c(1:number.drives),
function(x) cbind("Drive"=x,data.frame(do.call(rbind,nfl.json[[1]]$drives[[x]]$plays))[,c(1:9)])) %>%
dplyr::bind_rows()
}
# Search through the 'players' lists for each play (the 'plays' list within
# each drive) to find the air yards, yards after catch, and if the QB was hit
# Define the get_play_stats function which takes in a game's drive list and
# returns a dataframe with a row for each play and columns for the AirYards,
# YardsAfterCatch, and QBHit indicator column:
get_play_stats <- function(drive_plays_list){
result <- lapply(c(1:length(drive_plays_list$plays)),
function(x) suppressWarnings(find_extra_stats(drive_plays_list$plays[[x]]))) %>%
dplyr::bind_rows() %>%
# Include a column for the play_id
cbind(data.frame(play_id = names(drive_plays_list$plays)))
return(result)
}
# Define a function that takes in the players part of a play
# and returns a dataframe of the stats with IDs:
return_play_stat_df <- function(players_list){
# Check the number of players in the play,
# if empty return "None":
if (length(players_list$players) == 0){
result <- as.data.frame(list("statId" = "None"))
} else{
# Return the dataframe
result <- lapply(c(1:length(players_list$players)),
function(x) as.data.frame(do.call(rbind,players_list$players[[x]]))) %>% dplyr::bind_rows()
# Get the Player IDs:
player_ids <- names(players_list$players)
# For each player, get the number of rows for them and repeat their ID that many times:
player_ids_vector <- as.vector(unlist(sapply(c(1:length(player_ids)),
function(x) rep(player_ids[x],length(players_list$players[[x]])))))
result$PlayerID <- player_ids_vector
}
return(result)
}
# Define a fuction that returns as a dataframe the AirYards, YardsAfterCatch, and QBHit:
find_extra_stats <- function(play_list){
play_stat_df <- return_play_stat_df(play_list)
# If play_stat_df equals None, then return NA for each:
if (play_stat_df$statId == "None"){
result <- as.data.frame(list("AirYards" = 0,
"YardsAfterCatch" = 0,
"QBHit" = 0,
"Timeout_Indicator" = 0,
"Timeout_Team" = as.character("None"),
"Passer_ID" = as.character("None"),
"Rusher_ID" = as.character("None"),
"Receiver_ID" = as.character("None")))
} else{
# Find AirYards:
if (111 %in% play_stat_df$statId){
airyards <- play_stat_df$yards[which(play_stat_df$statId == 111)]
# Check to see if it's a list
if (typeof(airyards) == "list"){
airyards <- unlist(airyards)
}
# Only the first one
airyards <- airyards[1]
# If NULL then make it 0:
airyards <- ifelse(is.null(airyards),0,airyards)
} else if (112 %in% play_stat_df$statId){
airyards <- play_stat_df$yards[which(play_stat_df$statId == 112)]
# Check to see if it's a list
if (typeof(airyards) == "list"){
airyards <- unlist(airyards)
}
# Only the first one
airyards <- airyards[1]
# If NULL then make it 0:
airyards <- ifelse(is.null(airyards), 0, airyards)
} else {
airyards <- 0
}
# YardsAfterCatch:
if (113 %in% play_stat_df$statId){
yac <- play_stat_df$yards[which(play_stat_df$statId == 113)]
# Check to see if it's a list
if (typeof(yac) == "list"){
yac <- unlist(yac)
}
# Only the first one
yac <- yac[1]
# If NULL then make it 0:
yac <- ifelse(is.null(yac), 0, yac)
} else {
yac <- 0
}
# QBHit:
if (110 %in% play_stat_df$statId){
qbhit <- 1
} else {
qbhit <- 0
}
# Timeout:
if (68 %in% play_stat_df$statId){
to <- 1
to_team <- as.character(play_stat_df$clubcode[which(play_stat_df$statId == 68)])
# Only the first one
to_team <- to_team[1]
} else {
to <- 0
to_team <- as.character("None")
}
# Passer ID:
if (any(c(14, 15, 16, 111, 112, 20) %in% play_stat_df$statId)){
passer_id <- as.character(play_stat_df$PlayerID[which(play_stat_df$statId %in% c(14, 15, 16, 111, 112, 20))])
# Only the first one
passer_id <- passer_id[1]
} else {
passer_id <- "None"
}
# Rusher ID:
if (any(c(10, 11, 12, 13) %in% play_stat_df$statId)){
rusher_id <- as.character(play_stat_df$PlayerID[which(play_stat_df$statId %in% c(10, 11, 12, 13))])
# Only the first one
rusher_id <- rusher_id[1]
} else {
rusher_id <- "None"
}
# Receiver ID:
if (any(c(21, 22, 113, 115) %in% play_stat_df$statId)){
receiver_id <- as.character(play_stat_df$PlayerID[which(play_stat_df$statId %in% c(21, 22, 113, 115))])
# Only the first one
receiver_id <- receiver_id[1]
} else {
receiver_id <- "None"
}
# Return as a dataframe:
result <- as.data.frame(list("AirYards" = airyards,
"YardsAfterCatch" = yac,
"QBHit" = qbhit,
"Timeout_Indicator" = to,
"Timeout_Team" = to_team,
"Passer_ID" = passer_id,
"Rusher_ID" = rusher_id,
"Receiver_ID" = receiver_id))
}
colnames(result) <- c("AirYards","YardsAfterCatch","QBHit","Timeout_Indicator","Timeout_Team",
"Passer_ID","Rusher_ID","Receiver_ID")
result$Timeout_Team <- as.character(result$Timeout_Team)
result$Passer_ID <- as.character(result$Passer_ID)
result$Rusher_ID <- as.character(result$Rusher_ID)
result$Receiver_ID <- as.character(result$Receiver_ID)
return(result)
}
# Generate the dataframe with the extra stats:
# Catch the situation with GameID == 2013092206 and drive == 3
if (GameID == 2013092206) {
pbp_extrastats <- lapply(c(1,2,4:number.drives),
function(x) suppressWarnings(get_play_stats(nfl.json[[1]]$drives[[x]]))) %>%
dplyr::bind_rows()
} else {
pbp_extrastats <- lapply(c(1:number.drives),
function(x) suppressWarnings(get_play_stats(nfl.json[[1]]$drives[[x]]))) %>%
dplyr::bind_rows()
}
# Add to the PBP dataset:
PBP <- cbind(PBP, pbp_extrastats)
# Adjusting Possession Team
PBP$posteam <- ifelse(PBP$posteam == "NULL", dplyr::lag(PBP$posteam),
PBP$posteam)
# Fixing Possession team for Kick-Offs
kickoff.index <- which(sapply(PBP$desc, regexpr,
pattern =
"kicks") != -1)
pos.teams <- unlist(unique(PBP$posteam))[1:2]
correct.kickoff.pos <- ifelse(PBP$posteam[kickoff.index] == pos.teams[1],
pos.teams[2], pos.teams[1])
PBP[kickoff.index, "posteam"] <- correct.kickoff.pos
# Yard Line Information
# In the earlier seasons when there was a dead ball (i.e. timeout)
# the yardline info was left blank or NULL. Also if the ball was at midfield then
# there was no team associated so I had to add a space to make the strsplit
# work
yline.info.1 <- ifelse(PBP$yrdln == "50", "MID 50", PBP$yrdln)
yline.info.1 <- ifelse(nchar(PBP$yrdln) == 0 |
PBP$yrdln == "NULL", dplyr::lag(PBP$yrdln),
yline.info.1)
yline.info <- sapply(yline.info.1, strsplit, split = " ")
PBP$SideofField <- sapply(yline.info, FUN = function(x) x[1])
PBP$yrdln <- as.numeric(sapply(yline.info, FUN = function(x) x[2]))
# Yard Line on 100 yards Scale: Distance from Opponent Endzone
PBP$yrdline100 <- ifelse(PBP$SideofField == PBP$posteam | PBP$yrdln == 50,
100 - PBP$yrdln, PBP$yrdln )
# Game Date
date.step1 <- stringr::str_extract(urlstring, pattern = "/[0-9]{10}/")
date.step2 <- stringr::str_extract(date.step1, pattern = "[0-9]{8}")
year <- substr(date.step2, start = 1, stop = 4)
month <- substr(date.step2, start = 5, stop = 6)
day <- substr(date.step2, start = nchar(date.step2)-1,
stop = nchar(date.step2))
date <- as.Date(paste(month, day, year, sep = "/"), format = "%m/%d/%Y")
PBP$Date <- date
PBP$GameID <- stringr::str_extract(date.step1, pattern = "[0-9]{10}")
# Adding Zero time to Quarter End
quarter.end <- which(sapply(PBP$desc, regexpr,
pattern = "END QUARTER|END GAME") != -1)
PBP$time[quarter.end] <- "00:00"
# Time in Seconds
qtr.timeinsecs <- lubridate::period_to_seconds(lubridate::ms(PBP$time))
# Quarter 1
qtr.timeinsecs[which(PBP$qtr == 1)] <- qtr.timeinsecs[
which(PBP$qtr == 1)] + (900*3)
# Quarter 2
qtr.timeinsecs[which(PBP$qtr == 2)] <- qtr.timeinsecs[
which(PBP$qtr == 2)] + (900*2)
# Quarter 3
qtr.timeinsecs[which(PBP$qtr == 3)] <- qtr.timeinsecs[
which(PBP$qtr == 3)] + 900
# Overtime
qtr.timeinsecs[which(PBP$qtr == 5)] <- qtr.timeinsecs[
which(PBP$qtr == 5)] - 900
PBP$TimeSecs <- qtr.timeinsecs
# Time Difference (in seconds)
plays.time.diff <- abs(c(0, diff(qtr.timeinsecs)))
PBP$PlayTimeDiff <- plays.time.diff
## Challenge or Replay Review ##
# Binary
PBP$Challenge.Replay <- 0
replay.offic <- grep(PBP$desc, pattern = "Replay Official reviewed")
challenged <- grep(PBP$desc, pattern = "challenge")
PBP$Challenge.Replay[c(replay.offic, challenged)] <- 1
# Results
PBP$ChalReplayResult <- NA
upheld.play <- grep(PBP$desc, pattern = "the play was Upheld")
reversed.play <- grep(PBP$desc, pattern = "the play was REVERSED")
PBP$ChalReplayResult[upheld.play] <- "Upheld"
PBP$ChalReplayResult[reversed.play] <- "Reversed"
######################################
# Picking Apart the Description Column
######################################
# Yards Gained
yards.step1 <- sapply(PBP$desc, stringr::str_extract,
pattern = "for (-)?([0-9]{1,2})?")
PBP$Yards.Gained <- as.numeric( ifelse( grepl(x = yards.step1,
pattern = "(-)?([0-9]{1,2})"),
stringr::str_extract(yards.step1,
"(-)?([0-9]{1,2})"),
"0")
)
# Two Point Conversion
PBP$TwoPointConv <- NA
two.point.result.ind <- which(sapply(PBP$desc, regexpr,
pattern =
"TWO-POINT CONVERSION ATTEMPT") != -1)
two.point.result2 <- stringr::str_extract_all(PBP$desc[two.point.result.ind],
pattern = "ATTEMPT FAILS|SUCCEEDS")
two.point.result.final1 <- unlist(lapply(two.point.result2, tail, 1))
two.point.result.final2 <- ifelse(two.point.result.final1 == "ATTEMPT FAILS",
"Failure", "Success")
if (length(two.point.result.final2) != 0) {
PBP$TwoPointConv[two.point.result.ind] <- two.point.result.final2
}
# Penalty - Binary Column
PBP$Accepted.Penalty <- NA
penalty.play <- sapply(PBP$desc, stringr::str_extract, pattern = "PENALTY")
PBP$Accepted.Penalty <- ifelse(!is.na(penalty.play), 1, 0)
# Penalized Team
penalized.team.s1 <- sapply(PBP$desc, stringr::str_extract,
"PENALTY on [A-Z]{2,3}")
PBP$PenalizedTeam <- stringr::str_extract(penalized.team.s1,
"[A-Z]{2,3}$")
PBP$PenalizedTeam <- ifelse(PBP$PenalizedTeam == "BLT",
"BAL",
ifelse(PBP$PenalizedTeam == "JAX",
"JAC",
ifelse(PBP$PenalizedTeam == "HST",
"HOU",
ifelse(PBP$PenalizedTeam == "SL",
"STL",
ifelse(PBP$PenalizedTeam == "ARZ",
"ARI",
PBP$PenalizedTeam)))))
# Penalty - What was the penalty?
penalty.type.s1 <- sapply(PBP$desc, stringr::str_extract,
pattern ="PENALTY(.){5,25},.+, [0-9] yard(s)")
penalty.type.s2 <- stringr::str_extract(pattern = ",.+,", penalty.type.s1)
penalty.type.final <- stringr::str_sub(penalty.type.s2, 3, -2)
PBP$PenaltyType <- penalty.type.final
# Penalized Player
PBP$PenalizedPlayer <- NA
penalized.player.int <- sapply(PBP$desc[ which(PBP$Accepted.Penalty == 1) ],
stringr::str_extract,
pattern =
"[A-Z]{2,3}-[A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
penalized.player2 <- stringr::str_extract(penalized.player.int,
pattern =
"[A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
PBP$PenalizedPlayer[PBP$Accepted.Penalty == 1] <- penalized.player2
# Penalty Yards
PBP$Penalty.Yards <- NA
penalty.yards.step1 <- sapply(PBP$desc, stringr::str_extract,
pattern = ", [0-9]{1,2} yard(s?), enforced")
PBP$Penalty.Yards <- ifelse(!is.na(penalty.yards.step1),
as.numeric(stringr::str_extract(
penalty.yards.step1,
"[0-9]{1,2}")
), 0)
# Modifying Down Column
PBP$down <- unlist(PBP$down)
PBP$down[which(PBP$down == 0)] <- NA
# Defenseive Team Column
PBP$DefensiveTeam <- NA
teams.step1 <- stringr::str_extract(unlist(unique(PBP$posteam)), "[A-Z]{2,3}")
teams <- teams.step1[which(!is.na(teams.step1))]
Team1 <- teams[1]
Team2 <- teams[2]
PBP$DefensiveTeam[which(PBP$posteam == Team1)] <- Team2
PBP$DefensiveTeam[which(PBP$posteam == Team2)] <- Team1
### Type of Play Initialized ###
PBP$PlayType <- NA
## Passer ##
passer.step1 <- sapply(PBP$desc, stringr::str_extract,
pattern = "[A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)? pass")
PBP$Passer <- stringr::str_extract(passer.step1,
pattern = "[A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
## Receiver ##
receiver.step1 <- sapply(PBP$desc, stringr::str_extract,
pattern =
"pass ((incomplete )?[a-z]{4,5} [a-z]{4,6} )?to [A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
PBP$Receiver <- stringr::str_extract(receiver.step1,
pattern = "[A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
## Tacklers ##
tacklers.step1 <- sapply(PBP$desc, stringr::str_extract,
pattern = "(yard(s?)|no gain) \\([A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?(;|,)?( )?([A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?)?\\)\\.")
# Identifying the tacklers on the play (either one or two)
tacklers1 <- stringr::str_extract(tacklers.step1,
pattern =
"\\([A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
tacklers1 <- stringr::str_extract(tacklers1,
pattern =
"[A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
# Pulling out tacklers names
tacklers2 <- stringr::str_extract(tacklers.step1,
pattern =
"(;|,)( )[A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
tacklers2 <- stringr::str_extract(tacklers2,
pattern =
"[A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
PBP$Tackler1 <- tacklers1
PBP$Tackler2 <- tacklers2
# Player who blocks punt or field goal or extra point
player.blocking1 <- stringr::str_extract(PBP$desc,
pattern = "BLOCKED by [A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
player.blocking2 <- stringr::str_extract(player.blocking1,
pattern = "[A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
PBP$BlockingPlayer <- player.blocking2
# Timeouts
## Needs to be before Field Goal
timeouts <- which(sapply(PBP$desc, regexpr,
pattern = "[A-z]imeout( #[1-5] by)?") != -1)
PBP$PlayType[timeouts] <- "Timeout"
# Field Goal
fieldgoal <- which(sapply(PBP$desc, regexpr,
pattern = "field goal") != -1)
fieldgoal.null <- which(sapply(PBP$desc, regexpr,
pattern = "field goal(.)+NULLIFIED") != -1)
fieldgoal.rev <- which(sapply(PBP$desc, regexpr,
pattern = "field goal(.)+REVERSED") != -1)
fieldgoal <- setdiff(fieldgoal, c(fieldgoal.null, fieldgoal.rev))
missed.fg <- which(sapply(PBP$desc, regexpr,
pattern = "field goal is No Good") != -1)
blocked.fg <- which(sapply(PBP$desc, regexpr,
pattern = "field goal is BLOCKED") != -1)
made.fg <- which(sapply(PBP$desc, regexpr,
pattern = "field goal is GOOD") != -1)
PBP$PlayType[fieldgoal] <- "Field Goal"
# Field Goal Distance
fieldgoaldist.prelim <- sapply(PBP$desc[fieldgoal], stringr::str_extract,
pattern = "[0-9]{1,2} yard field goal")
fieldgoaldist <- sapply(fieldgoaldist.prelim, stringr::str_extract,
pattern = "[0-9]{1,2}")
PBP$FieldGoalDistance <- NA
PBP$FieldGoalDistance[fieldgoal] <- fieldgoaldist
# Field Goal Result
PBP$FieldGoalResult <- NA
PBP$FieldGoalResult[missed.fg] <- "No Good"
PBP$FieldGoalResult[blocked.fg] <- "Blocked"
PBP$FieldGoalResult[made.fg] <- "Good"
# Pass Plays
PBP$PassOutcome <- NA
pass.play <- which(sapply( PBP$desc, regexpr, pattern = "pass( )") != -1)
incomplete.pass.play <- which(sapply(PBP$desc, regexpr,
pattern =
"(pass incomplete)|INTERCEPTED|(is incomplete)") != -1)
PBP$PlayType[pass.play] <- "Pass"
# Pass Outcome
PBP$PassOutcome[incomplete.pass.play] <- "Incomplete Pass"
PBP$PassOutcome[ setdiff(pass.play, incomplete.pass.play) ] <- "Complete"
# Pass Length
PBP$PassLength <- NA
short.pass <- which(sapply(PBP$desc, regexpr,
pattern = "pass (incomplete )?short") != -1)
deep.pass <- which(sapply(PBP$desc, regexpr,
pattern = "pass (incomplete )?deep") != -1)
PBP$PassLength[short.pass] <- "Short"
PBP$PassLength[deep.pass] <- "Deep"
# Pass Location
PBP$PassLocation <- NA
pass.left <- which(sapply(PBP$desc, regexpr,
pattern = "(short|deep) left") != -1)
pass.reft <- which(sapply(PBP$desc, regexpr,
pattern = "(short|deep) right") != -1)
pass.middle <- which(sapply(PBP$desc, regexpr,
pattern = "(short|deep) middle") != -1)
PBP$PassLocation[pass.left] <- "left"
PBP$PassLocation[pass.reft] <- "right"
PBP$PassLocation[pass.middle] <- "middle"
# Pass Attempt
PBP$PassAttempt <- NA
PBP$PassAttempt <- ifelse( sapply(PBP$desc, grepl,
pattern = "pass"), 1, 0)
# Reception Made
PBP$Reception <- 0
PBP$Reception[setdiff(pass.play,incomplete.pass.play)] <- 1
# Interception Thrown
PBP$InterceptionThrown <- ifelse(
sapply(PBP$desc, grepl,
pattern = "INTERCEPTED"), 1, 0
)
# Punt
punt.play <- which(sapply(PBP$desc, regexpr, pattern = "punt(s)?") != -1)
PBP$PlayType[punt.play] <- "Punt"
# Punt Result
PBP$PuntResult <- NA
blocked.punts <- grep(PBP$desc[punt.play], pattern = "BLOCKED")
PBP$PuntResult[punt.play] <- "Clean"
PBP$PuntResult[punt.play][blocked.punts] <- "Blocked"
# Extra Point
extrapoint.good <- which(sapply(PBP$desc, regexpr,
pattern = "extra point is GOOD") != -1)
extrapoint.nogood <- which(sapply(PBP$desc, regexpr,
pattern = "(extra point is No Good)") != -1)
extrapoint.blocked <- which(sapply(PBP$desc, regexpr,
pattern = "(extra point is Blocked)") != -1)
extrapoint.aborted <- which(sapply(PBP$desc, regexpr,
pattern = "(extra point is Aborted)") != -1)
PBP$PlayType[c(extrapoint.good,
extrapoint.nogood,
extrapoint.blocked,
extrapoint.aborted)] <- "Extra Point"
# Extra Point Result
PBP$ExPointResult <- NA
PBP$ExPointResult[extrapoint.good] <- "Made"
PBP$ExPointResult[extrapoint.nogood] <- "Missed"
PBP$ExPointResult[extrapoint.blocked] <- "Blocked"
PBP$ExPointResult[extrapoint.aborted] <- "Aborted"
# Touchdown Play
touchdown.step1 <- grep(PBP$desc, pattern = "TOUCHDOWN")
nullified <- grep(PBP$desc[touchdown.step1], pattern = "NULLIFIED")
reversed <- grep(PBP$desc[touchdown.step1], pattern = "(REVERSED)$")
if (length(c(nullified, reversed)) > 0) {
touchdown.step1 <- touchdown.step1[-c(nullified, reversed)]
}
PBP$Touchdown <- 0
PBP$Touchdown[touchdown.step1] <- 1
# Defensive 2-pt conversion
def.twopt.suc <- which(sapply(PBP$desc, regexpr,
pattern = "DEFENSIVE TWO-POINT ATTEMPT\\. (.){1,70}\\. ATTEMPT SUCCEEDS") != -1)
def.twopt.fail <- which(sapply(PBP$desc, regexpr,
pattern = "DEFENSIVE TWO-POINT ATTEMPT\\. (.){1,70}\\. ATTEMPT FAILS") != -1)
PBP$DefTwoPoint <- NA
PBP$DefTwoPoint[def.twopt.suc] <- "Success"
PBP$DefTwoPoint[def.twopt.fail] <- "Failure"
all.2pts <- intersect(c(def.twopt.suc, def.twopt.fail), two.point.result.ind)
PBP$TwoPointConv[all.2pts] <- "Failure"
# Fumbles
PBP$Fumble <- 0
fumble.index1 <- which(sapply(PBP$desc, regexpr, pattern = "FUMBLE") != -1)
fumble.overruled <- which(sapply(PBP$desc[fumble.index1],
regexpr,
pattern = "(NULLIFIED)|(Reversed)") != -1)
fumble.index <- setdiff(fumble.index1, fumble.overruled)
PBP$Fumble[fumble.index] <- 1
# Quarter End
end.quarter <- which(sapply(PBP$desc, regexpr,
pattern = "(END QUARTER)|(End of quarter)") != -1)
PBP$PlayType[end.quarter] <- "Quarter End"
# Half End
end.half <- which(sapply(PBP$desc, regexpr,
pattern = "End of half") != -1)
PBP$PlayType[end.half] <- "Half End"
# 2 Minute Warning
two.minute.warning <- which(sapply(PBP$desc, regexpr,
pattern = "Two-Minute Warning") != -1)
PBP$PlayType[two.minute.warning] <- "Two Minute Warning"
# Sack
sack.plays <- which(sapply(PBP$desc, regexpr, pattern = "sacked") != -1)
PBP$PlayType[sack.plays] <- "Sack"
# Sack- Binary
PBP$Sack <- 0
PBP$Sack[sack.plays] <- 1
# Tacklers on a sack play
sackers <- sapply(PBP$desc[which(PBP$Sack == 1 & is.na(PBP$Tackler1))], stringr::str_extract,
pattern = "(split by [A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)? and [A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?\\))|(\\([A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?, [A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?\\))|\\([A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?\\)|Sack by [A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
# Sack Player 1
sackplayer1 <- stringr::str_extract(sackers,
pattern =
"[A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
# Sack Player 2
sackplayer2 <- stringr::str_extract(sackers,
pattern =
"[A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?\\)")
sackplayer2 <- stringr::str_extract(sackplayer2,
pattern =
"[A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
# Adding them to the Tacklers Column
# Tackler 2 must go first so that second conditional remains true
PBP$Tackler2[which(PBP$Sack == 1 & is.na(PBP$Tackler1))] <- sackplayer2
PBP$Tackler1[which(PBP$Sack == 1 & is.na(PBP$Tackler1))] <- sackplayer1
# No Play
no.play <- which(sapply(PBP$desc, regexpr,
pattern = "(No Play)|(\\(Kick formation\\) PENALTY)") != -1)#|((\\(Kick formation\\) )?PENALTY)") != -1)
no.play <- which(sapply(PBP$desc, regexpr,
pattern = "(No Play)|(\\(Kick formation\\) PENALTY)") != -1)#|((\\(Kick formation\\) )?PENALTY)") != -1)
def.penalty.noplay <- which(PBP$PenalizedTeam[no.play] != PBP$posteam[no.play])
PBP$PlayType[no.play] <- "No Play"
# Safety - Binary
safety.plays <- which(sapply(PBP$desc, regexpr, pattern = "SAFETY") != -1)
PBP$Safety <- 0
PBP$Safety[safety.plays] <- 1
# QB Kneel
qb.kneel <- which(sapply(PBP$desc, regexpr, pattern = "kneels") != -1)
PBP$PlayType[qb.kneel] <- "QB Kneel"
# Kick Off
kickoff <- which(sapply(PBP$desc, regexpr,
pattern = "(kick(s)? [0-9]{2,3})|((O|o)nside)") != -1)
PBP$PlayType[kickoff] <- "Kickoff"
# Onside Kick
onside <- sapply(PBP$desc, regexpr, pattern = "(O|o)nside( (K|k)ick)?")
PBP$Onsidekick <- ifelse(onside != -1, 1, 0)
# Spike
spike.play <- which(sapply(PBP$desc, regexpr, pattern = "spiked") != -1)
PBP$PlayType[spike.play] <- "Spike"
# End of Game
end.game <- which(sapply(PBP$desc, regexpr,
pattern = "(END GAME)|(End of game)") != -1)
PBP$PlayType[end.game] <- "End of Game"
# First Down
PBP$FirstDown <- 0
first.downplays <- which(PBP$down == 1)
first.downs <- first.downplays-1
PBP$FirstDown[first.downs] <- ifelse(PBP$down[first.downs] ==0, NA, 1)
# Running Play
running.play <- which(is.na(PBP$PlayType))
PBP$PlayType[running.play] <- "Run"
# Fix plays that are run plays but were challenged and lost, so a timeout was
# lost
running.play2 <- which(sapply(PBP$desc[which(PBP$PlayType == "Timeout")],
regexpr,
pattern = "(((left|right) (tackle|end|guard))|(up the middle)) (for|to)") != -1)
PBP[which(PBP$PlayType == "Timeout"),"PlayType"][running.play2] <- "Run"
PBP$RushAttempt <- ifelse(PBP$PlayType == "Run", 1, 0)
##################
### SOLVE: Look at timeout play and search for "(left|right) (tack|end|guard) for..."
# Run Direction
PBP$RunLocation <- NA
run.left <- which(sapply(PBP[which(PBP$PlayType == "Run"),"desc"], regexpr,
pattern = "left") != -1)
run.right <- which(sapply(PBP[which(PBP$PlayType == "Run"),"desc"], regexpr,
pattern = "right") != -1)
run.middle <- which(sapply(PBP[which(PBP$PlayType == "Run"),"desc"], regexpr,
pattern = "middle") != -1)
PBP[c(running.play, running.play2),"RunLocation"][run.left] <- "left"
PBP[c(running.play, running.play2),"RunLocation"][run.right] <- "right"
PBP[c(running.play, running.play2),"RunLocation"][run.middle] <- "middle"
# Run Gap
PBP$RunGap <- NA
run.guard <- which(sapply(PBP[which(PBP$PlayType == "Run"),"desc"], regexpr,
pattern = "guard") != -1)
run.tackle <- which(sapply(PBP[which(PBP$PlayType == "Run"),"desc"], regexpr,
pattern = "tackle") != -1)
run.end <- which(sapply(PBP[which(PBP$PlayType == "Run"),"desc"], regexpr,
pattern = "end") != -1)
PBP[c(running.play, running.play2),"RunGap"][run.guard] <- "guard"
PBP[c(running.play, running.play2),"RunGap"][run.tackle] <- "tackle"
PBP[c(running.play, running.play2),"RunGap"][run.end] <- "end"
# Rusher
rusherStep1 <- sapply(PBP[which(PBP$PlayType == "Run"),"desc"],
stringr::str_extract,
pattern = "[A-z]{1,4}\\.( )?[A-Z](')?[A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
# Catch the Aborted phrase:
rusherStep1 <- ifelse(stringr::str_detect(rusherStep1," Aborted"),
gsub(" Aborted","",rusherStep1),rusherStep1)
PBP[c(running.play, running.play2),"Rusher"] <- rusherStep1
# Changing to correctly reflect the rusher
elidgiblePlays <- grep(PBP[which( (PBP$PlayType == "Run" & PBP$Accepted.Penalty == 0) | (PBP$PlayType == "Run" & PBP$Accepted.Penalty == 1 &PBP$posteam != PBP$PenalizedTeam)),"desc"],
pattern = " (report(ed|s)?)?( in )?as eligible(.){15,}")
if (length(elidgiblePlays) > 0) {
rusherStep2 <- sapply(PBP[which( (PBP$PlayType == "Run" & PBP$Accepted.Penalty == 0) | (PBP$PlayType == "Run" & PBP$Accepted.Penalty == 1 &PBP$posteam != PBP$PenalizedTeam)),"desc"],
stringr::str_extract,
pattern = "(as eligible( receiver(s)?| for [A-Z]{2,3})?(\\.|,)?( ){1,2}(Direct snap to [A-z]{1,4}\\.( )?[A-Z](')?[A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?\\.( ){1,2})?[A-z]{1,4}\\.( )?[A-Z](')?[A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?)|(as eligible( receiver)?(\\.)?(.)+(\\.)? (\\(Shotgun\\) )?[A-z]{1,4}\\.( )?[A-Z](')?[A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?)|(\\(((Run formation)|([0-9]{1,2}:[0-9]{1,2}))\\) [A-z]{1,4}\\.( )?[A-Z](')?[A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)? (.){10,80}report(s|ed) as eligible(.){11,})")
rusherStep3 <- sapply(rusherStep2,
stringr::str_extract,
pattern = "([A-z]{1,4}\\.( )?[A-Z](')?[A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?)$|\\(((Run formation)|([0-9]{1,2}:[0-9]{1,2})|(Shotgun))\\) [A-z]{1,4}\\.( )?[A-Z](')?[A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
rusherStep3.1 <- sapply(rusherStep3,
stringr::str_extract,
pattern = "([A-z]{1,4}\\.( )?[A-Z](')?[A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?)$")
#([A-z]{1,4}\\.( )?[A-Z](')?[A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?)$)
rusherStepfinal <- rusherStep3.1[!is.na(rusherStep3.1)]
PBP$Rusher[which(PBP$PlayType == "Run")][elidgiblePlays] <- rusherStepfinal
}
## Punt and Kick Return Outcome ##
# Punt Outcome
punt.tds <- which(sapply(PBP$desc[punt.play], regexpr,
pattern = "TOUCHDOWN") != -1)
punt.tds.null <- which(sapply(PBP$desc[punt.play], regexpr,
pattern = "NULLIFIED") != -1)
punt.tds.rev <- which(sapply(PBP$desc[punt.play], regexpr,
pattern = "REVERSED") != -1)
punt.tds <- setdiff(punt.tds, c(punt.tds.null, punt.tds.rev))
punts.touchbacks <- which(sapply(PBP$desc[punt.play], regexpr,
pattern = "Touchback") != -1)
punts.faircatch <- which(sapply(PBP$desc[punt.play], regexpr,
pattern = "fair catch") != -1)
# Kickoff Outcome
kick.tds <- which(sapply(PBP$desc[kickoff], regexpr,
pattern = "TOUCHDOWN") != -1)
kick.tds.null <- which(sapply(PBP$desc[kickoff], regexpr,
pattern = "NULLIFIED") != -1)
kick.tds.rev <- which(sapply(PBP$desc[kickoff], regexpr,
pattern = "REVERSED") != -1)
kick.tds <- setdiff(kick.tds, c(kick.tds.null, kick.tds.rev))
kick.tds.null <- which(sapply(PBP$desc[kickoff], regexpr,
pattern = "NULLIFIED") != -1)
kick.tds.rev <- which(sapply(PBP$desc[kickoff], regexpr,
pattern = "REVERSED") != -1)
kick.tds <- setdiff(kick.tds, c(kick.tds.null, kick.tds.rev))
kick.touchbacks <- which(sapply(PBP$desc[kickoff], regexpr,
pattern = "Touchback") != -1)
kick.faircatch <- which(sapply(PBP$desc[kickoff], regexpr,
pattern = "fair catch") != -1)
kick.kneels <- which(sapply(PBP$desc[kickoff], regexpr,
pattern = "kneel(s)?") != -1)
# Interception Outcome
intercept.td <- which(sapply(PBP$desc[which(PBP$InterceptionThrown == 1)],
regexpr, pattern = "TOUCHDOWN") != -1)
intercept.td.null <- which(sapply(PBP$desc[which(PBP$InterceptionThrown == 1)]
, regexpr, pattern = "NULLIFIED") != -1)
intercept.td.rev <- which(sapply(PBP$desc[which(PBP$InterceptionThrown == 1)],
regexpr, pattern = "REVERSED") != -1)
intercept.td <- setdiff(intercept.td, c(intercept.td.null, intercept.td.rev))
# Fumble Outcome
fumble.td <- which(sapply(PBP$desc[fumble.index],
regexpr, pattern = "TOUCHDOWN") != -1)
# May be able to remove bottom two lines
fumble.td.null <- which(sapply(PBP$desc[fumble.index],
regexpr, pattern = "NULLIFIED") != -1)
fumble.td.rev <- which(sapply(PBP$desc[fumble.index],
regexpr, pattern = "REVERSED") != -1)
fumble.td <- setdiff(fumble.td, c(fumble.td.null, fumble.td.rev))
PBP$ReturnResult <- NA
PBP$ReturnResult[punt.play][punt.tds] <- "Touchdown"
PBP$ReturnResult[punt.play][punts.touchbacks] <- "Touchback"
PBP$ReturnResult[punt.play][punts.faircatch] <- "Fair Catch"
PBP$ReturnResult[kickoff][kick.tds] <- "Touchdown"
PBP$ReturnResult[kickoff][kick.touchbacks] <- "Touchback"
PBP$ReturnResult[kickoff][c(kick.faircatch,kick.kneels)] <- "Fair Catch"
PBP$ReturnResult[which(PBP$InterceptionThrown == 1)][intercept.td] <- "Touchdown"
PBP$ReturnResult[fumble.index][fumble.td] <- "Touchdown"
## Returner ##
# Punt Returner
# Fair Catches
punt.returner1 <- sapply(PBP$desc[punt.play], stringr::str_extract,
pattern = "by [A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?\\.$")
punt.returner2 <- sapply(punt.returner1, stringr::str_extract,
pattern = "[A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
# Touchdowns or Returns
punt.returner3 <- sapply(PBP$desc[punt.play], stringr::str_extract,
pattern = "(\\. [A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)? to [A-Z]{2,3} [0-9]{1,2})|\\. [A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)? for [0-9]{1,2} yard(s)")
punt.returner4 <- sapply(punt.returner3, stringr::str_extract,
pattern = "[A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
# Kickoff Returner
# Fair Catches
kickret1 <- sapply(PBP$desc[kickoff], stringr::str_extract,
pattern = "by [A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?\\.$")
kickret2 <- sapply(kickret1, stringr::str_extract,
pattern = "[A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
# Touchdowns or Returns
kickret3 <- sapply(PBP$desc[kickoff], stringr::str_extract,
pattern = "(\\. [A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)? to [A-Z]{2,3} [0-9]{1,2})|(\\. [A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)? for [0-9]{1,2} yard(s))|(\\. [A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)? pushed)")
kickret4 <- sapply(kickret3, stringr::str_extract,
pattern = "[A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
# All Returners
all.returners <- rep(NA, time = nrow(PBP))
all.returners[kickoff][which(!is.na(kickret2))] <- kickret2[which(!is.na(kickret2))]
all.returners[kickoff][which(!is.na(kickret4))] <- kickret4[which(!is.na(kickret4))]
all.returners[punt.play][which(!is.na(punt.returner2))] <- punt.returner2[which(!is.na(punt.returner2))]
all.returners[punt.play][which(!is.na(punt.returner4))] <- punt.returner4[which(!is.na(punt.returner4))]
PBP$Returner <- all.returners
# Interceptor
interceptor1 <- sapply(PBP$desc[which(PBP$InterceptionThrown == 1)],
stringr::str_extract,
pattern = "INTERCEPTED by [A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
interceptor2 <- sapply(interceptor1, stringr::str_extract,
pattern = "[A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
PBP$Interceptor <- NA
PBP$Interceptor[which(PBP$InterceptionThrown == 1)] <- interceptor2
# Fumbler Recovery Team and Player
recover.step1 <- sapply(PBP$desc[fumble.index], stringr::str_extract,
pattern = "[A-Z]{2,3}-[A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
recover.team <- sapply(recover.step1, stringr::str_extract,
pattern = "[A-Z]{2,3}")
recover.player <- sapply(recover.step1, stringr::str_extract,
pattern = "[A-z]{1,4}\\.( )?[A-Z][A-z]{1,20}(('|-| )?[A-Z][a-z]{1,14})?('o)?( (S|J)r\\.)?")
PBP$RecFumbTeam <- NA
PBP$RecFumbTeam[fumble.index] <- ifelse(recover.team == "BLT",
"BAL",
ifelse(recover.team == "JAX",
"JAC",
ifelse(recover.team == "HST",
"HOU",
ifelse(recover.team == "SL",
"STL",
recover.team))))
PBP$RecFumbPlayer <- NA
PBP$RecFumbPlayer[fumble.index] <- recover.player
# The next few variables are counting variables
# Used to help set up model for predictions
# Plays
PBP$PlayAttempted <- 1
time.format <- as.POSIXct(paste("00:",
unlist(PBP$time),
sep = ""), format = "%H:%M:%S")
# For the rows when "end of half occurs", need to lag the time
# for the previous play. Run multiple times for back to back plays with
# NA time
time.format[which(is.na(time.format))] <- time.format[which(is.na(time.format))-1]
time.format[which(is.na(time.format))] <- time.format[which(is.na(time.format))-1]
time.format[which(is.na(time.format))] <- time.format[which(is.na(time.format))-1]
# Time Under
PBP$TimeUnder <- substr(lubridate::ceiling_date(time.format, "minute"),
15, 16)
PBP$TimeUnder <- as.numeric(as.character(PBP$TimeUnder))
# Calculating Score of Game for Possesion team and Defensive Team
team.home.score <- rep(0, times = nrow(PBP))
team.away.score <- rep(0, times = nrow(PBP))
away.team.name <- nfl.json[[1]]$away$abbr
home.team.name <- nfl.json[[1]]$home$abbr
## Away Team ##
# Regular offensive passing, rushing
team.away.score[which(PBP$Touchdown %>% dplyr::lag() == 1
& PBP$posteam %>% dplyr::lag() == away.team.name
& !PBP$ReturnResult %>% dplyr::lag() %in% "Touchdown"
& !PBP$PlayType %>% dplyr::lag() %in% "Kickoff"
& PBP$sp %>% dplyr::lag() == 1)] <- 6
# Give points for Kickoff TDs
team.away.score[which(PBP$Touchdown %>% dplyr::lag() == 1
& PBP$posteam %>% dplyr::lag() == away.team.name
& PBP$ReturnResult %>% dplyr::lag() %in% "Touchdown"
& PBP$PlayType %>% dplyr::lag() %in% "Kickoff",
PBP$sp %>% dplyr::lag() == 1)] <- 6
# Give points for Punt Return TDs
team.away.score[which(PBP$Touchdown %>% dplyr::lag() == 1
& PBP$posteam %>% dplyr::lag() == home.team.name
& PBP$ReturnResult %>% dplyr::lag() %in% "Touchdown"
& PBP$PlayType %>% dplyr::lag() %in% "Punt"
& PBP$sp %>% dplyr::lag() == 1)] <- 6
# Give points for Interceptions
team.away.score[which(PBP$Touchdown %>% dplyr::lag() == 1
& PBP$posteam %>% dplyr::lag() == home.team.name
& PBP$ReturnResult %>% dplyr::lag() %in% "Touchdown"
& !is.na(PBP$Interceptor %>% dplyr::lag())
& PBP$sp %>% dplyr::lag() == 1)] <- 6
# Make sure to give away team points for fumble ret for TD
team.away.score[which(PBP$Touchdown %>% dplyr::lag() == 1
& PBP$posteam %>% dplyr::lag() == home.team.name
& PBP$ReturnResult %>% dplyr::lag() %in% "Touchdown"
& !PBP$PlayType %>% dplyr::lag() %in% "Kickoff"
& PBP$RecFumbTeam %>% dplyr::lag() == away.team.name
& PBP$sp %>% dplyr::lag() == 1)] <- 6
# Fumble and the team that fumbled recovers and scores a TD
team.away.score[which(PBP$Touchdown %>% dplyr::lag() == 1
& PBP$posteam %>% dplyr::lag() == away.team.name
& PBP$ReturnResult %>% dplyr::lag() %in% "Touchdown"
& PBP$RecFumbTeam %>% dplyr::lag() == away.team.name
& PBP$sp %>% dplyr::lag() == 1)] <- 6
# Points for two point conversion
team.away.score[which(PBP$TwoPointConv %>% dplyr::lag() == "Success"
& PBP$posteam %>% dplyr::lag() == away.team.name
& PBP$sp %>% dplyr::lag() == 1)] <- 2
# Points for safeties
team.away.score[which(PBP$Safety %>% dplyr::lag() == 1
& PBP$posteam %>% dplyr::lag() == home.team.name
& PBP$sp %>% dplyr::lag() == 1)] <- 2
# Points for made extra point
team.away.score[which(PBP$ExPointResult %>% dplyr::lag() == "Made"
& PBP$posteam %>% dplyr::lag() == away.team.name
& PBP$sp %>% dplyr::lag() == 1)] <- 1
# Points for made field goal
team.away.score[which(PBP$FieldGoalResult %>% dplyr::lag() == "Good"
& PBP$posteam %>% dplyr::lag() == away.team.name
& PBP$sp %>% dplyr::lag() == 1)] <- 3
team.away.score <- cumsum(team.away.score)
away.team.pos <- which(PBP$posteam == away.team.name)
away.team.def <- which(PBP$DefensiveTeam == away.team.name)
## Home Team ##
# Regular offensive passing, rushing
team.home.score[PBP$Touchdown %>% dplyr::lag() == 1
& PBP$posteam %>% dplyr::lag() == home.team.name
& !PBP$ReturnResult %>% dplyr::lag() %in% "Touchdown"
& !PBP$PlayType %>% dplyr::lag() %in% "Kickoff"
& PBP$sp %>% dplyr::lag() == 1] <- 6
# Give points for Kickoffs
team.home.score[which(PBP$Touchdown %>% dplyr::lag() == 1
& PBP$posteam %>% dplyr::lag() == home.team.name
& PBP$ReturnResult %>% dplyr::lag() %in% "Touchdown"
& PBP$PlayType %>% dplyr::lag() %in% "Kickoff"
& PBP$sp %>% dplyr::lag() == 1)] <- 6
# Give points for Punts
team.home.score[which(PBP$Touchdown %>% dplyr::lag() == 1
& PBP$posteam %>% dplyr::lag() == away.team.name
& PBP$ReturnResult %>% dplyr::lag() %in% "Touchdown"
& PBP$PlayType %>% dplyr::lag() %in% "Punt"
& PBP$sp %>% dplyr::lag() == 1)] <- 6
# Give points for Interceptions
team.home.score[which(PBP$Touchdown %>% dplyr::lag() == 1
& PBP$posteam %>% dplyr::lag() == away.team.name
& PBP$ReturnResult %>% dplyr::lag() %in% "Touchdown"
& !is.na(PBP$Interceptor %>% dplyr::lag() )
& PBP$sp %>% dplyr::lag() == 1)] <- 6
team.home.score[which(PBP$Touchdown %>% dplyr::lag() == 1
& PBP$posteam %>% dplyr::lag() == away.team.name
& PBP$ReturnResult %in% "Touchdown"
& !PBP$PlayType %>% dplyr::lag() %in% "Kickoff"
& PBP$RecFumbTeam %>% dplyr::lag() == home.team.name
& PBP$sp %>% dplyr::lag() == 1)] <- 6
# Fumble and the team that fumbled recovered and scored a TD
team.home.score[which(PBP$Touchdown == 1
& PBP$posteam == home.team.name
& PBP$ReturnResult %in% "Touchdown"
& PBP$RecFumbTeam == home.team.name
& PBP$sp == 1)] <- 6
# Points for two point conversion
team.home.score[which(PBP$TwoPointConv %>% dplyr::lag() == "Success"
& PBP$posteam %>% dplyr::lag() == home.team.name)] <- 2
# Points for safeties
team.home.score[which(PBP$Safety %>% dplyr::lag() == 1
& PBP$posteam %>% dplyr::lag() == away.team.name
& PBP$sp %>% dplyr::lag() == 1)] <- 2
# Points for made extra point
team.home.score[which(PBP$ExPointResult %>% dplyr::lag() == "Made"
& PBP$posteam %>% dplyr::lag() == home.team.name
& PBP$sp %>% dplyr::lag() == 1)] <- 1
# Points for made field goal
team.home.score[which(PBP$FieldGoalResult %>% dplyr::lag() == "Good"
& PBP$posteam %>% dplyr::lag() == home.team.name
& PBP$sp %>% dplyr::lag() == 1)] <- 3
team.home.score <- cumsum(team.home.score)
home.team.pos <- which(PBP$posteam == home.team.name)
home.team.def <- which(PBP$DefensiveTeam == home.team.name)
## Possesion and Defensive Team Scores
PBP$PosTeamScore <- NA
PBP$DefTeamScore <- NA
### Inputting Scores
PBP$PosTeamScore[home.team.pos] <- team.home.score[home.team.pos]
PBP$PosTeamScore[away.team.pos] <- team.away.score[away.team.pos]
PBP$DefTeamScore[home.team.def] <- team.home.score[home.team.def]
PBP$DefTeamScore[away.team.def] <- team.away.score[away.team.def]
PBP$DefTeamScore[1] <- 0
PBP$PosTeamScore[1] <- 0
PBP$DefTeamScore <- ifelse(is.na(PBP$DefTeamScore), lag(PBP$DefTeamScore),
PBP$DefTeamScore)
PBP$PosTeamScore <- ifelse(is.na(PBP$PosTeamScore), lag(PBP$PosTeamScore),
PBP$PosTeamScore)
# Score Differential #
PBP$ScoreDiff <- PBP$PosTeamScore - PBP$DefTeamScore
##############################################################################
### Allows All Finals Plays of Game to Have a Score Difference ###
PBP$ScoreDiff <- ifelse(PBP$PlayType == "End of Game", dplyr::lag(PBP$ScoreDiff)
, PBP$ScoreDiff)
##############################################################################
# Abs Score Differential #
PBP$AbsScoreDiff <- abs(PBP$PosTeamScore - PBP$DefTeamScore)
# Goal to Go
PBP$GoalToGo <- ifelse(PBP$SideofField != PBP$posteam &
((PBP$ydstogo == PBP$yrdline100) |
(PBP$ydstogo <= 1 & PBP$yrdline100 == 1)),
1,0)
##################
## Unlisting Listed Columns
PBP$sp <- unlist(PBP$sp)
PBP$qtr <- unlist(PBP$qtr)
PBP$time <- unlist(PBP$time)
PBP$ydstogo <- unlist(PBP$ydstogo)
PBP$ydsnet <- unlist(PBP$ydsnet)
PBP$posteam <- unlist(PBP$posteam)
PBP$desc <- unlist(PBP$desc)
PBP$FieldGoalDistance <- unlist(PBP$FieldGoalDistance)
PBP$Interceptor <- unlist(PBP$Interceptor)
PBP$RecFumbPlayer <- unlist(PBP$RecFumbPlayer)
### Adding Posteam for final play of game ###
### Needed for win probability purposes #####
PBP$posteam <- ifelse(PBP$PlayType == "End of Game", dplyr::lag(PBP$posteam),
PBP$posteam)
## Same for Defensive Team ##
PBP$DefensiveTeam <- ifelse(PBP$PlayType == "End of Game",
dplyr::lag(PBP$DefensiveTeam),
PBP$DefensiveTeam)
## Home Team ##
PBP$HomeTeam <- nfl.json[[1]]$home$abbr
## Away Team
PBP$AwayTeam <- nfl.json[[1]]$away$abbr
## Half Indicator
PBP$Half_Ind <- with(PBP,ifelse(qtr %in% c(1,2),"Half1",
ifelse(qtr %in% c(3,4),"Half2","Overtime")))
## Keep track of timeouts remaining
PBP$HomeTimeouts_Remaining_Pre <- vector(length = nrow(PBP))
PBP$AwayTimeouts_Remaining_Pre <- vector(length = nrow(PBP))
PBP$HomeTimeouts_Remaining_Post <- vector(length = nrow(PBP))
PBP$AwayTimeouts_Remaining_Post <- vector(length = nrow(PBP))
for (i in 1:nrow(PBP)){
# if its the first play in the game set all timeouts remaining
# columns to be set to 3, or if the it's the first play in
# the second half
if (i == 1){
PBP$HomeTimeouts_Remaining_Pre[i] <- 3
PBP$AwayTimeouts_Remaining_Pre[i] <- 3
PBP$HomeTimeouts_Remaining_Post[i] <- 3
PBP$AwayTimeouts_Remaining_Post[i] <- 3
} else if (PBP$Half_Ind[i-1] == "Half1" & PBP$Half_Ind[i] == "Half2"){
PBP$HomeTimeouts_Remaining_Pre[i] <- 3
PBP$AwayTimeouts_Remaining_Pre[i] <- 3
PBP$HomeTimeouts_Remaining_Post[i] <- 3
PBP$AwayTimeouts_Remaining_Post[i] <- 3
} else if (PBP$Half_Ind[i-1] == "Half2" & PBP$Half_Ind[i] == "Overtime") {
# if it's the start of overtime then go to 2 timeouts
PBP$HomeTimeouts_Remaining_Pre[i] <- 2
PBP$AwayTimeouts_Remaining_Pre[i] <- 2
PBP$HomeTimeouts_Remaining_Post[i] <- 2
PBP$AwayTimeouts_Remaining_Post[i] <- 2
} else {
# the _Pre for columns are the _Post of the previous row
PBP$HomeTimeouts_Remaining_Pre[i] <- PBP$HomeTimeouts_Remaining_Post[i-1]
PBP$AwayTimeouts_Remaining_Pre[i] <- PBP$AwayTimeouts_Remaining_Post[i-1]
PBP$HomeTimeouts_Remaining_Post[i] <- PBP$HomeTimeouts_Remaining_Post[i-1]
PBP$AwayTimeouts_Remaining_Post[i] <- PBP$AwayTimeouts_Remaining_Post[i-1]
}
# If Timeout_Indicator == 1 then subtract 1 from which team
# called the timeout:
if (PBP$Timeout_Indicator[i] == 1){
if (PBP$Timeout_Team[i] == PBP$HomeTeam[i]){
PBP$HomeTimeouts_Remaining_Post[i] <- PBP$HomeTimeouts_Remaining_Pre[i] - 1
} else{
PBP$AwayTimeouts_Remaining_Post[i] <- PBP$AwayTimeouts_Remaining_Pre[i] - 1
}
}
}
# Create a column, posteam_timeouts_rem:
PBP$posteam_timeouts_pre <- ifelse(PBP$posteam == PBP$HomeTeam,
PBP$HomeTimeouts_Remaining_Pre,
PBP$AwayTimeouts_Remaining_Pre)
# Fixing error row in 2010 game:
if (GameID == 2010112107){
err_play_index <- which(PBP$qtr == 2 & PBP$down == 2 &
PBP$TimeSecs == 1862)
PBP[err_play_index,"Passer"] <- "D.Brees"
PBP[err_play_index,"PassAttempt"] <- 1
PBP[err_play_index,"PassOutcome"] <- "Complete"
PBP[err_play_index,"PassLength"] <- "Deep"
PBP[err_play_index,"PassLocation"] <- "left"
PBP[err_play_index,"PassLength"] <- 20
PBP[err_play_index,"Yards.Gained"] <- 20
PBP[err_play_index,"Touchdown"] <- 1
}
## Adding in Expected Points ##
PBP <- expected_points(PBP)
## Adding in Win Probability ##
PBP <- win_probability(PBP)
##############################################################################
## Adding Row for End of Game if not in the data ##
# if (PBP$desc[nrow(PBP)] %in% c("(END GAME)", "(End of game)")) {
#
# PBP[nrow(PBP)+1,] <- PBP[nrow(PBP),]
#
# PBP$GameID[nrow(PBP)] <- PBP$GameID[nrow(PBP)-1]
# PBP$Date[nrow(PBP)] <- PBP$Date[nrow(PBP)-1]
# PBP$time[nrow(PBP)] <- "00:00"
# PBP$TimeUnder[nrow(PBP)] <- 0
# PBP$TimeSecs[nrow(PBP)] <- 0
# PBP$desc[nrow(PBP)] <- "End of Game"
# PBP$PlayType[nrow(PBP)] <- "End of Game"
# }
##############################################################################
## Final OutPut ##
PBP[,c("Date", "GameID", "play_id", "Drive", "qtr", "down", "time", "TimeUnder",
"TimeSecs", "PlayTimeDiff", "SideofField", "yrdln", "yrdline100",
"ydstogo", "ydsnet", "GoalToGo", "FirstDown",
"posteam", "DefensiveTeam", "desc", "PlayAttempted", "Yards.Gained",
"sp", "Touchdown", "ExPointResult", "TwoPointConv", "DefTwoPoint",
"Safety", "Onsidekick", "PuntResult", "PlayType", "Passer","Passer_ID", "PassAttempt", "PassOutcome",
"PassLength","AirYards","YardsAfterCatch","QBHit", "PassLocation", "InterceptionThrown", "Interceptor",
"Rusher","Rusher_ID", "RushAttempt", "RunLocation", "RunGap", "Receiver","Receiver_ID",
"Reception", "ReturnResult", "Returner", "BlockingPlayer","Tackler1", "Tackler2",
"FieldGoalResult", "FieldGoalDistance",
"Fumble", "RecFumbTeam", "RecFumbPlayer", "Sack", "Challenge.Replay",
"ChalReplayResult", "Accepted.Penalty", "PenalizedTeam", "PenaltyType",
"PenalizedPlayer", "Penalty.Yards", "PosTeamScore", "DefTeamScore",
"ScoreDiff", "AbsScoreDiff", "HomeTeam", "AwayTeam",
"Timeout_Indicator","Timeout_Team","posteam_timeouts_pre",
"HomeTimeouts_Remaining_Pre","AwayTimeouts_Remaining_Pre",
"HomeTimeouts_Remaining_Post","AwayTimeouts_Remaining_Post",
"No_Score_Prob","Opp_Field_Goal_Prob","Opp_Safety_Prob",
"Opp_Touchdown_Prob","Field_Goal_Prob","Safety_Prob",
"Touchdown_Prob","ExPoint_Prob","TwoPoint_Prob",
"ExpPts","EPA","airEPA","yacEPA","Home_WP_pre", "Away_WP_pre", "Home_WP_post",
"Away_WP_post","Win_Prob","WPA","airWPA","yacWPA")]
}
##################################################################
#' Parsed Descriptive Play-by-Play Function for a Full Season
#' @description This function outputs all plays of an entire season in one dataframe.
#' It calls the game_play_by_play function and applies it over every
#' game in the season by extracting each game ID and url in the specified season.
#' WARNING: This function is deprecated and will be removed from the package
#' after the 2018-19 season.
#'
#' @param Season (numeric) A 4-digit year corresponding to an NFL season of
#' interest
#' @details This function calls the extracting_gameids,
#' proper_jsonurl_formatting, and game_play_by_play to aggregate all the plays
#' from a given season. This dataframe is prime for use with the dplyr and
#' plyr packages.
#' @return A dataframe contains all the play-by-play information for a single
#' season. This includes all the 99 variables collected in our
#' game_play_by_play function (see documentation for game_play_by_play for
#' details) and a column for the Season.
#' @examples
#' # Play-by-play data from all games in 2010
#' pbp.data.2010 <- season_play_by_play(2010)
#'
#' # Looking at all Pittsburgh Steelers offensive plays
#' subset(pbp.data.2010, posteam = "PIT")
#' @export
season_play_by_play <- function(Season) {
# Google R style format
# Below the function put together the proper URLs for each game in each
# season and runs the game_play_by_play function across the entire season
game_ids <- extracting_gameids(Season)
# Create a boolean vector showing which of the game_ids has URLs
# that exist - so the function can run during games:
game_ids_exist <- sapply(game_ids, function(x) RCurl::url.exists(proper_jsonurl_formatting(x)))
# Check to see that the url is not empty
# Only use the game_ids that exist:
game_ids <- game_ids[game_ids_exist]
# Apply the play by play function to all of the game_ids
# Also added catch if the play by play feed is throws an error
# for games that are intiialized early.
pbp_data_unformatted <- lapply(game_ids,
FUN = function (x) {
tryCatch(game_play_by_play(x),
error = function(e) {})}
)
df_pbp_data <- dplyr::bind_rows(pbp_data_unformatted) %>%
dplyr::mutate(Season = Season)
# Output #
df_pbp_data
}
##################################################################
# Drive Summary Function
#' Drive Summary and Results
#' @description This function outputs the results dataframe of each drive of a
#' given game
#' @param GameID (character or numeric) A 10 digit game ID associated with a
#' given NFL game.
#' @details The resulting dataframe has 16 variables associated with a specific
#' aspect of a drive including the scoring result, number of plays, the duration
#' of the drive, and the offensive and defensive teams. All 16 variables are
#' explained in more detail below:
#' \itemize{
#' \item{"GameID"} - The ID of the given Game
#' \item{DriveNumber"} - The respective drive number in the game
#' \item{"posteam"} - The offensive team on the drive
#' \item{"qtr"} - The quarter at the end of the drive
#' \item{"fds"} - Number of first downs in the drive
#' \item{"result"} - End result of the drive
#' \item{"penyds"} - Net penalty yards of the drive for the offensive team
#' \item{"ydsgained"} - Number of yards gained on the drive
#' \item{"numplays"} - Number of plays on the drive
#' \item{"postime"} - The duration of the drive
#' \item{"StartQrt"} - The quarter at the beginning of the drive
#' \item{"StartTime} - The time left in the quarter at the start of the drive
#' \item{"StartYardln"} - Yardline at the start of the drive
#' \item{"StartTeam"} - The offensive team on the drive
#' \item{"EndQrt"} - The quarter at the end of the drive
#' \item{"EndTime} - The time left in the quarter at the end of the drive
#' \item{"EndYardln"} - Yardline at the end of the drive
#' \item{"EndTeam"} - The offensive team on the drive
#' }
#' @return A dataframe that has the summary statistics for each drive
#' final output includes first downs, drive result, penalty yards,
#' of plays, time of possession, quarter at the start of the drive,
#' time at start of drive, yardline at start of drive,
#' team with possession at start, end of drive quarter, end of drive time,
#' end of drive yard line, end of drive team with possession
#' @examples
#' # Parsed drive Summarize of final game in 2015 NFL Season
#' nfl2015.finalregseasongame.gameID <- "2016010310"
#' drive_summary(nfl2015.finalregseasongame.gameID)
#' @export
drive_summary <- function(GameID) {
# Google R stlye format
######################
######################
# Generating Game URL
urlstring <- proper_jsonurl_formatting(GameID)
# Converting JSON data
nfl.json.data <- RJSONIO::fromJSON(RCurl::getURL(urlstring))
# Creating Dataframe of Drive Outcomes:
# Create a warning message for the Patriots vs. TB game
# that has an empty drive 3 list (GameID is 2013092206),
# and delete the empty drive. For all other games just
# rbind together.
if (GameID == "2013092206"){
warning("Drive 3 is missing from game data.")
drive.data.fix <- nfl.json.data[[1]]$drives
drive.data.fix[[3]] <- NULL
drive.data <- data.frame(do.call(rbind,drive.data.fix))
} else {
drive.data <- data.frame(do.call(rbind, (nfl.json.data[[1]]$drives)))
}
# Gathering Start of Drive Time, Location, and Quarter Info
start.data <- data.frame(do.call(rbind, (drive.data$start)))
colnames(start.data) <- c("StartQrt", "StartTime", "StartYardln", "StartTeam")
# Gathering End of Drive Time, Location, and Quarter Info
end.data <- data.frame(do.call(rbind, (drive.data$end)))
colnames(end.data) <- c("EndQrt", "EndTime", "EndYardln", "EndTeam")
start.index <- which(colnames(drive.data) == "start")
end.index <- which(colnames(drive.data) == "end")
# Combining all datasets into one
drive.data.final <- cbind(drive.data[, -c(start.index,end.index)],
start.data, end.data)
drive.data.final$GameID <- GameID
# Create DriveNumber column but create a condition for
# the missing drive 3 game:
if (GameID == "2013092206"){
drive.data.final$DriveNumber <- c(1,2,4:(nrow(drive.data.final)+1))
} else{
drive.data.final$DriveNumber <- 1:nrow(drive.data.final)
}
# Removing last row and 4th column of irrelevant information
drive.data.final <- drive.data.final[-nrow(drive.data),-c(3,4)]
# Unlisting Columns
drive.data.final <- drive.data.final %>%
dplyr::mutate_all(unlist)
# Output
drive.data.final[,c("GameID", "DriveNumber", "posteam", "qtr", "fds",
"result", "penyds", "ydsgained", "numplays", "postime",
"StartQrt", "StartTime", "StartYardln", "StartTeam",
"EndQrt", "EndTime", "EndYardln", "EndTeam")]
}
##################################################################
# Simple Box Score
#' Simple Game Boxscore
#' @description This function pulls data from an NFL url and contructs it into a formatted
#' boxscore.
#' @param GameID (character or numeric) A 10 digit game ID associated with a
#' given NFL game.
#' @param home (boolean): home = TRUE will pull home stats,
#' home = FALSE pulls away stats
#' @return A list of player statistics including passing, rushing, receiving,
#' defense, kicking, kick return, and punt return statistics for the specified
#' game.
#' @examples
#' # Parsed drive summaries of final game in 2015 NFL season
#' nfl2015.finalregseasongame.gameID <- "2016010310"
#' simple_boxscore(nfl2015.finalregseasongame.gameID, home = TRUE)
#' @export
simple_boxscore <- function(GameID, home = TRUE) {
# Google R stlye format
##################
##################
# Generating Game URL
urlstring <- proper_jsonurl_formatting(GameID)
# Start of Function
nfl.json.data <- RJSONIO::fromJSON(RCurl::getURL(urlstring))
# Date of Game
datestep1 <- stringr::str_extract(urlstring, pattern = "/[0-9]{10}/")
datestep2 <- stringr::str_extract(datestep1, pattern = "[0-9]{8}")
year <- substr(datestep2, start = 1, stop = 4)
month <- substr(datestep2, start = 5, stop = 6)
day <- substr(datestep2, start = nchar(datestep2)-1, stop = nchar(datestep2))
date <- as.Date(paste(month, day, year, sep = "/"), format = "%m/%d/%Y")
# Parsing Data
if (home == TRUE) {
home.team.name <- nfl.json.data[[1]]$home$abbr
# Passing Stats
qb.stats <- data.frame(stat = "passing", date, GameID, home.team.name,
t(sapply(nfl.json.data[[1]]$home$stats$passing, c)))
qb.stats$playerID <- rownames(t(sapply(nfl.json.data[[1]]$home$stats$passing,
c)))
# Running Stats
rb.stats <- data.frame(stat = "rush", date, GameID, home.team.name,
t(sapply(nfl.json.data[[1]]$home$stats$rushing, c)))
rb.stats$playerID <- rownames(t(sapply(nfl.json.data[[1]]$home$stats$rushing,
c)))
# Receiving Stats
wr.stats <- data.frame(stat = "receiving", date, GameID, home.team.name,
t(sapply(nfl.json.data[[1]]$home$stats$receiving, c)))
wr.stats$playerID <- rownames(t(sapply(nfl.json.data[[1]]$home$stats$receiving,
c)))
# Defensive Stats
def.stats <- data.frame(stat = "defense", date, GameID, home.team.name,
t(sapply(nfl.json.data[[1]]$home$stats$defense, c)))
def.stats$playerID <- rownames(
t(sapply(nfl.json.data[[1]]$home$stats$defense
, c)))
# Kicking Stats
kicker.stats <- data.frame(stat = "kicking", date, GameID, home.team.name,
t(sapply(nfl.json.data[[1]]$home$stats$kicking,
c)))
kicker.stats$playerID <- rownames(t(
sapply(nfl.json.data[[1]]$home$stats$kicking,
c)))
# Fumble Stats
fumb.stats <- data.frame(stat = "fumbles", date, GameID, home.team.name,
t(sapply(nfl.json.data[[1]]$home$stats$fumbles, c)))
fumb.stats$playerID <- rownames(t(
sapply(nfl.json.data[[1]]$home$stats$fumbles,
c)))
# Kick Return Stats
kr.stats <- data.frame(stat = "kickreturn", date, GameID, home.team.name,
t(sapply(nfl.json.data[[1]]$home$stats$kickret, c)))
kr.stats$playerID <- rownames(t(
sapply(nfl.json.data[[1]]$home$stats$kickret,
c)))
# Punt Return Stats
pr.stats <- data.frame(stat = "puntreturn", date, GameID, home.team.name,
t(sapply(nfl.json.data[[1]]$home$stats$puntret, c)))
pr.stats$playerID <- rownames(t(
sapply(nfl.json.data[[1]]$home$stats$puntret,
c)))
# List of Stats
home.team.stats <- list(HomePassing = qb.stats,
HomeRushing = rb.stats,
HomeReceiving = wr.stats,
HomeDef = def.stats,
HomeKicking = kicker.stats,
HomeFumbles = fumb.stats, HomeKR = kr.stats,
HomePR = pr.stats)
home.team.stats
} else {
away.team.name <- nfl.json.data[[1]]$away$abbr
# Passing Away Stats
qb.away.stats <- data.frame(stat = "passing", GameID, away.team.name,
t(sapply(nfl.json.data[[1]]$away$stats$passing,
c)))
qb.away.stats$playerID <- rownames(t(sapply(nfl.json.data[[1]]$away$stats$passing,
c)))
# Running Away Stats
rb.away.stats <- data.frame(stat = "rushing", date, GameID, away.team.name,
t(sapply(nfl.json.data[[1]]$away$stats$rushing,
c)))
rb.away.stats$playerID <- rownames(t(sapply(nfl.json.data[[1]]$away$stats$rushing,
c)))
# Receiving Away Stats
wr.away.stats <- data.frame(stat = "receiving", date, GameID, away.team.name,
t(sapply(nfl.json.data[[1]]$away$stats$receiving,
c)))
wr.away.stats$playerID <- rownames(t(sapply(nfl.json.data[[1]]$away$stats$receiving,
c)))
# Defensive Away Stats
def.away.stats <- data.frame(stat = "defense", date, GameID, away.team.name,
t(sapply(nfl.json.data[[1]]$away$stats$defense,
c)))
def.away.stats$playerID <- rownames(t(sapply(nfl.json.data[[1]]$away$stats$defense,
c)))
# Kicking Away Stats
kicker.away.stats <- data.frame(stat = "kicking", date, GameID, away.team.name,
t(sapply(nfl.json.data[[1]]$away$stats$kicking
, c)))
kicker.away.stats$playerID <- rownames(t(sapply(nfl.json.data[[1]]$away$stats$kicking,
c)))
# Fumble Away Stats
fumb.away.stats <- data.frame(stat = "fumbles", date, GameID, away.team.name,
t(sapply(nfl.json.data[[1]]$away$stats$fumbles,
c)))
fumb.away.stats$playerID <- rownames(t(sapply(nfl.json.data[[1]]$away$stats$fumbles,
c)))
# Kick Return Away Stats
kr.away.stats <- data.frame(stat = "kickreturn", date, GameID, away.team.name,
t(sapply(nfl.json.data[[1]]$away$stats$kickret,
c)))
kr.away.stats$playerID <- rownames(t(sapply(nfl.json.data[[1]]$away$stats$kickret,
c)))
# Punt Return Away Stats
pr.away.stats <- data.frame(stat = "puntreturn", date, GameID,
away.team.name,
t(sapply(nfl.json.data[[1]]$away$stats$puntret,
c)))
pr.away.stats$playerID <- rownames(t(sapply(
nfl.json.data[[1]]$away$stats$puntret,
c)))
# List of Away Stats
awayTeamStats <- list(AwayPassing = qb.away.stats,
AwayRushing = rb.away.stats,
AwayReceiving = wr.away.stats,
AwayDef = def.away.stats,
AwayKicking = kicker.away.stats,
AwayFumb = fumb.away.stats,
AwayKR = kr.away.stats,
AwayPR = pr.away.stats)
awayTeamStats
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.