data-raw/check_wpa.R

library(cfbscrapR)
library(tidyverse)

off_TD = c(
  "Passing Touchdown",
  "Rushing Touchdown",
  "Field Goal Good",
  "Pass Reception Touchdown",
  "Fumble Recovery (Own) Touchdown",
  "Punt Touchdown"
)
def_TD = c(
  "Blocked Punt Touchdown",
  "Fumble Return Touchdown",
  "Defensive 2pt Conversion",
  "Interception Return Touchdown",
  "Safety",
  "Missed Field Goal Return Touchdown",
  "Punt Touchdown",
  "Punt Return Touchdown",
  "Blocked Field Goal Touchdown",
  "Fumble Recovery (Opponent) Touchdown",
  "Pass Interception Return Touchdown",
  "Sack Touchdown",
  "Uncategorized Touchdown"
)
kickoff = c(
  "Kickoff",
  "Kickoff Return (Offense)",
  "Kickoff Return Touchdown",
  "Kickoff Touchdown"
)
turnover_vec = c(
  "Blocked Field Goal",
  "Blocked Field Goal Touchdown",
  "Blocked Punt",
  "Blocked Punt Touchdown",
  "Field Goal Missed",
  "Missed Field Goal Return",
  "Missed Field Goal Return Touchdown",
  "Fumble Recovery (Opponent)",
  "Fumble Recovery (Opponent) Touchdown",
  "Fumble Return Touchdown",
  "Defensive 2pt Conversion",
  "Interception",
  "Interception Return Touchdown",
  "Pass Interception Return",
  "Pass Interception Return Touchdown",
  "Punt",
  "Punt Touchdown",
  "Punt Return Touchdown",
  "Sack Touchdown",
  "Uncategorized Touchdown"
)

pull_all_pbp_weeks <- function(yr=2019, start_wk = 1, end_wk=1, epa = TRUE){
  fd <- data.frame()
  for(i in start_wk:end_wk){
    print(i)
    pbp <- cfb_pbp_data(year = yr, season_type = 'regular', week = i, epa_wpa = epa)
    fd <- rbind(pbp, fd)
    Sys.sleep(5)
  }
  if(epa){
    filename<-paste0('data-raw/pbp_stats_',yr,'_week_',start_wk,'_to_',end_wk,'.csv')
  }else{
    filename<-paste0('data-raw/pbp_plain_',yr,'_week_',start_wk,'_to_',end_wk,'.csv')
  }
  write_csv(fd, filename)
  
  return(fd)
}

pull_all_pbp_year <- function(yr=2019, epa = FALSE){
  fd <- data.frame()
  pbp <- cfb_pbp_data(year = yr, season_type = 'regular', week = NULL, epa_wpa = epa)
  fd <- rbind(fd, pbp)
  Sys.sleep(3)
  if(epa){
    filename<-paste0('data-raw/pbp_stats_',yr,'.csv')
  }else{
    filename<-paste0('data-raw/pbp_plain_',yr,'.csv')
  }
  write_csv(fd, filename)
  return(fd)
}

year_for_pull = 2019
#--- Play by Play Data Pull/Save and Pre-processing -----------------------------------------------
# start_time <- proc.time()
# df_plain <- pull_all_pbp_weeks(yr = year_for_pull, start_wk = 1, end_wk=14, epa=FALSE)
# end_time <- proc.time()
# season_run <- end_time - start_time
# season run no EPA
# user  system elapsed 
# 7.69    1.05  130.61 
# print(season_run)
# df_plain <- read.csv('pbp_plain_2019.csv')

yr_epa_start_time <- proc.time()
df_yr <- pull_all_pbp_year(yr = year_for_pull, epa=TRUE)
yr_epa_season_run <- proc.time() - yr_epa_start_time
print(yr_epa_season_run['elapsed']/60)
# ##   user  system elapsed 
# ## 497.58    1.56  522.28  

# epa_start_time <- proc.time()
# df_weeks <- pull_all_pbp_weeks(yr = year_for_pull, epa=TRUE)
# epa_season_run <- proc.time() - epa_start_time
# print(epa_season_run)
# #  user  system elapsed 
# # 45.58    0.29   54.04 
# start_time <- proc.time()
# df_plain <- pull_all_pbp_weeks(yr = year_for_pull, epa=FALSE)
# season_run <- proc.time() - start_time
# print(season_run)
# # user  system elapsed 
# # 0.70    0.08    8.86 


# df_diff <- df_plain %>% 
#   filter(!(id_play %in% df_weeks$id_play))

df_yr <- read.csv('data-raw/pbp_stats_2019.csv')
# df <- create_wpa_naive(df)
df_yr <- df_yr %>% 
  # filter((play_type %in% off_TD)|
  #        (play_type %in% def_TD)) %>%
  mutate(
    wpa = round(wpa, digits = 4),
    wpa_change = round(wpa_change, digits = 4),
    wpa_base = round(wpa_base, digits = 4),
    wp_before = round(wp_before, digits = 4),
    lead_wp_before = round(lead_wp_before, digits = 4),
    def_wp_before = round(def_wp_before, digits = 4),
    home_wp_before = round(home_wp_before, digits = 4),
    away_wp_before = round(away_wp_before, digits = 4),
    home_wp_post = round(home_wp_post, digits = 4),
    away_wp_post = round(away_wp_post, digits = 4),
    home_wpa = home_wp_post - home_wp_before,
    home_wpa = round(home_wpa, digits = 4),
    away_wpa = away_wp_post - away_wp_before,
    away_wpa = round(away_wpa, digits = 4)
  ) %>% 
  select(game_id, 
         game_play_number, 
         drive_number,         
         down,
         distance,
         yards_gained,
         yards_to_goal,
         offense_play,
         defense_play,
         play_text,
         play_type, 
         play_after_turnover,
         score_diff,
         score_diff_start,
         scoring_play,
         ExpScoreDiff,
         ExpScoreDiff_Time_Ratio,
         change_of_poss,
         wpa, 
         wpa_change,
         wpa_base, 
         wp_before,
         lead_wp_before, 
         punt_play,
         kickoff_play,
         def_td_play,
         def_wp_before, 
         drive_play_number,
         off_td_play,
         touchdown,
         turnover_vec,
         downs_turnover,
         home_wpa,
         home_wp_before, 
         home_wp_post,
         away_wpa,
         away_wp_before, 
         away_wp_post, 
         td_play,
         kickoff_tb,
         kick_play,
         punt_tb,
         punt,
         EPA, 
         ep_before, 
         ep_after,    
         missing_yard_flag,
         drive_play_number, 
         firstD_by_poss,
         firstD_by_penalty,
         firstD_by_yards,
         first_by_penalty,
         first_by_yards,
         rush,
         pass, 
         sack_vec,
         fumble_vec,
         offense_score, 
         defense_score,
         everything())

df2 <- df_yr
hist(df2$wpa)

df3<-df2 %>% 
  group_by(play_type) %>% 
  summarize(
    wpa_avg = mean(wpa,na.rm = TRUE),
    plays = n()
  )
kickoff = c(
  "Kickoff",
  "Kickoff Return (Offense)",
  "Kickoff Return Touchdown",
  "Kickoff Touchdown"
)
df4 <- df_yr %>% 
  filter(play_type %in% kickoff) %>% 
  mutate(
    wpa = round(wpa, digits = 4),
    wpa_change = round(wpa_change, digits = 4),
    wpa_base = round(wpa_base, digits = 4),
    wp_before = round(wp_before, digits = 4),
    lead_wp_before = round(lead_wp_before, digits = 4),
    def_wp_before = round(def_wp_before, digits = 4),
    home_wp_before = round(home_wp_before, digits = 4),
    away_wp_before = round(away_wp_before, digits = 4),
    home_wp_post = round(home_wp_post, digits = 4),
    away_wp_post = round(away_wp_post, digits = 4),
    home_wpa = home_wp_post - home_wp_before,
    home_wpa = round(home_wpa, digits = 4),
    away_wpa = away_wp_post - away_wp_before,
    away_wpa = round(away_wpa, digits = 4)
  ) %>% 
  select(game_id, 
         game_play_number, 
         drive_number,         
         down,
         distance,
         yards_gained,
         yards_to_goal,
         offense_play,
         defense_play,
         play_text,
         play_type, 
         play_after_turnover,
         score_diff,
         score_diff_start,
         scoring_play,
         ExpScoreDiff,
         ExpScoreDiff_Time_Ratio,
         change_of_poss,
         wpa, 
         wpa_change,
         wpa_base, 
         wp_before,
         lead_wp_before, 
         punt_play,
         kickoff_play,
         def_td_play,
         def_wp_before, 
         drive_play_number,
         off_td_play,
         touchdown,
         turnover_vec,
         downs_turnover,
         home_wpa,
         home_wp_before, 
         home_wp_post,
         away_wpa,
         away_wp_before, 
         away_wp_post, 
         td_play,
         kickoff_tb,
         kick_play,
         punt_tb,
         punt,
         EPA, 
         ep_before, 
         ep_after,    
         missing_yard_flag,
         drive_play_number, 
         firstD_by_poss,
         firstD_by_penalty,
         firstD_by_yards,
         first_by_penalty,
         first_by_yards,
         rush,
         pass, 
         sack_vec,
         fumble_vec,
         offense_score, 
         defense_score,
         everything())
hist(df4$wpa)

df5<-df4 %>% 
  group_by(play_type) %>% 
  summarize(
    wpa_avg = mean(wpa, na.rm = TRUE),
    plays = n()
  )


df6 <- df_yr %>% 
  filter(play_type %in% turnover_vec) %>% 
  mutate(
    wpa = round(wpa, digits = 4),
    wpa_change = round(wpa_change, digits = 4),
    wpa_base = round(wpa_base, digits = 4),
    wp_before = round(wp_before, digits = 4),
    lead_wp_before = round(lead_wp_before, digits = 4),
    def_wp_before = round(def_wp_before, digits = 4),
    home_wp_before = round(home_wp_before, digits = 4),
    away_wp_before = round(away_wp_before, digits = 4),
    home_wp_post = round(home_wp_post, digits = 4),
    away_wp_post = round(away_wp_post, digits = 4),
    home_wpa = home_wp_post - home_wp_before,
    home_wpa = round(home_wpa, digits = 4),
    away_wpa = away_wp_post - away_wp_before,
    away_wpa = round(away_wpa, digits = 4)
  ) %>% 
  select(game_id, 
         game_play_number, 
         drive_number,         
         down,
         distance,
         yards_gained,
         yards_to_goal,
         offense_play,
         defense_play,
         play_text,
         play_type, 
         wpa, 
         play_after_turnover,
         score_diff,
         score_diff_start,
         scoring_play,
         ExpScoreDiff,
         ExpScoreDiff_Time_Ratio,
         change_of_poss,
         wpa_change,
         wpa_base, 
         wp_before,
         lead_wp_before, 
         punt_play,
         kickoff_play,
         def_td_play,
         def_wp_before, 
         drive_play_number,
         off_td_play,
         touchdown,
         turnover_vec,
         downs_turnover,
         home_wpa,
         home_wp_before, 
         home_wp_post,
         away_wpa,
         away_wp_before, 
         away_wp_post, 
         td_play,
         kickoff_tb,
         kick_play,
         punt_tb,
         punt,
         EPA, 
         ep_before, 
         ep_after,    
         missing_yard_flag,
         drive_play_number, 
         firstD_by_poss,
         firstD_by_penalty,
         firstD_by_yards,
         first_by_penalty,
         first_by_yards,
         rush,
         pass, 
         sack_vec,
         fumble_vec,
         offense_score, 
         defense_score,
         everything())

hist(df6$wpa)

df7<-df6 %>% 
  group_by(play_type) %>% 
  summarize(
    wpa_avg = mean(wpa,na.rm = TRUE),
    plays = n()
  )

df_yr_wpa <- df_yr %>% 
  group_by(play_type) %>% 
  summarize(
    wpa_avg = mean(wpa,na.rm = TRUE),
    plays = n()
  )

df_nd <- cfb_pbp_data(year=2019, week = 3, team='Notre Dame',
                      season_type='regular',
                      epa_wpa = TRUE)

df_nd <- df_nd %>% 
  mutate(
    wpa = round(wpa, digits = 4),
    wpa_change = round(wpa_change, digits = 4),
    wpa_base = round(wpa_base, digits = 4),
    wp_before = round(wp_before, digits = 4),
    lead_wp_before = round(lead_wp_before, digits = 4),
    def_wp_before = round(def_wp_before, digits = 4),
    home_wp_before = round(home_wp_before, digits = 4),
    away_wp_before = round(away_wp_before, digits = 4),
    home_wp_post = round(home_wp_post, digits = 4),
    away_wp_post = round(away_wp_post, digits = 4),
    home_wpa = home_wp_post - home_wp_before,
    home_wpa = round(home_wpa, digits = 4),
    away_wpa = away_wp_post - away_wp_before,
    away_wpa = round(away_wpa, digits = 4)
  ) %>% 
  select(game_id, 
         game_play_number, 
         drive_number,         
         down,
         distance,
         yards_gained,
         yards_to_goal,
         offense_play,
         defense_play,
         play_text,
         play_type, 
         EPA, 
         wpa, 
         play_after_turnover,
         score_diff,
         score_diff_start,
         scoring_play,
         ExpScoreDiff,
         ExpScoreDiff_Time_Ratio,
         change_of_poss,
         wpa, 
         wpa_change,
         wpa_base, 
         wp_before,
         lead_wp_before, 
         punt_play,
         kickoff_play,
         def_td_play,
         def_wp_before, 
         drive_play_number,
         off_td_play,
         touchdown,
         turnover_vec,
         downs_turnover,
         home_wpa,
         home_wp_before, 
         home_wp_post,
         away_wpa,
         away_wp_before, 
         away_wp_post, 
         td_play,
         kickoff_tb,
         kick_play,
         punt_tb,
         punt,
         ep_before, 
         ep_after,    
         missing_yard_flag,
         drive_play_number, 
         firstD_by_poss,
         firstD_by_penalty,
         firstD_by_yards,
         first_by_penalty,
         first_by_yards,
         rush,
         pass, 
         sack_vec,
         fumble_vec,
         offense_score, 
         defense_score,
         everything())
meysubb/cfbscrapR documentation built on Dec. 15, 2020, 11:26 p.m.