R/clean_game_logs.R

#' @title Clean Game Logs
#' @param df dataframe containing raw game logs
#' @description cleans game log data
#' @keywords NBA basketball-reference html
#' @importFrom magrittr %>%
#' @export
#' @examples
#' clean_game_log(game_log_df)

clean_game_logs <- function(game_log_df, year) {

  clean_game_log <- game_log_df

  #will replace games where player was marked Inactive or Did Not Play with NA
  inactive <- c("","Inactive","Did Not Play")

  clean_game_log <- game_log_df %>%
    subset(game!='G') %>%
    tidyr::separate(game_outcome, c("game_outcome", "margin"), sep="([\\(])") %>%
    tidyr::separate(margin, c("margin", NA), sep="([\\)])") %>%
    dplyr::mutate(margin=as.numeric(margin)) %>%
    dplyr::mutate(minutes_played = ifelse(minutes_played%in% inactive,NA,minutes_played)) %>%
    tidyr::separate(minutes_played, c("minutes_played", "seconds_played")) %>%
    dplyr::mutate(minutes_played = as.numeric(minutes_played) + as.numeric(seconds_played) / 60) %>%
    dplyr::select(-seconds_played) %>%
    dplyr::mutate(date = as.Date(date),
                  game = as.numeric(ifelse(started_game %in% inactive, NA, game)),
                  started_game = as.numeric(ifelse(started_game%in% inactive,NA,started_game)),
                  field_goals = as.numeric(ifelse(field_goals%in% inactive,NA,field_goals)),
                  field_goals_attempted = as.numeric(ifelse(field_goals_attempted%in% inactive,NA,field_goals_attempted)),
                  field_goal_pct = as.numeric(ifelse(field_goal_pct%in% inactive,NA,field_goal_pct)),
                  three_pt_fg = as.numeric(ifelse(three_pt_fg%in% inactive,NA,three_pt_fg)),
                  three_pts_attm = as.numeric(ifelse(three_pts_attm%in% inactive,NA,three_pts_attm)),
                  three_pt_pct = as.numeric(ifelse(three_pt_pct%in% inactive,NA,three_pt_pct)),
                  ft_made = as.numeric(ifelse(ft_made%in% inactive,NA,ft_made)),
                  ft_attm = as.numeric(ifelse(ft_attm%in% inactive,NA,ft_attm)),
                  ft_pct = as.numeric(ifelse(ft_pct%in% inactive,NA,ft_pct)),
                  off_reb = as.numeric(ifelse(off_reb%in% inactive,NA,off_reb)),
                  def_reb = as.numeric(ifelse(def_reb%in% inactive,NA,def_reb)),
                  tot_reb = as.numeric(ifelse(tot_reb%in% inactive,NA,tot_reb)),
                  assists = as.numeric(ifelse(assists%in% inactive,NA,assists)),
                  steals = as.numeric(ifelse(steals%in% inactive,NA,steals)),
                  blocks = as.numeric(ifelse(blocks%in% inactive,NA,blocks)),
                  turnovers = as.numeric(ifelse(turnovers%in% inactive,NA,turnovers)),
                  fouls = as.numeric(ifelse(fouls%in% inactive,NA,fouls)),
                  points = as.numeric(ifelse(points%in% inactive,NA,points)),
                  game_score = as.numeric(ifelse(game_score %in% inactive,NA,game_score)))

  if (year >= 2001) {
    clean_game_log <- clean_game_log %>%
      dplyr::mutate(plus_minus = as.numeric(ifelse(plus_minus %in% inactive,NA,plus_minus)))
  }

  return(clean_game_log)
}
emilykuehler/basketballstatsR documentation built on May 31, 2019, 10:01 a.m.