R/PbPmanipulation.R

Defines functions PbPmanipulation

Documented in PbPmanipulation

#' Adapts the standard file supplied by BigDataBall to the format required by BasketballAnalyzeR
#'
#' @author Marco Sandri, Paola Zuccolotto, Marica Manisera (\email{basketballanalyzer.help@unibs.it})
#' @param data a play-by-play  data frame supplied by \href{https://www.bigdataball.com/}{BigDataBall}.
#' @param period.length numeric, the length of a quarter in minutes (default: 12 minutes as in NBA)
#' @param overtime.length numeric, the length of an overtime period in minutes (default: 5 minutes as in NBA)
#' @return A play-by-play data frame.
#' @seealso \code{\link{PbP.BDB}}
#' @return The data frame generated by \code{PbPmanipulation} has the same variables of \code{PbP.BDB} (when necessary, coerced from one data type to another, e.g from factor to numeric) plus the following five additional variables:
#' @return * \code{periodTime}, time played in the quarter (in seconds)
#' @return * \code{totalTime}, time played in the match (in seconds)
#' @return * \code{playlength}, time since the immediately preceding event (in seconds)
#' @return * \code{ShotType}, type of shot (FT, 2P, 3P)
#' @return * \code{oppTeam}, name of the opponent team
#' @return * \code{hometeam}, name of the home team (generated conditionally on  the presence of the variable \code{home_score})
#' @references P. Zuccolotto and M. Manisera (2020) Basketball Data Science: With Applications in R. CRC Press.
#' @examples
#' PbP <- PbPmanipulation(PbP.BDB)
#' @export
#' @importFrom stringr str_sub
#' @importFrom operators %~%
#' @importFrom operators %!~%
#' @importFrom readr parse_number
#' @importFrom dplyr ungroup
#' @importFrom dplyr select
#' @importFrom dplyr group_by
#' @importFrom dplyr first

PbPmanipulation <- function(data, period.length=12, overtime.length=5) {

  period <- periodTime <- home_score <- game_id <- team <- NULL
  #### Convert shot distance and x-y coordinates to numeric
  num_vars <- c("shot_distance","original_x","original_y","converted_x","converted_y")
  data[,num_vars] <- sapply(data[,num_vars], function(x) suppressWarnings(as.numeric(as.character(x))))

  #### Drop empty levels from factors
  fact_vars <- sapply(data, function(x) is.factor(x))
  data[,fact_vars] <- lapply(data[,fact_vars], function(x) droplevels(x))

  #### Extract minutes and seconds and calculate the total time played
  Minutes <- as.numeric(stringr::str_sub(data$remaining_time,-5,-4))
  Seconds <- as.numeric(stringr::str_sub(data$remaining_time,-2,-1))
  data <- data %>%
    mutate(periodTime=ifelse(period<5, period.length*60 - (Minutes*60 + Seconds),
                             overtime.length*60 - (Minutes*60 + Seconds)),
           totalTime =ifelse(period<5, periodTime + period.length*60*(period-1),
                             periodTime + 4*period.length*60+overtime.length*60*(period-5))
    )

  #### Add play length
  data$playlength <- as.numeric(stringr::str_sub(data$play_length,-2,-1))

  #### Add shot type
  filt <- (data$result!="")
  mat <- data[filt,]
  mat$ShotType <- ifelse(mat$event_type!="free throw" & mat$description%~%"3PT","3P",
                         ifelse(mat$event_type!="free throw" & mat$description%!~%"3PT","2P","FT"))
  data$ShotType[filt] <- mat$ShotType
  data$ShotType <- as.factor(data$ShotType)

  # Clean game_id
  #data$game_id <- readr::parse_number(as.character(data$game_id))

  # Create oppTeam
  games <- unique(data$game_id)
  data$oppTeam <- ""
  for (gm in games) {
    idx <- data$game_id==gm & data$team!=""
    team_vec <- data[idx,"team"]
    tbl <- table(team_vec)
    playing_teams <- names(tbl)[tbl!=0]
    opp_team <- ifelse(team_vec==playing_teams[1], playing_teams[2], playing_teams[1])
    #opp_team <- playing_teams[playing_teams!=playTeam]
    data[idx,"oppTeam"] <- opp_team
  }

  # Create home_team if home_score is available
  if ("home_score" %in%  names(data)) {
    data <- data %>%
      group_by(game_id) %>%
      mutate(filt=c(0, diff(home_score))!=0,
             hometeam=first(team[filt])) %>%
      select(-filt) %>%
      ungroup() %>%
      as.data.frame()
  }

  return(data)
}
sndmrc/BasketAnalyzeR documentation built on June 6, 2023, 12:52 a.m.