R/PbPmanipulation.R

Defines functions PbPmanipulation

Documented in PbPmanipulation

#' Adapts the standard file supplied by BigDataBall to the format required by BasketballAnalyzeR
#'
#' @author Marco Sandri, Paola Zuccolotto, Marica Manisera (\email{basketballanalyzer.help@unibs.it})
#' @param data a play-by-play  data frame supplied by \href{https://www.bigdataball.com/}{BigDataBall}.
#' @return A play-by-play data frame.
#' @seealso \code{\link{PbP.BDB}}
#' @return The data frame generated by \code{PbPmanipulation} has the same variables of \code{PbP.BDB} (when necessary, coerced from one data type to another, e.g from factor to numeric) plus the following five additional variables:
#' @return * \code{periodTime}, time played in the quarter (in seconds)
#' @return * \code{totalTime}, time played in the match (in seconds)
#' @return * \code{playlength}, time since the immediately preceding event (in seconds)
#' @return * \code{ShotType}, type of shot (FT, 2P, 3P)
#' @return * \code{oppTeam}, name of the opponent team
#' @references P. Zuccolotto and M. Manisera (2020) Basketball Data Science: With Applications in R. CRC Press.
#' @examples
#' PbP <- PbPmanipulation(PbP.BDB)
#' @export
#' @importFrom stringr str_sub
#' @importFrom operators %~%
#' @importFrom operators %!~%
#' @importFrom readr parse_number

# PbPmanipulation <- function(data, playTeam="GSW") {
PbPmanipulation <- function(data) {
  #### Convert shot distance and x-y coordinates to numeric
  num_vars <- c("shot_distance","original_x","original_y","converted_x","converted_y")
  data[,num_vars] <- sapply(data[,num_vars], function(x) suppressWarnings(as.numeric(as.character(x))))

  #### Drop empty levels from factors
  fact_vars <- sapply(data, function(x) is.factor(x))
  data[,fact_vars] <- lapply(data[,fact_vars], function(x) droplevels(x))

  #### Extract minutes and seconds and calculate the total time played
  Minutes <- as.numeric(stringr::str_sub(data$remaining_time,-5,-4))
  Seconds <- as.numeric(stringr::str_sub(data$remaining_time,-2,-1))
  period.length <- 12
  data$periodTime  = period.length*60 - (Minutes*60 + Seconds)
  data$totalTime = data$periodTime + period.length*60*(data$period-1)

  #### Add play length
  data$playlength <- as.numeric(stringr::str_sub(data$play_length,-2,-1))

  #### Add shot type
  filt <- (data$result!="")
  mat <- data[filt,]
  mat$ShotType <- ifelse(mat$event_type!="free throw" & mat$description%~%"3PT","3P",
                  ifelse(mat$event_type!="free throw" & mat$description%!~%"3PT","2P","FT"))
  data$ShotType[filt] <- mat$ShotType
  data$ShotType <- as.factor(data$ShotType)

  # Clean game_id
  data$game_id <- readr::parse_number(as.character(data$game_id))

  # Creat oppTeam
  games <- unique(data$game_id)
  data$oppTeam <- ""
  for (gm in games) {
    idx <- data$game_id==gm & data$team!=""
    team_vec <- data[idx,"team"]
    tbl <- table(team_vec)
    playing_teams <- names(tbl)[tbl!=0]
    opp_team <- ifelse(team_vec==playing_teams[1], playing_teams[2], playing_teams[1])
    #opp_team <- playing_teams[playing_teams!=playTeam]
    data[idx,"oppTeam"] <- opp_team
  }

  return(data)
}

Try the BasketballAnalyzeR package in your browser

Any scripts or data that you put into this service are public.

BasketballAnalyzeR documentation built on July 2, 2020, 2:14 a.m.