#' Adapts the standard file supplied by BigDataBall to the format required by BasketballAnalyzeR
#'
#' @author Marco Sandri, Paola Zuccolotto, Marica Manisera (\email{basketballanalyzer.help@unibs.it})
#' @param data a play-by-play data frame supplied by \href{https://www.bigdataball.com/}{BigDataBall}.
#' @param period.length numeric, the length of a quarter in minutes (default: 12 minutes as in NBA)
#' @param overtime.length numeric, the length of an overtime period in minutes (default: 5 minutes as in NBA)
#' @return A play-by-play data frame.
#' @seealso \code{\link{PbP.BDB}}
#' @return The data frame generated by \code{PbPmanipulation} has the same variables of \code{PbP.BDB} (when necessary, coerced from one data type to another, e.g from factor to numeric) plus the following five additional variables:
#' @return * \code{periodTime}, time played in the quarter (in seconds)
#' @return * \code{totalTime}, time played in the match (in seconds)
#' @return * \code{playlength}, time since the immediately preceding event (in seconds)
#' @return * \code{ShotType}, type of shot (FT, 2P, 3P)
#' @return * \code{oppTeam}, name of the opponent team
#' @return * \code{hometeam}, name of the home team (generated conditionally on the presence of the variable \code{home_score})
#' @references P. Zuccolotto and M. Manisera (2020) Basketball Data Science: With Applications in R. CRC Press.
#' @examples
#' PbP <- PbPmanipulation(PbP.BDB)
#' @export
#' @importFrom stringr str_sub
#' @importFrom operators %~%
#' @importFrom operators %!~%
#' @importFrom readr parse_number
#' @importFrom dplyr ungroup
#' @importFrom dplyr select
#' @importFrom dplyr group_by
#' @importFrom dplyr first
PbPmanipulation <- function(data, period.length=12, overtime.length=5) {
period <- periodTime <- home_score <- game_id <- team <- NULL
#### Convert shot distance and x-y coordinates to numeric
num_vars <- c("shot_distance","original_x","original_y","converted_x","converted_y")
data[,num_vars] <- sapply(data[,num_vars], function(x) suppressWarnings(as.numeric(as.character(x))))
#### Drop empty levels from factors
fact_vars <- sapply(data, function(x) is.factor(x))
data[,fact_vars] <- lapply(data[,fact_vars], function(x) droplevels(x))
#### Extract minutes and seconds and calculate the total time played
Minutes <- as.numeric(stringr::str_sub(data$remaining_time,-5,-4))
Seconds <- as.numeric(stringr::str_sub(data$remaining_time,-2,-1))
data <- data %>%
mutate(periodTime=ifelse(period<5, period.length*60 - (Minutes*60 + Seconds),
overtime.length*60 - (Minutes*60 + Seconds)),
totalTime =ifelse(period<5, periodTime + period.length*60*(period-1),
periodTime + 4*period.length*60+overtime.length*60*(period-5))
)
#### Add play length
data$playlength <- as.numeric(stringr::str_sub(data$play_length,-2,-1))
#### Add shot type
filt <- (data$result!="")
mat <- data[filt,]
mat$ShotType <- ifelse(mat$event_type!="free throw" & mat$description%~%"3PT","3P",
ifelse(mat$event_type!="free throw" & mat$description%!~%"3PT","2P","FT"))
data$ShotType[filt] <- mat$ShotType
data$ShotType <- as.factor(data$ShotType)
# Clean game_id
#data$game_id <- readr::parse_number(as.character(data$game_id))
# Create oppTeam
games <- unique(data$game_id)
data$oppTeam <- ""
for (gm in games) {
idx <- data$game_id==gm & data$team!=""
team_vec <- data[idx,"team"]
tbl <- table(team_vec)
playing_teams <- names(tbl)[tbl!=0]
opp_team <- ifelse(team_vec==playing_teams[1], playing_teams[2], playing_teams[1])
#opp_team <- playing_teams[playing_teams!=playTeam]
data[idx,"oppTeam"] <- opp_team
}
# Create home_team if home_score is available
if ("home_score" %in% names(data)) {
data <- data %>%
group_by(game_id) %>%
mutate(filt=c(0, diff(home_score))!=0,
hometeam=first(team[filt])) %>%
select(-filt) %>%
ungroup() %>%
as.data.frame()
}
return(data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.