Nothing
#' Compute when possessions start
#'
#' @aliases do_possession
#'
#' @description
#' Compute when the possession starts for each team in a game.
#'
#' @usage
#' do_possession(data, period_sel, time_point_start)
#'
#' @param data Play-by-play prepared data from a given game.
#' @param period_sel Period of interest. Options can be "xC", where x=1,2,....
#' If NULL, no filtering is done.
#' @param time_point_start Starting time point of the lineup.
#'
#' @return
#' Data frame. This is the meaning of the columns that might not be
#' explanatory by themselves:
#' \itemize{
#' \strong{time_start}: Time point when the action starts.
#' \strong{time_end}: Time point when the action ends.
#' \strong{poss_time}: Duration of the possession.
#' \strong{possession}: Indicates when the possession starts. This is encoded
#' with the Spanish word \emph{inicio} (\emph{start}, in English).
#' \strong{points}: Number of points scored from a given action.
#' }
#'
#' @note
#' 1. A possession lasts 24 seconds in the ACB league.
#'
#' 2. Actions are given in Spanish. A bilingual basketball vocabulary (Spanish/English)
#' is provided in \url{https://www.uv.es/vivigui/docs/basketball_dictionary.xlsx}.
#'
#' 3. The \strong{game_code} column allows us to detect the source website, for example,
#' \url{https://live.acb.com/es/partidos/103389/jugadas}.
#'
#' @author
#' Guillermo Vinue
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' df0 <- acb_vbc_cz_pbp_2223
#'
#' day_num <- unique(acb_vbc_cz_pbp_2223$day)
#' game_code <- unique(acb_vbc_cz_pbp_2223$game_code)
#'
#' # Starting players:
#' acb_games_2223_sl <- acb_vbc_cz_sl_2223 %>%
#' dplyr::filter(period == "1C")
#'
#' # Prepare data:
#' df1 <- do_prepare_data(df0, day_num,
#' acb_games_2223_sl, acb_games_2223_info,
#' game_code)
#'
#' teams_game <- sort(unique(df1$team))
#' team_sel <- teams_game[1]
#'
#' data <- df1
#' data <- data %>%
#' mutate(row_num = row_number()) %>%
#' mutate(time_point = ifelse(nchar(time_point) < 5, paste0("0", time_point), time_point))
#'
#' # Filter by team:
#' data1 <- data %>%
#' filter(team == team_sel)
#'
#' # Set also the opponent team:
#' team_opp <- setdiff(unique(data$team), team_sel)
#'
#' # Add the last row of games' data to have the real final
#' # game score in case it is not available:
#' last_row_game <- data[nrow(data),]
#'
#' last_row_game$time_point <- "00:00"
#' last_row_game$player <- NA
#' last_row_game$action <- NA
#' last_row_game$team <- team_sel
#'
#' data1 <- bind_rows(data1, last_row_game)
#'
#' # Get players out:
#' pl_out <- c(1, which(data1$action == "Sale de la pista"), nrow(data1))
#'
#' i <- 1
#' data2 <- data1 %>%
#' slice(pl_out[i]:pl_out[i + 1])
#'
#' nr <- nrow(data2)
#'
#' # Lineup:
#' lineup <- data2 %>%
#' filter(action != "Sale de la pista") %>%
#' # Avoid actions that are assigned to teams:
#' filter(player != team_sel) %>%
#' distinct(player) %>%
#' pull()
#'
#' # Identify when the possessions start:
#' data2_rival <- data %>%
#' filter(team == team_opp) %>%
#' filter(between(row_num, data2$row_num[1], data2$row_num[nr]))
#'
#' data3 <- rbind(data2, data2_rival) %>%
#' arrange(row_num) %>%
#' na.omit()
#'
#' data4 <- do_possession(data3, NULL, "10:00")
#' }
#'
#' @importFrom dplyr lag lead summarize
#'
#' @export
do_possession <- function(data, period_sel, time_point_start) {
team <- action <- player <- period <- time_point <- block <- NULL
time_start <- time_end <- poss_time <- possession <- row_name <- NULL
if (!is.null(period_sel)) {
data <- data %>%
filter(period == period_sel)
}
data <- as.data.frame(data)
rownames(data) <- 1:nrow(data)
# Try to correct the cases when the replacements are as the possession runs.
# I guess this must be done when the replacement is in the team that has the possession:
dt_repl <- data
dt_repl_row <- which(dt_repl$action == "Falta Recibida" &
lead(dt_repl$action) == "Sale de la pista" &
dt_repl$team == lead(dt_repl$team))
dt_repl1 <- dt_repl[dt_repl_row,]
# ---- See below for further computations.
# Two main situations with start possession:
# First one:
data1 <- data %>%
filter(!action %in% c("Quinteto inicial", "Salto perdido", "Tiempo Muerto", "Sale de la pista", "Entra a pista")) %>%
filter(!(player == team & action != "Rebote Defensivo")) %>%
mutate(possession = ifelse(action %in% c("Salto ganado", "Rebote Defensivo", "Recuperaci\u00f3n"), "inicio", NA),
.after = action) %>%
distinct() # There are duplicated rows.
if (!is.null(period_sel)) {
if (period_sel != "1C") {
data1$possession[1] <- "inicio"
}
}
# Correct the technical fouls that do not start a real possession for the next team.
# See for example 104465 4C 09:15 Joel Soriano
tech_foul <- which(data1$action == "Falta Personal (1TL)" & data1$team == lag(data1$team))
# Possible rows to correct:
if (length(tech_foul) > 0) {
tech_foul_corr <- data1[tech_foul + 1, ]
}
# ---- See below for further computations.
# Reverse situations where some type of Falta Personal is before Falta Recibida.
# This causes errors in the computation of the time possession.
wh_fo <- which(grepl("Falta Personal", data1$action) & data1$team == lag(data1$team))
while (length(wh_fo) != 0) {
for (i in 1:length(wh_fo)) {
# 24 103158 1C wh_fo[4] is 97 (the penultimate row), so wh_fo[i] + 2 does not exist:
if ((wh_fo[i] + 2) > nrow(data1)) {
data1 <- data1[c(1:(wh_fo[i] - 1), wh_fo[i] + 1, wh_fo[i]), ]
}else{
data1 <- data1[c(1:(wh_fo[i] - 1), wh_fo[i] + 1, wh_fo[i], (wh_fo[i] + 2):nrow(data1)), ]
}
}
wh_fo <- which(grepl("Falta Personal", data1$action) & data1$team == lag(data1$team))
}
# Second one:
wh <- which(data1$action == "Asistencia" & data1$team != lead(data1$team))
data1$possession[wh + 1] <- "inicio"
# Discard personal fouls because they are not needed and add noise:
data1 <- data1 %>%
filter(!grepl("Falta Personal", action))
# Other situations that start a possession:
si1 <- which(data1$action == "Mate" & lead(data1$action) != "Asistencia") + 1
si2 <- which(grepl("Tiro de", data1$action) & data1$team != lag(data1$team) & lag(data1$action) != "Falta Personal")
si3 <- which(grepl("Triple", data1$action) & data1$team != lag(data1$team) & lag(data1$action) != "Falta Personal")
si4 <- which(grepl("Tiro Libre", data1$action) & data1$team != lead(data1$team)) + 1
si5 <- which(data1$action == "P\u00e9rdida" & lead(data1$action) != "Recuperaci\u00f3n") + 1
si6 <- which(data1$action == "P\u00e9rdida" & data1$team != lag(data1$team))
si7 <- which(data1$action == "Falta Personal (2TL)" & data1$team == lag(data1$team)) + 1
si8 <- which(data1$action == "Falta en Ataque" & data1$team != lag(data1$team))
si9 <- which(data1$action == "Falta Recibida" & !grepl("Falta Personal|Falta Antideportiva", lag(data1$action)) &
data1$team != lag(data1$team))
si10 <- which(data1$action == "Mate" & data1$team != lag(data1$team))
# Free throw after technical foul to the coach. See 104467 4C 08::12
si11 <- which(data1$action == "Tiro Libre anotado" & data1$team != lag(data1$team))
si12 <- which(data1$action == "Tiro Libre fallado" & data1$team != lag(data1$team))
si13 <- which(grepl("anotado", data1$action) & lag(data1$action) == "Falta T\u00e9cnica" &
data1$team == lag(data1$team) & data1$team != lead(data1$team))
data1$possession[c(si1, si2, si3, si4, si5, si6, si7, si8, si9, si10, si11, si12, si13)] <- "inicio"
# Correct some inaccuracies (when they are not in the first row):
data1$possession[which(data1$action == "Falta Personal (1TL)" & data1$possession == "inicio")] <- NA
data1$possession[which(data1$action == "P\u00e9rdida" & lag(data1$action) == "Tap\u00f3n" & data1$possession == "inicio")] <- NA
if (is.na(data1$possession[1])) {
data1$possession[1] <- "inicio"
}
# Add the starting possession of each period:
for (i in unique(data1$period)) {
data1[which(data1$period == i)[1], "possession"] <- "inicio"
}
# Correct some actions that do not start a new possession (this is mainly due to team offensive rebounds):
bl_prob <- which(data1$action == "Tap\u00f3n" & lag(data1$team) == lead(data1$team))
if (length(bl_prob) > 0) {
for (i in 1:length(bl_prob)) {
data1$possession[bl_prob[i]] <- NA
if (!is.na(data1$possession[bl_prob[i] + 1])) {
data1$possession[bl_prob[i] + 1] <- NA
}
}
}
# Create time end and start to compute the possession time:
data2 <- data1 %>%
mutate(time_start = lag(time_point), .before = time_point) %>%
rename(time_end = time_point)
data2$time_start[1] <- time_point_start
# Add for each possession a label block to be able to compute the possession time:
ini <- which(data2$possession == "inicio")
if (length(ini) > 1) {
block_v <- c()
for (i in 1:(length(ini) - 1)) {
block_v <- c(block_v, rep(ini[i], ini[i + 1] - ini[i]))
}
# If 'inicio' is in the last row of data2:
if (ini[length(ini)] == nrow(data2)) {
block_v <- c(block_v, ini[length(ini)])
}else{
# If not, repeat the needed value as many times as needed. For example, if
# 'inicio' is in the row 84 and data2 has 85 rows, we need to create two 84,
# as 85 - 84 + 1:
reps_need <- nrow(data2) - ini[length(ini)] + 1
block_v <- c(block_v, rep(ini[length(ini)], reps_need))
}
}else{
block_v <- 1
}
# Note: The block numbers refer to the rows where 'inicio' were located.
# For example, if the second 'inicio' label was in the fourth row,
# the second block will be labeled with a 4.
data3 <- data2 %>%
mutate(block = block_v, .after = period)
# Turn 00:00 into 10:00 for the first time point of each period different from 1C:
for (i in unique(data3$period)) {
if (i %in% c("2C", "3C", "4C") & data3[which(data3$period == i)[1], "time_start"] %in% c("00:00", "00:01")) {
data3[which(data3$period == i)[1], "time_start"] <- "10:00"
}else if (i %in% c("5C", "6C") & data3[which(data3$period == i)[1], "time_start"] %in% c("00:00", "00:01")) {
data3[which(data3$period == i)[1], "time_start"] <- "05:00"
}
}
# Compute the possession times:
data3_time <- data3 %>%
group_by(block) %>%
summarize(poss_time = period_to_seconds(ms(time_start[1])) - period_to_seconds(ms(time_end[n()]))) %>%
ungroup()
data4 <- left_join(data3, data3_time, by = "block") %>%
select(period, block, time_start, time_end, poss_time, everything())
# In data4, poss_time goes beyond 24 either because offensive rebounds
# or because fouls received or because transcription typos.
# 103389 1C: In data1 between data1$time_point[84] and data1$time_point[85]
# goes 38 seconds! --> "02:14" and "01:36" ; Also 2C: 08:00 and 07:33
# Add points:
data5 <- data4 %>%
mutate(points = case_when(
action == "Tiro Libre anotado" ~ 1,
action == "Mate" ~ 2,
action == "Tiro de 2 anotado" ~ 2,
action == "Triple anotado" ~ 3),
.after = possession)
# Correct minutes so that all have five characters:
data5 <- data5 %>%
mutate(time_start = ifelse(nchar(time_start) < 5, paste0("0", time_start), time_start)) %>%
mutate(time_end = ifelse(nchar(time_end) < 5, paste0("0", time_end), time_end))
# Correct technical fouls if needed.
if (length(tech_foul) > 0) {
for (i in 1:nrow(tech_foul_corr)) {
pl_corr <- which(data5$period == tech_foul_corr$period[i] &
data5$time_end == tech_foul_corr$time_point[i] &
data5$player == tech_foul_corr$player[i])
# Remove the current possession:
## Ensure the new factor is unique by summing 1000:
data5[pl_corr, "block"] <- data5[pl_corr, "block"] + 1000
data5[pl_corr, "possession"] <- NA
# and start it in the next row:
data5[pl_corr + 1, "possession"] <- "inicio"
}
}
# Try to correct the blocks of the possessions where the are replacements.
if (nrow(dt_repl1) > 0) {
for (i in 1:nrow(dt_repl1)) {
aux0 <- which(data5$time_end == dt_repl1$time_point[i] & data5$action == "Falta Recibida")
if (length(aux0) > 0) {
aux0_block <- data5[aux0[1], "block"]
aux0_block1 <- which(data5$block == aux0_block)
aux1 <- aux0_block1[aux0_block1 > aux0[1]]
# In some cases when the replacement takes place as the possession runs,
# the possession end with a team turnover, which is not registered in
# the data frame, so there are not rows after aux0.
# See for example 104471 4C 04:21 and 4C 04:05 Coviran Granada.
if (length(aux1) > 0) {
data5[aux1[1], "possession"] <- "inicio"
data5[aux1, "block"] <- data5[aux1, "block"] + 0.1
aux2 <- aux0_block1[aux0_block1 <= aux0[1]]
if (length(aux2) > 0) {
data5 <- data5[-aux2, ]
rownames(data5) <- 1:nrow(data5)
}
}
}
}
}
# Remove misleading offensive fouls that are not followed by a turnover, but by a drawn foul.
# See for example 104555 4C 02:28 D. Cacok and K. Taylor
off_foul <- which(data5$action == "Falta en Ataque" & !is.na(data5$possession) &
lead(data5$action) == "Falta Recibida" & !is.na(lead(data5$possession)))
if (length(off_foul)) {
rm_off_foul <- c()
for (i in 1:length(off_foul)) {
rm_off_foul <- c(rm_off_foul, c(off_foul[i], off_foul[i] + 1))
}
data5 <- data5[-rm_off_foul, ]
if (nrow(data5) != 0) {
rownames(data5) <- 1:nrow(data5)
}
}
return(data5)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.