set_win_probabilities_theoretical <- function(so) {
## so = vector c(P(Team 1 sideouts), P(Team 2 sideouts))
s.matrix <- matrix(0, 26, 26)
o.matrix <- matrix(0, 26, 26)
s.matrix[26, -c(25, 26)] <- 1
o.matrix[26, -c(25, 26)] <- 1
rownames(s.matrix) <- rownames(o.matrix) <- colnames(s.matrix) <- colnames(o.matrix) <- 0:25
denom <- (1 - so[1])^2 + (1 - so[2]) * so[1] * (1 - so[1] + (1 - so[2]))
s.matrix[25, 25] <- (1 - so[2])^2 / denom
o.matrix[25, 25] <- (1 - so[2]) * so[1] * ((1 - so[1]) + (1 - so[2])) / denom
for (k in 47:0) {
for (i in seq(max(k - 24, 0), min(24, k))) {
s.matrix[i+1, k-i+1] <- (1 - so[2]) * s.matrix[i+2, k-i+1] + so[2] * o.matrix[i+1, k-i+2]
o.matrix[i+1, k-i+1] <- so[1] * s.matrix[i+2, k-i+1] + (1 - so[1]) * o.matrix[i+1, k-i+2]
}
}
s.matrix[26, 25] <- s.matrix[25, 24]
s.matrix[25, 26] <- s.matrix[24, 25]
s.matrix[26, 26] <- s.matrix[25, 25]
o.matrix[25, 26] <- o.matrix[24, 25]
o.matrix[26, 25] <- o.matrix[25, 24]
o.matrix[26, 26] <- o.matrix[25, 25]
list(s.matrix = s.matrix, o.matrix = o.matrix)
}
win_probabilities_theoretical <- function(so, serve1_start = NA, serve5_start = NA, go_to = 25, go_to_tiebreak = 15, max_sets = 5) {
## create P(Team 1 Wins Set) and P(Team 1 Wins Match) as a function of
## so = vector c(P(Team 1 sideouts), P(Team 2 sideouts))
## serve1_start = TRUE if Team 1 starts match with serve, FALSE if Team 2 starts match with serve
## serve5_start = TRUE if Team 1 starts set 5 with serve, FALSE if Team 2 starts set 5 with serve
## if serving team(s) is unknown, will average P(Team 1 Wins Set | Serve 0-0) and P(Team 1 Wins Set | Receive 0-0)
## returns a list of the following objects:
## team_serve: a matrix of set win probabilities given score and Team 1 is serving
## opponent_serve: a matrix of set win probabilities given score and Team 2 is serving
## start_serve_set1_wins: a vector of match win probabilities given Team 1's results of previous sets and that Team 1 began Set 1 with serve
## opponent_serve_set1_wins: a vector of match win probabilities given Team 1's results in previous sets that Team 2 began Set 1 with serve
## result_probabilities: a vector of match win/loss probabilities
assert_that(max_sets %in% c(3,5), msg = "Only 3-set and 5-set matches are supported")
assert_that(go_to <= 25, go_to_tiebreak <= 25, msg = "Target set points must be at most 25")
m <- set_win_probabilities_theoretical(so)
zero_eq <- 25 - go_to
zero_eq_tiebreak <- 25 - go_to_tiebreak
pu.s <- m$s.matrix[1 + zero_eq, 1 + zero_eq] ##P(Win | Serve)
pu.o <- m$o.matrix[1 + zero_eq, 1 + zero_eq] ##P(Win | Opp Serve)
pu.s5 <- m$s.matrix[1 + zero_eq_tiebreak, 1 + zero_eq_tiebreak] ##P(Win Game 5 | Serve)
pu.o5 <- m$o.matrix[1 + zero_eq_tiebreak, 1 + zero_eq_tiebreak] ##P(Win Game 5 | Opp Serve)
# set chance of winning set 5 based on who starts
w5 <- dplyr::case_when(
serve5_start ~ pu.s5,
!serve5_start ~ pu.o5,
TRUE ~ 0.5*(pu.s5 + pu.o5)
)
## now match outcomes from set outcomes
wu3.s <- pu.s*pu.o*pu.s
wu4.s <- 2*(1-pu.s)*pu.o*pu.s*pu.o + pu.s*(1-pu.o)*pu.s*pu.o
##L1, L2; L1, L4; L2, L3; L3, L4 --- L1, L3; L2, L4
wu5.s <- w5*(4*(1-pu.s)*pu.o*pu.s*(1-pu.o) + pu.s^2*(1-pu.o)^2 + pu.o^2*(1-pu.s)^2)
lu3.s <- (1 - pu.s)*(1 - pu.o)*(1 - pu.s)
lu4.s <- 2*pu.s*(1-pu.o)*(1-pu.s)*(1-pu.o) + (1-pu.s)*pu.o*(1-pu.s)*(1-pu.o)
lu5.s <- (1-w5)*(4*pu.o*(1-pu.s)*(1-pu.o)*pu.s + (1-pu.o)^2*(pu.s)^2 + (1-pu.s)^2*pu.o^2)
wu3.o <- pu.o*pu.s*pu.o
wu4.o <- 2*(1-pu.o)*pu.s*pu.o*pu.s + pu.o*(1-pu.s)*pu.o*pu.s
wu5.o <- w5*(4*(1-pu.s)*pu.o*pu.s*(1-pu.o) + pu.s^2*(1-pu.o)^2 + pu.o^2*(1-pu.s)^2)
lu3.o <- (1 - pu.o)*(1 - pu.s)*(1 - pu.o)
lu4.o <- 2*pu.o*(1-pu.s)*(1-pu.o)*(1-pu.s) + (1-pu.o)*pu.s*(1-pu.o)*(1-pu.s)
lu5.o <- (1-w5)*(4*pu.s*(1-pu.o)*(1-pu.s)*pu.o + (1-pu.s)^2*(pu.o)^2 + (1-pu.o)^2*pu.s^2)
wu3.s.w1 <- pu.o*pu.s
wu4.s.w1 <- pu.o*(1-pu.s)*pu.o + (1-pu.o)*pu.s*pu.o
##L2, L3; L3, L4; L2, L4
wu5.s.w1 <- w5*(2*(1-pu.o)*(1-pu.s)*pu.o + pu.s*(1-pu.o)^2)
wu3.o.w1 <- pu.s*pu.o
wu4.o.w1 <- (1-pu.s)*pu.o*pu.s + pu.s*(1-pu.o)*pu.s
wu5.o.w1 <- w5*(2*(1-pu.s)*(1-pu.o)*pu.s + pu.o*(1-pu.s)^2)
wu4.s.l1 <- pu.o*pu.s*pu.o
##W2, W3; W3, W4; W2, W4
wu5.s.l1 <- w5*(2*pu.o*pu.s*(1-pu.o) + pu.o^2*(1-pu.s))
wu4.o.l1 <- pu.s*pu.o*pu.s
wu5.o.l1 <- w5*(2*pu.s*pu.o*(1-pu.s) + pu.s^2*(1-pu.o))
w.s.w1 <- wu3.s.w1 + wu4.s.w1 + wu5.s.w1
w.o.w1 <- wu3.o.w1 + wu4.o.w1 + wu5.o.w1
w.s.l1 <- wu4.s.l1 + wu5.s.l1
w.o.l1 <- wu4.o.l1 + wu5.o.l1
##Game 2 result
wu3.s.w2 <- pu.s
wu4.s.w2 <- (1-pu.s)*pu.o
wu5.s.w2 <- w5*((1-pu.s)*(1-pu.o))
wu4.s.wl <- pu.s*pu.o
wu5.s.wl <- w5*((1-pu.s)*pu.o + pu.s*(1-pu.o))
wu5.s.l2 <- w5*(pu.s*pu.o)
wu3.o.w2 <- pu.o
wu4.o.w2 <- (1-pu.o)*pu.s
wu5.o.w2 <- w5*((1-pu.s)*(1-pu.o))
wu4.o.wl <- pu.o*pu.s
wu5.o.wl <- w5*((1-pu.o)*pu.s + pu.o*(1-pu.s))
wu5.o.l2 <- w5*(pu.o*pu.s)
w.s.w2 <- wu3.s.w2 + wu4.s.w2 + wu5.s.w2
w.s.wl <- wu4.s.wl + wu5.s.wl
w.s.l2 <- wu5.s.l2
w.o.w2 <- wu3.o.w2 + wu4.o.w2 + wu5.o.w2
w.o.wl <- wu4.o.wl + wu5.o.wl
w.o.l2 <- wu5.o.l2
##Game 3 result
wu4.s.w2l <- pu.o
wu5.s.w2l <- w5*(1-pu.o)
wu5.s.wl2 <- w5*pu.o
wu4.o.w2l <- pu.s
wu5.o.w2l <- w5*(1-pu.s)
wu5.o.wl2 <- w5*pu.s
w.s.w2l <- wu4.s.w2l + wu5.s.w2l
w.s.wl2 <- wu5.s.wl2
w.o.w2l <- wu4.o.w2l + wu5.o.w2l
w.o.wl2 <- wu5.o.wl2
##Game 4 result
wu5.w2l2 <- w5
##unconditional
wu.s <- pu.s*w.s.w1 + (1-pu.s)*w.s.l1 ##P(win Set 1 | Serve)*P(Win | Win Set 1) + P(Lose Set 1 | Serve)*P(Win Match | Lose Set 1)
wu.o <- pu.o*w.o.w1 + (1-pu.o)*w.o.l1
## unconditional, w, l, ww, wl, lw, ll, www, wwl, wlw, lww, wll, lwl, llw, lll
## wwlw, wlww, lwww, wwll, wlwl, wllw, llww, lwlw, lwwl, llwl, lwll, wlll
if (max_sets == 5){
s.wins <- c(wu.s,w.s.w1, w.s.l1, w.s.w2, w.s.wl, w.s.l2, w.s.w2l, w.s.wl2, wu5.w2l2)
o.wins <- c(wu.o,w.o.w1, w.o.l1, w.o.w2, w.o.wl, w.o.l2, w.o.w2l, w.o.wl2, wu5.w2l2)
names(s.wins) <- names(o.wins) <- c("U","W","L","WW", "WL", "LL", "WLW", "WLL", "WLWL")
## overall results - chance of winning/losing in each number of games
results <- dplyr::case_when(
serve1_start ~ list(pwin = wu.s,
scores = list(
`3-0` = wu3.s,
`3-1` = wu4.s,
`3-2` = wu5.s,
`2-3` = lu5.s,
`1-3` = lu4.s,
`0-3` = lu3.s
)),
!serve1_start ~ list(pwin = wu.o,
scores = list(
`3-0` = wu3.o,
`3-1` = wu4.o,
`3-2` = wu5.o,
`2-3` = lu5.o,
`1-3` = lu4.o,
`0-3` = lu3.o
)),
TRUE ~ list(pwin = (wu.s + wu.o)/2, # if serve1_start is NA, coin toss hasn't happened, average
scores = list(
`3-0` = (wu3.s + wu3.o)/2,
`3-1` = (wu4.s + wu4.o)/2,
`3-2` = (wu5.s + wu5.o)/2,
`2-3` = (lu5.s + lu5.o)/2,
`1-3` = (lu4.s + lu4.o)/2,
`0-3` = (lu3.s + lu3.o)/2
))
)
names(results) <- c("pwin", "scores") # case_when can return a list but the first level names are NA'd out
} else { # Max sets = 3, can only win 2-0 or 2-1
# This is much easier as no need to worry about serve vs. receive start
s.wins <- c(w.s.wl, w.s.w2l, w.s.wl2, wu5.w2l2)
o.wins <- c(w.o.wl, w.o.w2l, w.o.wl2, wu5.w2l2)
names(s.wins) <- names(o.wins) <- c("U","W","L","WL")
wu2 <- pu.s*pu.o
wu3.3 <- pu.s*(1-pu.o)*w5 + (1-pu.s)*pu.o*w5
lu2 <- (1-pu.s)*(1-pu.o)
lu3.3 <- pu.s*(1-pu.o)*(1-w5) + (1-pu.s)*pu.o*(1-w5)
## overall results - chance of winning/losing in each number of games
results <- list(pwin = wu2 + wu3.3,
scores = list(
`2-0` = wu2,
`2-1` = wu3.3,
`1-2` = lu3.3,
`0-2` = lu2
)
)
}
return(list(team_serve = m$s.matrix, opponent_serve = m$o.matrix,
start_serve_set1_wins = s.wins, opponent_serve_set1_wins = o.wins,
result_probabilities = results))
}
#' Estimate theoretical sideout rates given 'phase' parameters
#'
#' The [vs_estimate_rates()] function returns a team's performance rates across a range of aspects of play, including serve ace rate, serve error rate, and so on. Using [vs_theoretical_sideout_rates()] We can estimate the theoretical sideout rate that we would expect to see, given those parameters. This can be compared to the actual sideout rate achieved by the team.
#'
#' @param rates list: rates as returned by [vs_estimate_rates()]
#' @param process_model string: currently only "phase". See [vs_estimate_rates()]
#'
#' @return The theoretical sideout rates of the two teams
#'
#' @seealso [vs_estimate_rates()]
#'
#' @examples
#' \dontrun{
#' library(datavolley)
#' x <- dv_read(dv_example_file())
#' rates <- list(vs_estimate_rates(x, target_team = home_team(x)),
#' vs_estimate_rates(x, target_team = visiting_team(x)))
#'
#' ## the theoretical sideout rates
#' vs_theoretical_sideout_rates(rates)
#'
#' ## compare to their actual sideout rates
#' c(rates[[1]]$sideout, rates[[2]]$sideout)
#'}
#' @export
vs_theoretical_sideout_rates <- function(rates, process_model = "phase") {
##c(estimate_sideout_rates(rates[[2]], rates[[1]]), estimate_sideout_rates(rates[[1]], rates[[2]]))
MM <- rates_to_MC(rates, process_model = process_model, target_team = "each")
c(MC_sideout_rates(rev(MM)), MC_sideout_rates(MM))
}
## no longer used internally, see MC_sideout_rates
estimate_sideout_rates <- function(serving, receiving){
## estimate sideout probabilities from rate stats using terminating Markov chain theory
## serving and receiving are the rate stats for the serving and receiving teams, respectively
## these rate stats are computed by, e.g., vs_simulate_match
## Step 1: create the transition matrix
## SW: Serving team wins point
## RW: Receiving team wins point
## SS: Serving team serves - nothing should transition to this column
## RR: Receiving team receives - only SS should transition to this column
## RAR: Receiving team attacks off reception
## SAT: Serving team attacks in transition
## RAT: Receiving team attacks in transition
## SD: Serving team digs/freeball reception - note: technically we should have transition from RR and RD to this column to account for overpasses and downballs
## RD: Receiving team digs/freeball reception - note: technically we should have transition from SD to this column to account for overpasses and downballs
A <- data.matrix(
data.frame(
SW = c(1, 0, serving$serve_ace, receiving$rec_set_error, (receiving$rec_att_error + serving$rec_block), serving$trans_att_kill, (receiving$trans_att_error + serving$trans_block), 0, receiving$trans_set_error),
RW = c(0, 1, serving$serve_error, 0, receiving$rec_att_kill, (receiving$trans_block + serving$trans_att_error), receiving$trans_att_kill, serving$trans_set_error, 0),
SS = rep(0, 9),
RR = c(0, 0, 1 - serving$serve_ace - serving$serve_error, rep(0, 6)),
RAR = c(rep(0, 3), 1 - receiving$rec_set_error, rep(0, 5)),
SAT = c(rep(0, 7), 1 - serving$trans_set_error, 0),
RAT = c(rep(0, 8), 1 - receiving$trans_set_error),
SD = c(rep(0, 4), 1 - (receiving$rec_att_kill + receiving$rec_att_error + serving$rec_block + receiving$rec_att_replayed), serving$trans_att_replayed, 1 - (receiving$trans_att_kill + receiving$trans_att_error + serving$trans_block + receiving$trans_att_replayed), 0, 0),
RD = c(rep(0, 4), receiving$rec_att_replayed, 1 - (serving$trans_att_kill + serving$trans_att_error + receiving$trans_block + serving$trans_att_replayed), receiving$trans_att_replayed, rep(0, 2))
)
)
rownames(A) <- c("SW", "RW", "SS", "RR", "RAR", "SAT", "RAT", "SD", "RD")
## Step 2: find the asymptotic win probability - since it's a terminating matrix everything has to end up eventually in either SW or RW
I_rc <- which(rownames(A) %in% c("SW", "RW")) # rows and columns of the identity submatrix
N <- solve(diag(nrow(A) - length(I_rc)) - A[-I_rc, -I_rc])
R <- A[-I_rc, I_rc]
point_prob <- N%*%R
return(point_prob[1,2]) #receiving team sideout rate is what we want
}
## given two sets of rates, construct markovchain object(s) for one or both teams
# @param name string: the name assigned internally to the object
# @param process_model string: "phase" is as for e.g. vs_simulate_set
rates_to_MC <- function(rates, process_model = "phase", target_team = "each", name = "M") {
assert_that(is.string(process_model))
rates <- precheck_rates(rates, process_model = process_model)
if (is.numeric(target_team)) target_team <- as.character(target_team)
assert_that(is.string(target_team))
target_team <- tolower(target_team)
target_team <- match.arg(target_team, c("each", "1", "2"))
if (target_team == "each") {
return(list(rates_to_MC(rates, process_model = process_model, target_team = "1", name = "M1"),
rates_to_MC(rates, process_model = process_model, target_team = "2", name = "M2")))
} else if (target_team == "2") {
rates <- rev(rates)
}
state_names <- switch(tolower(process_model),
phase = c("SS", "SS=", "SS#", "RR", "RRO=", "RRA", "RRA=", "RRA#", "RRA/",
"RT", "ST",
"STO=", "STA", "STA=", "STA#", "STA/",
"RTO=", "RTA", "RTA=", "RTA#", "RTA/"),
phase_simple = c("SS", "SS=", "SS#", ## serving team serves (SS), serve ace (S#), serve error (S=)
"RR", "RR#", "RR=", ## receiving team receives (RR), wins on reception (R#), loses on reception (R=)
"RT", "ST", ## receiving team in transition (RT), serving team in transition (ST)
"ST#", "ST=", ## serving team wins in transition (ST#), loses in transition (ST=)
"RT#", "RT="),
stop("unrecognized process_model:", process_model)
)
M <- matrix(0, length(state_names), length(state_names))
colnames(M) <- rownames(M) <- state_names
if (process_model == "phase") {
M["SS", "SS="] <- rates[[1]]$serve_error
M["SS", "SS#"] <- rates[[1]]$serve_ace
M["SS", "RR"] <- abs(1 - sum(M["SS", ])) ## using abs(1 - ...) will avoid negative probs, which aren't picked up by markovchain
M["RR", "RRO="] <- rates[[2]]$rec_loss_other
M["RR", "ST"] <- rates[[2]]$rec_no_att
M["RR", "RRA"] <- abs(1 - sum(M["RR", ]))
M["RRA", "RRA="] <- rates[[2]]$rec_att_error
M["RRA", "RRA#"] <- rates[[2]]$rec_att_kill
M["RRA", "RRA/"] <- rates[[1]]$rec_block
M["RRA", "RT"] <- rates[[2]]$rec_att_replayed
M["RRA", "ST"] <- abs(1 - sum(M["RRA", ]))
M["ST", "STO="] <- rates[[1]]$trans_loss_other
M["ST", "RT"] <- rates[[1]]$trans_no_att
M["ST", "STA"] <- abs(1 - sum(M["ST", ]))
M["STA", "STA="] <- rates[[1]]$trans_att_error
M["STA", "STA#"] <- rates[[1]]$trans_att_kill
M["STA", "STA/"] <- rates[[2]]$trans_block
M["STA", "ST"] <- rates[[1]]$trans_att_replayed
M["STA", "RT"] <- abs(1 - sum(M["STA", ]))
M["RT", "RTO="] <- rates[[2]]$trans_loss_other
M["RT", "ST"] <- rates[[2]]$trans_no_att
M["RT", "RTA"] <- abs(1 - sum(M["RT", ]))
M["RTA", "RTA="] <- rates[[2]]$trans_att_error
M["RTA", "RTA#"] <- rates[[2]]$trans_att_kill
M["RTA", "RTA/"] <- rates[[1]]$trans_block
M["RTA", "RT"] <- rates[[2]]$trans_att_replayed
M["RTA", "ST"] <- abs(1 - sum(M["RTA", ]))
} else if (process_model == "phase_simple") {
M["SS", "SS="] <- rates[[1]]$serve_error
M["SS", "SS#"] <- rates[[1]]$serve_ace
M["SS", "RR"] <- abs(1 - sum(M["SS", ]))
M["RR", "RR="] <- rates[[2]]$rec_loss
M["RR", "RR#"] <- rates[[2]]$rec_win
M["RR", "RT"] <- rates[[2]]$rec_replayed
M["RR", "ST"] <- abs(1 - sum(M["RR", ])) ## no attack or attack in play
M["ST", "ST="] <- rates[[1]]$trans_loss
M["ST", "ST#"] <- rates[[1]]$trans_win
M["ST", "ST"] <- rates[[1]]$trans_replayed
M["ST", "RT"] <- abs(1 - sum(M["ST", ]))
M["RT", "RT="] <- rates[[2]]$trans_loss
M["RT", "RT#"] <- rates[[2]]$trans_win
M["RT", "RT"] <- rates[[2]]$trans_replayed
M["RT", "ST"] <- abs(1 - sum(M["RT", ]))
} else {
stop("unrecognized process_model:", process_model)
}
## all rally-ending states become sinks
for (tstate in grep("[/#=]$", state_names)) M[tstate, tstate] <- 1.0
new("markovchain", states = state_names, transitionMatrix = M, name = name)
}
## MM <- rates_to_MC(rates, process_model = "phase")
## plot(MM[[1]])
MC_sideout_rates <- function(servingM, receivingM) {
if (is.list(servingM) && (missing(receivingM) || is.null(receivingM)) && length(servingM) == 2 && all(vapply(servingM, inherits, "markovchain", FUN.VALUE = TRUE))) {
receivingM <- servingM[[2]]
servingM <- servingM[[1]]
}
winR_states <- states(servingM)[grepl("^S.*[=/]$", states(servingM)) | grepl("^R.*#$", states(servingM))]
sum(absorptionProbabilities(servingM)[1, winR_states])
}
## given the team markovchain objects, construct the serve-sideout-breakpoint markovchain
MC2MCP <- function(M1, M2) {
if (is.list(M1) && (missing(M2) || is.null(M2)) && length(M1) == 2 && all(vapply(M1, inherits, "markovchain", FUN.VALUE = TRUE))) {
M2 <- M1[[2]]
M1 <- M1[[1]]
}
## take per-team MC matrices, calculate sideout rates
winR_states <- states(M1)[grepl("^S.*[=/]$", states(M1)) | grepl("^R.*#$", states(M1))]
winS_states <- states(M1)[grepl("^S.*#$", states(M1)) | grepl("^R.*[=/]$", states(M1))]
so <- c(sum(absorptionProbabilities(M2)[1, winR_states]), sum(absorptionProbabilities(M1)[1, winR_states]))
## the markovchain object representing the serve-sideout-breakpoint process
state_names <- c("1S", "1BP", "2SO", "2S", "2BP", "1SO")
MM <- matrix(0, length(state_names), length(state_names))
colnames(MM) <- rownames(MM) <- state_names
MM["1S", "2SO"] <- so[2] ## team 1 serves, team 2 sides out
MM["1S", "1BP"] <- 1 - so[2] ## team 1 serves, team 1 wins breakpoint
MM["2S", "1SO"] <- so[1] ## team 2 serves, team 1 sides out
MM["2S", "2BP"] <- 1 - so[1] ## team 2 serves, team 2 wins breakpoint
MM["2SO", "2S"] <- MM["2BP", "2S"] <- MM["1SO", "1S"] <- MM["1BP", "1S"] <- 1.0
new("markovchain", states = state_names, transitionMatrix = MM, name = "MMP")
}
## relative proportion of serves by each team
MCP_serve_proportions <- function(MMP) {
if (markovchain::name(MMP) != "MMP") stop("MMP needs to be a markovchain object as returned by MC2MCP")
sprop <- steadyStates(MMP)[1, c("1S", "2S")]
sprop/sum(sprop)
}
MC_to_points_breakdown <- function(rates, process_model) {
M1 <- rates_to_MC(rates, process_model = process_model, target_team = "each")
so <- c(MC_sideout_rates(rev(M1)), MC_sideout_rates(M1)) ## theor sideout rates
## separate the MC objects
M2 <- M1[[2]]
M1 <- M1[[1]]
out <- do_MC_to_points_breakdown(M1, M2, this_team = 1L)
out2 <- do_MC_to_points_breakdown(M2, M1, this_team = 2L)
out <- rbind(out, out2)
## also do breakdown by points per set
## first need the distribution of scores
## sprop gives proportion of serves for team 1 and team 2
sprop <- MCP_serve_proportions(MC2MCP(M1, M2))
if (FALSE) {
## this is much slower than using absorbp
SM1 <- scoreMC(so = so, serving = 1, go_to = 25, stop_at = 35)
## set scores when team 1 is serving
ss <- states(SM1)[grep("^WIN", states(SM1))]
set_scores1 <- absorptionProbabilities(SM1)[1, ss]
} else {
A1 <- score_transition_matrix(so = so, serving = 1, go_to = 25, stop_at = 35)
set_scores1 <- absorbp(A1)[1, ]
set_scores1 <- set_scores1[grep("^WIN", names(set_scores1))]
ss <- names(set_scores1)
}
set_scores1 <- data.frame(p = set_scores1/sum(set_scores1), id = ss)
temp <- sub(".*@", "", ss)
set_scores1$team_1_score <- as.integer(sub(":.*", "", temp))
set_scores1$team_2_score <- as.integer(sub(".*:", "", temp))
mss1 <- c(sum(set_scores1$team_1_score * set_scores1$p), sum(set_scores1$team_2_score * set_scores1$p))
## ss1 is the mean points per set by team, when team 1 starts serving
A2 <- score_transition_matrix(so = so, serving = 2, go_to = 25, stop_at = 35)
set_scores2 <- absorbp(A2)[1, ]
set_scores2 <- set_scores2[grep("^WIN", names(set_scores2))]
ss <- names(set_scores2)
set_scores2 <- data.frame(p = set_scores2/sum(set_scores2), id = ss)
temp <- sub(".*@", "", ss)
set_scores2$team_1_score <- as.integer(sub(":.*", "", temp))
set_scores2$team_2_score <- as.integer(sub(".*:", "", temp))
mss2 <- c(sum(set_scores2$team_1_score * set_scores2$p), sum(set_scores2$team_2_score * set_scores2$p))
## combine to overall mss which is mean points per set by team, accounting for sprop
mss <- c(mss1[1] * sprop[1] + mss2[1] * sprop[2], mss1[2] * sprop[1] + mss2[2] * sprop[2])
## now given mss and the res$points_breakdown (proportion_of_team_points) we can now estimate the breakdown in terms of points_per_set
out$points_per_set <- NA_real_
out$points_per_set[out$point_won_by == 1] <- out$proportion_of_team_points[out$point_won_by == 1] * mss[1]
out$points_per_set[out$point_won_by == 2] <- out$proportion_of_team_points[out$point_won_by == 2] * mss[2]
out
}
do_MC_to_points_breakdown <- function(M1, M2, this_team) {
stopifnot(identical(states(M1), states(M2)))
winR_states <- states(M1)[grepl("^S.*[=/]$", states(M1)) | grepl("^R.*#$", states(M1))]
winS_states <- states(M1)[grepl("^S.*#$", states(M1)) | grepl("^R.*[=/]$", states(M1))]
## relative number of serves by team
sprop <- MCP_serve_proportions(MC2MCP(M1, M2))
## relative proportion of points won by team 1 in serve/reception, scaled by their serve proportion
T1points <- c(absorptionProbabilities(M1)[1, winS_states] * sprop[1], absorptionProbabilities(M2)[1, winR_states] * sprop[2])
names(T1points) <- substr(names(T1points), 2, 4)
temp <- list()
for (uu in unique(names(T1points))) temp[[uu]] <- sum(T1points[names(T1points) == uu])
temp <- unlist(temp)
out <- data.frame(point_won_by = this_team, outcome = states_to_factor(names(temp)), proportion_of_team_points = unname(temp)/sum(temp), proportion_of_all_points = unname(temp), stringsAsFactors = FALSE)
out <- out[order(out$outcome), ]
out$outcome <- as.character(out$outcome)
rownames(out) <- NULL
out
}
states_as_factors <- function() c("S=" = "Serve error", "S#" = "Serve ace", "RO=" = "Rec loss other", "RA=" = "Rec attack error", "RA/" = "Rec attack block", "RA#" = "Rec attack kill", "R=" = "Rec loss", "R#" = "Rec win", "TO=" = "Trans loss other", "TA=" = "Trans attack error", "TA#" = "Trans attack kill", "TA/" = "Trans attack block", "T=" = "Trans loss", "T#" = "Trans win")
states_to_factor <- function(s) {
remap <- states_as_factors()
for (us in na.omit(unique(s))) if (us %in% names(remap)) s[which(s == us)] <- remap[names(remap) == us]
factor(s, levels = remap)
}
#' Create a win probability graph for a match
#'
#' @param pbp data frame: a data frame containing the set number, home team, visiting team, serving team, point-winning team, home team score, and visiting team score at the end of each point,
#' easiest to obtain by subsetting the plays component of a datavolley object as returned by [datavolley::dv_read()] to only include rows where `point == TRUE`
#' @param so integer: a two-element vector of sideout rates for the home team and visiting team, easiest to obtain using [vs_estimate_rates()]
#' @param go_to integer: the minimum score that must be reached to end a non-tiebreaker set (typically 25 for indoor volleyball in sets 1 to 4, or 21 in beach volleyball)
#' @param go_to_tiebreak integer: the minimum score that must be reached to end a tiebreaker set (typically 15)
#' @param max_sets integer: the maximum number of sets that can be played, either 3 or 5
#' @param show_plot logical: if `TRUE`, produce a graph showing the home team's win probability at each point in the match
#' @param home_color string: the color used to indicate points in the match where the home team is favored to win
#' @param visiting_color string: the same as `home_color`, but for the visiting team
#'
#' @return A data frame containing the home team's probability of winning the set (`set_probs`) and match (`match_probs`) at each point in the set.
#' The first row of the data frame refers to the start of the match (0-0, Set 1).
#'
#' @examples
#' \dontrun{
#' library(datavolley)
#' x <- dv_read(vs_example_file())
#' sideout_rates <- vs_estimate_rates(x, target_team = "each")$sideout
#' play_by_play <- subset(plays(x), point)
#'
#' vs_match_win_probability(play_by_play, sideout_rates) ## data frame is not printed to console
#' ## but can be stored in a variable
#' match_win_probs <- vs_match_win_probability(play_by_play, sideout_rates)
#'}
#' @export
#'
vs_match_win_probability <- function(pbp, so, go_to = 25, go_to_tiebreak = 15, max_sets = 5, show_plot = TRUE, home_color = "blue", visiting_color = "darkred"){
assert_that("set_number" %in% names(pbp), msg = "Set number not detected - please rename the set column set_number")
assert_that("serving_team" %in% names(pbp), msg = "Serving team not detected - please rename the serving team column serving_team")
assert_that("point_won_by" %in% names(pbp), msg = "Point-winning team not detected - please rename the column containing the team winning the point to point_won_by")
assert_that(max_sets %in% c(3,5), msg = "Only 3-set and 5-set matches are supported")
assert_that(go_to <= 25, go_to_tiebreak <= 25, msg = "Target set points must be at most 25")
assert_that(max(pbp$set_number) <= max_sets, msg = "More sets have been played in this match than maximum number of sets allowed")
# fix potential issues with column names not matching exactly
if(!("visiting_team" %in% names(pbp)) && ("away_team" %in% names(pbp))){
pbp <- dplyr::rename(pbp, visiting_team = "away_team")
}
if(!("visiting_team_score" %in% names(pbp)) && (("away_team_score" %in% names(pbp)))){
pbp <- dplyr::rename(pbp, visiting_team_score = "away_team_score")
} else if(!("visiting_team_score" %in% names(pbp)) && (("away_score" %in% names(pbp)))){
pbp <- dplyr::rename(pbp, visiting_team_score = "away_score")
} else if(!("visiting_team_score" %in% names(pbp)) && (("visiting_score" %in% names(pbp)))){
pbp <- dplyr::rename(pbp, visiting_team_score = "visiting_score")
}
if(!("home_team_score" %in% names(pbp)) && ("home_score" %in% names(pbp))){
pbp <- dplyr::rename(pbp, home_team_score = "home_score")
}
# Now let's add some asserts after hopefully fixing the issue
assert_that("home_team" %in% names(pbp), msg = "Home team not detected - please rename the home team column home_team")
assert_that("visiting_team" %in% names(pbp), msg = "Visiting team not detected - please rename the visiting team column visiting_team")
assert_that("home_team_score" %in% names(pbp), msg = "Home team score not detected - please rename the home team score column home_team_score")
assert_that("visiting_team_score" %in% names(pbp), msg = "Visiting team score not detected - please rename the visiting team score column visiting_team_score")
# Find the start and end points of each set
n <- nrow(pbp)
set.start.points <- c(1, (which(diff(pbp$set_number) != 0) - 1))
set.end.points <- c(which(diff(pbp$set_number)!=0), n)
# Figure out who serves at start of match/set 5
match_start_serve <- pbp$serving_team[1] == pbp$home_team[1] # TRUE if home team starts match with serve
set5_start_serve <- case_when(
length(set.start.points) == 5 ~ pbp$serving_team[set.start.points[5]] == pbp$home_team[set.start.points[5]],
length(set.start.points) == 3 & max_sets == 3 ~ pbp$serving_team[set.start.points[5]] == pbp$home_team[set.start.points[5]],
TRUE ~ NA
)
wp_list <- win_probabilities_theoretical(so,
serve1_start = match_start_serve,
serve5_start = set5_start_serve,
go_to = go_to,
go_to_tiebreak = go_to_tiebreak)
zero_eq <- 25 - go_to
zero_eq_tiebreak <- 25 - go_to_tiebreak
wp_df <- pbp %>% group_by(.data$set_number) %>% mutate(
next_serve = lead(.data$serving_team),
index.left = 1 + zero_eq + .data$home_team_score + (.data$set_number == max_sets)*zero_eq_tiebreak,
index.right = 1 + zero_eq + .data$visiting_team_score + (.data$set_number == max_sets)*zero_eq_tiebreak
) %>%
mutate(
next_serve = if_else(is.na(.data$next_serve), .data$point_won_by, .data$next_serve),
index.left2 = if_else(.data$index.left > 26 | .data$index.right > 26, 24 + pmax(0, .data$home_team_score - .data$visiting_team_score), .data$index.left),
index.right2 = if_else(.data$index.left > 26 | .data$index.right > 26, 24 + pmax(0, .data$visiting_team_score - .data$home_team_score), .data$index.right)
) %>%
mutate(
set_probs = if_else(.data$point_won_by == .data$home_team,
diag(wp_list$team_serve[.data$index.left2, .data$index.right2]), # these end up being matrices so have to take the diagonal values to get the right numbers
diag(wp_list$opponent_serve[.data$index.left2, .data$index.right2])
)
) %>% ungroup()
serve_list <- if(match_start_serve) wp_list$start_serve_set1_wins else wp_list$opponent_serve_set1_wins
# need to know how many sets have been won by each team at this point in the match
set_winners <- wp_df %>% slice(set.end.points) %>%
transmute(
.data$set_number,
set_won_by = if_else(.data$home_team == .data$point_won_by, .data$home_team, .data$visiting_team),
home_team_sets_won = lag(cumsum(.data$set_won_by == .data$home_team), default = 0),
visiting_team_sets_won = lag(cumsum(.data$set_won_by == .data$visiting_team), default = 0)
) %>% right_join(wp_df, by = "set_number")
if(max_sets == 5){ # Max sets is 5, normal for indoor
match_winners <- set_winners %>% transmute(
.data$home_team,
.data$visiting_team,
.data$serving_team,
.data$set_number,
.data$home_team_score,
.data$visiting_team_score,
.data$set_probs,
match_probs = case_when(
.data$home_team_sets_won == 0 & .data$visiting_team_sets_won == 0 ~ .data$set_probs*serve_list[2] + (1 - .data$set_probs)*serve_list[3],
.data$home_team_sets_won == 1 & .data$visiting_team_sets_won == 0 ~ .data$set_probs*serve_list[4] + (1 - .data$set_probs)*serve_list[5],
.data$home_team_sets_won == 0 & .data$visiting_team_sets_won == 1 ~ .data$set_probs*serve_list[5] + (1 - .data$set_probs)*serve_list[6],
.data$home_team_sets_won == 1 & .data$visiting_team_sets_won == 1 ~ .data$set_probs*serve_list[7] + (1 - .data$set_probs)*serve_list[8],
.data$home_team_sets_won == 2 & .data$visiting_team_sets_won == 0 ~ .data$set_probs + (1 - .data$set_probs)*serve_list[7],
.data$home_team_sets_won == 0 & .data$visiting_team_sets_won == 2 ~ .data$set_probs*serve_list[8],
.data$home_team_sets_won == 2 & .data$visiting_team_sets_won == 1 ~ .data$set_probs + (1 - .data$set_probs)*serve_list[9],
.data$home_team_sets_won == 1 & .data$visiting_team_sets_won == 2 ~ .data$set_probs*serve_list[9],
.data$home_team_sets_won == 2 & .data$visiting_team_sets_won == 2 ~ .data$set_probs,
TRUE ~ NA_real_ # if there is a problem here, just output NA and move on
)
)
} else { #max_sets is 3, equivalent to starting in set 3 with WL
match_winners <- set_winners %>% transmute(
.data$home_team,
.data$visiting_team,
.data$serving_team,
.data$set_number,
.data$home_team_score,
.data$visiting_team_score,
.data$set_probs,
match_probs = case_when(
.data$home_team_sets_won == 0 & .data$visiting_team_sets_won == 0 ~ .data$set_probs*serve_list[7] + (1 - .data$set_probs)*serve_list[8],
.data$home_team_sets_won == 1 & .data$visiting_team_sets_won == 0 ~ .data$set_probs + (1 - .data$set_probs)*serve_list[9],
.data$home_team_sets_won == 0 & .data$visiting_team_sets_won == 1 ~ .data$set_probs*serve_list[9],
.data$home_team_sets_won == 1 & .data$visiting_team_sets_won == 1 ~ .data$set_probs,
TRUE ~ NA_real_ # if there is a problem here, just output NA and move on
)
)
}
# add the initial row for 0-0 at the beginning of the match
match_start <- match_winners[1,] %>% mutate(
home_team_score = 0,
visiting_team_score = 0,
set_probs = if(match_start_serve) wp_list$team_serve[1 + zero_eq, 1 + zero_eq] else wp_list$opponent_serve[1 + zero_eq, 1 + zero_eq],
match_probs = if(max_sets == 5) serve_list[1] else serve_list[5]
)
match_winners <- bind_rows(match_start, match_winners) %>%
dplyr::arrange(.data$set_number, .data$visiting_team_score, .data$home_team_score)
# the above line should fix issues of points getting out of order, assuming only one line per point
# if we make the win probability graph, make it
if(show_plot){
yaxis.label <- paste0("P(", pbp$home_team[1], " Win)")
title.label <- paste0(pbp$home_team[1], " vs. ", pbp$visiting_team[1])
# Quick plot that can be made nicer with ggplot if desired
par(xaxt = "n")
plot(x = seq(0, nrow(match_winners) - 1),
y = match_winners$match_probs,
type = "n",
xlab = "",
ylab = yaxis.label,
main = title.label,
ylim = c(0,1))
s <- seq(nrow(match_winners) - 1)
x <- c(0, s)
segments(x[s], match_winners$match_probs[s], x[s+1], match_winners$match_probs[s+1],
col = (if_else(lead(match_winners$match_probs) >= 0.5, home_color, visiting_color)),
lwd = 2)
abline(v = set.end.points[-length(set.end.points)], lty = 2) # dashed lines at set breaks
abline(v = c(0, set.end.points[length(set.end.points)]), lty = 2) # dashed lines at start and end of match too
# Add set number labels
set.labels <- paste0("Set ", seq(1, length(set.end.points)))
# essentially, put the set number at top margin of graph
set.mid.points <- (set.start.points + set.end.points)/2
for (i in 1:length(set.mid.points)){
mtext(set.labels[i], side = 3, at = set.mid.points[i])
}
}
invisible(match_winners) # only return the data frame if there is somewhere to return to
}
## functions to build a markov chain of score transitions through a set
sshash <- function(score1, score2, serving) serving * 10e4 + score1 * 10e2 + score2
## what outcomes can we get from a given point?
point_transition <- function(p) {
## parms are c(starting_score1, starting_score2, so1, so2, serving, go_to, stop_at, current_index)
stopifnot(length(p) == 8)
t1won <- ifelse(p[5] < 2, 1 - p[4], p[3])
m <- matrix(c(p[1], p[1], ## col 1 = starting team_1_score
p[2], p[2], ## 2 = starting team_2_score
p[5], p[5], ## 3 = starting serving team
p[1] + c(1, 0), ## 4 = next team_1_score
p[2] + c(0, 1), ## 5 = next team_2_score
t1won, 1 - t1won, ## 6 = prob
1, 2, ## 7 = result - who won / is serving at the start of the next point
sshash(p[1] + c(0, 0), p[2] + c(0, 0), p[5]), ## 8 = "hash" of starting scores and serving team
rep(p[8], 2)), ## 9 = current_index
ncol = 9)
## other 'result' values
m[m[, 4] >= p[7] | m[, 5] >= p[7], 7] <- 0 ## don't continue
m[m[, 4] > (m[, 5] + 1) & m[, 4] >= p[6], 7] <- 91 ## team 1 won
m[m[, 5] > (m[, 4] + 1) & m[, 5] >= p[6], 7] <- 92 ## team 2 won
m
}
## generate a long-form matrix representing score transitions
score_transition_long_matrix <- function(so, serving = 1, go_to = 25, stop_at = 35, start_scores = c(0, 0)) {
p <- c(start_scores, so, serving, go_to, stop_at, 0) ## starting params
## preallocate
M <- matrix(NA_real_, nrow = 10000, ncol = 9)
qi <- c(1L) ## index of item in Q to process
seen <- c(0) ## hashes that we've queued/processed (don't queue again)
Q <- list(p) ## queue of items
i <- 1L ## index into M
while (length(qi) > 0) {
## find the transitions from this set of starting_scores and serving team
res <- point_transition(Q[[qi[1]]])
M[c(i, i + 1L), ] <- res
## add to the queue any result rows representing a continuing game (not won, not aborted)
## also use the next score hash to avoid adding already-seen starting_score/serving team combos
next_hashes <- sshash(res[, 4], res[, 5], res[, 7])
res_propagate <- res[, 7] > 0 & res[, 7] < 3 & !next_hashes %in% seen
res[, 8] <- next_hashes ## the starting hashes for these are the "next" hashes from the point_transition output
res[, 9] <- c(i, i + 1L) ## update "from" for these
nextq <- res[res_propagate, c(4:5, 7:9), drop = FALSE] ## cols 1,2 = next starting scores, 3 = result (next serving team), 4 = hash, 5 = from
seen <- c(seen, nextq[, 4])
i <- i + 2L
qi <- qi[-1]
if (nrow(nextq) > 0) {
## cat("\nnext_hashes was: ", capture.output(str(next_hashes)), "\n")
## cat("res_propagate was: ", capture.output(str(res_propagate)), "\n")
## cat("M contains hashes: ", paste(na.omit(M[, 8]), collapse = ", "), "\n")
## cat("adding hashes to Q: ", paste(nextq[, 4], collapse = ", "), "\n")
Q <- c(Q, lapply(seq_len(nrow(nextq)), function(z) c(nextq[z, 1:2], so, nextq[z, 3], go_to, stop_at, nextq[z, 5])))
qi <- c(qi, length(Q) + 1 - seq_len(nrow(nextq)))
}
}
colnames(M) <- c("starting_team_1_score", "starting_team_2_score", "starting_serving_team", "team_1_score", "team_2_score", "prob", "result", "score_hash", "from")
M[seq_len(i - 1L), ]
}
score_transition_matrix <- function(so, serving = 1, go_to = 25, stop_at = 35, start_scores = c(0, 0)) {
M <- score_transition_long_matrix(so = so, serving = serving, go_to = go_to, stop_at = stop_at, start_scores = start_scores)
M <- as.data.frame(M)
M$rowid <- seq_len(nrow(M))
M$from_id <- paste0(M$starting_serving_team, "@", M$starting_team_1_score, ":", M$starting_team_2_score)
M$to_id <- M$result
M$to_id[M$result == 91] <- "WIN1"
M$to_id[M$result == 92] <- "WIN2"
M$to_id <- paste0(M$to_id, "@", M$team_1_score, ":", M$team_2_score)
M$to_id[M$result < 1] <- "NO RESULT"
all_id <- sort(unique(c(M$from_id, M$to_id)))
temp <- setNames(seq_along(all_id), all_id)
from_idn <- unname(temp[M$from_id])
to_idn <- unname(temp[M$to_id])
A <- matrix(0, nrow = length(all_id), ncol = length(all_id))
for (i in seq_len(nrow(M))) {
A[from_idn[i], to_idn[i]] <- A[from_idn[i], to_idn[i]] + M$prob[i]
}
## sink states
for (ss in grep("^(WIN|NO RESULT)", all_id)) A[ss, ss] <- 1.0
colnames(A) <- rownames(A) <- all_id
## rearrange to canonical form
aidx <- diag(A) == 1 ## absorbing states
aidx <- c(which(!aidx), which(aidx))
A[aidx, aidx]
}
scoreMC <- function(so, serving = 1, go_to = 25, stop_at = 35) {
A <- score_transition_matrix(so = so, serving = serving, go_to = go_to, stop_at = stop_at)
new("markovchain", states = rownames(A), transitionMatrix = A, name = "SCM") ## note that this is fairly slow with a large transition matrix
}
## faster direct implementation of absorption probs
absorbp <- function(A) {
aidx <- which(diag(A) == 1)
stopifnot(all(diff(aidx) == 1), max(aidx) == nrow(A))
Q <- A[seq_len(aidx[1] - 1L), seq_len(aidx[1] - 1L), drop = FALSE]
R <- A[seq_len(aidx[1] - 1L), aidx]
N <- solve(diag(1, nrow(Q), ncol(Q)) - Q)
N %*% R
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.