set.seed(0) knitr::opts_chunk$set(echo = TRUE) library("hyper2") library("magrittr") options("digits" = 5)
knitr::include_graphics(system.file("help/figures/hyper2.png", package = "hyper2"))
To cite the hyper2
package in publications, please use @hankin2017_rmd.
Here I consider a single basketball match from the perspective of the
Bradley-Terry model. File NBA.txt
contains a point-by-point
analysis of a basketball match between the Cleveland Cavaliers and the
Golden State Warriors on 13 June 2017, data taken from
here. Some
further documentation is given at inst/NBA.Rd
in the package.
NBA_table <- read.csv("NBA.txt",header=TRUE) head(NBA_table)
Each row corresponds to a point scored. The first column is the time of the score, the second is the number of points scored, the third shows which team had possession at the start of play, and the fourth shows which team scored. The other columns show the players. Table entries show whether or not that particular player was on the pitch when the point was scored. Note that the two "ghost" players represent the effect of having possession.
First some basics:
(TT <- table(NBA_table[,3:4])) fisher.test(TT,alternative="greater")
Thus the identity of the scoring team is not independent of the
identity of the possessing team (as one might expect).
We can convert NBA_table
to a likelihood function:
allplayers <- as.matrix(NBA_table[,5:23]) NBA <- hyper2() NBA2 <- hyper2() ## players 1-9 Cleveland ## players 10-19 (sic) Warriors: C_onpitch <- allplayers[,1:9] W_onpitch <- allplayers[,10:19] possession <- NBA_table[,3] scored <- NBA_table[,4] for(i in seq_len(nrow(NBA_table))){ if(scored[i] == 'W'){ # Warriors score a point onpitch_scored <- names(which(W_onpitch[i,])) onpitch_noscore <- names(which(C_onpitch[i,])) } else if(scored[i] == "C"){ # Cleveland score a point onpitch_scored <- names(which(C_onpitch[i,])) onpitch_noscore <- names(which(W_onpitch[i,])) } else { stop("scorint team must be either W or C") } if(possession[i] == 'W'){ onpitch_scored1 <- c(onpitch_scored,"possession") onpitch_scored2 <- c(onpitch_scored,"W_possession") } else if (possession[i] == 'C'){ onpitch_scored1 <- c(onpitch_scored,"possession") onpitch_scored2 <- c(onpitch_scored,"C_possession") } else { stop("possession must be either W or C") } NBA[onpitch_scored1] %<>% inc() NBA[c(onpitch_scored1,onpitch_noscore)] %<>% dec() NBA2[onpitch_scored2] %<>% inc() NBA2[c(onpitch_scored,onpitch_noscore)] %<>% dec() } NBA2_maxp <- maxp(NBA2) NBA2_maxp %<>% ordertrans(c(colnames(NBA_table)[-(1:4)],"C_possession","W_possession")) dotchart(NBA2_maxp,col=c(rep("black",9),rep("red",10),rep("blue",2)),pch=16) NBA_maxp_precomputed <- c( # precomputed; very very computer # intensive but far higher likelihood # than the result of maxp(NBA): Barnes = 0.165210241492775, Curry = 1.11314508611164e-06, Durant = 0.00538619616133955, Green = 0.0110738194130432, Iguodala = 1.0005765074368e-06, Irving = 0.0198810257932021, James = 1.00000582238398e-06, Jefferson = 0.00990927936847305, Korver = 0.00843806151505123, Livingston = 0.075108213128189, Love = 0.00175976502052894, McCaw = 0.0119305671842558, Pachiuila = 0.0496281731046277, possession = 0.479366441714251, Shumpert = 0.0666196978946451, Smith = 1.0008827880116e-06, Thompson = 0.00159564073991825, Thompson.1 = 0.00135198197477646, West = 0.0927348818403838, Williams = 1.89904433611776e-06) NBA_maxp <- maxp(NBA) NBA_maxp %<>% ordertrans(c(colnames(NBA_table)[-(1:4)],"possession")) dotchart(NBA_maxp,col=c(rep("black",9),rep("red",10),rep("blue",1)),pch=16)
samep.test(NBA2,c("C_possession","W_possession"))
We may thus assume that Cleveland having possession is the same
strength as the Warriors having possession, and from now on we work
with NBA
rather than NBA2
.
(C_allstrengths <- sort(NBA_maxp[01:9],decreasing=TRUE)) (W_allstrengths <- sort(NBA_maxp[10:19],decreasing=TRUE)) samep.test(NBA, names(C_allstrengths)) samep.test(NBA, names(W_allstrengths))
Now, what is the probability of C scoring both with and without possession? We may condition on the estimated player and possession strengths, and assume that only the 5 strongest players on each team play.
C_strongestteam <- sum(C_allstrengths[1:5]) W_strongestteam <- sum(W_allstrengths[1:5]) possession <- 0.2155786 # taken from samep.test() above # probability of C scoring without possession: (C_strongestteam )/(C_strongestteam + W_strongestteam + possession) # probability of C scoring with possession: (C_strongestteam+possession)/(C_strongestteam + W_strongestteam + possession)
Following lines create NBA.rda
, residing in the data/
directory of the package.
NBA_maxp <- NBA_maxp_precomputed save(NBA_table,NBA,NBA_maxp,file="NBA.rda")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.