In RobinHankin/hyper2: The Hyperdirichlet Distribution, Mark 2

set.seed(0)
knitr::opts_chunk$set(echo = TRUE)
library("hyper2")
library("magrittr")
options("digits" = 5)

knitr::include_graphics(system.file("help/figures/hyper2.png", package = "hyper2"))

To cite the hyper2 package in publications, please use @hankin2017_rmd. Here I consider a single basketball match from the perspective of the Bradley-Terry model. File NBA.txt contains a point-by-point analysis of a basketball match between the Cleveland Cavaliers and the Golden State Warriors on 13 June 2017, data taken from here. Some further documentation is given at inst/NBA.Rd in the package.

NBA_table  <- read.csv("NBA.txt",header=TRUE)
head(NBA_table)

Each row corresponds to a point scored. The first column is the time of the score, the second is the number of points scored, the third shows which team had possession at the start of play, and the fourth shows which team scored. The other columns show the players. Table entries show whether or not that particular player was on the pitch when the point was scored. Note that the two "ghost" players represent the effect of having possession.

First some basics:

(TT <- table(NBA_table[,3:4]))
fisher.test(TT,alternative="greater")

Thus the identity of the scoring team is not independent of the identity of the possessing team (as one might expect).
We can convert NBA_table to a likelihood function:

allplayers <- as.matrix(NBA_table[,5:23])
NBA <- hyper2()
NBA2 <- hyper2()

## players 1-9 Cleveland
## players 10-19 (sic) Warriors:
C_onpitch <- allplayers[,1:9]
W_onpitch <- allplayers[,10:19]

possession <- NBA_table[,3]
scored     <- NBA_table[,4]

for(i in seq_len(nrow(NBA_table))){
  if(scored[i] == 'W'){  # Warriors score a point
    onpitch_scored  <- names(which(W_onpitch[i,]))
    onpitch_noscore <- names(which(C_onpitch[i,]))
  } else if(scored[i] == "C"){ # Cleveland score a point
    onpitch_scored  <- names(which(C_onpitch[i,]))
    onpitch_noscore <- names(which(W_onpitch[i,]))
  } else {
    stop("scorint team must be either W or C")
  }
  if(possession[i] == 'W'){
    onpitch_scored1 <- c(onpitch_scored,"possession")
    onpitch_scored2 <- c(onpitch_scored,"W_possession")
  } else if (possession[i] == 'C'){
    onpitch_scored1 <- c(onpitch_scored,"possession")
    onpitch_scored2 <- c(onpitch_scored,"C_possession")
  } else {
    stop("possession must be either W or C")
  }

  NBA[onpitch_scored1] %<>% inc()
  NBA[c(onpitch_scored1,onpitch_noscore)] %<>% dec()

  NBA2[onpitch_scored2] %<>% inc()
  NBA2[c(onpitch_scored,onpitch_noscore)] %<>% dec()
}

NBA2_maxp <- maxp(NBA2)

NBA2_maxp %<>% ordertrans(c(colnames(NBA_table)[-(1:4)],"C_possession","W_possession"))
dotchart(NBA2_maxp,col=c(rep("black",9),rep("red",10),rep("blue",2)),pch=16)


NBA_maxp_precomputed  <- c(
                                        # precomputed; very very computer
                                        # intensive but far higher likelihood
                                        # than the result of maxp(NBA):


Barnes    = 0.165210241492775,    Curry      = 1.11314508611164e-06,
Durant    = 0.00538619616133955,  Green      = 0.0110738194130432,
Iguodala  = 1.0005765074368e-06,  Irving     = 0.0198810257932021,
James     = 1.00000582238398e-06, Jefferson  = 0.00990927936847305,
Korver    = 0.00843806151505123,  Livingston = 0.075108213128189,
Love      = 0.00175976502052894,  McCaw      = 0.0119305671842558,
Pachiuila = 0.0496281731046277,   possession = 0.479366441714251,
Shumpert  = 0.0666196978946451,   Smith      = 1.0008827880116e-06,
Thompson  = 0.00159564073991825,  Thompson.1 = 0.00135198197477646,
West      = 0.0927348818403838,   Williams   = 1.89904433611776e-06)



NBA_maxp <- maxp(NBA)
NBA_maxp %<>% ordertrans(c(colnames(NBA_table)[-(1:4)],"possession"))


dotchart(NBA_maxp,col=c(rep("black",9),rep("red",10),rep("blue",1)),pch=16)

Statistical analysis

samep.test(NBA2,c("C_possession","W_possession"))

We may thus assume that Cleveland having possession is the same strength as the Warriors having possession, and from now on we work with NBA rather than NBA2.

(C_allstrengths <- sort(NBA_maxp[01:9],decreasing=TRUE))
(W_allstrengths <- sort(NBA_maxp[10:19],decreasing=TRUE))
samep.test(NBA, names(C_allstrengths))
samep.test(NBA, names(W_allstrengths))

Now, what is the probability of C scoring both with and without possession? We may condition on the estimated player and possession strengths, and assume that only the 5 strongest players on each team play.

C_strongestteam <- sum(C_allstrengths[1:5])
W_strongestteam <- sum(W_allstrengths[1:5])
possession <- 0.2155786 # taken from samep.test() above

# probability of C scoring without possession:
(C_strongestteam           )/(C_strongestteam + W_strongestteam + possession)


# probability of C scoring with possession:
(C_strongestteam+possession)/(C_strongestteam + W_strongestteam + possession)

Package dataset {-}

Following lines create NBA.rda, residing in the data/ directory of the package.

NBA_maxp <- NBA_maxp_precomputed
save(NBA_table,NBA,NBA_maxp,file="NBA.rda")

References {-}

RobinHankin/hyper2 documentation built on April 13, 2025, 9:33 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

RobinHankin/hyper2
The Hyperdirichlet Distribution, Mark 2

In RobinHankin/hyper2: The Hyperdirichlet Distribution, Mark 2

Statistical analysis

Package dataset {-}

References {-}

R Package Documentation

Browse R Packages

We want your feedback!

RobinHankin/hyper2 The Hyperdirichlet Distribution, Mark 2

In RobinHankin/hyper2: The Hyperdirichlet Distribution, Mark 2

Statistical analysis

Package dataset {-}

References {-}

R Package Documentation

Browse R Packages

We want your feedback!

RobinHankin/hyper2
The Hyperdirichlet Distribution, Mark 2