set.seed(0) knitr::opts_chunk$set(echo = TRUE) library("hyper2") library("magrittr") library("PlackettLuce") library("partitions") options("digits" = 5) showsome <- function(D){ jj <- round(D[c(1:4,nrow(D)),],4) jj[4,] <- "..." rownames(jj)[4] <- "..." noquote(jj) }
knitr::include_graphics(system.file("help/figures/hyper2.png", package = "hyper2"))
To cite the hyper2
package in publications, please use @hankin2017_rmd.
This short document creates the pudding
suite of R objects and
presents some preliminary analysis. It is not particularly
well-structured or terse and is essentially a sequence of notes. It
follows Davidson 1970 and the PlackettLuce
package.
Here is the pudding result:
pudding_table <- pudding # standard hyper2 terminology rownames(pudding_table) <- apply(allbinom(6,2),2,paste,collapse="v") pudding_table
Here is a method to convert the pudding matrix to a hyper2
likelihood function:
numbers <- as.character(1:6) puddings <- paste("pudding",numbers,sep="_") tyers <- paste("tie",numbers,sep="_") `convert` <- function(M){ H <- hyper2() I <- hyper2() J <- hyper2() for(i in seq_len(nrow(M))){ r_ij <- M[i,3] w_ij <- M[i,4] w_ji <- M[i,5] t_ij <- M[i,6] H[c(puddings[c(M[i,1] )] )] %<>% inc(w_ij) H[c(puddings[c( M[i,2])] )] %<>% inc(w_ji) H[c( "tie")] %<>% inc(t_ij) H[c(puddings[c(M[i,1],M[i,2])],"tie")] %<>% dec(r_ij) I[c(puddings[c(M[i,1] )] )] %<>% inc(w_ij) I[c(puddings[c( M[i,2])] )] %<>% inc(w_ji) I[c(tyers [c(M[i,1],M[i,2])] )] %<>% inc(t_ij) I[c(puddings[c(M[i,1],M[i,2])],tyers[c(M[i,1],M[i,2])])] %<>% dec(r_ij) J[c(puddings[c(M[i,1] )],"first")] %<>% inc(w_ij) J[c(puddings[c( M[i,2])] )] %<>% inc(w_ji) J[c(tyers [c(M[i,1],M[i,2])] )] %<>% inc(t_ij) J[c(puddings[c(M[i,1],M[i,2])],tyers[c(M[i,1],M[i,2])],"first")] %<>% dec(r_ij) } return(list(H,I,J)) }
Function convert()
gives a list of three hyper2
objects. The
first assumes that each pudding has a strength $p_i$, $1\leq i\leq 6$
and
[ \operatorname{Prob}(i\succ j) = \frac{\pi_i}{\pi_i+\pi_j+D}\ \operatorname{Prob}(j\succ i) = \frac{\pi_j}{\pi_i+\pi_j+D}\ \operatorname{Prob}(i\sim j) = \frac{D}{\pi_i+\pi_j+D} ]
where $p_i,D\geq 0$ and $\sum p_i+D=1$ (here $i\succ j$ means $i$ is preferred to $j$ and $i\sim j$ means no preference). The second element is a generalization of the first, allowing the tying propensities to differ:
[ \operatorname{Prob}(i\succ j) = \frac{\pi_i}{\pi_i+\pi_j+D_i+D_j}\ \operatorname{Prob}(j\succ i) = \frac{\pi_j}{\pi_i+\pi_j+D_i+D_j}\ \operatorname{Prob}(i\sim j) = \frac{D}{\pi_i+\pi_j+D_i+D_j} ]
where $\sum p_i+D_i=1$. The third element is a generalization of the second, giving the first pudding a "first-mover" advantage analogous to the home-team advantage in football, or playing white in chess:
[ \operatorname{Prob}(i\succ j) = \frac{\pi_i+F}{\pi_i+\pi_j+D_i+D_j+F}\ \operatorname{Prob}(j\succ i) = \frac{\pi_j}{\pi_i+\pi_j+D_i+D_j+F}\ \operatorname{Prob}(i\sim j) = \frac{D}{\pi_i+\pi_j+D_i+D_j+F} ]
Above, the first-mover advantage is given to pudding $i$. We can use these now:
lf <- convert(pudding_table) pudding <- lf[[1]] # standard hyper2 terminology pudding2 <- lf[[2]] pudding3 <- lf[[3]] pudding_maxp <- maxp(pudding) pudding2_maxp <- maxp(pudding2) pudding3_maxp <- maxp(pudding3)
pudding_maxp pudding2_maxp pudding3_maxp
We will analyse pudding
first:
summary(pudding) pie(pudding_maxp)
First we use pudding
to test a null of equal pudding strengths:
samep.test(pudding,puddings)
so according to samep.test()
the puddings are all the same strength.
We note that the propensity to tie cannot possibly be zero but we can
give a profile likelihood curve:
ptie <- seq(from=0.08,len=20,to=0.15) l <- profsupp(pudding,"tie",ptie)
plot(ptie,l,type='b',lwd=2) abline(h=c(0,-2)) #abline(v=c(0.095,0.127)) grid()
and we see that a credible interval for the proclivity to tie would be $(0.095,0.127)$.
Object pudding2
includes a pudding-specific propensity to tie. We
can test different nulls:
samep.test(pudding2,puddings) samep.test(pudding2,tyers)
Object pudding3
includes a first-mover advantage:
summary(pudding3)
pudding3_maxp
We can see above that the first-mover advantage is small (and indeed a "second-mover advantage") is also very small.
Davidson (1970) considers the following probability model:
[ \operatorname{Prob}(i\succ j) = \frac{\pi_i}{\pi_i+\pi_j+\nu\sqrt{\pi_i\pi_j}}\ \operatorname{Prob}(j\succ i) = \frac{\pi_j}{\pi_i+\pi_j+\nu\sqrt{\pi_i\pi_j}}\ \operatorname{Prob}(i\sim j) = \frac{\nu\sqrt{\pi_i\pi_j}}{\pi_i+\pi_j+\nu\sqrt{\pi_i\pi_j}} ]
and gives the following estimate for the pudding data:
davidson <- c(0.1388005,0.1729985, 0.1617420, 0.1653930, 0.1586805, 0.2023855, 0.7468147) names(davidson) <- c(puddings,"tie") davidson par(pty='s') plot(davidson[1:6],pudding_maxp[1:6]/(1-pudding_maxp[7]),asp=1,xlim=c(0.13,0.2),ylim=c(0.13,0.2),pch=16) abline(0,1)
Although the probability model is different, we may compare the probabilities of $i\succ j$, $j\succ i$, and $i\sim j$ for $i<j$ with the two models. This gives us a total of $3{6\choose 2}=45$ (nonindependent) probabilities and we may plot a scattergraph:
ph <- pudding_maxp # ph == 'pudding hankin'; saves typing pd <- davidson # pd == 'pudding davidson' print(ph) print(pd) `hankin` <- function(ph){ ## ph a vector of length n+1; first n entries are strengths, last one ## propensity to tie n <- length(ph)-1 m <- allbinom(n,2) out <- matrix(NA,nrow=ncol(m),ncol=3) colnames(out) <- c("i","j","tie") rownames(out) <- apply(m,2,paste,collapse="v") th <- ph[n+1] for(o in seq_len(ncol(m))){ i <- m[1,o] j <- m[2,o] dh <- ph[i] + ph[j] + th # dh == 'denominator hankin' out[o,1] <- ph[i]/dh out[o,2] <- ph[j]/dh out[o,3] <- th/dh } return(out) } `davidson` <- function(pd){ ## pd a vector of length n+1; first n entries are strengths, last one ## nu n <- length(pd)-1 # how many puddings nu <- pd[n+1] m <- allbinom(n,2) out <- matrix(NA,nrow=ncol(m),ncol=3) colnames(out) <- c("i","j","tie") rownames(out) <- apply(m,2,paste,collapse="v") nu <- pd[n+1] for(o in seq_len(ncol(m))){ i <- m[1,o] j <- m[2,o] dd <- pd[i] + pd[j] + nu*sqrt(pd[i]*pd[j]) # dd == 'denominator davidson' out[o,1] <- pd[i]/dd out[o,2] <- pd[j]/dd out[o,3] <- nu*sqrt(pd[i]*pd[j])/dd } return(out) } plotter <- function(ph,pd,...){ h <- hankin(ph) d <- davidson(pd) nh <- nrow(h) plot(c(h),c(d),pch=16,asp=1,col=c(rep("red",nh),rep("blue",nh),rep("green",nh)),...) }
print(ph) print(hankin(ph)) print(pd) print(davidson(pd)) jj <- c(2,5)/10 par(pty='s') plotter(ph,pd,xlim=jj,ylim=jj) grid()
Now calculate likelihoods:
n <- pudding_table[,-(1:3)] h <- hankin(ph) d <- davidson(pd) showsome(n) showsome(h) showsome(d) sum(n*log(h)) sum(n*log(d)) (suppdiff <- sum(n*log(d))-sum(n*log(h)))
Above we see that Davidson's method has almost 3 units of support more than RBT. However, we note that Davidson's model has seven degrees of freedom while RBT has only six (because of the the unit sum constraint). Here Wilks's theorem is not applicable because the two models are not nested but even so the likelihood ratio is a meaningful statistic.
pchisq(suppdiff,df=1,lower.tail=FALSE)
Above we see that, according to this model, there is no evidence to suggest that the puddings' strengths, or their tying ability, differ. But we can adduce a dataset for which the tying ability does differ. Consider this:
p <- pudding_table p[1 ,4:6] <- c(10,10,37) # 1v2 p[2 ,4:6] <- c(10,10,27) # 1v3 p[4 ,4:6] <- c(10,10,24) # 1v4 p[7 ,4:6] <- c(10,10,20) # 1v5 p[11,4:6] <- c(10,10,31) # 1v6 p
(above, we doctor the dataset so that any comparison involving pudding 1 has a high probability of tying). Then:
Hp <- convert(p)[[2]] mHp <- maxp(Hp) mHpt <- mHp[7:12] #just the tyers
visualise:
mHpt
plot(mHpt)
(above we see that pudding 1 does indeed have a higher tie probability). Test the null of equal tying probabilities:
(tHp <- samep.test(Hp,tyers))
Above analysis shows that we can reject the null that all puddings have the same tying propensity.
Although the likelihood for Davidson's model was higher than mine for the original dataset, we can create a synthetic dataset which is a random observation drawn from a reified Bradley-Terry probability model. I will then test the hypothesis that the data is in fact drawn from an RBT distribution by calculating a likelihood ratio.
set.seed(0) f <- function(i){rmultinom(1,50,prob=h[i,])} # f(i) = sample from row i of h synth <- pudding_table synth[,-(1:3)] <- t(sapply(1:15,f)) # random sample synth[,3] <- 50 rownames(synth) <- rownames(p) colnames(synth) <- colnames(p) showsome(synth)
Above, synth
is a dataset drawn from an RBT model with maximum
likelihood parameters obtained from the original dataset. We now use
both methods. First reified Bradley-Terry:
(H_synth <- convert(synth)[[1]]) ph_synth <- maxp(H_synth)
ph_synth
The above strengths are different from (but not too different from)
the value obtained from maxp(H)
above. Now analyse the same dataset
but using the method given in the PlackettLuce
vignette:
PlackettLuce_coef <- function(pudding_table){ i_wins <- data.frame(Winner = pudding_table$i, Loser = pudding_table$j) j_wins <- data.frame(Winner = pudding_table$j, Loser = pudding_table$i) ties <- data.frame(Winner = asplit(pudding_table[c("i", "j")], 1), Loser = rep(NA, 15)) R <- as.rankings(rbind(i_wins, j_wins, ties), input = "orderings") w <- unlist(pudding_table[c("w_ij", "w_ji", "t_ij")]) mod <- PlackettLuce(R, weights = w, npseudo = 0, maxit = 70) return(coef(mod,log=FALSE)) }
As a consistency check, try verifying the original result:
PlackettLuce_coef(pudding_table)
Above, we see values that match those in the PlackettLuce
vignette.
Now apply the same method to the synthetic dataset synth
:
(pd_synth <- PlackettLuce_coef(synth))
Now we need to calculate the probabilities:
showsome(h_synth <- hankin(ph_synth)) showsome(d_synth <- davidson(pd_synth))
And calculate a likelihood ratio for the two hypotheses, RBT and Davidson:
showsome(n <- synth[,-(1:3),]) (lh_synth <- sum(n*log(h_synth))) # Hankin; RBT (ld_synth <- sum(n*log(d_synth))) # Davidson lh_synth - ld_synth
So reified Bradley-Terry is better than Davidson, but (in this case at least) not by much. The difference is small because the puddings have very similar strengths. We can repeat the analysis but with puddings of different strengths:
dpudstrengths <- c(zipf(6)*0.9,tie=0.1) # different pudding strengths names(dpudstrengths) <- c(puddings,"tie") dpudstrengths
Above we see that pudding_1
is six times stronger than pudding_6
in accordance with Zipf's law. Same analysis as before:
showsome(h2 <- hankin(dpudstrengths)) f <- function(i){rmultinom(1,50,prob=h2[i,])} synth2 <- pudding_table synth2[,-(1:3)] <- t(sapply(1:15,f)) # random sample synth2[,3] <- 50 rownames(synth2) <- rownames(p) colnames(synth2) <- colnames(p) showsome(synth2)
OK so synth2
is a different synthetic dataset but this time with
pudding strengths following Zipf. As before, create a hyper2
object and find the evaluate:
H_synth2 <- convert(synth2)[[1]] # a hyper2 likelihood function ph_synth2 <- maxp(H_synth2)
ph_synth2
As a consistency check compare estimate with true value:
ph_synth2-dpudstrengths # difference is small
Now Davidson estimate using synth2
:
(pd_synth2 <- PlackettLuce_coef(synth2))
h_synth2 <- hankin(ph_synth2) d_synth2 <- davidson(pd_synth2) showsome(h_synth2) # same as before
And calculate a support function:
n <- synth2[,-(1:3),] # as before, extract just the counts (lh_synth2 <- sum(n*log(h_synth2))) (ld_synth2 <- sum(n*log(d_synth2))) lh_synth2 - ld_synth2
So this time the difference in support is larger, due to the puddings having different strengths.
Following lines create pudding.rda
, residing in the data/
directory of the package.
save(pudding_table,pudding,pudding_maxp,curling2_maxp,file="pudding.rda")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.