knitr::opts_chunk$set(echo = TRUE) library("hyper2") library("pracma") library("magrittr")
This terse document reproduces the results presented in the associated manuscript.
First we use Stirling's approximation $\log(n!)\sim
(n+1/2)\log(n)-n+\log(2\pi)/2$ to simplify equation 4 of the manuscript
Following is a sage
-compatible input [the $\sqrt{2\pi}$ bit
cancels]:
var('B n r') pretty_print_default(True) X=log(B-1) + ( + ((B+n-r-2)*log(B+n-r-2)-(B+n-r-2)+log(B+n-r-2)/2) - ((B+n -1)*log(B+n -1)-(B+n -1)+log(B+n -1)/2) ) X.taylor(n,Infinity,2)
sage
returns this:
[ -(r+1)\log n -\frac{(2B-3)r-r^2+2B-2}{2n} -\frac{3(2B-3)r^2-6B^2-6(B^2-3B+2)r+12B-5}{12n^2} +\log(B-1)+\mathcal{O}(n^{-3}) ]
Note that if we use instead $n~\sim n^ne^{-n}\sqrt{2\pi n}\left(1+\frac{1}{12n}\right)$, then only terms of $\mathcal{O}(n^{-2})$ and above are changed; the first-order terms and indeed the zeroth order terms are unaffected. Anyway, after simplifying and taking an asymptotic expansion to order $n^{-1}$, we find, to first order,
[ \mathcal{S}=\log\mathcal{L}_{r,n}(a)=\log(B-1)-B\frac{r+1}{n} + K + \mathcal{O}\left(n^{-2}\right) ]
where $K$ is a constant.
Below I give R idiom for the ikelihood expression given by equation 4 in the manuscript. Some inline comments are included.
f_single <- function(a,r,n,log=FALSE){ # not vectorised B <- 1/(1-a) if(log){ out <- log(B-1) - lgamma(B+n) + lgamma(B+n-r-1) } else { # not-log out <- (B-1)/prod(B+(n-r-1):(n-1)) } return(out) } f_vec_a <- function(a,r,n, log=FALSE){ # vectorised in 'a' but not in 'r' sapply(a,function(a){f_single(a,r=r,n=n,log=log)}) } f_vec_arn <- function(a,r,n,log=FALSE){ ## vectorized in 'a'; treats 'r' and 'n' as ## vectors of independent observations M <- cbind(r,n) if(log){ out <- 0 for(i in seq_len(nrow(M))){out <- out + f_vec_a(a,r=M[i,1],n=M[i,2],log=TRUE)} } else { out <- 1 for(i in seq_len(nrow(M))){out <- out * f_vec_a(a,r=M[i,1],n=M[i,2],log=FALSE)} } return(out) }
fapprox <- function(a,r,n,log=FALSE){ if(log){ return(log(B-1)-B*(r+1)/n) } else { return(B-1 - (r+1)*exp(B)/n) } }
Above, fapprox()
gives the asymptotic expression (5) in the
manuscript.
fdash <- function(a,r,n,log=TRUE){ B <- 1/(1-a) out <- (1/(B-1) - psigamma(B+n) + psigamma(B+n-r-1))*B^2 if(!log){out <- out * f_vec_arn(a,r,n,log=FALSE)} return(out) }
Notation is $\mathcal{L}$ for likelihood and
$\mathcal{S}=\log\mathcal{L}$ for log-likelihood (support). Above we
see fdash()
implements either $\frac{\partial\mathcal{S}}{\partial
a}$ or $\frac{\partial\mathcal{L}}{\partial a}$ [depending on the
value of argument log
. We see that
$$ \frac{\partial\mathcal{S}}{\partial a}= B^2\left(\frac{1}{B-1}-\psi(B+n)+\psi(B+n-r-1)\right)$$
and $\frac{\partial\mathcal{L}}{\partial a}$ is of course just
$\frac{\mathcal{L}\partial\mathcal{S}}{\partial a}$. Now, numerical
verification of fdash()
:
a <- 0.34 d <- 1e-3 r <- 4 n <- 10 c( numerical = (f_single(a+d/2,r,n,log=TRUE)-f_single(a-d/2,r,n,log=TRUE))/d, analytical = fdash(a,r,n) ) c( numerical = (f_single(a+d/2,r,n,log=FALSE)-f_single(a-d/2,r,n,log=FALSE))/d, analytical = fdash(a,r,n,log=FALSE) )
Above we see very close agreemen between numerical and analytical
results, for both log=TRUE
and log=FALSE
.
Use numerical optimization [optim()
] to find the maximum likelihood
point [R function MLE()
] and, as a check, use calculus uniroot()
to find the point of zero derivative; R function MLEa()
] to find the
maximum likelihood point:
MLE <- function(r,n,give=FALSE){ d <- 1e-6 out <- optimize(f_vec_arn,c(d,1-d),r=r,n=n,log=TRUE,maximum=TRUE) if(!give){out <- out$maximum} return(out) } MLEa <- function(r,n,give=FALSE,small=1e-4){ out <- uniroot(f=function(a){fdash(a,r,n)},interval=c(small,1-small)) if(!give){out <- out$root} return(out) } showdiff <- function(x,y){c(way1=x,way2=y,diff=x-y,logratio=log(x/y))} rbind( showdiff(MLE(1,9),MLEa(1,9)), showdiff(MLE(3,9),MLEa(3,9)), showdiff(MLE(3,7),MLEa(3,7)) )
Above we see very close agreement between the two methods. We can get a feel for the accuracy of the asymptotic result numerically using similar methods:
rbind( showdiff(MLE(10, 100), 100 / 111), showdiff(MLE(10, 500), 500 / 511), showdiff(MLE(10,1000),1000 /1011), showdiff(MLE(10,5000),5000 /5011) )
rbind( showdiff(MLEa(10, 100), 100 / 111), showdiff(MLEa(10, 500), 500 / 511), showdiff(MLEa(10,1000),1000 /1011), showdiff(MLEa(10,5000),5000 /5011) )
Now another verification, this time comparing the R implementation against two different direct numerical implementations of equation 4 of the manuscript:
a <- 0.23423434 b <- 1-a B <- 1/b n <- 9 r <- 3 c( way1 = b^3*a/((a+9*b) * (a+8*b) * (a+7*b) * (a+6*b)), way2 = (B-1)/prod(B+(5:8)), way3 = f_vec_arn(a,r,n,log=FALSE) )
Above we see close agreement.
out <- list() for(n in 1:10){ out[[n]] <- rep(NA,n+1) for(r in 0:n){ if(r==0){ jj <- 1 } else if(r==n){n jj <- 0 } else { jj <- MLE(r,n) } out[[n]][r+1] <- jj } } out
plotterp <- function(...){ plot(NA,xlim=c(1,10),ylim=c(0,1),type="n",xlab="n",ylab=expression(hat(a))) for(n in 1:10){ for(r in 0:n){ points(n,out[[n]][r+1],pch=16) } for(r in 0:n){ text(n+0.2,out[[n]][r+1],r,cex=0.5,col='gray') } } } plotterp() pdf(file="dotprobs.pdf") plotterp() dev.off()
Produce the rainbow-coloured likelihood plot
plotter <- function(...){ a <- seq(from=0,to=1,by=0.01) n <- 8 jj <- f_vec_arn(a,3,n,log=FALSE) plot(a,jj/max(jj,na.rm=TRUE),type='n',xlab=expression(a),ylab="likelihood") grid() rain <- rainbow(n+1) for(r in seq(from=0,to=n)){ y <- f_vec_a(a,r,n,log=FALSE) if(r==0){y[length(y)] <- 1} y <- y/max(y,na.rm=TRUE) if(r>0){y[length(y)] <- 0} points(a,y,type='l',lwd=4,col=rain[r+1]) } abline(v=MLE(r=4,n=8)) text(0.07,0.95,"r=8",col=rain[9]) text(0.20,0.95,"r=7",col=rain[8]) text(0.26,0.91,"r=6",col=rain[7]) text(0.29,0.83,"r=5",col=rain[6]) text(0.33,0.76,"r=4",col=rain[5]) text(0.35,0.64,"r=3",col=rain[4]) text(0.39,0.53,"r=2",col=rain[3]) text(0.45,0.41,"r=1",col=rain[2]) text(0.63,0.22,"r=0",col=rain[1]) } plotter() pdf(file="ninelikes.pdf") plotter() dev.off()
n <- 7 a <- seq(from=0,to=1,by=0.01) fhyper3 <- function(r,n){ out <- hyper3() out['a'] <- 1 out['b'] <- r for(i in (n-r):n){ out[c(a=1,b=i)] %<>% dec } return(out) } plot(NA,xlim=c(0,1),ylim=c(0,1),type='n') M <- cbind(a=a,b=1-a) for(r in 0:7){ y <- loglik(M,fhyper3(r,n),log=FALSE) y <- y/max(y, na.rm=TRUE) points(M[,1],y,type="l",lwd=8) }
readstring <- function(year){read.table(paste("formula1_",year,".txt",sep=""))} getfoc <- function(year,comp="Perez"){ # get focal competitor M <- as.matrix(readstring(year)) out <- suppressWarnings(as.numeric(M[comp,seq_len(ncol(M)-1)])) out[is.na(out)] <- nrow(M) return(out) } getnum <- function(year){nrow(readstring(year))} # number of competitors perez <- lapply(2011:2023,getfoc) print(perez) y <- unlist(lapply(seq_along(perez),function(i){mean(perez[[i]])})) x <- 2011:2023 plot(x,y)
checo_like <- function(a){ out <- a*0 for(year in 2011:2023){ out <- out + f_vec_arn(a,getfoc(year)-1,getnum(year)-1,log=TRUE) } return(out) } a <- seq(from=0.45,to=0.62,by=0.01) cL <- checo_like(a) cL <- cL - max(cL) plot(a,cL,type='b') abline(h=c(0,-2))
f1_logistic <- function(vec){ alpha <- vec[1] beta <- vec[2] out <- 0 for(year in 2011:2023){ LO <- alpha + beta*(year-2011) strength <- exp(LO)/(1+exp(LO)) out <- out + f_vec_arn(strength,getfoc(year)-1,getnum(year)-1,log=TRUE) } return(out) }
a <- seq(from=-0.8,to=0.0,by=0.01) b <- seq(from=0,to=0.2,by=0.01) jj <- as.matrix(expand.grid(a,b)) L <- apply(jj,1,f1_logistic) L <- L-max(L) L <- matrix(L,length(a),length(b))
L <- pmax(L,-40) showchec <- function(...){ contour(a,b,L,xlab=expression(alpha),ylab=expression(beta),levels=-c(2,5*(1:5))) points(-0.27,0.0813,pch=16,cex=2) } showchec() pdf(file="showchecolike.pdf") showchec(a,b,L) dev.off()
f1_logistic(c(0,0)) f1_logistic(c(0,0.0001)) f1_logistic(c(0.001,0))
o <- optim(c(-0.2,0.1),fn=f1_logistic, control=list(fnscale = -1),hessian=TRUE)
o o$par o$hessian eigen(o$hessian) best <- o$par f1_logistic(best) jj <- best jj[1] <- jj[1]*1.01 f1_logistic(jj) - f1_logistic(best) jj <- best jj[1] <- jj[1]*0.99 f1_logistic(jj) - f1_logistic(best) jj <- best jj[2] <- jj[2]*1.00001 f1_logistic(jj) - f1_logistic(best) jj <- best jj[2] <- jj[2]*0.99 f1_logistic(jj) - f1_logistic(best)
o_free <- optim(c(-0.2,0.1),fn=f1_logistic, control=list(fnscale = -1),hessian=TRUE) o_constrained <- optim(c(-0.2,0.1),fn=function(v){ v[2] <- 0 return(f1_logistic(v))}, control=list(fnscale = -1),hessian=TRUE)
o_free o_constrained pchisq(2*(o_free$value - o_constrained$value),df=1,lower.tail=FALSE)
O <- read.table("olympic_athletics_mens_100m.txt",header=TRUE) head(O)
jjf <- function(x){f_vec_arn(x,O$rank,O$n,log=TRUE)} system.time(optolym <- optimize(jjf, c(0.45,0.5), maximum=TRUE))
optolym jjf(0.5) jjf(optolym$maximum) (LR <- 2*(jjf(optolym$maximum)-jjf(0.5))) pchisq(LR,df=1,lower.tail=FALSE)
a <- seq(from=0.3,to=0.65,len=45) L <- f_vec_arn(a,O$rank,O$n,log=TRUE) Lmax <- jjf(optolym$maximum) Lmax L <- L-Lmax
(a_lower <- uniroot(function(x){jjf(x)+2-Lmax},interval=c(0.3,0.5))$root) (a_upper <- uniroot(function(x){jjf(x)+2-Lmax},interval=c(0.5,0.6))$root)
plotolymp <- function(...){ plot(a,L,type='l',ylab="log-likelihood",lwd=2) abline(h=c(0,-2)) ahat <- optolym$maximum segments(x0=ahat,y0=-1.5,y1=0) text(ahat,-0.91,expression(hat(a)==0.474),pos=2) abline(v=0.5,lty=3) segments(x0=a_lower,y0=-1.5,y1=-3.5) segments(x0=a_upper,y0=-1.5,y1=-3.5) text(x=a_lower,y=-3,"0.380",pos=4) text(x=a_upper,y=-3,"0.563",pos=2) } plotolymp() pdf(file="plotolymp.pdf") plotolymp() dev.off()
rank <- c( 9, 7, 2, 3,2 ,8 ,5 ,4 ,9 ) class_size <- c(12, 17,23, 9,13,14,13,12,15) course <- c("rings and modules","group theory","calculus","linear algebra", "differential equations","topology","special relativity","fluid mechanics","Lie algebra") category=c("pure","pure","applied","applied","applied","pure","applied","applied","pure") data.frame(course,category,rank,class_size)
a <- seq(from=0.2,by=0.01,to=0.8) wp <- category=='pure' wa <- category=='applied'
Following lines show optimization of the marginal Bradley Terry strength for pure and applied mathematics:
Lpure <- f_vec_arn(a,rank[wp]-1,class_size[wp]-1,log=TRUE) Lpure <- Lpure - max(Lpure,na.rm=TRUE) plot(a,Lpure,ylab='pure strength') optimize(function(a){f_vec_arn(a,rank[wp]-1,class_size[wp]-1,log=TRUE)},c(0.1,0.9),maximum=TRUE) optimize(function(a){f_vec_arn(a,rank[wa]-1,class_size[wa]-1,log=TRUE)},c(0.1,0.9),maximum=TRUE)
[the evaluates above do not appear in the manuscript] Now we define a likelihood function for the two subjects jointly:
supp2_ed <- function(vec,place1,place2,runners1,runners2){ out <- 0 M1 <- cbind(place1,runners1) M2 <- cbind(place2,runners2) for(i in seq_len(nrow(M1))){ out <- out + f_vec_arn(vec[1],r=M1[i,1]-1,n=M1[i,2]-1,log=TRUE) } for(i in seq_len(nrow(M2))){ out <- out + f_vec_arn(vec[2],r=M2[i,1]-1,n=M2[i,2]-1,log=TRUE) } return(out) }
x <- seq(from=0.1,to=0.99,by=0.01) jj <- as.matrix(expand.grid(x,x)) L <- apply(jj,1,supp2_ed,rank[wp]-1,rank[wa]-1,class_size[wp]-1,class_size[wa]-1) L <- L - max(L,na.rm=TRUE) L <- matrix(L,length(x),length(x))
plotpureandapplied <- function(...){ par(pty='s') contour(x,x,L,asp=1,xlim=range(x),ylim=range(x),levels=-(0:5), xlab='pure strength',ylab='applied strength') abline(0,1) } plotpureandapplied() pdf(file="plotpureandapplied.pdf") plotpureandapplied() dev.off()
Following code shows joint maximization of likelihood:
jj1 <- optim( c(.4,.8), function(vec){ supp2_ed(vec,rank[wp]-1,rank[wa]-1,class_size[wp]-1,class_size[wa]-1)}, control=list(fnscale = -1) ) jj1
Following code shows constrained maximization of likelihood
jj2 <- optimize(function(v){supp2_ed(c(v,v),rank[wp]-1,rank[wa]-1,class_size[wp]-1,class_size[wa]-1)}, c(0.1,0.9),maximum=TRUE) jj2
Extra support is thus $-86.9449 + 89.3732=2.4283$ (to 4 d.p.) Now calculate the asymptotic $p$-value obtained from Wilks's theorem:
pchisq(2*(jj1$value-jj2$objective),lower.tail=FALSE,df=1)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.