set.seed(0) knitr::opts_chunk$set(echo = TRUE) library("hyper2") library("magrittr") options("digits" = 5)
knitr::include_graphics(system.file("help/figures/hyper2.png", package = "hyper2"))
(almost all the time is spent on chunk podlikevec7
,
which takes about one minute per evaluation)
To cite the hyper2
package in publications, please use
@hankin2017_rmd. This short document defines the podium()
function
which tests the hypothesis that a Formula 1 driver's being on the
podium at one venue affects his performance on the next. Data from
the 2007 season are used. It uses hyper3
objects from the hyper2
package. A detailed use-case for the top six drivers (Alonso,
Hamilton, Heidfeld, Kubica, Massa, and Raikkonen^[Umlauts pose special
problems for Rmd files]), and only the first five venues (Austria,
Malaysia, Bahrain, Spain, and Monaco) is given.
TLDR: Statistically significant evidence for the podium effect was found, with about 2.7 units of support for the alternative.
Function podium()
, defined below, takes a race table and a value of
$\lambda$ and returns a hyper3
support function.
podium <- function (lambda,x,print=FALSE){ noscore <- c("Ret", "WD", "DNS", "DSQ", "DNP", "NC", "DNQ", "EX", "Sick") venues <- colnames(x) jj <- apply(x, 2, function(y) { if (any(y %in% noscore)) { y[y %in% noscore] <- 0 } return(y) }) fmat <- matrix(as.numeric(jj), byrow = TRUE, ncol = nrow(x)) colnames(fmat) <- rownames(x) rownames(fmat) <- venues o <- fmat[1, , drop = TRUE] o[o > 0] <- rank(o[o > 0]) out <- as.hyper3(suppfun(o)) for (i in seq(from=2,to=nrow(fmat))){ if(print){cat(paste(i,"/",nrow(fmat),"\n"))} yesterday <- fmat[i-1, , drop = TRUE] yesterday[yesterday > 0] <- rank(yesterday[yesterday > 0]) # 'incomplete' functionality of ordertable2supp() podium_yesterday <- names(which((yesterday <= 3) & (yesterday>0))) d <- fmat[i, , drop = TRUE] # today d[d > 0] <- rank(d[d > 0]) # This is the 'incomplete' functionality of ordertable2supp() ## Following lines lifted from ordervec2supp() nd <- names(d) while (any(d > 0)) { eligible <- which(d >= 0) winner <- nd[d == 1] # that is, the winner of those still racing if(winner %in% podium_yesterday){ jj <- lambda } else { jj <- 1 } names(jj) <- winner out[jj] %<>% inc # numerator ## Now denominator jj <- rep(1,length(eligible)) jj[nd[eligible] %in% podium_yesterday] <- lambda names(jj) <- names(eligible) out[jj] %<>% dec # denominator d[d == 1] <- -1 d[d > 0] %<>% dec } # ordervec2supp() lookalike closes } # i loop closes return(out) }
The first race has standard Plackett-Luce likelihood:
$$a\succ b\succ c\succ d\succ e\succ f$$
has a PL likelihood of
$$ \frac{a}{a+b+c+d+e+f}\cdot \frac{b}{ b+c+d+e+f}\cdot \frac{c}{ c+d+e+f}\cdot \frac{d}{ d+e+f}\cdot \frac{e}{ e+f} $$
However, if the result were
$$\overline{a}\succ b\succ\overline{c}\succ\overline{d}\succ e\succ f$$
(where an overbar signifies that the driver was on the podium after the previous race), the likelihood function would be
$$ \frac{\lambda a}{\lambda a+b+\lambda c+\lambda d+e+f}\cdot \frac{ b}{ b+\lambda c+\lambda d+e+f}\cdot \frac{\lambda c}{ \lambda c+\lambda d+e+f}\cdot \frac{\lambda d}{ \lambda d+e+f}\cdot \frac{ e}{ e+f}\cdot $$
and the null of no podium effect would be $H_0\colon\lambda=0$. Next,
we will take dataset formula1_2007
and define a subset of it which
we will call b
:
a <- read.table("formula1_2007.txt",header=TRUE) a <- a[,seq_len(ncol(a)-1)] b <- a[1:6,1:5] b[,1] <- 1:6 b[,2] <- c(4,5,6,1,2,3) b[,3] <- c(5,6,4,2,3,1) b[,5] <- c(1,2,4,3,8,6) # tests the 'incomplete' functionality of podium() b
Taking Monaco (MON
) as an example, the preceding race was in spain
(ESP
) at which Massa, Hamilton, and Alonso finished on the podium.
The order statistic would be
$$ \text{Raikkonen}\succ \overline{\text{Hamilton}}\succ \overline{\text{Massa}}\succ \overline{\text{Alonso}}\succ \text{Kubica}\succ \text{Heidfeld} $$
where again an overbar signifies that the driver was on the podium in the previous race. The likelihood function for Monaco would thus be
$$ \begin{split} \operatorname{\mathcal{L}}\left(\lambda; p_\text{Raikkonen},\ldots,p_\text{Heidfeld} \right) & = \frac{p_\text{Raikkonen}}{p_\text{Raikkonen}+\lambda p_\text{Hamilton}+\lambda p_\text{Massa}+\lambda p_\text{Alonso}+p_\text{Kubica}+p_\text{Heidfeld}}\ &{\qquad} \cdot\frac{\lambda p_\text{Hamilton}}{\lambda p_\text{Hamilton}+\lambda p_\text{Massa}+\lambda p_\text{Alonso}+p_\text{Kubica}+p_\text{Heidfeld}}\ &{\qquad}\cdot\frac{\lambda p_\text{Massa}}{\lambda p_\text{Massa}+\lambda p_\text{Alonso}+p_\text{Kubica}+p_\text{Heidfeld}}\ &{\qquad}\cdot\frac{\lambda p_\text{Alonso}}{\lambda p_\text{Alonso}+p_\text{Kubika}+p_\text{Heidfeld}}\cdot \frac{p_\text{Kubica}}{p_\text{Kubica}+p_\text{Heidfeld}} \end{split} $$
Ha <- podium(1.4, b) Hb <- podium(2.1, b) ma <- maxp(Ha, give=0, n=1) mb <- maxp(Hb, give=0, n=1)
Ha
summary(Hb)
ma
mb
Above we see that passing $\lambda$ values of 1.4 and 2.1 result in different likelihoods. The difference of about 0.4 units of support is not significant, failing to reach the two-units-of-support criterion of Edwards.
fun <- function(lambda){maxp(podium(lambda,b),give=TRUE)$value} lam <- seq(from=0.5,to=2,len=6) like <- lapply(lam,fun)
like <- unlist(like) plot(lam,like-max(like),type='b') like
Now a real dataset, Formula 1 2007 season:
a <- read.table("formula1_2007.txt",header=TRUE) a <- a[1:10,seq_len(ncol(a)-1)] # rows Raikkonen (1) through Coulthard (10) options(width=90) a
podlike <- function(lambda){maxp(podium(lambda=lambda,a),n=1,give=TRUE)$value}
podlike(1.8) podlike(1.9)
date() lam <- seq(from=0.2,to=1.2,len=15) like <- lapply(lam,podlike) date()
like <- unlist(like) plot(lam,like-max(like),type="b") abline(h=c(0,-2)) abline(v=1)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.