RoeDeerMass-simulation.R
In OnAge: Test of Between-Group Differences in the Onset of Senescence

### R code from vignette source 'RoeDeerMass-simulation.Rnw'
### Encoding: UTF-8

###################################################
### code chunk number 1: lib (eval = FALSE)
###################################################
## install.packages("OnAge")


###################################################
### code chunk number 2: lib
###################################################
library(OnAge)


###################################################
### code chunk number 3: data
###################################################
data(RoeDeerMassData)
RoeDeerMassData$ID <- factor(RoeDeerMassData$ID)
RoeDeerMassData$cohort <- factor(RoeDeerMassData$cohort)

dataFCH <- RoeDeerMassData[RoeDeerMassData$sex%in%"F"&
                             RoeDeerMassData$population%in%"CH", ]
dataMCH <- RoeDeerMassData[RoeDeerMassData$sex%in%"M"&
                             RoeDeerMassData$population%in%"CH", ]
dataFTF <- RoeDeerMassData[RoeDeerMassData$sex%in%"F"&
                             RoeDeerMassData$population%in%"TF", ]
dataMTF <- RoeDeerMassData[RoeDeerMassData$sex%in%"M"&
                             RoeDeerMassData$population%in%"TF", ]


###################################################
### code chunk number 4: model
###################################################
## b1: function for piecewise regression (transforms x into 0 before bp)
b1 <- function(x, bp) ifelse(x < bp, 0, x - bp)

## Use this function to define the model in which the differential
## onset hypothesis is tested.
ll.real <- function(thr, dataIn){
  logLik(lme4::lmer(body.mass ~ b1(age, thr) + age.at.last.capture + 
                      last.year.of.capture + (1|ID) + (1|cohort),
                    data=dataIn, REML="FALSE"))
}

## Same model using simulated body mass
ll.sim <- function(thr, dataIn){
  logLik(lme4::lmer(body.mass.sim ~ b1(age, thr) + age.at.last.capture + 
                      last.year.of.capture + (1|ID) + (1|cohort),
                    data=dataIn, REML="FALSE"))
}


###################################################
### code chunk number 5: test
###################################################
search.range <- c(6, 12)
search.range.TF <- search.range.CH <- search.range


###################################################
### code chunk number 6: testTF
###################################################
res.tf <- onset.test(ll.real, dataFTF, dataMTF, search.range.TF,
  do.plot=TRUE)


###################################################
### code chunk number 7: testCH
###################################################
res.ch <- onset.test(ll.real, dataFCH, dataMCH, search.range.CH,
   do.plot=TRUE)


###################################################
### code chunk number 8: test-result
###################################################
cat(sprintf("p-value for differential age at onset is %g in 
  Trois Fontaines, %g in Chizé", res.tf$pv, res.ch$pv))


###################################################
### code chunk number 9: simulation
###################################################
sfunc <- function(dataIn, b1, thr, bw.offset, age.effect, 
                  age.last.effect, last.effect, id.s2, coh.s2, s2){
  
    id.dum <- sapply(levels(dataIn$ID), 
      FUN=function(ll) 1*(levels(dataIn$ID)[dataIn$ID] == ll))
    id.effect <- matrix(rnorm(ncol(id.dum), sd=sqrt(id.s2)), ncol=1)
  
    coh.dum <- sapply(levels(dataIn$cohort), 
      FUN=function(ll) 1*(levels(dataIn$cohort)[dataIn$cohort] == ll))
    coh.effect <- matrix(rnorm(ncol(coh.dum), sd=sqrt(coh.s2)), ncol=1)
  
    dataIn$body.mass.sim <- bw.offset + 
      (b1(dataIn$age, thr) * age.effect + 
         dataIn$age.at.last.capture * age.last.effect +
         dataIn$last.year.of.capture * last.effect + 
         id.dum %*% id.effect +
         coh.dum %*% coh.effect + 
         rnorm(nrow(dataIn), sd=sqrt(s2)))
  
    return(dataIn)
}

## Noise level in the linear model
s2 <- 10
## True onset for males and females under H0
thr.m <- thr.f <- 7
## Difference of onsets under H1
thr.delta <- 2
## Baseline body mass
bw.offset <- 10
## Fixed effects on bw
age.effect <- -1
age.last.effect <- 1
last.effect <- 1
## Variance of random effects on bw
id.s2 <- 1
coh.s2 <- 1


###################################################
### code chunk number 10: load-simu
###################################################
# data('pre-computed-simulation')
load(file = "pre-computed-simulation.RData")


###################################################
### code chunk number 11: run (eval = FALSE)
###################################################
## ## Number of simulations we want to run under H0 and H1.
## n.h0 <- 5000
## n.h1 <- 5000
## n.rep <- n.h0 + n.h1
## 
## pv.tf <- pv.ch <- llr.tf <- llr.ch <- 
##   lh1.tf <- lh1.ch <- lh0.tf <- lh0.ch <- rep(NA, n.rep)
## 
## cvg.tf <- cvg.ch <- rep(NA, n.rep)
## warn.tf <- warn.ch <- rep(FALSE, n.rep)
## 
## data.f.ch <- data.m.ch <- data.f.tf <- data.m.tf <- list()
## 
## ftf.grad <- ftf.feval <- ftf.joint.grad <- ftf.joint.feval <- rep(NA, n.rep) 
## mtf.grad <- mtf.feval <- mtf.joint.grad <- mtf.joint.feval <- rep(NA, n.rep) 
## fch.grad <- fch.feval <- fch.joint.grad <- fch.joint.feval <- rep(NA, n.rep) 
## mch.grad <- mch.feval <- mch.joint.grad <- mch.joint.feval <- rep(NA, n.rep) 
## 
## ## Range over which we optimize the onset. In this example, going up
## ## to 17 yo makes the simulation unstable: the loglikelihood under H1
## ## has a (suboptimal) local maximum in large values (for which we have
## ## few samples), leading to inaccurate (and sometimes negative)
## ## loglikelihood ratio statistics.
## search.range <- c(6, 12) # data not available before 6 years old
## search.range.TF <- search.range.CH <- search.range
## 
## ## Main loop for simulations
## for(rr in 1:n.rep){
##     print(rr)
##     ## Simulate data from the Chizé population
##     data.f.ch[[rr]] <- sfunc(dataFCH, b1, thr.f, bw.offset, age.effect, 
##         age.last.effect, last.effect, id.s2, coh.s2, s2)
##     data.m.ch[[rr]] <- sfunc(dataMCH, b1, thr.m, bw.offset, age.effect, 
##         age.last.effect, last.effect, id.s2, coh.s2, s2)
##     
##     ## Simulate data from the Trois Fontaines population
##     data.f.tf[[rr]] <- sfunc(dataFTF, b1, thr.f, bw.offset, age.effect, 
##         age.last.effect, last.effect, id.s2, coh.s2, s2)
##     data.m.tf[[rr]] <- sfunc(dataMTF, b1, thr.m, bw.offset, age.effect, 
##         age.last.effect, last.effect, id.s2, coh.s2, s2)
##     
##     ## Compute the likelihood ratio test for this Trois Fontaines simulation
##     test.TF <- tryCatch({res=onset.test(ll.sim, data.f.tf[[rr]], 
##           data.m.tf[[rr]], search.range.TF, CI.lvl=NA)
##         res$warn=FALSE
##         res},
##         warning=function(w) {
##             res <- onset.test(ll.sim, data.f.tf[[rr]], 
##               data.m.tf[[rr]], search.range.TF, CI.lvl=NA)
##             res$warn <- TRUE
##             return(res)
##         })
##     llr.tf[rr] <- test.TF$llr
##     lh1.tf[rr] <- test.TF$lh1
##     lh0.tf[rr] <- test.TF$lh0
##     pv.tf[rr] <- test.TF$pv
##     cvg.tf[rr] <- test.TF$cvg.ok
##     warn.tf[rr] <- test.TF$warn
##     ## Compute the likelihood ratio test for this Chizé simulation
##     test.CH <- tryCatch({res=onset.test(ll.sim, data.f.ch[[rr]], 
##           data.m.ch[[rr]], search.range.CH, CI.lvl=NA)
##         res$warn=FALSE
##         res},
##         warning=function(w) {
##             res <- onset.test(ll.sim, data.f.ch[[rr]], 
##               data.m.ch[[rr]], search.range.CH, CI.lvl=NA)
##             res$warn <- TRUE
##             return(res)
##         })
##     llr.ch[rr] <- test.CH$llr
##     lh1.ch[rr] <- test.CH$lh1
##     lh0.ch[rr] <- test.CH$lh0
##     pv.ch[rr] <- test.CH$pv
##     cvg.ch[rr] <- test.CH$cvg.ok
##     warn.ch[rr] <- test.CH$warn
##     
##     ## Optimality check: amplitude of the gradient (should be close to
##     ## 0) and number of function evaluation (if equal to the largest
##     ## allowed value, it is likely that the optimization did not
##     ## converge).
##     ftf.lm <- lmer(body.mass.sim ~ b1(age, test.TF$est.1) + 
##       age.at.last.capture + last.year.of.capture + 
##       (1|ID) + (1|CohortF), data=data.f.tf[[rr]], REML='FALSE')
##     
##     mtf.lm <- lmer(body.mass.sim ~ b1(age, test.TF$est.2) + 
##       age.at.last.capture + last.year.of.capture + 
##       (1|ID) + (1|CohortF), data=data.m.tf[[rr]], REML='FALSE')
##     
##     ftf.joint.lm <- lmer(body.mass.sim ~ b1(age, test.TF$est.joint) + 
##       age.at.last.capture + last.year.of.capture + 
##       (1|ID) + (1|CohortF), data=data.f.tf[[rr]], REML='FALSE')
##     
##     mtf.joint.lm <- lmer(body.mass.sim ~ b1(age, test.TF$est.joint) + 
##       age.at.last.capture + last.year.of.capture + 
##       (1|ID) + (1|CohortF), data=data.m.tf[[rr]], REML='FALSE')
##     
##     fch.lm <- lmer(body.mass.sim ~ b1(age, test.CH$est.1) + 
##       age.at.last.capture + last.year.of.capture + 
##       (1|ID) + (1|CohortF), data=data.f.ch[[rr]], REML='FALSE')
## 
##     mch.lm <- lmer(body.mass.sim ~ b1(age, test.CH$est.2) + 
##       age.at.last.capture + last.year.of.capture + 
##       (1|ID) + (1|CohortF), data=data.m.ch[[rr]], REML='FALSE')
##     
##     fch.joint.lm <- lmer(body.mass.sim ~ b1(age, test.CH$est.joint) + 
##       age.at.last.capture + last.year.of.capture + 
##       (1|ID) + (1|CohortF), data=data.f.ch[[rr]], REML='FALSE')
##     
##     mch.joint.lm <- lmer(body.mass.sim ~ b1(age, test.CH$est.joint) + 
##       age.at.last.capture + last.year.of.capture + 
##       (1|ID) + (1|CohortF), data=data.m.ch[[rr]], REML='FALSE')
##     
##     ftf.grad[rr] <- max(abs(ftf.lm@optinfo$derivs$gradient))
##     ftf.feval[rr] <- ftf.lm@optinfo$feval
##     mtf.grad[rr] <- max(abs(mtf.lm@optinfo$derivs$gradient))
##     mtf.feval[rr] <- mtf.lm@optinfo$feval
##     ftf.joint.grad[rr] <- max(abs(ftf.joint.lm@optinfo$derivs$gradient))
##     ftf.joint.feval[rr] <- ftf.joint.lm@optinfo$feval
##     mtf.joint.grad[rr] <- max(abs(mtf.joint.lm@optinfo$derivs$gradient))
##     mtf.joint.feval[rr] <- mtf.joint.lm@optinfo$feval
##     fch.grad[rr] <- max(abs(fch.lm@optinfo$derivs$gradient))
##     fch.feval[rr] <- fch.lm@optinfo$feval
##     mch.grad[rr] <- max(abs(mch.lm@optinfo$derivs$gradient))
##     mch.feval[rr] <- mch.lm@optinfo$feval
##     fch.joint.grad[rr] <- max(abs(fch.joint.lm@optinfo$derivs$gradient))
##     fch.joint.feval[rr] <- fch.joint.lm@optinfo$feval
##     mch.joint.grad[rr] <- max(abs(mch.joint.lm@optinfo$derivs$gradient))
##     mch.joint.feval[rr] <- mch.joint.lm@optinfo$feval
##     
##     if(rr == n.h0){
##       thr.f = thr.m + thr.delta
##     }
## }


###################################################
### code chunk number 12: cvg
###################################################
cat(sprintf('Negative log-likelihood obtained in proportion %g of the Trois
  Fontaine and %g of the Chizé simulations', mean(!cvg.tf), mean(!cvg.ch)))


###################################################
### code chunk number 13: optim
###################################################
summary(ftf.grad)
summary(mtf.grad)
summary(ftf.joint.grad)
summary(mtf.joint.grad)
summary(fch.grad)
summary(mch.grad)
summary(fch.joint.grad)
summary(mch.joint.grad)

summary(ftf.feval)
summary(mtf.feval)
summary(ftf.joint.feval)
summary(mtf.joint.feval)
summary(fch.feval)
summary(mch.feval)
summary(fch.joint.feval)
summary(mch.joint.feval)


###################################################
### code chunk number 14: load-failed-run (eval = FALSE)
###################################################
## failed.simulation <- which(!cvg.tf)[1]
## failed.f.tf <- data.f.tf[[failed.simulation]]
## failed.m.tf <- data.m.tf[[failed.simulation]]


###################################################
### code chunk number 15: profile-simu-plot
###################################################
res.failed.tf <- onset.test(ll.sim, failed.f.tf, failed.m.tf, 
                            search.range.TF, do.plot=TRUE)


###################################################
### code chunk number 16: QQPlot
###################################################
oldpar <- par()
par(mfrow=c(1, 2), pty="s", mar=c(2, 6, 1, 1)+0.1)
qqplot(rchisq(1e4, 1), llr.ch[1:n.h0], main='Chizé', pch=20,
  xlab=expression(chi[2](1) ~ 'quantiles'), ylab=expression(
    'Empirical quantiles of the log-likelihood ratio test statistic under'~H[0]))
abline(a=0, b=1, col='red')
qqplot(rchisq(1e4, 1), llr.tf[1:n.h0], main='Trois Fontaines', pch=20,
  xlab=expression(chi[2](1) ~ 'quantiles'), ylab=expression(
    'Empirical quantiles of the log-likelihood ratio test statistic under'~H[0]))
abline(a=0, b=1, col='red')
par(oldpar)


###################################################
### code chunk number 17: calibration
###################################################
ch.thr <- unique(sort(pv.ch))
tf.thr <- unique(sort(pv.tf))
ch.lvl <- ch.pwr <- rep(-1, length(ch.thr))
tf.lvl <- tf.pwr <- rep(-1, length(tf.thr))
for(tt in 1:length(ch.thr)){
    ch.lvl[tt] <- mean((pv.ch[1:n.h0] <= ch.thr[tt]))
    ch.pwr[tt] <- mean((pv.ch[-(1:n.h0)] <= ch.thr[tt]))
}
for(tt in 1:length(tf.thr)){
    tf.lvl[tt] <- mean((pv.tf[1:n.h0] <= tf.thr[tt]))
    tf.pwr[tt] <- mean((pv.tf[-(1:n.h0)] <= tf.thr[tt]))
}
par(pty="s", mar=c(2, 4, 0, 2) + 0.1)
plot(ch.thr, ch.lvl, cex.lab=1.5,
     xlab='P-value threshold', 
     ylab='False positive rate', 
     col='blue', type="s", lwd=2,
     xlim=c(0, 0.1), ylim=c(0, 0.1))
lines(tf.thr, tf.lvl, col='red', type='l',lwd=2)
abline(a=0, b=1)
legend("bottomright", c('Chizé', 'Trois Fontaines'), lwd=2,
       col=c('blue', 'red'))


###################################################
### code chunk number 18: ROC
###################################################
## Plot calibration and ROC curve
par(pty="s", mar=c(2, 4, 0, 2) + 0.1)
plot(ch.lvl, ch.pwr, cex.lab=1.5,
     xlab='False positive rate',
     ylab='True positive rate',
     col='blue', type="s", lwd=2)
lines(tf.lvl, tf.pwr, col='red', type='l', lwd=2)
abline(a=0, b=1)
legend("bottomright", c('Chizé', 'Trois Fontaines'), lwd=2,
       col=c('blue', 'red'))


###################################################
### code chunk number 19: sessionInfo
###################################################
sessionInfo()