tests/refine2.R
In coxme: Mixed Effects Cox Models

#
# Check out the code on a simulated data set.  On first cut it appeared to
#  do very badly, this turned out to be a bug in coxfit6d.c, when there
#  were random slopes it miscalculated the linear predictor.
#
library(coxme)
aeq <- function(x,y) all.equal(as.vector(x), as.vector(y))

set.seed(1979)
mkdata <- function(n, beta=c(.4, .1), sitehaz=c(.5,1.5, 2,1)) {
    nsite <- length(sitehaz)
    site <- rep(1:nsite, each=n)
    trt1 <- rep(0:1, length=n*nsite)
    hazard <- sitehaz[site] + beta[1]*trt1 + beta[2]*trt1 * (site-mean(site))
    stime <- rexp(n*nsite, exp(hazard))
    q80 <- quantile(stime, .8)
    data.frame(site=site,
               trt1 = trt1,
               trt2 = 1-trt1,
               futime= pmin(stime, q80),
               status= ifelse(stime>q80, 0, 1),
               hazard=hazard
               )
    }

trdata <- mkdata(150)  #150 enrolled per site
#fixf <- coxph(Surv(futime, status) ~ factor(site)*trt1, trdata)
#pfit <- pyears(Surv(futime, status) ~ site + trt1, trdata)
#(pfit$event/sum(pfit$event))/ (pfit$pyears/sum(pfit$pyears))

set.seed(50)
#nsim <- 500
nsim <- 20  # speedup for CRAN

fit <- coxme(Surv(futime, status) ~ trt2 + (1 + trt2 | site), trdata,
             refine.n=nsim, refine.detail=TRUE)
debug <- fit$refine.detail

# Recreate the variance-covariance and sim data that was used
set.seed(50)
bmat <- matrix(rnorm(8*nsim), nrow=8)
hmatb <- fit$hmat[1:8, 1:8]
hmat.inv <- as.matrix(solve(hmatb))

chidf <- coxme.control()$refine.df

b2 <- backsolve(hmatb, bmat, upper=TRUE)
htran <- as(hmatb, "dtCMatrix")  #verify that backsolve works correctly
all.equal(as.matrix(htran %*% b2), bmat, check.attributes=FALSE)

b2 <- scale(b2, center=F, scale=sqrt(rchisq(nsim, df=chidf)/chidf))
b3 <- b2 + unlist(ranef(fit)) 
aeq(b3, debug$bmat)


temp <- VarCorr(fit)[[1]]
sigma <- diag(c(rep(temp[1],4), rep(temp[4],4)))
sigma[cbind(1:4,5:8)] <- temp[3]* sqrt(temp[1] * temp[4])
sigma[cbind(5:8,1:4)] <- temp[3]* sqrt(temp[1] * temp[4])

if (!is.null(debug))
    all.equal(as.matrix(gchol(sigma), ones=F), as.matrix(debug$gkmat, ones=F))

coxll <- double(nsim)
for (i in 1:nsim) {
    lp <- trdata$trt2 * fixef(fit) + b3[trdata$site,i] +
           b3[trdata$site +4,i] * trdata$trt2
    tfit <- coxph(Surv(futime, status) ~ offset(lp), trdata)
    coxll[i] <- tfit$loglik
    }
if (!is.null(debug)) all.equal(coxll, debug$log)

# How does the direct average compare to the IPL?
# (This is not guarranteed to be accurate, just curious)
constant <- .5*(log(2*pi) + sum(log(diag(gchol(sigma)))))
fit$log[2] + c(IPL=0, sim=log(mean(exp(coxll-fit$log[2]))) - constant)

# Compute the Taylor series for the IPL
bhat <- unlist(ranef(fit))
b.sig <- t(b2) %*% fit$hmat[1:8, 1:8]  # b times sqrt(H)
taylor <- rowSums(b.sig^2)/2   # vector of b'H b/2
aeq(taylor, debug$penalty2)

# Look at how well the Taylor series does
pen <- solve(sigma)
simpen <- colSums(b3 *(pen%*%b3))/2 #penalty for each simulated b
aeq(simpen, debug$penalty1)

#plot(coxll- simpen, fit$log[3] - taylor, 
#     xlab="Actual PPL for simulated points", 
#     ylab="Taylor series approximation")
#abline(0,1)
#

#And now I need the Gaussian integration contant and the t-density
require(mvtnorm)
tdens <- dmvt(t(b3), delta=unlist(ranef(fit)), sigma=hmat.inv, df=chidf)
aeq(tdens, debug$tdens)
 
# The normalization constant for the Gaussian penalty
#  
temp <- determinant(sigma, log=TRUE)
gnorm <- -(4*log(2*pi) + .5*temp$sign *temp$modulus)
aeq(gnorm, debug$gdens)

m2 <- fit$loglik[2]
errhat <- exp(coxll+gnorm -(simpen + tdens +m2)) - 
         exp(fit$log[3]+ gnorm -(taylor + tdens + m2))
if (!is.null(debug)) {
    aeq(errhat, debug$errhat)
    }