Poisson GLM, Cox PH, & degrees of freedom

Introduction

We discuss connections between the Cox proportional hazards model and Poisson generalized linear models as described in Whitehead (1980). We fit comparable models to a sample dataset using coxph(), glm(), phmm(), and glmer() and explore similarities.

A simple Cox PH example

Generate data

We generate proportional hazards mixed model data.

library(phmm)

n <- 50      # total sample size
nclust <- 5  # number of clusters
clusters <- rep(1:nclust,each=n/nclust)
beta0 <- c(1,2)
set.seed(13)

Z <-cbind(Z1=sample(0:1,n,replace=TRUE),
          Z2=sample(0:1,n,replace=TRUE),
          Z3=sample(0:1,n,replace=TRUE))
b <- cbind(rep(rnorm(nclust), each=n/nclust),
           rep(rnorm(nclust), each=n/nclust))
Wb <- matrix(0,n,2)
for( j in 1:2) Wb[,j] <- Z[,j]*b[,j]
Wb <- apply(Wb,1,sum)
T <- -log(runif(n,0,1))*exp(-Z[,c('Z1','Z2')]%*%beta0-Wb)
C <- runif(n,0,1)
time <- ifelse(T<C,T,C)
event <- ifelse(T <= C,1,0)
sum(event)
phmmd <- data.frame(Z)
phmmd$cluster <- clusters
phmmd$time <- time
phmmd$event <- event

Fit the Cox PH model

fit.ph <- coxph(Surv(time, event) ~ Z1 + Z2, 
   phmmd, method="breslow", x=TRUE, y=TRUE)

summary(fit.ph)
# cox partial likelihood
pl <- c()
for(h in 1:length(eventtimes)){
  js <- phmmd$time == eventtimes[h] # j star
  j  <- phmmd$time >= eventtimes[h]
  if(sum(js) > 1) stop("tied event times")
  pl <- c(pl, 
    exp(phmmd[js, "linear.predictors"])/
    sum(exp(phmmd[j, "linear.predictors"])))
}

Next we create data to fit an auxilary Poisson model as described in Whitehead (1980) using the pseudoPoisPHMM() function provided in the phmm package. This function also extracts the linear predictors as estimated from the Cox PH model so that we can calculate likelihoods and degrees of freedom.

Likelihood and degrees of freedom for Poisson GLM from Cox PH parameters

ppd <- as.data.frame(as.matrix(pseudoPoisPHMM(fit.ph)))

# pois likelihood
poisl <- c()
eventtimes <- sort(phmmd$time[phmmd$event == 1])
for(h in 1:length(eventtimes)){
  js <- ppd$time == eventtimes[h] & ppd$m >= 1  # j star
  j  <- ppd$time == eventtimes[h]
  if(sum(js) > 1) stop("tied event times")
  poisl <- c(poisl, 
    ppd[js, "N"]*exp(-1)*exp(ppd[js, "linear.predictors"])/
    sum(ppd[j, "N"]*exp(ppd[j, "linear.predictors"])))
}

Poisson likelihood:

(coxph.pois.loglik = sum(log(poisl)))
coxph.pois.loglik - fit.ph$loglik[2]

Poisson degrees of freedom

(coxph.pois.df = length(fit.ph$coef) + sum(phmmd$event))

Fit auxiliary Poisson GLM

We fit an auxiliary Poisson GLM and note that the parameter estimates for z1 and z2 are identical to the coxph() fit, and the likelihood and degrees of freedom are as expected.

ppd$t <- as.factor(ppd$time)
fit.glm <- glm(m~-1+t+z1+z2+offset(log(N)), 
  ppd, family=poisson)

summary(fit.glm)

cbind(coxph.coef = fit.ph$coef, glm.coef = coef(fit.glm)[c('z1', 'z2')])
cbind(coxph.pois.loglik, glm.loglik=logLik(fit.glm))

The additional parameter estimates correspond to the estimated log baseline hazard, which we verify using the basehaz() function.

bh <- basehaz(fit.ph, centered = FALSE)
cbind(
  coxph.bh.step = log(bh$hazard - c(0,bh$hazard[1:(length(bh$hazard)-1)]))[1:5],
  glm.bh.step = coef(fit.glm)[1:5]
)

Extending to PHMM

Fit PHMM

set.seed(20200316)
fit.phmm <- phmm(Surv(time, event) ~ Z1 + Z2 + (Z1 + Z2|cluster), 
   phmmd, Gbs = 100, Gbsvar = 1000, VARSTART = 1,
   NINIT = 10, MAXSTEP = 100, CONVERG=90)
summary(fit.phmm)

Likelihood and degrees of freedom for Poisson GLMM from PHMM parameters

ppd <- as.data.frame(as.matrix(pseudoPoisPHMM(fit.phmm)))

poisl <- c()
eventtimes <- sort(phmmd$time[phmmd$event == 1])
for(h in 1:length(eventtimes)){
  js <- ppd$time == eventtimes[h] & ppd$m >= 1  # j star
  j  <- ppd$time == eventtimes[h]
  if(sum(js) > 1) stop("tied event times")
  poisl <- c(poisl, 
    ppd[js, "N"]*exp(-1)*exp(ppd[js, "linear.predictors"])/
    sum(ppd[j, "N"]*exp(ppd[j, "linear.predictors"])))
}

Poisson likelihood:

phmm.pois.loglik = sum(log(poisl))
phmm.pois.loglik - fit.phmm$loglik[1]

Poisson degrees of freedom

# Poisson GLMM degrees of freedom  length(unique(x$cluster)) * x$nrandom + x$nfixed
traceHat(fit.phmm, "pseudoPois") # + 2*sum(phmmd$event)

Fit auxiliary Poisson GLMM

We fit an auxiliary Poisson GLMM, although with a general variance-covariance matrix for the random effects (phmm() only fits models with diagonal variance-covariance matrix).

library(lme4)
ppd$t <- as.factor(ppd$time)
fit.lmer <- glmer(m~-1+t+z1+z2+
  (z1+z2|cluster)+offset(log(N)), 
  data=ppd, family=poisson, nAGQ=0)

summary(fit.lmer)$coef
fit.phmm$coef
logLik(fit.lmer)

phmm.pois.loglik - logLik(fit.lmer)[1]
cbind(
  phmm.bh.step = log(fit.phmm$lambda)[1:5],
  glm.bh.step = fixef(fit.lmer)[1:5]
)


Try the phmm package in your browser

Any scripts or data that you put into this service are public.

phmm documentation built on March 26, 2020, 5:10 p.m.