knitr::opts_chunk$set(fig.width=7)

From help(cox.ph) in mgcv. In mgcv::cox.ph() weights=0 denotes censored.

library(mgcv)
library(survival) ## for data
library("ggplot2")

col1 <- colon[colon$etype==1,] ## concentrate on single event
col1$differ <- as.factor(col1$differ)
col1$sex <- as.factor(col1$sex)

b <- gam(time~s(age,by=sex)+sex+s(nodes)+perfor+rx+obstruct+adhere,
         family=cox.ph(),data=col1,weights=status)

summary(b) 


## plot survival function for patient j...

np <- 300;j <- 6
newd <- data.frame(time=seq(0,3000,length=np))
dname <- names(col1)
for (n in dname) newd[[n]] <- rep(col1[[n]][j],np)
newd$time <- seq(0,3000,length=np)
fv <- predict(b,newdata=newd,type="response",se=TRUE)

plotF <- newd[,c('id','time')]
plotF$survival <- as.numeric(fv$fit)
plotF$survivalU <- as.numeric(fv$fit+2*fv$se.fit)
plotF$survivalL <- pmax(0,fv$fit-2*fv$se.fit)
ggplot(data=plotF,mapping=aes(x=time,y=survival,ymin=survivalL,ymax=survivalU)) +
  geom_line() + geom_ribbon(alpha=0.5,fill='blue') +
  ggtitle(paste('mgcv::cox.ph() gam survival model, patient',j))

The mgcv::cox.ph appears to have a fixed buffer size of 1000 rows.

n <- 1000
print(n)
bigd <- newd[rep(1,n),]
fbig <- predict(b,newdata=bigd,type="response",se=TRUE)
n <- 1001
print(n)
bigd <- newd[rep(1,n),]
fbig <- predict(b,newdata=bigd,type="response",se=TRUE)

Similar calculation using survival::coxph. In survival::coxph, TRUE means event happened (dead, not censored).

library('survival')

dTrain <- col1
dTrain$surv <- with(dTrain,Surv(time,status==1))
fit <- coxph(surv~age:sex+age+sex+nodes+perfor+rx+obstruct+adhere,
               data=dTrain)
summary(fit)
sdat <- survfit(fit,newdata=dTrain[j,])
pFrame <- data.frame(time=sdat$time,
                     survival=sdat$surv,
                     survivalU=sdat$upper,
                     survivalL=sdat$lower,
                     stringsAsFactors = FALSE)
ggplot(data=pFrame,
       mapping=aes(x=time,y=survival,ymin=survivalL,ymax=survivalU)) +
  geom_line() + geom_ribbon(alpha=0.5,fill='blue') +
  ggtitle(paste('survival::coxph model, patient',j))

Similar calculation using QSurvival. In QSurvival valid if the event index is in the range 1:numberOfObservations the event happend, if it is NA our out of range it is censored.

library('QSurvival')

timecut <- 2750
dTrain <- buildQuasiObsForTraining(col1, 
                                   col1$time, ifelse(col1$status==1,col1$time,NA), 'origRowID',
                                   'days', 'recurrence',
                                   targetSize=20000,forceEvent=TRUE,weightsColumnName='wts')
dTrain$cappedDays <- pmin(timecut,dTrain$days)

model <- gam(recurrence~s(age,by=sex)+sex+s(nodes)+perfor+rx+obstruct+adhere+s(cappedDays),
         family=binomial,data=dTrain,weights=wts)
summary(model)

colJ <- col1[j,]

dRes <- buildQuasiObsForApplication(colJ, 
                                    3000,
                                   'origRowID',
                                   'days')
dRes$cappedDays <- pmin(timecut,dRes$days)
pred <- predict(model,newdata=dRes,type='response',se=TRUE)
dRes$hazard <- as.numeric(pred$fit)
# Note: note the same error-bar method as the literature!
dRes$hazardU <- as.numeric(pred$fit+2*pred$se.fit)
dRes$hazardL <- pmax(0,pred$fit-2*pred$se.fit)
dPlot <- QSurvival::summarizeHazard(dRes,'origRowID','days',
                                     c('hazard','hazardU','hazardL'),
                                     survivalColumnName=c('pclean','pcleanU','pcleanL')) 

ggplot(data=dPlot$details,mapping=aes(x=days,y=pclean,ymin=pcleanL,ymax=pcleanU)) +
  geom_line() + geom_ribbon(alpha=0.5,fill='blue') +
  ggtitle(paste('QSurvival discrete time gam logistic model, patient',j))


WinVector/QSurvival documentation built on May 9, 2019, 10:59 p.m.