tests/concordance3.R
In survival: Survival Analysis

library(survival)
options(na.action=na.exclude)
aeq <- function(x,y,...)  all.equal(as.vector(x), as.vector(y), ...)

#  Make sure strata is retained, and that the overall variance is correct
fit1 <- coxph(Surv(time, status) ~ age + offset(ph.ecog*0) +strata(sex), lung)
fit2 <- coxph(Surv(time, status) ~ age + ph.ecog +strata(sex), lung)

test <- concordance(fit1, fit2, influence=1)

ksex <- model.frame(fit1)[["strata(sex)"]]
test1 <- concordance(fit1$y ~ fit1$linear.predictors + strata(ksex), 
                     reverse=TRUE, influence=1)
test2 <- concordance(fit1$y ~ fit2$linear.predictors + strata(ksex), 
                     reverse=TRUE, influence=1)
aeq(test$concordance, c(test1$concordance, test2$concordance))
aeq(diag(test$var), c(test1$var[1], test2$var[1]))
aeq(test$dfbeta, cbind(test1$dfbeta, test2$dfbeta))

cvec <- c(-1, 1)
aeq(cvec %*% test$var %*% cvec, sum((test1$dfbeta - test2$dfbeta)^2))

# Time weights
# Start with a very small data set: aml has 23 subjects
#
atest1 <- concordance(Surv(time, status) ~ x, aml, ranks=TRUE)
atest2 <- concordance(Surv(time, status) ~ x, aml, ranks=TRUE, timewt='S')
atest3 <- concordance(Surv(time, status) ~ x, aml, ranks=TRUE, timewt='S/G')
atest4 <- concordance(Surv(time, status) ~ x, aml, ranks=TRUE, timewt='n/G2')
# The ranks data frame agrees for all but weights
all.equal(atest1$ranks[, -3], atest2$ranks[, -3])
all.equal(atest1$ranks[, -3], atest3$ranks[, -3])
all.equal(atest1$ranks[, -3], atest4$ranks[, -3])

wt1 <- cbind(atest1$ranks[,"timewt"], atest2$ranks[,"timewt"],
             atest3$ranks[,"timewt"], atest4$ranks[,"timewt"])
            
# survfit0 adds time 0 to the curves
# to break ties between censor/death for G, we need to add an offset to
#  the censoring times.  Since time is integer, .1 works nicely
s1 <- survfit0(survfit(Surv(time, status) ~ 1, aml))
g1 <- survfit0(survfit(Surv(time + .1*(1-status), 1-status) ~1, aml))

# The ingredients of the weights
indx <- match(atest1$ranks[,"time"], s1$time)
nrisk  <- s1$n.risk[indx]
sminus <- s1$surv[indx-1]
gminus <- g1$surv[findInterval(atest1$ranks[,"time"], g1$time)]
n <- nrow(aml)

wt2 <- cbind(nrisk, n*sminus, n*sminus/gminus, nrisk/gminus^2)
aeq(wt1, wt2)

# The sum of weighted ranks should equal (C-D) for a Cox model fit
tfun <- function(cfit, reverse=FALSE) {
    t1 <- sum(cfit$ranks$timewt * cfit$ranks$rank)
    t2 <- cfit$count[1] - cfit$count[2]
    all.equal(unname(t1), unname(t2))
}
tfun(atest1)
tfun(atest2)
tfun(atest3)
tfun(atest4)

# The nafld data set has strong and early censoring (one of the only ones
# in the package that does.) So it is a good check of time weights.
#
nfit <- coxph(Surv(futime, status) ~ male + pspline(age), nafld1)
cn1 <- concordance(nfit, timewt='n', ranks=TRUE)
cn2 <- concordance(nfit, timewt='S', ranks=TRUE)
cn3 <- concordance(nfit, timewt='S/G', ranks=TRUE)
cn4 <- concordance(nfit, timewt='n/G2', ranks=TRUE)

sfit <- survfit0(survfit(Surv(futime, status) ~ 1, nafld1))
gfit <- survfit0(survfit(Surv(futime + .1*(status==0), 1-status) ~0, nafld1))

# The ingredients of the weights
dtime <- cn1$ranks[, "time"]
indx <- match(dtime, sfit$time)
nrisk  <- sfit$n.risk[indx]
sminus <- sfit$surv[indx-1]
gminus <- gfit$surv[findInterval(dtime, gfit$time)]
n <- nrow(nafld1)

wt1 <- cbind(cn1$ranks[, "timewt"], cn2$ranks[,"timewt"],
             cn3$ranks[, "timewt"], cn4$ranks[,"timewt"])
wt2 <- cbind(nrisk, n*sminus, n*sminus/gminus, nrisk/gminus^2)
aeq(wt1, wt2)

rd1 <- cn1$ranks
rd2 <- cn2$ranks
rd3 <- cn3$ranks
all.equal(rd1[c('time', 'rank', 'casewt')], rd2[c('time', 'rank', 'casewt')])
all.equal(rd1[c('time', 'rank', 'casewt')], rd3[c('time', 'rank', 'casewt')])

tfun(cn1)
tfun(cn2)
tfun(cn3)
tfun(cn4)

# Simple check of (time1, time2) data
# First a check on the fastkm2 (internal) routine
test1 <- survfit(Surv(tstart, tstop, status) ~1, cgd, id=id)
nr <- nrow(cgd)
y <- with(cgd, Surv(tstart,tstop, status))
sort1 <- order(-cgd$tstart); sort2 <- order(-cgd$tstop, cgd$status)
if (!exists("Cfastkm2")) Cfastkm2 <- survival:::Cfastkm2  # for my test env
test2 <- .Call(Cfastkm2, y, rep(1.0, nr), order(-cgd$tstart)-1L,
               order(-cgd$tstop, cgd$status) -1L)
ii <- which(test1$n.event>0)
all.equal(test1$time[ii], test2$etime)
all.equal(test1$n.risk[ii], test2$nrisk)
all.equal(c(1, test1$surv[ii[-length(ii)]]), test2$S) # test 2 is lagged

zero <- rep(0, nrow(nafld1))
test3 <- survfit(Surv(futime, status) ~1, nafld1, id=id)
test4 <- with(nafld1, .Call(Cfastkm2, Surv(zero, futime, status), zero+1,
                            seq.int(nrow(nafld1)) -1L,
                            order(-futime, status) -1L))
ii <- which(test3$n.event >0)
all.equal(test3$time[ii], test4$etime)
all.equal(test3$n.risk[ii], test4$nrisk)
all.equal(c(1, test3$surv[ii[-length(ii)]]), test4$S) # test 2 is lagged

# Now a check of concordance
nfitx <- coxph(Surv(zero, futime, status) ~ male + pspline(age), nafld1)
cn1x <- concordance(nfitx, timewt='n', ranks=TRUE)
cn2x <- concordance(nfitx, timewt='S', ranks=TRUE)
all.equal(cn1x$count, cn1$count)
all.equal(cn2x$count, cn2$count)