| cIndex | R Documentation |
Calculates the concordance index for discrete survival models, which does not depend on time. This is the probability that, for a pair of randomly chosen comparable samples, the sample with the higher risk prediction will experience an event before the other sample or belongs to a higher binary class. Estimation of the concordance index requires estimation of the time dependent area under the curve (AUC) with time specific sensitivities and specificities. Additional investigation of these measures allows a more fine grained view on the performance of the discrete survival model.
cIndex(marker, testTime, testEvent, trainTime, trainEvent)
## S3 method for class 'discSurv_cIndex'
plot(x, selectPlot = c("tpr", "fpr", "auc", "margProb", "margSurv"), ...)
## S3 method for class 'discSurvTprUno'
plot(x, ...)
## S3 method for class 'discSurvFprUno'
plot(x, ...)
## S3 method for class 'discSurvAucUno'
plot(x, ...)
marker |
Gives the predicted values of the linear predictor of a regression model (class "numeric"). May also be on the response scale. |
testTime |
New time intervals in the test data (class "integer"). |
testEvent |
Event indicators in the test data (binary vector). |
trainTime |
Time intervals in the training data (class "integer"). |
trainEvent |
Event indicators in the training data (binary vector). |
x |
Object of class "discSurvAucUno" |
... |
Specification of additional arguments in function |
Additional measures are available in the attribute "addMeasures" as list with structure
tprAllTime: Time dependent sensitivities data.
fprAllTime: Time dependent specificities data.
aucAllTime: Time dependent area under the curve data.
MargProb: Marginal distribution of discrete intervals.
MargSurv: Discrete survival curve.
Input: Storage function of objects computed from internal functions.
For each list element tprAllTime, fprAllTime, aucAllTime, MargProb, MargSurv are plot methods available.
Value of discrete concordance index between zero and one (class "numeric").
It is assumed that all time points up to the last observed interval
\left[ a_{q-1}, a_q \right) are available.
Thomas Welchowski t.welchowski@psychologie.uzh.ch
heagertySurvROCdiscSurv
\insertRefschmidDiscMeasurediscSurv
\insertRefunoEvalPreddiscSurv
\insertRefzadehBiasCrossEntropydiscSurv
cIndex
##################################################
# Example with unemployment data and prior fitting
library(Ecdat)
library(caret)
library(mgcv)
data(UnempDur)
summary(UnempDur$spell)
# Extract subset of data
set.seed(635)
IDsample <- sample(1:dim(UnempDur)[1], 100)
UnempDurSubset <- UnempDur [IDsample, ]
set.seed(-570)
TrainingSample <- sample(1:100, 75)
UnempDurSubsetTrain <- UnempDurSubset [TrainingSample, ]
UnempDurSubsetTest <- UnempDurSubset [-TrainingSample, ]
# Convert to long format
UnempDurSubsetTrainLong <- dataLong(dataShort = UnempDurSubsetTrain,
timeColumn = "spell", eventColumn = "censor1")
# Estimate gam with smooth baseline
gamFit <- gam(formula = y ~ s(I(as.numeric(as.character(timeInt)))) +
s(age) + s(logwage), data = UnempDurSubsetTrainLong, family = binomial())
gamFitPreds <- predict(gamFit, newdata = cbind(UnempDurSubsetTest,
timeInt = UnempDurSubsetTest$spell))
# Evaluate C-Index based on short data format
cIndex1 <- cIndex(marker = gamFitPreds,
testTime = UnempDurSubsetTest$spell,
testEvent = UnempDurSubsetTest$censor1,
trainTime = UnempDurSubsetTrain$spell,
trainEvent = UnempDurSubsetTrain$censor1)
cIndex1[1]
# Plot time-dependent AUC
plot(cIndex1, selectPlot="auc")
# Plot time-dependent sensitivities
plot(cIndex1, selectPlot="tpr")
# Plot time-dependent specificities
plot(cIndex1, selectPlot="fpr")
# Plot marginal probabilities
plot(cIndex1, selectPlot="margProb")
# Plot survival curve
plot(cIndex1, selectPlot="margSurv")
#####################################
# Example National Wilm's Tumor Study
library(survival)
head(nwtco)
summary(nwtco$rel)
# Select subset
set.seed(-375)
Indices <- sample(1:dim(nwtco)[1], 500)
nwtcoSub <- nwtco [Indices, ]
# Convert time range to 30 intervals
intLim <- quantile(nwtcoSub$edrel, prob = seq(0, 1, length.out = 30))
intLim [length(intLim)] <- intLim [length(intLim)] + 1
nwtcoSubTemp <- contToDisc(dataShort = nwtcoSub, timeColumn = "edrel", intervalLimits = intLim)
nwtcoSubTemp$instit <- factor(nwtcoSubTemp$instit)
nwtcoSubTemp$histol <- factor(nwtcoSubTemp$histol)
nwtcoSubTemp$stage <- factor(nwtcoSubTemp$stage)
# Split in training and test sample
set.seed(-570)
TrainingSample <- sample(1:dim(nwtcoSubTemp)[1], round(dim(nwtcoSubTemp)[1]*0.75))
nwtcoSubTempTrain <- nwtcoSubTemp [TrainingSample, ]
nwtcoSubTempTest <- nwtcoSubTemp [-TrainingSample, ]
# Convert to long format
nwtcoSubTempTrainLong <- dataLong(dataShort = nwtcoSubTempTrain,
timeColumn = "timeDisc", eventColumn = "rel", timeAsFactor=TRUE)
# Estimate continuation ratio model
inputFormula <- y ~ timeInt + histol + instit + stage
glmFit <- glm(formula = inputFormula, data = nwtcoSubTempTrainLong, family = binomial())
# Event and time points not available for new patients
# -> Average out predictions with marginal distribution of discrete time intervals
# based on training data
margDiscT <- prop.table(table(factor(nwtcoSubTempTrain$timeDisc,
levels=1:max(nwtcoSubTempTrain$timeDisc))))
# -> First interval was not observed as endpoint in training data
# Integrate out time from predictions
# Convert to long format
nwtcoSubTempTestLong <- dataLong(dataShort = nwtcoSubTempTest,
timeColumn = "timeDisc", eventColumn = "rel", timeAsFactor=TRUE,
aggTimeFormat = TRUE, lastTheoInt = max(nwtcoSubTempTrain$timeDisc))
linPreds_time <- predict(glmFit, newdata = nwtcoSubTempTestLong)
linPreds <- aggregate(time ~ ID,
data=data.frame(time=linPreds_time, ID=nwtcoSubTempTestLong$obj),
FUN = function(x) sum(x * margDiscT))[,2]
# Evaluate C-Index based on short data format
cIndex(marker = linPreds,
testTime = as.numeric(as.character(nwtcoSubTempTest$timeDisc)),
testEvent = nwtcoSubTempTest$rel,
trainTime = as.numeric(as.character(nwtcoSubTempTrain$timeDisc)),
trainEvent = nwtcoSubTempTrain$rel)
# 0.6397157
# Comparison to model without covariates
inputFormula2 <- y ~ timeInt
glmFit2 <- glm(formula = inputFormula2, data = nwtcoSubTempTrainLong, family = binomial())
# Integrate out time from predictions
linPreds_time2 <- predict(glmFit2, newdata = nwtcoSubTempTestLong)
linPreds2 <- aggregate(time ~ ID,
data=data.frame(time=linPreds_time2, ID=nwtcoSubTempTestLong$obj),
FUN = function(x) sum(x * margDiscT))[,2]
# Evaluate C-Index based on short data format
cIndex(marker = linPreds2,
testTime = as.numeric(as.character(nwtcoSubTempTest$timeDisc)),
testEvent = nwtcoSubTempTest$rel,
trainTime = as.numeric(as.character(nwtcoSubTempTrain$timeDisc)),
trainEvent = nwtcoSubTempTrain$rel)
# 0.5 -> Model not informative
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.