pseudoKM: Fast pseudo values for the survival function and the RMST for...

Description Usage Arguments Details Value See Also Examples

View source: R/pseudoKM.R

Description

Compute pseudo values for the Kaplan-Meier estimator or for the Restricted Mean Survival Time (RMST) based on the Kaplan-Meier estimator. We use a fast approximation of the jackknife method.

Usage

1
pseudoKM(Time, status, tau = NULL)

Arguments

Time

a continuous time variable.

status

an indicator for censoring for the corresponding Time variable.

tau

if equal to NULL, the pseudo-values for the Kaplan-Meier estimator are computed. If a value of tau is specified, the pseudo-values for the RMST with time endpoint equal to tau is computed.

Details

A fast approximation formula is used to compute pseudo-values for right-censored data. Those pseudo-values are computed for the survival function if tau is equal to NULL. In that case, the function returns an approximation of

n\hat S(t)-(n-1)\hat S^{(-i)}(t),

where \hat S is the Kaplan-Meier estimator and \hat S^{(-i)} is the Kaplan-Meier estimator computed when the ith observation has been removed. Those pseudo-values are computed for all i=1,...,n and for all time points t corresponding to all observed times. Those pseudo-values are computed for the RMST if tau is specified. In that case, the function returns an approximation of

n\int_0^{τ}\hat S(t)dt-(n-1)\int_0^{τ}\hat S^{(-i)}(t)dt.

A model based on Generalised Estimating Equation can be further specified through the geese function in the geepack package (see Examples below).

It is also possible to use formulas of the type Surv(Time,status).

Value

For the survival function (when tau equals NULL) it returns pseudoval that contains the pseudo values in a matrix form where the individuals are in column and the times are in the rows and tseq that contains the time points at which the pseudo-values are computed. For the RMST (when tau is specified) it returns pseudoval that contains the pseudo values in vector form.

See Also

Rmst, pseudoIC.

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#Illustration of the pseudo-values for the Kaplan-Meier estimator on a simple simulated set
require(survival)
n=100
cpar=0.1
set.seed(28)
TrueTime=rweibull(n,shape=0.5,scale=2)
Cens=rexp(n,cpar)
Tobs=pmin(TrueTime,Cens) #observed times
status=TrueTime<=Cens #mean(status) #21% of censoring on average

pseudo_val=pseudoKM(Tobs,status,tau=NULL)
#pseudo_val=pseudoKM(Surv(Tobs,status),tau=NULL)#also works
VonM=pseudo_val$pseudoval
tseq=pseudo_val$tseq

par(mfrow=c(2,2))
plot(tseq,VonM[,3],type="s",xlab="Time",ylab="Pseudo-val. for obs. 3")
abline(v=Tobs[3],lty=2,col="red")
plot(tseq,VonM[,48],type="s",xlab="Time",ylab="Pseudo-val. for obs. 48")
abline(v=Tobs[48],lty=2,col="red")
plot(tseq,VonM[,1],type="s",xlab="Time",ylab="Pseudo-val. for obs. 1")
abline(v=Tobs[1],lty=2,col="red")
plot(tseq,VonM[,5],type="s",xlab="Time",ylab="Pseudo-val. for obs. 5")
abline(v=Tobs[5],lty=2,col="red")
par(mfrow=c(1,1))

plot(tseq,apply(VonM,1,mean),type="s",xlab="Time",ylab="Mean of pseudo-values")
lines(survfit(Surv(Tobs,status)~1),col="red",conf.int=FALSE)

#Example for the Rmst
pseudo_val=pseudoKM(Tobs,status,tau=3)
mean(pseudo_val$pseudoval)
Rmst(Tobs,status,tau=3) #returns the same value

#Illustration on simple simulated data for the pseudo-values for the RMST
#Simulation in the Exponential model (no covariates)

n<-100
lambda=0.5
TrueTime=rexp(n,rate=lambda)
tau=10
tau=c(2,4,6,8,10)
cpar=0.2
Cens=rexp(n,cpar)
Tobs=pmin(TrueTime,Cens) #observed times
status=TrueTime<=Cens #mean(status) #36% of censoring on average

pseudo_val=sapply(tau,function(x){pseudoKM(Tobs,status,tau=x)$pseudoval})
apply(pseudo_val,2,mean) #check that mean of pseudo-values return the RMST
sapply(tau,function(x){Rmst(Tobs,status,x)$rmst}) #compute the RMST
sapply(tau,function(x){(1-exp(-lambda*x))/lambda}) #the truth

#Illustration on simple simulated data for the pseudo-values for the RMST
#Simulation in a linear model
set.seed(28)
require(geepack)
n<-4000
sigma=1
tau=400
alpha=c(3)
X=rnorm(n,2,0.25)
epsi=rnorm(n,0,sigma)
TrueTime=alpha*X+epsi
Cens=rep(Inf,n)
Tobs=pmin(TrueTime,Cens)
status=TrueTime<=Cens

VonM=pseudoKM(Tobs,status,tau)$pseudoval
data_VonM<-data.frame(Y=c(VonM),X=X,id=rep(1:n))
resultEst=geese(Y~X,id=id,jack=TRUE,family="gaussian", mean.link="identity",
corstr="independence",scale.fix=TRUE,data=data_VonM)
summary(resultEst)


#Illustration on simulated data for the estimation in a Cox model based on
#the pseudo-values of the Kaplan-Meier estimator
#Compare pseudoKM with jackknife method

set.seed(28)
require(geepack)
require(survival)
n=4000
shape=3;scale=30;cpar=0.01
#Simulate covariates
X=runif(n,0,1)
#simulate Cox model with Weibull baseline distribution
TrueTime=scale*(rexp(n,1)/exp(log(0.5)*X))^(1/shape)
#True regression parameter is log(0.5)=-0.6931472
Cens=rexp(n,cpar)
Tobs=pmin(TrueTime,Cens) #observed times
Tsort<-sort(Tobs,index.return=TRUE)
Tobs_ord<-Tsort$x
status=TrueTime<=Cens #mean(status) #25% of censoring on average
status_ord<-status[Tsort$ix]
X_ord=X[Tsort$ix]
SurvEst<-matrix(NA,n,n)
#The jackknife method (takes some time)
for (i in 1:n)
{
  Tjack<-Tobs_ord[-i]
  statusjack<-status_ord[-i]
  resKMjack=survfit(Surv(Tjack,statusjack)~1)
  resKMfunjack=stepfun(c(resKMjack$time),c(1,resKMjack$surv))
  SurvEst[,i]<-resKMfunjack(c(Tobs_ord))#[status_ord==1]
}
#Compute pseudo-values
resKM=survfit(Surv(Tobs,status)~1)
pseudo<-n*matrix(rep(resKM$surv,n),ncol=n)-(n-1)*SurvEst
pseudo=pseudo[n*seq(5,95,by=10)/100,] #we compute the pseudo-values for 10 times
tseq=Tobs_ord[n*seq(5,95,by=10)/100]
M=length(n*seq(5,95,by=10)/100)
data_pseudo<-data.frame(Y=1-c(pseudo),X=rep(X_ord,each=M),Time=rep(tseq,n),id=rep(1:n,each=M))
result=geese(Y~X+as.factor(Time)-1,id=id,jack=TRUE,
family="gaussian", mean.link="cloglog", corstr="independence",scale.fix=TRUE,data=data_pseudo)
summary(result)
###########################################
#with pseudoKM
VonM=pseudoKM(Tobs,status,tau=NULL)$pseudoval
pseudo_VM=VonM[n*seq(5,95,by=10)/100,] #we compute the pseudo-values for 10 times
tseq=Tobs_ord[n*seq(5,95,by=10)/100]
M=length(n*seq(5,95,by=10)/100)
data_pseudo1<-data.frame(Y=1-c(pseudo_VM),X=rep(X,each=M),Time=rep(tseq,n),id=rep(1:n,each=M))
#data_pseudo1<-data.frame(Y=1-c(pseudo_VM),X=rep(X_ord,each=M),Time=rep(tseq,n),id=rep(1:n,each=M))
result1<-geese(Y~X+as.factor(Time)-1,id=id,jack=TRUE,family="gaussian",
mean.link="cloglog", corstr="independence",scale.fix=TRUE,data=data_pseudo1)
summary(result1)

#Illustration on simulated data for the pseudo-values for the RMST
#Simulation in a linear model
set.seed(28)
n<-10000
sigma=3
cpar=0.07
tau=4
alpha=c(5.5,0.25,0.25)
X1=rbinom(n,1,0.5)
X2=rbinom(n,1,0.5)
epsi=runif(n,-sigma,sigma)
TrueTime=alpha[1]+alpha[2]*X1+alpha[3]*X2+epsi
Cens=rexp(n,cpar)
Tobs=pmin(TrueTime,Cens)
Tsort<-sort(Tobs,index.return=TRUE)
Tobs_ord<-Tsort$x
status=TrueTime<=Cens #approximately 35% of censoring
status_ord<-status[Tsort$ix]
X_ord1=X1[Tsort$ix];X_ord2=X2[Tsort$ix]
X00=(X1==0 & X2==0)
X01=(X1==0 & X2==1)
X10=(X1==1 & X2==0)
X11=(X1==1 & X2==1)
X_ord00=X00[Tsort$ix];X_ord01=X01[Tsort$ix];X_ord10=X10[Tsort$ix];X_ord11=X11[Tsort$ix]
#The true value of the parameters are c(3.812552,0.05705492,0.05730445,0.1044318)
#Those values were based on a Monte-Carlo experiment with n=10 000 000, using the true times

VonM=pseudoKM(Tobs,status,tau)$pseudoval
data_VonM<-data.frame(Y=c(VonM),X2=X01,X3=X10,X4=X11,id=rep(1:n))
resultEst=geese(Y~X2+X3+X4,id=id,jack=TRUE,family="gaussian",
mean.link="identity", corstr="independence",scale.fix=TRUE,data=data_VonM)
summary(resultEst)

obouaziz/FastPseudo documentation built on Dec. 22, 2021, 4:12 a.m.