Description Usage Arguments Details Value See Also Examples
Compute pseudo values for the Kaplan-Meier estimator or for the Restricted Mean Survival Time (RMST) based on the Kaplan-Meier estimator. We use a fast approximation of the jackknife method.
1 |
Time |
a continuous time variable. |
status |
an indicator for censoring for the corresponding Time variable. |
tau |
if equal to NULL, the pseudo-values for the Kaplan-Meier estimator are computed. If a value of tau is specified, the pseudo-values for the RMST with time endpoint equal to tau is computed. |
A fast approximation formula is used to compute pseudo-values for right-censored data. Those pseudo-values are computed
for the survival function if tau
is equal to NULL
. In that case, the function returns an approximation of
n\hat S(t)-(n-1)\hat S^{(-i)}(t),
where \hat S is the Kaplan-Meier estimator and \hat S^{(-i)} is the Kaplan-Meier estimator computed when the ith
observation has been removed. Those pseudo-values are computed for all i=1,...,n and for all time points t corresponding
to all observed times. Those pseudo-values are computed for the RMST if tau
is specified. In that case, the function returns an approximation of
n\int_0^{τ}\hat S(t)dt-(n-1)\int_0^{τ}\hat S^{(-i)}(t)dt.
A model based on Generalised Estimating Equation can be further specified through the geese
function
in the geepack
package (see Examples below).
It is also possible to use formulas of the type Surv(Time,status).
For the survival function (when tau equals NULL) it returns pseudoval
that contains the pseudo values
in a matrix form where the individuals are in column and the times are in the rows and tseq
that contains the
time points at which the pseudo-values are computed.
For the RMST (when tau is specified) it returns pseudoval
that contains the pseudo values in vector form.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | #Illustration of the pseudo-values for the Kaplan-Meier estimator on a simple simulated set
require(survival)
n=100
cpar=0.1
set.seed(28)
TrueTime=rweibull(n,shape=0.5,scale=2)
Cens=rexp(n,cpar)
Tobs=pmin(TrueTime,Cens) #observed times
status=TrueTime<=Cens #mean(status) #21% of censoring on average
pseudo_val=pseudoKM(Tobs,status,tau=NULL)
#pseudo_val=pseudoKM(Surv(Tobs,status),tau=NULL)#also works
VonM=pseudo_val$pseudoval
tseq=pseudo_val$tseq
par(mfrow=c(2,2))
plot(tseq,VonM[,3],type="s",xlab="Time",ylab="Pseudo-val. for obs. 3")
abline(v=Tobs[3],lty=2,col="red")
plot(tseq,VonM[,48],type="s",xlab="Time",ylab="Pseudo-val. for obs. 48")
abline(v=Tobs[48],lty=2,col="red")
plot(tseq,VonM[,1],type="s",xlab="Time",ylab="Pseudo-val. for obs. 1")
abline(v=Tobs[1],lty=2,col="red")
plot(tseq,VonM[,5],type="s",xlab="Time",ylab="Pseudo-val. for obs. 5")
abline(v=Tobs[5],lty=2,col="red")
par(mfrow=c(1,1))
plot(tseq,apply(VonM,1,mean),type="s",xlab="Time",ylab="Mean of pseudo-values")
lines(survfit(Surv(Tobs,status)~1),col="red",conf.int=FALSE)
#Example for the Rmst
pseudo_val=pseudoKM(Tobs,status,tau=3)
mean(pseudo_val$pseudoval)
Rmst(Tobs,status,tau=3) #returns the same value
#Illustration on simple simulated data for the pseudo-values for the RMST
#Simulation in the Exponential model (no covariates)
n<-100
lambda=0.5
TrueTime=rexp(n,rate=lambda)
tau=10
tau=c(2,4,6,8,10)
cpar=0.2
Cens=rexp(n,cpar)
Tobs=pmin(TrueTime,Cens) #observed times
status=TrueTime<=Cens #mean(status) #36% of censoring on average
pseudo_val=sapply(tau,function(x){pseudoKM(Tobs,status,tau=x)$pseudoval})
apply(pseudo_val,2,mean) #check that mean of pseudo-values return the RMST
sapply(tau,function(x){Rmst(Tobs,status,x)$rmst}) #compute the RMST
sapply(tau,function(x){(1-exp(-lambda*x))/lambda}) #the truth
#Illustration on simple simulated data for the pseudo-values for the RMST
#Simulation in a linear model
set.seed(28)
require(geepack)
n<-4000
sigma=1
tau=400
alpha=c(3)
X=rnorm(n,2,0.25)
epsi=rnorm(n,0,sigma)
TrueTime=alpha*X+epsi
Cens=rep(Inf,n)
Tobs=pmin(TrueTime,Cens)
status=TrueTime<=Cens
VonM=pseudoKM(Tobs,status,tau)$pseudoval
data_VonM<-data.frame(Y=c(VonM),X=X,id=rep(1:n))
resultEst=geese(Y~X,id=id,jack=TRUE,family="gaussian", mean.link="identity",
corstr="independence",scale.fix=TRUE,data=data_VonM)
summary(resultEst)
#Illustration on simulated data for the estimation in a Cox model based on
#the pseudo-values of the Kaplan-Meier estimator
#Compare pseudoKM with jackknife method
set.seed(28)
require(geepack)
require(survival)
n=4000
shape=3;scale=30;cpar=0.01
#Simulate covariates
X=runif(n,0,1)
#simulate Cox model with Weibull baseline distribution
TrueTime=scale*(rexp(n,1)/exp(log(0.5)*X))^(1/shape)
#True regression parameter is log(0.5)=-0.6931472
Cens=rexp(n,cpar)
Tobs=pmin(TrueTime,Cens) #observed times
Tsort<-sort(Tobs,index.return=TRUE)
Tobs_ord<-Tsort$x
status=TrueTime<=Cens #mean(status) #25% of censoring on average
status_ord<-status[Tsort$ix]
X_ord=X[Tsort$ix]
SurvEst<-matrix(NA,n,n)
#The jackknife method (takes some time)
for (i in 1:n)
{
Tjack<-Tobs_ord[-i]
statusjack<-status_ord[-i]
resKMjack=survfit(Surv(Tjack,statusjack)~1)
resKMfunjack=stepfun(c(resKMjack$time),c(1,resKMjack$surv))
SurvEst[,i]<-resKMfunjack(c(Tobs_ord))#[status_ord==1]
}
#Compute pseudo-values
resKM=survfit(Surv(Tobs,status)~1)
pseudo<-n*matrix(rep(resKM$surv,n),ncol=n)-(n-1)*SurvEst
pseudo=pseudo[n*seq(5,95,by=10)/100,] #we compute the pseudo-values for 10 times
tseq=Tobs_ord[n*seq(5,95,by=10)/100]
M=length(n*seq(5,95,by=10)/100)
data_pseudo<-data.frame(Y=1-c(pseudo),X=rep(X_ord,each=M),Time=rep(tseq,n),id=rep(1:n,each=M))
result=geese(Y~X+as.factor(Time)-1,id=id,jack=TRUE,
family="gaussian", mean.link="cloglog", corstr="independence",scale.fix=TRUE,data=data_pseudo)
summary(result)
###########################################
#with pseudoKM
VonM=pseudoKM(Tobs,status,tau=NULL)$pseudoval
pseudo_VM=VonM[n*seq(5,95,by=10)/100,] #we compute the pseudo-values for 10 times
tseq=Tobs_ord[n*seq(5,95,by=10)/100]
M=length(n*seq(5,95,by=10)/100)
data_pseudo1<-data.frame(Y=1-c(pseudo_VM),X=rep(X,each=M),Time=rep(tseq,n),id=rep(1:n,each=M))
#data_pseudo1<-data.frame(Y=1-c(pseudo_VM),X=rep(X_ord,each=M),Time=rep(tseq,n),id=rep(1:n,each=M))
result1<-geese(Y~X+as.factor(Time)-1,id=id,jack=TRUE,family="gaussian",
mean.link="cloglog", corstr="independence",scale.fix=TRUE,data=data_pseudo1)
summary(result1)
#Illustration on simulated data for the pseudo-values for the RMST
#Simulation in a linear model
set.seed(28)
n<-10000
sigma=3
cpar=0.07
tau=4
alpha=c(5.5,0.25,0.25)
X1=rbinom(n,1,0.5)
X2=rbinom(n,1,0.5)
epsi=runif(n,-sigma,sigma)
TrueTime=alpha[1]+alpha[2]*X1+alpha[3]*X2+epsi
Cens=rexp(n,cpar)
Tobs=pmin(TrueTime,Cens)
Tsort<-sort(Tobs,index.return=TRUE)
Tobs_ord<-Tsort$x
status=TrueTime<=Cens #approximately 35% of censoring
status_ord<-status[Tsort$ix]
X_ord1=X1[Tsort$ix];X_ord2=X2[Tsort$ix]
X00=(X1==0 & X2==0)
X01=(X1==0 & X2==1)
X10=(X1==1 & X2==0)
X11=(X1==1 & X2==1)
X_ord00=X00[Tsort$ix];X_ord01=X01[Tsort$ix];X_ord10=X10[Tsort$ix];X_ord11=X11[Tsort$ix]
#The true value of the parameters are c(3.812552,0.05705492,0.05730445,0.1044318)
#Those values were based on a Monte-Carlo experiment with n=10 000 000, using the true times
VonM=pseudoKM(Tobs,status,tau)$pseudoval
data_VonM<-data.frame(Y=c(VonM),X2=X01,X3=X10,X4=X11,id=rep(1:n))
resultEst=geese(Y~X2+X3+X4,id=id,jack=TRUE,family="gaussian",
mean.link="identity", corstr="independence",scale.fix=TRUE,data=data_VonM)
summary(resultEst)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.