RobustAFT-package: Robust Accelerated Failure Time Model Fitting

Description Details Author(s) References Examples

Description

This package computes the truncated maximum likelihood regression estimates described in Marazzi and Yohai (2004) and Locatelli et al. (2010). The error distribution is assumed to follow approximately a Gaussian or a log-Weibull distribution. The cut-off values for outlier rejection are fixed or adaptive. The main functions of this package are TML.noncensored and TML.censored.

Details

Package: RobustAFT
Type: Package
Version: 1.0
Date: 2011-05-01
License: GPL-2 or later
LazyLoad: yes

Author(s)

Alfio Marazzi <Alfio.Marazzi@chuv.ch>, Jean-Luc Muralti

Maintainer: A. Randriamiharisoa <Alex.Randriamiharisoa@chuv.ch>

References

Marazzi A., Yohai V. (2004). Adaptively truncated maximum likelihood regression with asymmetric errors. Journal of Statistical Planning and Inference, 122, 271-291.

Locatelli I., Marazzi A., Yohai V. (2010). Robust accelerated failure time regression. Computational Statistics and Data Analysis, 55, 874-887.

Marazzi A. (1993) Algorithm, Routines, and S functions for Robust Statistics. Wadsworth & Brooks/cole, Pacific Grove, California.

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# Example 1. This is the example described in Marazzi and Yohai (2004).
# ---------
# The two following auxiliary functions, not included in the library, 
#must be loaded.
# ------------- Auxiliary functions -------------------------------------

SDmux.lw <- function(x,theta,sigma,COV){
# Standard deviation of the conditional mean estimate: log-Weibull case
np <- length(theta); nc <- ncol(COV); nr <- nrow(COV)
if (np!=length(x)) cat("length(x) must be the same as length(theta)")
if (nc!=nr)        cat("COV is not a square matrix")
if (nc!=(np+1))    cat("ncol(COV) must be the same as length(theta)+1")
log.mu.x <- t(x)%*%theta+lgamma(1+sigma) # log of conditional mean estimate
mu.x     <- exp(log.mu.x)                # conditional mean estimate
dg       <- digamma(1+sigma)
COV.TT   <- COV[1:np,1:np]
Var.S    <- COV[(np+1),(np+1)]
COV.TS   <- COV[1:np,(np+1)]
V.mu.x   <- t(x)%*%COV.TT%*%x + dg^2*Var.S + 2*dg*(t(x)%*%COV.TS)
SD.mu.x  <- as.numeric((sqrt(V.mu.x))*mu.x)
SD.mu.x}

plt <- function(LOS,Cost,Adm,theta.fr,sigma.fr,sd0.fr,sd1.fr,theta.ml,
                sigma.ml,sd0.ml,sd1.ml){
# Plot of the conditional mean and confidence intervals: log-Weibull case
par(mfrow=c(1,2),oma=c(0,0,2,0))
plot(LOS,Cost,type="n")
points(LOS[Adm==0],Cost[Adm==0],pch=1)
points(LOS[Adm==1],Cost[Adm==1],pch=16,col=2)
x0t   <- x0%*%theta.fr; x1t <- x1t <- x1%*%theta.fr
lines(l0,exp(x0t)*gamma(1+sigma.fr))
lines(l0,exp(x1t)*gamma(1+sigma.fr),col=2)
z0min <- exp(x0t)*gamma(1+sigma.fr)-2.576*sd0.fr
z0max <- exp(x0t)*gamma(1+sigma.fr)+2.576*sd0.fr
z1min <- exp(x1t)*gamma(1+sigma.fr)-2.576*sd1.fr
z1max <- exp(x1t)*gamma(1+sigma.fr)+2.576*sd1.fr
lines(l0,z0min,lty=2,col=1)
lines(l0,z0max,lty=2,col=1)
lines(l0,z1min,lty=2,col=1)
lines(l0,z1max,lty=2,col=1)
polygon(c(l0,rev(l0)), c(z0min,rev(z0max)), border=FALSE, density=10, angle=90)
polygon(c(l0,rev(l0)), c(z1min,rev(z1max)), border=FALSE, density=12, angle=90,col=2)
plot(LOS,Cost,type="n")
points(LOS[Adm==0],Cost[Adm==0],pch=1)
points(LOS[Adm==1],Cost[Adm==1],pch=16,col=2)
x0t   <- x0%*%theta.ml; x1t <- x1t <- x1%*%theta.ml
lines(l0,exp(x0t)*gamma(1+sigma.ml))
lines(l0,exp(x1t)*gamma(1+sigma.ml),col=2)
z0min <- exp(x0t)*gamma(1+sigma.ml)-2.576*sd0.ml
z0max <- exp(x0t)*gamma(1+sigma.ml)+2.576*sd0.ml
z1min <- exp(x1t)*gamma(1+sigma.ml)-2.576*sd1.ml
z1max <- exp(x1t)*gamma(1+sigma.ml)+2.576*sd1.ml
lines(l0,z0min,lty=2,col=1)
lines(l0,z0max,lty=2,col=1)
lines(l0,z1min,lty=2,col=1)
lines(l0,z1max,lty=2,col=1)
polygon(c(l0,rev(l0)), c(z0min,rev(z0max)), border=FALSE, density=10, angle=90)
polygon(c(l0,rev(l0)), c(z1min,rev(z1max)), border=FALSE, density=12, angle=90,col=2)}

#------ End of auxiliary functions --------------------------------------------------

library(RobustAFT)

data(D243)
Cost <- D243$Cost                            # Cost (Swiss francs)
LOS <- D243$LOS                              # Length of stay (days)
Adm <- D243$Typadm; Adm <- (Adm==" Urg")*1   # Type of admission 
                                             # (0=on notification, 1=Emergency)
Ass <- D243$Typass; Ass <- (Ass=="P"   )*1   # Type of insurance (0=usual, 1=private)
Age <- D243$age                              # Age (years)
Dst <- D243$dest;   Dst <- (Dst=="DOMI")*1   # Destination (1=Home, 0=another hospital)
Sex <- D243$Sexe;   Sex <- (Sex=="M"   )*1   # Sex (1=Male, 0=Female)

## Not run: 
# Plot data
par(mfrow=c(1,2))
plot(LOS,Cost); plot(log(LOS),log(Cost))

# log-Weibull fits
# ----------------
# Full robust model
zwff     <- TML.noncensored(log(Cost)~log(LOS)+Adm+Ass+Age+Sex+Dst,
            errors="logWeibull")
summary(zwff)

# Reduced model
zwfr     <- update(zwff,log(Cost)~log(LOS)+Adm)
summary(zwfr)

# Residual plots
par(mfrow=c(1,2))
plot(zwfr,which=c(1,3))

# Plot robust predictions on log-log scale
par(mfrow=c(1,1))
l0       <- seq(from=2,to=60,by=0.5)
x0       <- as.matrix(cbind(1,log(l0),0))
x1       <- as.matrix(cbind(1,log(l0),1))
plot(log(LOS),log(Cost),type="n")
points(log(LOS[Adm==1]),log(Cost[Adm==1]),pch=16,col=2)
points(log(LOS[Adm==0]),log(Cost[Adm==0]),pch=1)
lines(log(l0),predict(zwfr,x0))
lines(log(l0),predict(zwfr,x1),col=2)

# Maximum likelihood : full model
zmlf     <- TML.noncensored(log(Cost)~log(LOS)+Adm+Ass+Age+Sex+Dst,
            errors="logWeibull",cu=100)
summary(zmlf)

# Maximum likelihood : reduced model
zmlr     <- update(zmlf,log(Cost)~log(LOS)+Adm)
summary(zmlr)

# Plot conditional means and confidence intervals
l0       <- seq(from=2,to=62,by=0.5)
x0       <- as.matrix(cbind(1,log(l0),0))
x1       <- as.matrix(cbind(1,log(l0),1))
theta.fr <- coef(zwfr)
sigma.fr <- zwfr$v1
COV.fr   <- vcov(zwfr)
sd0.fr   <- apply(x0,1,SDmux.lw,theta.fr,sigma.fr,COV.fr)
sd1.fr   <- apply(x1,1,SDmux.lw,theta.fr,sigma.fr,COV.fr)
theta.ml <- coef(zmlr)
sigma.ml <- zmlr$v1
COV.ml   <- zmlr$COV
sd0.ml   <- apply(x0,1,SDmux.lw,theta.ml,sigma.ml,COV.ml)
sd1.ml   <- apply(x1,1,SDmux.lw,theta.ml,sigma.ml,COV.ml)
plt(LOS,Cost,Adm,theta.fr,sigma.fr,sd0.fr,sd1.fr,theta.ml,sigma.ml,sd0.ml,sd1.ml)

# Gaussian fits (for comparison)
# ------------------------------
# Reduced model
zgfr <- TML.noncensored(log(Cost)~log(LOS)+Adm,errors="Gaussian")
summary(zgfr)

# Residual plots
par(mfrow=c(1,2))
plot(zgfr,which=c(1,3))

# Classical Gaussian fit
lr <- lm(log(Cost)~log(LOS)+Adm)
summary(lr)

# Compare several fits
# --------------------
comp <- fits.compare(TML.logWeibull=zwfr,ML.logWeibull=zmlr,least.squares=lr)
comp
plot(comp,leg.position=c("topleft","topleft","bottomleft")) # click on graphics

## End(Not run)
#
#------------------------------------------------------------------------------
#
# Example 2. This is the example described in Locatelli Marazzi and Yohai (2010).
# ---------
# This is the example described in Locatelli et al. (2010). 
# The estimates are slighty different due to changes in the algorithm for the 
# final estimate.
## Not run: 
# Remove data of Example 1
rm(Cost,LOS,Adm,Ass,Age,Dst,Sex)

data(MCI)
attach(MCI)
     
# Exploratory Analysis

par(mfrow=c(1,1)) 

plot(Age,log(LOS),type= "n",cex=0.7)

# (1) filled square   : regular,   complete   
# (2) empty  square   : regular,   censored
# (3) filled triangle : emergency, complete
# (4) empty  triangle : emergency, censored

points(Age[Dest==1 & TypAdm==0], log(LOS)[Dest==1 & TypAdm==0], pch=15,cex=0.7)  # (1)
points(Age[Dest==0 & TypAdm==0], log(LOS)[Dest==0 & TypAdm==0], pch=0, cex=0.7)  # (2)
points(Age[Dest==1 & TypAdm==1], log(LOS)[Dest==1 & TypAdm==1], pch=17,cex=0.7)  # (3)
points(Age[Dest==0 & TypAdm==1], log(LOS)[Dest==0 & TypAdm==1], pch=2, cex=0.7)  # (4)

# Maximum Likelihood
ML   <- survreg(Surv(log(LOS), Dest) ~ TypAdm*Age, dist="gaussian")
summary(ML)
B.ML <- ML$coef; S.ML <- ML$scale
abline(c(B.ML[1]        ,B.ML[3]        ),lwd=1,col="grey",lty=1)
abline(c(B.ML[1]+B.ML[2],B.ML[3]+B.ML[4]),lwd=1,col="grey",lty=1)
     
# Robust Accelerated Failure Time Regression with Gaussian errors
ctrol.S   <- list(N=150, q=5, sigma0=1, MAXIT=100, TOL=0.001,seed=123)

ctrol.ref <- list(maxit.sigma=2,tol.sigma=0.0001,maxit.Beta=2,tol.Beta=0.0001,
          Maxit.S=50, tol.S.sigma=0.001, tol.S.Beta=0.001,alg.sigma=1,nitmon=FALSE)

ctrol.tml <- list(maxit.sigma=50,tol.sigma=0.0001,maxit.Beta=50,tol.Beta=0.0001,
    Maxit.TML=50, tol.TML.sigma=0.001, tol.TML.Beta=0.001, alg.sigma=1,nitmon=FALSE)

WML       <- TML.censored(log(LOS)~TypAdm*Age,data=MCI,delta=Dest,otp="adaptive",
             control.S=ctrol.S,control.ref=ctrol.ref,control.tml=ctrol.tml)

summary(WML)

B.WML     <-coef(WML)
abline(c(B.WML[1]         ,B.WML[3]         ),lty=1, col="red")
abline(c(B.WML[1]+B.WML[2],B.WML[3]+B.WML[4]),lty=1, col="red")
#
detach(MCI)

## End(Not run)

RobustAFT documentation built on March 13, 2020, 1:31 a.m.