##' Discrete time to event interval censored data
##'
##' We consider the cumulative odds model
##' \deqn{
##' P(T \leq t | x) = \frac{G(t) \exp(x \beta) }{1 + G(t) exp( x \beta) }
##' }
##' or equivalently
##' \deqn{
##' logit(P(T \leq t | x)) = log(G(t)) + x \beta
##' }
##' and we can thus also compute the probability of surviving
##' \deqn{
##' P(T >t | x) = \frac{1}{1 + G(t) exp( x \beta) }
##' }
##'
##' The baseline \eqn{G(t)} is written as \eqn{cumsum(exp(\alpha))} and this is not the standard
##' parametrization that takes log of \eqn{G(t)} as the parameters. Note that the regression
##' coefficients are describing the probability of dying before or at time t.
##'
##' Input are intervals given by ]t_l,t_r] where t_r can be infinity for right-censored intervals
##' When truly discrete ]0,1] will be an observation at 1, and ]j,j+1] will be an observation at j+1.
##' Can be used for fitting the usual ordinal regression model (with logit link) that in contrast, however,
##' describes the probibility of surviving time t (thus leads to -beta).
##'
##' Likelihood is maximized:
##' \deqn{
##' \prod P(T_i >t_{il} | x) - P(T_i> t_{ir}| x)
##' }
##'
##' @param formula formula
##' @param data data
##' @param beta starting values
##' @param no.opt optimization TRUE/FALSE
##' @param method NR, nlm
##' @param stderr to return only estimate
##' @param weights weights following id for GLM
##' @param offsets following id for GLM
##' @param exp.link parametrize increments exp(alpha) > 0
##' @param increment using increments dG(t)=exp(alpha) as parameters
##' @param ... Additional arguments to lower level funtions lava::NR optimizer or nlm
##' @author Thomas Scheike
##' @examples
##' data(ttpd)
##' dtable(ttpd,~entry+time2)
##' out <- interval.logitsurv.discrete(Interval(entry,time2)~X1+X2+X3+X4,ttpd)
##' summary(out)
##'
##' pred <- predictlogitSurvd(out,se=FALSE)
##' plotSurvd(pred)
##'
##' ttpd <- dfactor(ttpd,fentry~entry)
##' out <- cumoddsreg(fentry~X1+X2+X3+X4,ttpd)
##' summary(out)
##'
##' @aliases Interval dInterval simlogitSurvd predictlogitSurvd cumoddsreg simTTP predictSurvd plotSurvd
##' @export
interval.logitsurv.discrete <- function (formula,data,beta=NULL,no.opt=FALSE,method="NR",
stderr=TRUE,weights=NULL,offsets=NULL,exp.link=1,increment=1,...)
{ ## {{{
cl <- match.call()
m <- match.call(expand.dots = TRUE)[1:3]
special <- c("strata", "cluster","offset")
Terms <- terms(formula, special, data = data)
m$formula <- Terms
m[[1]] <- as.name("model.frame")
m <- eval(m, parent.frame())
Y <- model.extract(m, "response")
if (ncol(Y)==2) {
time2 <- eventtime <- Y[,2]
entrytime <- Y[,1]
left <- 0
} else {
time2 <- eventtime <- Y[,2]
status <- delta <- Y[,3]
entrytime <- Y[,1]
left <- 1
if (max(entrytime)==0) left <- 0
}
if (any(entrytime==time2)) stop("left==right not possible for discrete data")
id <- strata <- NULL
if (!is.null(attributes(Terms)$specials$cluster)) {
ts <- survival::untangle.specials(Terms, "cluster")
pos.cluster <- ts$terms
Terms <- Terms[-ts$terms]
id <- m[[ts$vars]]
} else pos.cluster <- NULL
if (!is.null(attributes(Terms)$specials$strata)) {
ts <- survival::untangle.specials(Terms, "strata")
pos.strata <- ts$terms
Terms <- Terms[-ts$terms]
strata <- m[[ts$vars]]
strata.name <- ts$vars
} else { strata.name <- NULL; pos.strata <- NULL}
X <- model.matrix(Terms, m)
if (!is.null(intpos <- attributes(Terms)$intercept))
X <- X[,-intpos,drop=FALSE]
if (ncol(X)>0) X.names <- colnames(X) else X.names <- NULL
if (!is.null(id)) {
ids <- unique(id)
nid <- length(ids)
if (is.numeric(id)) id <- fast.approx(ids,id)-1 else {
id <- as.integer(factor(id,labels=seq(nid)))-1
}
} else { id <- as.integer(seq_along(time2))-1; nid <- length(time2) }
## orginal id coding into integers
id.orig <- id+1;
## times 1 -> mutimes , 0 til start
utimes <- sort(unique(c(time2,entrytime)))
mutimes <- max(utimes[utimes<Inf])
n <- length(time2)
## setting up designs for t_l and t_r
tR <- tL <- matrix(0,n,mutimes)
## design for ]t_l,t_r], for t_l=0 row is 0
if (increment==0) {
tL <- matdoubleindex(tL,1:n,entrytime,rep(1,n))
tR <- matdoubleindex(tL,1:n,time2,rep(1,n))
} else {
for (i in 1:mutimes) {
tL[,i] <- (i <= entrytime)
tR[,i] <- (i <= time2)
}
}
## computing X^2
if (ncol(X)>0) {
X2 <- .Call("vecMatMat",X,X)$vXZ
XtL <- .Call("vecMatMat",X,tL)$vXZ
XtR <- .Call("vecMatMat",X,tR)$vXZ
}
tL2 <- .Call("vecMatMat",tL,tL)$vXZ
tR2 <- .Call("vecMatMat",tR,tR)$vXZ
## weights/offets will follow id
if (is.null(weights)) weights <- rep(1,n); # else wiid <- weights
if (is.null(offsets)) offsets <- rep(0,n); # else offsets <- offsets
if (is.null(beta)) {
beta <- rep(0,ncol(X)+mutimes)
Set <- 1-cumsum(table(time2))/n
dHt <- log(diff(c(0,(1/Set-1))))
beta[1:mutimes] <- dHt[1:mutimes]
}
obj <- function(pp,all=FALSE)
{ # {{{
if (exp.link==1) theta <- exp(pp[1:mutimes]) else theta <- pp[1:mutimes]
if (ncol(X)>0) {
betal <- pp[-c(1:mutimes)]
Zbeta <- c(X %*% betal+offsets)
} else {Zbeta <- offsets }
xltheta <- c(tL %*% theta)
xrtheta <- c(tR %*% theta)
EZbeta <- exp(Zbeta)
GEl <- xltheta * EZbeta
GEr <- xrtheta * EZbeta
Stl <- 1/(1+GEl)
Str <- 1/(1+GEr)
############################################
## likelihood
############################################
# {{{
p <- Stl-Str*(time2<Inf)
logp <- log(p)
logl <- weights*logp
# }}}
############################################
## Derivative
############################################
# {{{
if (ncol(X)>0) {
DbetaStl <- -X*GEl*Stl^2
DbetaStr <- -(time2<Inf)*X*GEr*Str^2
Dbetalogp <- (DbetaStl-DbetaStr)/p
}
DgStl <- -tL*EZbeta*Stl^2
DgStr <- -(time2<Inf)*tR*EZbeta*Str^2
Dglogp <- (DgStl-DgStr)/p
if (ncol(X)>0) {
Dlogliid <- cbind(Dglogp*weights,Dbetalogp*weights)
Dlogl <- apply(Dlogliid,2,sum)
} else {
Dlogliid <- Dglogp*weights
Dlogl <- apply(Dlogliid,2,sum)
}
if (exp.link==1) {
Dlogliid[,1:mutimes] <- t( t(Dlogliid[,1:mutimes])*theta)
Dlogl[1:mutimes] <- Dlogl[1:mutimes]*theta
}
# }}}
############################################
## 2nd Derivative
############################################
# {{{
if (ncol(X)>0) {
D2betaStl <- X2*(GEl^2-GEl)*Stl^3
D2betaStr <- X2*(time2<Inf)*(GEr^2-GEr)*Str^3
DbetagStl <- XtL* (EZbeta*(GEl-1))*Stl^3
DbetagStr <- XtR*(time2<Inf)*(EZbeta*(GEr-1))*Str^3
}
D2gStl <- 2*tL2*EZbeta^2*Stl^3
D2gStr <- 2*tR2*(time2<Inf)*EZbeta^2*Str^3
DglpDglp <- .Call("vecMatMat",Dglogp,Dglogp)$vXZ
D2glogp <- (D2gStl-D2gStr)/p - DglpDglp
D2g <- apply(weights*D2glogp,2,sum)
## cross products of derivatives
if (ncol(X)>0) {
DbetalpDbetalp <- .Call("vecMatMat",Dbetalogp,Dbetalogp)$vXZ
DbetalpDglp <- .Call("vecMatMat",Dbetalogp,Dglogp)$vXZ
D2betalogp <- (D2betaStl-D2betaStr)/p - DbetalpDbetalp
Dbetaglogp <- (DbetagStl-DbetagStr)/p - DbetalpDglp
D2beta <- apply(weights*D2betalogp,2,sum)
Dbetag <- apply(weights*Dbetaglogp,2,sum)
}
D2log <- matrix(0,length(pp),length(pp))
D2log[1:mutimes,1:mutimes] <- D2g
if (ncol(X)>0) {
###D2log[1:mutimes,(mutimes+1):length(pp)] <- Dbetag
D2log[(mutimes+1):length(pp),1:mutimes] <- Dbetag
D2log[1:mutimes,(mutimes+1):length(pp)] <- t(D2log[(mutimes+1):length(pp),1:mutimes])
D2log[(mutimes+1):length(pp),(mutimes+1):length(pp)] <- D2beta
}
diagM <- function(vec)
{# {{{
p <- length(vec)
out <- matrix(0,p,p)
out <- matdoubleindex(out,1:p,1:p,vec)
return(out)
}
# }}}
if (exp.link==1) {
D2log[1:mutimes,1:mutimes] <- diagM(Dlogl[1:mutimes]) + D2log[1:mutimes,1:mutimes]*(theta %o% theta)
if (ncol(X)>0) {
D2log[(mutimes+1):length(pp),1:mutimes] <- Dbetag*rep(theta,each=length(betal))
###D2log[(mutimes+1):length(pp),1:mutimes] <- Dbetag*rep(theta,length(betal))
D2log[1:mutimes,(mutimes+1):length(pp)] <- t(D2log[(mutimes+1):length(pp),1:mutimes])
}
}
# }}}
############################################
ploglik <- sum(logl)
gradient <- Dlogl
hessian <- D2log
if (all) {
ihess <- solve(hessian)
beta.iid <- Dlogliid %*% ihess ## %*% t(Dlogl)
beta.iid <- apply(beta.iid,2,sumstrata,id,nid)
robvar <- crossprod(beta.iid)
val <- list(par=pp,ploglik=ploglik,gradient=gradient,hessian=hessian,ihessian=ihess,
iid=beta.iid,robvar=robvar,var=robvar,ihessian=ihess,id=id,
se=diag(robvar)^.5,coef=pp,se.coef=diag(robvar)^.5)
return(val)
}
structure(-ploglik,gradient=-gradient,hessian=-hessian)
}# }}}
p <- ncol(X)
opt <- NULL
if (no.opt==FALSE) {
if (tolower(method)=="nr") {
tim <- system.time(opt <- lava::NR(beta,obj,...))
opt$timing <- tim
opt$estimate <- opt$par
} else {
opt <- nlm(obj,beta,...)
opt$method <- "nlm"
}
cc <- opt$estimate;
if (!stderr) return(cc)
val <- c(list(coef=cc),obj(opt$estimate,all=TRUE))
} else val <- c(list(coef=beta),obj(beta,all=TRUE))
uu <- utimes[utimes<Inf]
Xnames <- c(paste("time",uu[-1],sep=""),X.names)
if (length(val$coef)==length(Xnames)) names(val$coef) <- Xnames
val <- c(list(increment=increment,exp.link=exp.link,ntimes=mutimes,utimes=utimes),val)
class(val) <- c("cumoddsreg")
return(val)
} ## }}}
##' @export
cumoddsreg <- function (formula,data,...)
{ ## {{{
cl <- match.call()
m <- match.call(expand.dots = TRUE)[1:3]
special <- c("strata", "cluster","offset")
Terms <- terms(formula, special, data = data)
m$formula <- Terms
m[[1]] <- as.name("model.frame")
m <- eval(m, parent.frame())
Y <- model.extract(m, "response")
nt <- nlevels(Y)
time2__ <- as.numeric(Y)
entrytime__ <- time2__-1
time2__[time2__==nt] <- Inf
xf <- update.formula(formula,Interval(entrytime__,time2__)~.)
data$entrytime__ <- entrytime__
data$time2__ <- time2__
out <- interval.logitsurv.discrete(xf,data,...)
return(out)
} ## }}}
##' @export
Interval <- function (time, time2 , ...)
{# {{{
out <- cbind(time, time2 )
colnames(out) <- c("entry", "time2")
class(out) <- "Interval"
return(out)
}# }}}
##' @export
dInterval <- function (time, time2 ,cuts=NULL,cut.first=0,show=FALSE, ...)
{# {{{
if (is.null(cuts)) cuts <- sort(unique(time,time2))
if (min(cuts)> 0) cuts <- c(cut.first,cuts)
###
lleft <- fast.approx(cuts,time,type="left")
rright <- fast.approx(cuts,time2,type="right")
out <- data.frame(time=time,time2=time2,left=lleft-1,right=rright-1)
attr(out,"cuts") <- cuts
out$leftd <- cuts[lleft]
out$rightd <- cuts[rright]
if (max(cuts)==Inf) out$right[out$rightd==Inf] <- Inf
if (any(out$time<out$leftd)) warning("left not in [leftd,rightd]\n")
if (any(out$time2>out$rightd)) warning("right not in [leftd,rightd]\n")
if (show) {
mtime <- mmtime <- max(cuts[cuts<Inf])
n <- length(time)
if (max(cuts)==Inf) mtime <- mmtime+1
plot(0,0,xlim=c(0,mtime),ylim=c(0,n),type="n")
abline(v=cuts,col=3)
lines(c(min(cuts),mmtime),c(-1,-1),lwd=2)
if (max(cuts)==Inf) lines(c(mtime,mmtime),c(-1,-1),lwd=2,col=2)
for (i in 1:n)
{
lines(c(out$time[i],min(out$time2[i],mtime)),rep(i,2),col=1)
lines(c(out$leftd[i],min(out$rightd[i],mtime)),rep(i+0.5,2),col=2)
}
}
return(out)
}# }}}
##' @export
simlogitSurvd <- function(coef,Z,n=NULL,times=0:6)
{# {{{
if (missing(Z)) Z <- NULL
if (!is.null(Z)) n <- nrow(Z)
Z <- as.matrix(Z)
g <- coef[times[-1]]
beta <- coef[-(times[-1])]
Gt <- cumsum(c(0,exp(g)))
if (!is.null(Z)) EZbeta <- c(exp(Z %*% beta)) else EZbeta <- rep(1,n)
EZbeta <- rep(EZbeta,each=length(times))
Gti <- rep(Gt,n)
GE <- Gti*EZbeta
Ft <- GE/(1+GE)
ru <- runif(n)
out <- data.frame(.Call("wherestrataR",ru,Ft,rep(0:(n-1),each=length(times)),n))
status <- rep(1,n)
names(out)[1] <- "time"
status[out$time==(length(times)-1)] <- 0
out$status <- status
out <- cbind(out,Z)
return(out)
}# }}}
##' @export
predictlogitSurvd <- function(x,Z,n=NULL,times=NULL,se=FALSE,type="prob")
{# {{{
if (missing(Z)) Z <- NULL
if (!is.null(Z)) {
n <- nrow(Z)
Z <- as.matrix(Z)
} else { if (is.null(n)) n <- 1; }
ccc <- x$coef
ntimes <- x$ntimes
g <- ccc[1:ntimes]
beta <- ccc[-(1:ntimes)]
if (is.null(times)) times <- 0:ntimes
Gt <- cumsum(c(0,exp(g)))
if (!is.null(Z)) EZbeta <- c(exp(Z %*% beta)) else EZbeta <- rep(1,n)
if (!se) {# {{{
EZbeta <- rep(EZbeta,each=length(times))
id <- rep(1:n,each=length(times))
Gti <- rep(Gt,n)
GE <- Gti*EZbeta
Gt <- 1/(1+GE)
preds <- data.frame(pred=Gt,id=id,times=rep(times,n))
# }}}
} else {# {{{
Ft <- function(p,Zi=1)
{# {{{
g <- p[1:ntimes]
beta <- p[-(1:ntimes)]
Gt <- cumsum(c(0,exp(g)))
if (length(beta)>0) EZ <- exp(sum(Zi*beta)) else EZ <- 1
pred <- 1/(1+Gt*EZ)
return(pred)
}# }}}
preds <- c()
for (i in 1:length(EZbeta)) {
if (is.null(x$var)) covv <- vcov(x) else covv <- x$var
eud <- estimate(coef=x$coef,vcov=covv,f=function(p) Ft(p,Zi=Z[i,]))
cmat <- data.frame(eud$coefmat)
cmat$id <- i
cmat$times <- times
names(cmat)[1:4] <- c("pred","se","lower","upper")
preds <- rbind(preds,cmat)
}
}# }}}
return(preds)
} ## }}}
##' @export
simTTP <- function(coef=NULL,n=100,Xglm=NULL,times=NULL)
{# {{{
Z <- Xglm
if (!is.null(Z)) n <- nrow(Z)
if (!is.null(Z)) data <- Z else data <- data.frame(id=1:n)
if (!is.null(times)) {
timesf <- data.frame(times=rep(times,n),id=rep(1:n,each=length(times)))
data <- merge(data,timesf,by.x="id",by.y="id")
mt <- model.matrix(~factor(times),data)
nm <- match(c("id","times"),names(data))
Z <- cbind(mt,data[,-nm])
}
p <- c(expit(as.matrix(Z) %*% coef))
y <- rbinom(length(p),1,p)
data <- cbind(y,data)
data <- count.history(data,status="y",id="id",types=1)
data <- subset(data,data$Count1<=0)
attr(data,"coef") <- beta
return(data)
}# }}}
##' @export
summary.cumoddsreg <- function(object,...) { ## {{{
ntimes <- object$ntimes
coefb <- coef(object)[1:ntimes]
vcovb <- object$var[1:object$ntimes,1:object$ntimes]
outb <- lava::estimate(coef=coefb,vcov=vcovb,...)
if (length(object$coef)>ntimes) {
ll <- length(object$coef)
takex <- (ntimes+1):ll
coefx <- object$coef[takex]
vcovx <- object$var[takex,takex]
outx <- lava::estimate(coef=coefx,vcov=vcovx,...)
eoutx <- exp(outx$coefmat[,c(1,3,4)])
out <- list(baseline=outb,logor=outx,or=eoutx)
} else out <- list(baseline=outb)
return(out)
} ## }}}
##' @export
print.cumoddsreg <- function(x,...) summary(x,...)
##' @export
vcov.cumoddsreg <- function(object,...) return(object$var)
##' @export
coef.cumoddsreg <- function(object,...) return(object$coef)
##' @export
predictSurvd <- function(ds,Z,times=1:6,se=FALSE,type="prob")
{# {{{
if (!is.null(Z)) n <- nrow(Z)
if (!is.null(Z)) data <- Z else data <- data.frame(id=1:n)
Z <- data.frame(Z)
Z$id <- 1:n
ccc <- ds$coef
if (!se) {# {{{{{{
data <- Z
if (!is.null(times)) {
timesf <- data.frame(times=rep(times,n),id=rep(1:n,each=length(times)))
data <- merge(data,timesf,by.x="id",by.y="id")
mt <- model.matrix(~factor(times),data)
nm <- match(c("id","times"),names(data))
Z <- cbind(mt,data[,-nm])
}
if (ncol(Z)!=length(c(ccc))) {
print(head(Z))
print(ccc)
stop("dimension of Z not consistent with length of coefficients");
}
p <- c(expit(as.matrix(Z) %*% ccc))
preds <- data.frame(p=p,id=data$id,times=data$times)
survt <- exp(cumsumstrata(log(1-preds$p),data$id-1,6))
if (type=="prob") pred <- 1-survt
if (type=="surv") pred <- survt
if (type=="hazard") pred <- p
if (type=="rrm") { ## restricted residual mean
ll <- length(survt)
pred <- cumsum(c(1,survt[-ll]))
}
preds <- cbind(preds,pred)
# }}}
} else {# {{{
Ft <- function(p)
{
xp <- as.matrix(Zi) %*% p
lam <- expit(xp)
st <- cumprod(1-lam)
if (type=="prob") st <- 1-st
if (type=="surv") st <- st
if (type=="hazard") st <- lam
if (type=="rrm") { ## restricted residual mean
ll <- length(st)
st <- cumsum(c(1,st[-ll]))
}
return(st)
}
preds <- c()
for (i in 1:nrow(Z)) {
Zi <- data.frame(Z[i,,drop=FALSE])
data <- Zi
if (!is.null(times)) {
timesf <- data.frame(times=rep(times,n),id=rep(1:n,each=length(times)))
data <- merge(data,timesf,by.x="id",by.y="id")
mt <- model.matrix(~factor(times),data)
nm <- match(c("id","times"),names(data))
Zi <- cbind(mt,data[,-nm])
}
if (is.null(ds$var)) covv <- vcov(ds) else covv <- ds$var
eud <- estimate(coef=ds$coef,vcov=covv,f=function(p) Ft(p))
cmat <- data.frame(eud$coefmat)
cmat$id <- i
cmat$times <- times
names(cmat)[1:4] <- c("pred","se","lower","upper")
preds <- rbind(preds,cmat)
}
}# }}}
return(preds)
}# }}}
## }}}
##' @export
plotSurvd <- function(ds,ids=NULL,add=FALSE,se=FALSE,cols=NULL,ltys=NULL,...)
{# {{{
if (is.null(ids)) ids <- unique(ds$id)
if (is.null(cols)) cols <- 1:length(ids)
if (is.null(ltys)) ltys <- 1:length(ids)
k <- 1
fplot <- 0
for (i in ids) {
timei <- ds$time[ds$id==i]
predi <- ds$pred[ds$id==i]
if (fplot==0) {
if (!add) plot(timei,predi,type="s",col=cols[k],lty=ltys[k],...)
if (add) lines(timei,predi,type="s",col=cols[k],lty=ltys[k],...)
fplot <- 1
} else lines(timei,predi,type="s",col=cols[k],lty=ltys[k],...)
if (se) {
loweri <- ds$lower[ds$id==i]
upperi <- ds$upper[ds$id==i]
plotConfRegion(timei,cbind(loweri,upperi),col=cols[k])
}
k <- k+1
}
} ## }}}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.