R/Surv.R

Defines functions levels.Surv quantile.Surv median.Surv mean.Surv as.numeric.Surv as.integer.Surv as.logical.Surv t.Surv rep_len.Surv rep.int.Surv rep.Surv c.Surv unique.Surv rev.Surv duplicated.Surv anyDuplicated.Surv text.Surv pairs.Surv points.Surv lines.Surv image.Surv identify.Surv hist.Surv density.Surv barplot.Surv plot.Surv head.Surv tail.Surv as.data.frame.Surv format.Surv names.Surv length.Surv as.matrix.Surv is.Surv xtfrm.Surv Summary.Surv Ops.Surv Math.Surv is.na.Surv as.character.Surv print.Surv Surv

Documented in anyDuplicated.Surv as.character.Surv as.data.frame.Surv as.integer.Surv as.matrix.Surv as.numeric.Surv c.Surv duplicated.Surv format.Surv head.Surv is.na.Surv is.Surv length.Surv levels.Surv Math.Surv mean.Surv median.Surv names.Surv Ops.Surv plot.Surv quantile.Surv rep.int.Surv rep_len.Surv rep.Surv rev.Surv Summary.Surv Surv tail.Surv t.Surv unique.Surv xtfrm.Surv

#
# Package up surivival type data as a structure
#
Surv <- function(time, time2, event,
	      type=c('right', 'left', 'interval', 'counting', 'interval2',
                     "mstate"),
		       origin=0) {

    if (missing(time)) stop ("Must have a time argument")
    if (inherits(time ,"difftime")) time <- unclass(time)
    if (!missing(time2) && inherits(time2, "difftime")) time2 <-as.numeric(time2)
    if (!is.numeric(time)) stop ("Time variable is not numeric")
    nn <- length(time)

    # ng = number of the first 3 arguments that are present
    ng <- (!missing(time)) + (!missing(time2)) + (!missing(event)) 
    # The logic below uses "ng" throughout; why not use "missing(time2)"
    # and missing(event) instead?  Because we want to assume that 
    # "Surv(a,b)" has the variable b matched to event rather than time2.
    #
    mtype <- match.arg(type)
    
    # If type is missing or it is "mstate", I need to figure out for myself
    #  whether I have (time, time2, status) or (time, status) data
    if (missing(type) || mtype=="mstate") {
	if (ng==1 || ng==2) type <- 'right'
	else if (ng==3)     type <- 'counting'
        else stop ("No time variable!") # no time variable at all!
    }
    else {
        type <- mtype
	if (ng!=3 && (type=='interval' || type =='counting'))
		stop("Wrong number of args for this type of survival data")
	if (ng!=2 && (type=='right' || type=='left' ||  type=='interval2'))
		stop("Wrong number of args for this type of survival data")
	}

    if (ng==1) { # only a time variable given
        if (!is.numeric(time)) stop("Time variable is not numeric")
	ss <- cbind(time=time-origin, status=1)
        type <- "right"
	}
    else if (type=='right' || type=='left') {
        if (!is.numeric(time)) stop("Time variable is not numeric")
	if (missing(event))   {
            event <- time2  # treat time2 as event
            time2 <- NULL   # force any inputAttributes to attach to "event"
        }
        if (length(event) != nn) stop ("Time and status are different lengths")
        if (mtype=="mstate" || (is.factor(event))) {
            mstat <- as.factor(event)
            status <- as.numeric(mstat) -1
            type <- "mright"
        }
	else {
            if (is.logical(event)) status <- as.numeric(event)
            else  if (is.numeric(event)) {
                who2 <- !is.na(event)
                if (max(event[who2]) ==2) status <- event -1
                else status <- event
                temp <- (status==0 | status==1)
                status <- ifelse(temp, status, NA)
                if (!all(temp[who2], na.rm=TRUE))
		    warning("Invalid status value, converted to NA")
	    }
            else stop("Invalid status value, must be logical or numeric")
        }
	ss <- cbind(time=time-origin, status=status)
    }
    else  if (type=='counting') {
	if (length(time2) !=nn) stop ("Start and stop are different lengths")
	if (length(event)!=nn) stop ("Start and event are different lengths")
        if (!is.numeric(time))  stop("Start time is not numeric")
	if (!is.numeric(time2)) stop("Stop time is not numeric")
	temp <- (time >= time2)
	if (any(temp & !is.na(temp))) {
	    time[temp] <- NA
	    warning("Stop time must be > start time, NA created")
	    }
        if (mtype=="mstate" || is.factor(event)) {
            mstat <- as.factor(event)
            status <- as.numeric(mstat) -1
            type <- "mcounting"
        }
	else {
            if (is.logical(event)) status <- as.numeric(event)
	    else  if (is.numeric(event)) {
		who2 <- !is.na(event)
		if (max(event[who2])==2) status <- event - 1
		else status <- event
		temp <- (status==0 | status==1)
		status <- ifelse(temp, status, NA)
		if (!all(temp[who2], na.rm=TRUE))
		    warning("Invalid status value, converted to NA")
		}
	    else stop("Invalid status value")
        }
	ss <- cbind(start=time-origin, stop=time2-origin, status=status)
    }

    else {  #interval censored data
	if (type=='interval2') {
	    # convert to "interval" type, infer the event code
	    if (!is.numeric(time2)) stop("Time2 must be numeric")
	    if (length(time2) !=nn) 
		    stop ("time and time2 are different lengths")
            backwards <- (!is.na(time) & !is.na(time2) & time > time2)
            # allow for infinite times (important to do the backwards check
            #  first)
            time  <- ifelse(is.finite(time), time, NA)
            time2 <- ifelse(is.finite(time2), time2, NA)
            unknown <-  (is.na(time) & is.na(time2))
 	    status <- ifelse(is.na(time),  2,
		      ifelse(is.na(time2), 0,
		      ifelse(time==time2, 1,3)))
	    time <- ifelse(status!=2, time, time2)

            if (any(backwards)) {
                warning("Invalid interval: start > stop, NA created")
                status[backwards] <- NA
            }
            if (any(unknown)) status[unknown] <- NA
	    type <- 'interval'
	    }
	else {  #check legality of event code
	    if (length(event)!=nn) 
		    stop("Time and status are different lengths")
            if (!is.numeric(event)) 
		   stop("Invalid status value, must be logical or numeric")
            temp <- (event==0 | event==1| event==2 | event==3)
            status <- ifelse(temp, event, NA)
            if (!all(temp, na.rm=TRUE))
                warning("Status must be 0, 1, 2 or 3; converted to NA")

	    if (any(event==3, na.rm=T)) {
		if (!is.numeric(time2)) stop("Time2 must be numeric")
		if (length(time2) !=nn) 
		    stop ("time and time2 are different lengths")
                temp <- (status==3 & time>time2)
                if (any(temp & !is.na(temp))) {
                    status[temp] <- NA
                    warning("Invalid interval: start > stop, NA created")
                }
            }
	    else time2 <- 1  #dummy value, time2 is never used
        }

	ss <- cbind(time1=time-origin, 
		    time2=ifelse(!is.na(status) & status==3, time2-origin, 1),
		    status=status)
    }

    # Retain any attributes of the input arguments. Originally requested
    #  by the rms package
    inputAttributes <- list()
    if (!is.null(attributes(time)))
        inputAttributes$time  <-attributes(time)
    if (!missing(time2) && !is.null(attributes(time2)))
        inputAttributes$time2 <- attributes(time2)
    if (!missing(event) && !is.null(attributes(event)))
        inputAttributes$event <- attributes(event)

    # In rare cases there are no column names, and I have discovered that
    #  people depend on them.
    cname <- dimnames(ss)[[2]]
    if (length(cname) ==0) {
        if (ncol(ss)==2) cname <- c("time", "status")
        else if (type=="counting") cname <- c("start", "stop", "status")
        else cname <- c("time1", "time2", "status")
    }
    dimnames(ss) <- list(NULL, cname)  #kill extraneous row names
                                           
    attr(ss, "type")  <- type
    if (type=="mright" || type=="mcounting") {
        states <- levels(mstat)[-1]
        if (any(is.na(states) | states=='') )
            stop("each state must have a non-blank name")
        attr(ss, "states") <- states
    }
    if (length(inputAttributes) > 0) 
        attr(ss, "inputAttributes") <- inputAttributes
    class(ss) <- 'Surv'
    ss
    }

print.Surv <- function(x, quote=FALSE, ...) {
    invisible(print(as.character.Surv(x), quote=quote, ...))
    }

as.character.Surv <- function(x, ...) {
    new <- switch(attr(x, "type"),
           "right"={
               temp <- x[,2]
               temp <- ifelse(is.na(temp), "?", ifelse(temp==0, "+",""))
               paste0(format(x[,1]), temp)
           },
           "counting"= {
               temp <- x[,3]
               temp <- ifelse(is.na(temp), "?", ifelse(temp==0, "+",""))
               paste0('(', format(x[,1]), ',', format(x[,2]), temp,
                     ']')
           },
           "left" ={
               temp <- x[,2]
               temp <- ifelse(is.na(temp), "?", ifelse(temp==0, "-",""))
               paste0(format(x[,1]), temp)
           },
           "interval"= {
               stat <- x[,3]
               temp <- c("+", "", "-", "]")[stat+1]
               temp2 <- ifelse(stat==3,
			 paste("[", format(x[,1]), ", ",format(x[,2]), sep=''),
			 format(x[,1]))
               ifelse(is.na(stat), "NA", paste0(temp2, temp))
           },
           "mright" = {  #multi-state
               temp <- x[,2]
               end <- c("+", paste(":", attr(x, "states"), sep='')) #endpoint
               temp <- ifelse(is.na(temp), "?", end[temp+1])
               paste0(format(x[,1]), temp)
           },
           "mcounting"= {
               temp <- x[,3]
               end <- c("+", paste(":", attr(x, "states"), sep='')) #endpoint
               temp <- ifelse(is.na(temp), "?", end[temp+1])
               paste0('(', format(x[,1]), ',', format(x[,2]), temp,
                     ']')
           })
    names(new) <- rownames(x)
    new
}


"[.Surv" <- function(x, i, j, drop=FALSE) {
    # If only 1 subscript is given, the result will still be a Surv object,
    #   and the drop argument is ignored.
    # I would argue that x[3:4,,drop=FALSE] should return a matrix, since
    #  the user has implicitly specified that they want a matrix.
    #  However, [.dataframe calls [.Surv with the extra comma; its
    #  behavior drives the choice of default.
    if (missing(j)) {
        xattr <- attributes(x)
        x <- unclass(x)[i,, drop=FALSE] # treat it as a matrix: handles dimnames
        attr(x, 'type') <- xattr$type
        if (!is.null(xattr$states)) attr(x, "states") <- xattr$states
        if (!is.null(xattr$inputAttributes)) {
            # If I see "names" subscript it, leave all else alone
            attr(x, 'inputAttributes') <- 
                lapply(xattr$inputAttributes, function(z) {
                       if (any(names(z)=="names")) z$names <- z$names[i]
                       z
                   })
        }
        class(x) <- "Surv"  #restore the class
        x
    }
    else { # return  a matrix or vector
	class(x) <- 'matrix'
       	NextMethod("[")
    }
}

is.na.Surv <- function(x) {
    as.vector(rowSums(is.na(unclass(x))) >0)
    }

Math.Surv <- function(...)  stop("Invalid operation on a survival time")
Ops.Surv  <- function(...)  stop("Invalid operation on a survival time")
Summary.Surv<-function(...) stop("Invalid operation on a survival time")

# The Ops.Surv method could in theory define == and >, to allow sorting
#  but I've left them out since it is the xtfrm method that explicitly
#  is used for this.  For (start, stop) data we order by event within
#  ending time.  Start time is included as a last index, but it is not
#  clear that we need to do so.
xtfrm.Surv <- function(x) {
    if (attr(x, 'type') == "interval") {
        temp <- ifelse(x[,3]==3, (x[,1] + x[,2])/2, x[,1])
        index <- order(temp, match(x[,3], c(2,1,3,0)))
        }
    else if (attr(x, 'type')== "left") index <- order(x[,1], x[,2])
    else if (ncol(x)==2) index <- order(x[,1], x[,2]==0, x[,2]) # censor last
    else index <- order(x[,2], x[,3]==0, x[,3], x[,1]) # ending time, stat, start
    temp <- integer(nrow(x))
    temp[index] <- seq.int(nrow(x))
    temp[is.na(x)] <- NA
    temp
}

is.Surv <- function(x) inherits(x, 'Surv')
as.matrix.Surv <- function(x, ...) {
    y <- unclass(x)
    attr(y, "type") <- NULL
    attr(y, "states") <- NULL
    attr(y, "inputAttributes") <- NULL
    y
}

# You can't do length without names
# and names doesn't pay attention to my definition of length:
# we need to map to rownames instead

length.Surv <- function(x) nrow(x)
"names<-.Surv" <- function(x, value) {
    rownames(x) <- value
    x
}
names.Surv <- function(x) rownames(x)

format.Surv <- function(x, ...) format(as.character.Surv(x), ...)
as.data.frame.Surv <- function(x, ...) as.data.frame.model.matrix(x, ...)

# all sorts of methods for Surv, caused by searching for every case of
#  UseMethod in the standard libraries

# package:utils methods
tail.Surv <- function(x, ...) 
    x[tail(1:nrow(x), ...)]     

head.Surv <- function(x, ...)
    x[head(1:nrow(x), ...)]

# packge:graphics.  All try to give a nicer failure message
plot.Surv <- function(x, ...)
    plot(survfit(x ~1), ...)

barplot.Surv <- function(height, ...)
    stop("not defined for a Surv object")
density.Surv <- function(x, ...)
    stop("method not defined for a Surv object")
hist.Surv <- function(x, ...)
    stop("method not defined for a Surv object")
identify.Surv <- function(x, ...)
    stop("method not defined for a Surv object")
image.Surv <- function(x, ...)
    stop("method not defined for a Surv object")
lines.Surv <- function(x, ...)
    stop("method not defined for a Surv object")
points.Surv <- function(x, ...)
    stop("method not defined for a Surv object")
pairs.Surv <- function(x, ...)
    stop("method not defined for a Surv object")
text.Surv <- function(x, ...)
    stop("method not defined for a Surv object")

# package:base methods
anyDuplicated.Surv <- function(x, ...) anyDuplicated(as.matrix(x), ...)
duplicated.Surv    <- function(x, ...) duplicated(as.matrix(x), ...)
rev.Surv <- function(x) x[rev(1:nrow(x))]
unique.Surv  <- function(x, ...)
    x[!duplicated(as.matrix(x), ...)]

c.Surv <- function(...) {
    slist <- list(...)
    if (!all(sapply(slist, function(x) inherits(x, "Surv")))) 
        stop("all elements must be of class Surv")
    types <- sapply(slist, function(x) attr(x, "type"))
    if (!all(types == types[1]))
        stop("all elements must be of the same Surv type")

    if (types[[1]] %in% c("mright", "mcounting")) {
        states <- lapply(slist, function(x) attr(x, 'states'))
        if (any(diff(sapply(states, length))!=0))
            stop("all elements must have the same list of states")
        if (!all(sapply(states, function(x) all.equal(x, states[[1]]))))
            stop("all elements must have the same list of states")
        }
    new <- do.call("rbind", lapply(slist, as.matrix))
    att1 <- attributes(slist[[1]])
    att1 <- att1[is.na(match(names(att1), c("dim","dimnames")))]
    attributes(new) <- c(attributes(new)[c('dim', 'dimnames')], att1)
    new
    }


# The cbind/rbind methods cause more trouble than they solve
# The problem is when one is called with mixed arguments, e.g.
#      cbind(Surv(1:4), data.frame(x=6:9, z=c('a', 'b', 'a', 'a'))
# R will call cbind.Surv for cbind(Surv(1:4), Surv(2:5)), giving a matrix.
# In the above, however, cbind.Surv is never called, but the \emph{presence}
#    of a cbind.Surv method messes up the default behavior, see the
#    'Dispatch' section of help('cbind').  The result becomes a matrix of
#    lists rather than a dataframe.
#
#rbind.Surv <- function(...) {
#    dotlist <- list(...)
#    if (all(sapply(dotlist, is.Surv))) do.call("c.Surv", dotlist)
#    else do.call("rbind", lapply(dotlist, function(x)
#        if (is.Surv(x)) as.matrix(x) else x))
#    }
#
#cbind.Surv <- function(...) 
#    do.call("cbind", lapply(list(...), 
#                        function(x) if (is.Surv(x)) as.matrix(x) else x))
#}

rep.Surv <- function(x, ...) {
    index <- rep(1:nrow(x), ...)
    x[index]
    }
rep.int.Surv <- function(x, ...) {
    index <- rep.int(1:nrow(x), ...)
    x[index]
    }
rep_len.Surv <- function(x, ...) {
    index <- rep_len(1:nrow(x), ...)
    x[index]
    }
t.Surv <- function(x) t(as.matrix(x))

as.logical.Surv <- function(x, ...)
    stop("invalid operation on a survival time")
as.integer.Surv <- function(x, ...) {
    nc <- ncol(x)
    x[,-nc] <- as.integer(x[,-nc])
    if (nc==3 && any(x[,1] >= x[,2]))
        stop("invalid survival time created")
    x
}
as.numeric.Surv <- function(x, ...) {
    nc <- ncol(x)
    x[,-nc] <- as.numeric(x[, -nc])
    x
}

mean.Surv <-function(x, ...)
    stop("a mean method has not been defined for Surv objects")

median.Surv <- function(x, na.rm=FALSE, ...)
    quantile(x, probs=0.5, na.rm= na.rm, ...)

quantile.Surv <- function(x, probs, na.rm=FALSE, ...) {
    if (!na.rm && any(is.na(x)))
        stop("missing values and NaN's not allowed if 'na.rm' is FALSE")
    if (attr(x, "type") %in% c("mright", "mcounting"))
        stop("quantile method not defined for multiple-endpoint Surv objects")
    fit <- survfit(x~1)
    quantile.survfit(fit, probs, ...)
}   

# these make sense but aren't S3 methods
# sd, IQR, mad, cov, cor

levels.Surv <- function(x) attr(x, "states")

Try the survival package in your browser

Any scripts or data that you put into this service are public.

survival documentation built on Aug. 14, 2023, 9:07 a.m.