R/plot.survfit.R
In survival: Survival Analysis

Documented in plot.survfit

plot.survfit<- function(x, conf.int,  mark.time=FALSE,
                        pch=3,  col=1,lty=1, lwd=1, 
                        cex=1, log=FALSE,
                        xscale=1, yscale=1, 
                        xlim, ylim, xmax, 
                        fun, xlab="", ylab="", xaxs='r', 
                        conf.times, conf.cap=.005, conf.offset=.012, 
                        conf.type=c("log",  "log-log",  "plain", 
                                  "logit", "arcsin", "none"),
                        mark, mark.col, noplot="(s0)", cumhaz=FALSE,
                        firstx, ymin, cumprob=FALSE, ...) {

    dotnames <- ...names()
    if (any(dotnames =='type'))
        stop("The graphical argument 'type' is not allowed")
    if (!missing(mark))
        warning("the mark option is deprecated, use mark.time=TRUE along with pch for the character")

    x <- survfit0(x, x$start.time)   # align data at 0 for plotting

    # decide on logarithmic axes, yes or no
    if (is.logical(log)) {
        ylog <- log
        xlog <- FALSE
        if (ylog) logax <- 'y'
        else      logax <- ""
    }
    else {
        ylog <- (log=='y' || log=='xy')
        xlog <- (log=='x' || log=='xy')
        logax  <- log
    }

    if (!missing(fun)) {
        if (is.character(fun)) {
            if (fun=='log'|| fun=='logpct') ylog <- TRUE
            if (fun=='cloglog') {
                xlog <- TRUE
                if (ylog) logax <- 'xy'
                else logax <- 'x'
            }
            if (fun=="cumhaz" && missing(cumhaz)) cumhaz <- TRUE
        }
    }
    # The default for plot and lines is to add confidence limits
    #  if there is only one curve
    if (!missing(conf.type) || is.null(x$conf.type)) 
        conf.type <- match.arg(conf.type) # check for legal
    else conf.type <- x$conf.type  # use the default in the curve
    if (conf.type=="none") conf.int <- FALSE

    if (missing(conf.int) && missing(conf.times))  
        conf.int <- (!is.null(x$std.err) && prod(dim(x) ==1))

    if (missing(conf.times)) conf.times <- NULL   
    else {
        if (!is.numeric(conf.times)) stop('conf.times must be numeric')
        if (missing(conf.int)) conf.int <- TRUE
    }
    if (!missing(conf.int)) {
        if (is.numeric(conf.int)) {
            conf.level <- conf.int
            if (conf.level<0 || conf.level > 1)
                stop("invalid value for conf.int")
            if (conf.level ==0) conf.int <- FALSE
            else if (conf.level != x$conf.int) {
                x$upper <- x$lower <- NULL  # force recomputation
            }
            conf.int <- TRUE
        }
        else conf.level = 0.95
    }

    # Organize data into stime, ssurv, supper, slower
    stime <- x$time
    std   <- NULL
    yzero <- FALSE   #a marker that we have an "ordinary survival curve" with min 0
    smat <- function(x) {
        # the rest of the routine is simpler if everything is a matrix
        dd <- dim(x)
        if (is.null(dd)) as.matrix(x)
        else if (length(dd) ==2) x
        else matrix(x, nrow=dd[1])
    }

    if (is.numeric(cumhaz)) { # plot the cumulative hazard
        if (!inherits(x, "survfitms") && any(cumhaz != 1))
            stop("numeric cumhaz argument only applies to multi-state")
        dd <- dim(x$cumhaz)
        if (is.null(dd)) nhazard <- 1
        else nhazard <- prod(dd[-1])

        if (!all(cumhaz == floor(cumhaz))) stop("cumhaz argument is not integer")
        if (any(cumhaz < 1 | cumhaz > nhazard)) stop("subscript out of range")
        ssurv <- smat(x$cumhaz)[,cumhaz, drop=FALSE]
        if (!is.null(x$std.chaz)) std <- smat(x$std.chaz)[,cumhaz, drop=FALSE]
        cumhaz <- TRUE # for the rest of the code
    } else if (cumhaz) {
        if (is.null(x$cumhaz)) 
            stop("survfit object does not contain a cumulative hazard")
        ssurv <- smat(x$cumhaz)
        if (!is.null(x$std.chaz)) std <- smat(x$std.chaz)
    }
    else if (inherits(x, "survfitms")) {
        if (!missing(cumprob) && !(is.logical(cumprob) && !cumprob)) {
            if (conf.int) 
                stop("confidence intervals not available when cumprob=TRUE")
            dd <- dim(x)
            j <- match("states", names(dd), nomatch=0)
            if (j==0) stop("survfitms object with no states dimension")
            
            #  cumprob is T/F or a vector of integers
            if (is.logical(cumprob)) cumprob <- 1:dd[j]
            else if (!is.numeric(cumprob) || any(cumprob <1 | cumprob > dd[j])
                     || any(cumprob != floor(cumprob)))
                stop("cumprob contains an invalid numeric")
                
            if (dd[j] ==1) {
                # nothing to do, user subscripted to only 1 state
                ssurv <- x$pstate
            } else {
                # reorder the states, pstate has dimension 2 or 3,
                #  time/strata is first, data (if present), then states
                #  (dd is the dimension from the user's point of view, of
                #  strata, data, state)
                if (length(dim(x$pstate))==2) {
                    # drop = FALSE for the rare case of a single time point
                    ssurv <- t(apply(x$pstate[,cumprob, drop=FALSE],1,cumsum))
                } else {
                    temp <- apply(x$pstate[,,cumprob, drop=FALSE],1:2, cumsum)
                    ssurv <- smat(aperm(temp, c(2,3,1)))
                }
                cumprob <- TRUE  # for the lastx line
            }
        } else {
            i <- !(x$states %in% noplot)
            if (all(i) || !any(i)) {
                # the !any is a failsafe, in case none are kept we ignore noplot
                ssurv <- smat(x$pstate)
                if (!is.null(x$std.err)) std <- smat(x$std.err)
                if (!is.null(x$lower)) {
                    slower <- smat(x$lower)
                    supper <- smat(x$upper)
                }
            }
            else {
                i <- which(i)  # the states to keep
                # we have to be careful about subscripting
                if (length(dim(x$pstate)) ==3) {
                    ssurv <- smat(x$pstate[,,i, drop=FALSE])
                    if (!is.null(x$std.err))
                        std <- smat(x$std.err[,,i, drop=FALSE])
                    if (!is.null(x$lower)) {
                        slower <- smat(x$lower[,,i, drop=FALSE])
                        supper <- smat(x$upper[,,i, drop=FALSE])
                    }
                }
                else {
                    ssurv <- x$pstate[,i, drop=FALSE]
                    if (!is.null(x$std.err)) std <- x$std.err[,i, drop=FALSE]
                    if (!is.null(x$lower)) {
                        slower <- smat(x$lower[,i, drop=FALSE])
                        supper <- smat(x$upper[,i, drop=FALSE])
                    }
                }
            }
        }
    }
    else {
        yzero <- TRUE
        ssurv <- as.matrix(x$surv)   # x$surv will have one column
        if (!is.null(x$std.err)) std <- as.matrix(x$std.err)
        # The fun argument usually applies to single state survfit objects
        #  First deal with the special case of fun='cumhaz', which is here for
        #  backwards compatability; people should use the cumhaz argument
        if (!missing(fun) && is.character(fun) && fun=="cumhaz") {
            cumhaz <- TRUE
            if (!is.null(x$cumhaz)) {
                ssurv <- as.matrix(x$cumhaz)
                if (!is.null(x$std.chaz)) std <- as.matrix(x$std.chaz)
            } 
            else {
                ssurv <- as.matrix(-log(x$surv))
                if (!is.null(x$std.err)) {
                    if (x$logse) std <- as.matrix(x$std.err)
                    else std <- as.matrix(x$std.err/x$surv)
                }
             }
        }
    }

    # set up strata
    if (is.null(x$strata)) {
        nstrat <- 1
        stemp <- rep(1, length(x$time)) # same length as stime
    }
    else {
        nstrat <- length(x$strata)
        stemp <- rep(1:nstrat, x$strata) # same length as stime
    }
    ncurve <- nstrat * ncol(ssurv)
    if (missing(conf.type)) {
        missingtype <- TRUE
        conf.type <- match.arg(conf.type)
    } else missingtype <- FALSE  # used below for cumhaz
    if (conf.type=="none") conf.int <- FALSE
    if (conf.int== "none") conf.int <- FALSE
    if (conf.int=="only") {
        plot.surv <- FALSE
        conf.int <- TRUE
        }
    else plot.surv <- TRUE

    if (conf.int) {
        if (is.null(std)) stop("object does not have standard errors, CI not possible")
        if (cumhaz) {
            if (missingtype) conf.type="plain"
            temp <- survfit_confint(ssurv, std, logse=FALSE,
                                    conf.type, conf.level, ulimit=FALSE)
            supper <- as.matrix(temp$upper)
            slower <- as.matrix(temp$lower)
        }
        else if (is.null(x$upper)) {
            if (missing(conf.type) && !is.null(x$conf.type))
                conf.type <- x$conf.type
            temp <- survfit_confint(ssurv, std, logse= x$logse,
                                    conf.type, conf.level, ulimit=FALSE)
            supper <- as.matrix(temp$upper)
            slower <- as.matrix(temp$lower)
        }
        else if (!inherits(x, "survfitms")) {
            supper <- as.matrix(x$upper)
            slower <- as.matrix(x$lower)
        }
    } else supper <- slower <- NULL
    if (!missing(fun)){
        if (is.character(fun)) {
            if (cumhaz) {
                tfun <- switch(tolower(fun),
                               'log' = function(x) x,
                               'cumhaz'=function(x) x,
                               'identity'= function(x) x,
                               stop("Invalid function argument")
                               )
            } else if (inherits(x, "survfitms")) {
                tfun <-switch(tolower(fun),
                              'log' = function(x) log(x),
                              'event'=function(x) x,
                              'cloglog'=function(x) log(-log(1-x)),
                              'cumhaz' = function(x) x,
                              'pct' = function(x) x*100,
                              'identity'= function(x) x,
                              stop("Invalid function argument")
                              )
            } else {
                yzero <- FALSE
                tfun <- switch(tolower(fun),
                           'log' = function(x) x,
                           'event'=function(x) 1-x,
                           'cumhaz'=function(x) x,
                           'cloglog'=function(x) log(-log(x)),
                           'pct' = function(x) x*100,
                           'logpct'= function(x) 100*x,  #special case further below
                           'identity'= function(x) x,
                           'f' = function(x) 1-x,
                           's' = function(x) x,
                           'surv' = function(x) x,
                           stop("Unrecognized function argument")
                           )
            }
        }
        else if (is.function(fun)) tfun <- fun
        else stop("Invalid 'fun' argument")
        
        ssurv <- tfun(ssurv )
        if (!is.null(supper)) {
            supper <- tfun(supper)
            slower <- tfun(slower)
        }
    }

    # Marks are not placed on confidence bands, pch only applies to marks
    if (is.numeric(mark.time) || mark.time) {
        if (is.numeric(mark.time)) mark.time <- sort(mark.time)
        if (missing(pch)) pch <- rep(3, ncurve)
        else pch  <- rep(pch, length.out=ncurve)

        if (!missing(mark.col)) mark.col <- rep(mark.col, length.out=ncurve)
        else if (missing(col)) mark.col <- rep(1L, ncurve)
        else mark.col <- rep(col, length.out=ncurve)
    }

    # The actual number of curves is ncurve*3 if there are confidence bands,
    #  unless conf.times has been given.  Colors and line types in the latter
    #  match the curves
    # If the number of line types is 1 and lty is an integer, then use lty 
    #    for the curve and lty+1 for the CI
    # If the length(lty) <= length(ncurve), use the same color for curve and CI
    #   otherwise assume the user knows what they are about and has given a full
    #   vector of line types.
    # Colors and line widths work like line types, excluding the +1 rule.
    if (conf.int & is.null(conf.times)) {
        if (length(lty)==1 && is.numeric(lty))
            lty <- rep(c(lty, lty+1, lty+1), ncurve)
        else if (length(lty) <= ncurve)
            lty <- rep(rep(lty, each=3), length.out=(ncurve*3))
        else lty <- rep(lty, length.out= ncurve*3)
        
        if (length(col) <= ncurve) col <- rep(rep(col, each=3), length.out=3*ncurve)
        else col <- rep(col, length.out=3*ncurve)
        
        if (length(lwd) <= ncurve) lwd <- rep(rep(lwd, each=3), length.out=3*ncurve)
        else lwd <- rep(lwd, length.out=3*ncurve)
    }
    else {
        col  <- rep(col, length.out=ncurve)
        lty  <- rep(lty, length.out=ncurve)
        lwd  <- rep(lwd, length.out=ncurve)
    }
    # check consistency
    if (!missing(xlim)) {
        if (!missing(xmax)) warning("cannot have both xlim and xmax arguments, xmax ignored")
        if (!missing(firstx)) stop("cannot have both xlim and firstx arguments")
    }
    if (!missing(ylim)) {
        if (!missing(ymin)) stop("cannot have both ylim and ymin arguments")
    }

    # Do axis range computations
    if (!missing(xlim) && !is.null(xlim)) {
        tempx <- xlim
        xmax <- xlim[2]
        if (xaxs == 'S') tempx[2] <- tempx[1] + diff(tempx)*1.04
    }
    else {
        temp <-  stime[is.finite(stime)]
        if (!missing(xmax) && missing(xlim)) temp <- pmin(temp, xmax)
        else xmax <- NULL
        
        if (xaxs=='S') {
            rtemp <- range(temp)
            delta <- diff(rtemp)
            #special x- axis style for survival curves
            if (xlog) tempx <- c(min(rtemp[rtemp>0]), min(rtemp)+ delta*1.04)
            else tempx <- c(min(rtemp), min(rtemp)+ delta*1.04)
        }
        else if (xlog) tempx <- range(temp[temp > 0])
        else tempx <- range(temp)
    }  
    if (!missing(xlim) || !missing(xmax)) 
        options(plot.survfit = list(xmax=tempx[2]))
    else options(plot.survfit = NULL)

    if (!missing(ylim) && !is.null(ylim)) tempy <- ylim
    else {
        skeep <- is.finite(stime) & stime >= tempx[1] & stime <= tempx[2]

        if (ylog) {
            if (!is.null(supper))
                tempy <- range(c(slower[is.finite(slower) & slower>0 & skeep], 
                                 supper[is.finite(supper) & skeep]))
            else tempy <-  range(ssurv[is.finite(ssurv)& ssurv>0 & skeep])
            if (tempy[2]==1) tempy[2] <- .99   # makes for a prettier axis
            if (any(c(ssurv, slower)[skeep] ==0)) {
                tempy[1] <- tempy[1]*.8
                ssurv[ssurv==0] <- tempy[1]
                if (!is.null(slower))  slower[slower==0] <- tempy[1]
            }
        }
        else {
            if (!is.null(supper)) 
                tempy <- range(c(supper[skeep], slower[skeep]), finite=TRUE, na.rm=TRUE)
            else tempy <- range(ssurv[skeep], finite=TRUE, na.rm= TRUE)
            if (yzero) tempy <- range(c(0, tempy))
        }
    }

    if (!missing(ymin)) tempy[1] <- ymin

    #
    # Draw the basic box
    #
    temp <- if (xaxs=='S') 'i' else xaxs
    plot(range(tempx, finite=TRUE, na.rm=TRUE)/xscale, 
         range(tempy, finite=TRUE, na.rm=TRUE)*yscale, 
         type='n', log=logax, xlab=xlab, ylab=ylab, xaxs=temp,...)
    if(yscale != 1) {
        if (ylog) par(usr =par("usr") -c(0, 0, log10(yscale), log10(yscale))) 
        else par(usr =par("usr")/c(1, 1, yscale, yscale))   
    }
    if (xscale !=1) {
        if (xlog) par(usr =par("usr") -c(log10(xscale), log10(xscale), 0,0)) 
        else par(usr =par("usr")*c(xscale, xscale, 1, 1))   
    } 
    # The use of [[par(usr)]] just above is a bit sneaky.  I want the
    # lines and points routines to be able to add to the plot, *without*
    # passing them a global parameter that determines the y-scale or forcing
    # the user to repeat it.

    # Create a step function, removing redundancies that sometimes occur in
    #  curves with lots of censoring.
    dostep <- function(x,y) {
        keep <- is.finite(x) & is.finite(y) 
        if (!any(keep)) return()  #all points were infinite or NA
        if (!all(keep)) {
            # these won't plot anyway, so simplify (CI values are often NA)
            x <- x[keep]
            y <- y[keep]
        }
        n <- length(x)
        if (n==1)       list(x=x, y=y)
        else if (n==2)  list(x=x[c(1,2,2)], y=y[c(1,1,2)])
        else {
            # replace verbose horizonal sequences like
            # (1, .2), (1.4, .2), (1.8, .2), (2.3, .2), (2.9, .2), (3, .1)
            # with (1, .2), (.3, .2),(3, .1).  
            #  They are slow, and can smear the looks of the line type.
            temp <- rle(y)$lengths
            drops <- 1 + cumsum(temp[-length(temp)])  # points where the curve drops

            #create a step function
            if (n %in% drops) {  #the last point is a drop
                xrep <- c(x[1], rep(x[drops], each=2))
                yrep <- rep(y[c(1,drops)], c(rep(2, length(drops)), 1))
            }
            else {
                xrep <- c(x[1], rep(x[drops], each=2), x[n])
                yrep <- c(rep(y[c(1,drops)], each=2))
            }
            list(x=xrep, y=yrep)
        }
    }

    drawmark <- function(x, y, mark.time, censor, cex, ...) {
        if (!is.numeric(mark.time)) {
            xx <- x[censor>0]
            yy <- y[censor>0]
            if (any(censor >1)) {  # tied death and censor, put it on the midpoint
                j <- pmax(1, which(censor>1) -1)
                i <- censor[censor>0]
                yy[i>1] <- (yy[i>1] + y[j])/2
            }
        }
        else { #interpolate
            xx <- mark.time
            yy <- approx(x, y, xx, method="constant", f=0)$y
        }
        points(xx, yy, cex=cex, ...)
    }
    type <- 's'
    c1 <- 1  # keeps track of the curve number
    c2 <- 1  # keeps track of the lty, col, etc
    xend <- yend <- double(ncurve)
    if (length(conf.offset) ==1) 
        temp.offset <- (1:ncurve - (ncurve+1)/2)* conf.offset* diff(par("usr")[1:2])
    else temp.offset <- rep(conf.offset, length=ncurve) *  diff(par("usr")[1:2])
    temp.cap    <-  conf.cap    * diff(par("usr")[1:2])

    for (j in 1:ncol(ssurv)) {
        for (i in unique(stemp)) {  #for each strata
            who <- which(stemp==i)

            # if n.censor is missing, then assume any line that does not have an
            #   event would not be present but for censoring, so there must have
            #   been censoring then
            # otherwise categorize is 0= no censor, 1=censor, 2=censor and death
            if (is.null(x$n.censor)) censor <- ifelse(x$n.event[who]==0, 1, 0)
            else censor <- ifelse(x$n.censor[who]==0, 0, 1 + (x$n.event[who] > 0))
            xx <- stime[who]
            yy <- ssurv[who,j]
            if (conf.int) {
                ylower <- (slower[who,j])
                yupper <- (supper[who,j])
            }
            if (!is.null(xmax) && max(xx) > xmax) {  # truncate on the right
                # say that there are points at 10 and 20, with xmax=15
                # we want to draw the line up to time 15, so we don't want to
                # toss out the point at 20, rather change its time to 15.
                # But, we also need to remove the mark
                xn <- min(which(xx > xmax))
                xx <- xx[1:xn]
                yy <- yy[1:xn]
                if (xx[xn] > xmax) censor[xn] <-0  # don't draw a "+" here 
                xx[xn] <- xmax
                yy[xn] <- yy[xn-1]
                if (conf.int) {
                    ylower <- ylower[1:xn]
                    yupper <- yupper[1:xn]
                    ylower[xn] <- ylower[xn-1]
                    yupper[xn] <- yupper[xn-1]
                }
            }
                

            if (plot.surv) {
                if (type=='s')
                    lines(dostep(xx, yy), lty=lty[c2], col=col[c2], lwd=lwd[c2]) 
                else lines(xx, yy, type=type, lty=lty[c2], col=col[c2], lwd=lwd[c2])
                if (is.numeric(mark.time) || mark.time) 
                    drawmark(xx, yy, mark.time, censor, pch=pch[c1], 
                             col=mark.col[c1], cex=cex)
            }
            xend[c1] <- max(xx)
            yend[c1] <- yy[length(yy)]

            if (conf.int && !is.null(conf.times)) {
                # add vertical bars at the specified times
                x2 <- conf.times + temp.offset[c1]
                templow <- approx(xx, ylower, x2,
                                  method='constant', f=1)$y
                temphigh<- approx(xx, yupper, x2,
                                  method='constant', f=1)$y
                segments(x2, templow, x2, temphigh,
                          lty=lty[c2], col=col[c2], lwd=lwd[c2])
                if (conf.cap>0) {
                    segments(x2-temp.cap, templow, x2+temp.cap, templow,
                             lty=lty[c2], col=col[c2], lwd=lwd[c2] )
                    segments(x2-temp.cap, temphigh, x2+temp.cap, temphigh,
                              lty=lty[c2], col=col[c2], lwd=lwd[c2])
                }
               
            }
            c1 <- c1 +1
            c2 <- c2 +1

            if (conf.int && is.null(conf.times)) {
                if (type == 's') {
                    lines(dostep(xx, ylower), lty=lty[c2], 
                          col=col[c2],lwd=lwd[c2])
                    c2 <- c2 +1
                    lines(dostep(xx, yupper), lty=lty[c2], 
                          col=col[c2], lwd= lwd[c2])
                    c2 <- c2 + 1
                }
                else {
                    lines(xx, ylower, lty=lty[c2], 
                          col=col[c2],lwd=lwd[c2], type=type) 
                    c2 <- c2 +1
                    lines(xx, yupper, lty=lty[c2], 
                          col=col[c2], lwd= lwd[c2], type= type)
                    c2 <- c2 + 1
                }
             }

        }
    }

    if (cumprob) {
        if (!is.null(xmax) && max(stime) > xmax) {  # truncate on the right
            keep <- (stime <= xmax)
            lastx <- list(x = stime[keep], y= ssurv[,keep])
        }
        else lastx <- list(x=stime, y=ssurv)
    }
    else lastx <- list(x=xend, y=yend)
    invisible(lastx)
}