R/itree.exp.R

Defines functions itree.exp

#ALG: itree copies this direclty from rpart.

#SCCS @(#)rpart.exp.s	1.8 07/05/01
# rescaled exponential splitting
#  The survival object 'y' is rescaled so that
#    a. overall death rate is 1.0
#    b. death rate within small intervals of time is 1
#  The first makes printouts easier to read, since the rates in subnodes are
#  numbers like "1.23" (23% higher event rate than the root node) instead
#  of "0.0014" (which requires looking back at the root node rate and
#  dividing).  The second makes the data appear to be Poisson, and causes
#  the early splits at least to be equivalent to the local full likelihood
#  method of LeBlanc and Crowley
#
itree.exp <- function(y, offset, parms, wt) {

    if (!inherits(y, "Surv"))
	   stop("Response must be a survival object - use the Surv() function")

    ny <- ncol(y)
    n  <- nrow(y)

    status <- y[,ny]
    if (any(y[,1L]<=0)) stop("Observation time must be > 0")
    if (all(status==0)) stop("No deaths in data set")
    time <- y[ , ny-1L]

    # Make my table of time intervals.  The first goes from 0 to the first
    #   death, the next from death 2 to death 3, ..., and the last from
    #   "next to last death" to "max time in dataset".
    # We also need to avoid a pathological condition in some data sets, where
    #   two death times differ by a trivial amount, e.g., 10^-16, perhaps due
    #   to roundoff error in creating the input data.  Ammalgamate such
    #   intervals.  This turns out to be hard to do in S, but easy in C
    dtimes <- sort(unique(time[status==1]))        # unique death times
    temp <- .C(C_rpartexp2,
	       as.integer(length(dtimes)),
	       as.double(dtimes),
	       as.double(.Machine$double.eps),
	       keep=integer(length(dtimes)))$keep
    dtimes <- dtimes[temp==1]

    # For the sake of speed, restrict the number of intervals to be <1000.
    #   (Actually, anything >100 is probably overkill for the
    #   actual task at hand, which is to approximately scale to exponential).
    if (length(dtimes) > 1000) dtimes <- quantile(dtimes, 0:1000/1000)

    # The last interval goes to the max time in the data set
    itable <- c(0, dtimes[-length(dtimes)], max(time)) # set of intervals


    drate2 <- function(n, ny, y, wt, itable) {
	# An alternative to the drate1 function
	# Why?  The pyears2 routine changed in 6/2001, with the inclusion
	#  of case weights.  We need the newer version.  If you have the
	#  older version of the survival library, the above will crash S.
	# How to tell -- list the pyears function, and see whether it's
	#  call to pyears2 has weights in the argument list.
	#
	time <- y[, ny-1L]
	status <- y[,ny]
	ilength <- diff(itable)                   #lengths of intervals
	ngrp <- length(ilength)                   #number of intervals

	# The code below is as opaque as any I've written, all in the
	#  service of "no for loops".
	# First, 'index' gives the time interval (as defined by itable)
	#  in which the end of each observation's follow-up (time) lies.
	#  Then 'itime' will be the amount of time spent in that last
	#  interval, which is of course somewhat less than ilength.
	index <- unclass(cut(time, itable, include.lowest=TRUE))
	itime <- time - itable[index]
	if (ny ==3L) {
	    # there is both a start time and a stop time
	    #  compute the amount of time NOT spent in the interval that
	    #  the start time lies in.
	    stime <- y[,1L]   #start time for each interval
	    index2<- unclass(cut(stime, itable, include.lowest=TRUE))
	    itime2<- stime - itable[index2]
	    }

	# Compute the amount of person-years in each of the intervals
	#   This is:  (width of interval) * (number of "time" elements that
	#                                     end in an interval farther right)
	#            + (ending times in this interval)
	# By construction, I know that there is at least 1 obs in each of the
	#  intervals, so tab1 is of a determined length
	tab1 <- table(index)
	temp <- rev(cumsum(rev(tab1)))  #cumsum, counting from the right
	pyears <- ilength * c(temp[-1L], 0) +	 tapply(itime, index, sum)
	if (ny==3L) {
	    #subtract off the time before "start"
	    tab2 <- table(index2, levels=1:ngrp) #force the length of tab2
	    temp <- rev(cumsum(rev(tab2)))
	    py2  <-  ilength * c(0, temp[-ngrp]) +  tapply(itime2, index2, sum)
	    pyears <- pyears - py2
	    }

	deaths <- tapply(status, index, sum)
	rate <- deaths/pyears   #hazard rate in each interval
	rate
	}

    #
    # Now, compute the "new y" for each observation.
    #  This is a stretching of the time axis
    # The cumulative hazard over each interval is rate*length(interval),
    #  and is the basis of the rescaling.
    rate <- drate2(n, ny, y, wt, itable)
    cumhaz <- cumsum(c(0, rate*diff(itable)))
    newy <- approx(itable, cumhaz, time)$y
    if (ny==3L) {
	newy <- newy - approx(itable, cumhaz, y[,1L])$y
	}

    if (length(offset)==n)  newy <- newy * exp(offset)

    if (missing(parms)) parms <- c(shrink=1L, method=1L)
    else {
	parms <- as.list(parms)
        if(is.null(names(parms))) stop("You must input a named list for parms")
        parmsNames <- c("method", "shrink")
        indx <- pmatch(names(parms), parmsNames, nomatch= 0L)
        if (any(indx==0L))
            stop("'parms' component not matched: ", names(parms)[indx==0L])
	else names(parms) <- parmsNames[indx]

	if (is.null(parms$method)) method <- 1L
	else method <- pmatch(parms$method, c("deviance", "sqrt"))
	if (is.na(method)) stop("Invalid error method for Poisson")

	if (is.null(parms$shrink)) shrink <- 2L-method
	else shrink <- parms$shrink
	if (!is.numeric(shrink) || shrink < 0L)
		stop("Invalid shrinkage value")
	parms <- c(shrink=shrink, method=method)
	}
    list(y=cbind(newy, y[,2L]), parms=parms, numresp=2L, numy=2L,
	 summary= function(yval, dev, wt, ylevel, digits) {
	     paste("  events=", formatg(yval[,2]),
		",  estimated rate=" , formatg(yval[,1], digits),
		" , mean deviance=",formatg(dev/wt, digits),
		sep = "")
	     },
	 text= function(yval, dev, wt, ylevel, digits, n, use.n) {
	     if(use.n) {paste(formatg(yval[,1L],digits),"\n",
				formatg(yval[,2L]),"/",n,sep="")} else
		    {paste(formatg(yval[,1L],digits))}
	     })
    }

Try the itree package in your browser

Any scripts or data that you put into this service are public.

itree documentation built on May 2, 2019, 7:25 a.m.