Nothing
#ALG: itree copies this direclty from rpart.
#SCCS @(#)rpart.exp.s 1.8 07/05/01
# rescaled exponential splitting
# The survival object 'y' is rescaled so that
# a. overall death rate is 1.0
# b. death rate within small intervals of time is 1
# The first makes printouts easier to read, since the rates in subnodes are
# numbers like "1.23" (23% higher event rate than the root node) instead
# of "0.0014" (which requires looking back at the root node rate and
# dividing). The second makes the data appear to be Poisson, and causes
# the early splits at least to be equivalent to the local full likelihood
# method of LeBlanc and Crowley
#
itree.exp <- function(y, offset, parms, wt) {
if (!inherits(y, "Surv"))
stop("Response must be a survival object - use the Surv() function")
ny <- ncol(y)
n <- nrow(y)
status <- y[,ny]
if (any(y[,1L]<=0)) stop("Observation time must be > 0")
if (all(status==0)) stop("No deaths in data set")
time <- y[ , ny-1L]
# Make my table of time intervals. The first goes from 0 to the first
# death, the next from death 2 to death 3, ..., and the last from
# "next to last death" to "max time in dataset".
# We also need to avoid a pathological condition in some data sets, where
# two death times differ by a trivial amount, e.g., 10^-16, perhaps due
# to roundoff error in creating the input data. Ammalgamate such
# intervals. This turns out to be hard to do in S, but easy in C
dtimes <- sort(unique(time[status==1])) # unique death times
temp <- .C(C_rpartexp2,
as.integer(length(dtimes)),
as.double(dtimes),
as.double(.Machine$double.eps),
keep=integer(length(dtimes)))$keep
dtimes <- dtimes[temp==1]
# For the sake of speed, restrict the number of intervals to be <1000.
# (Actually, anything >100 is probably overkill for the
# actual task at hand, which is to approximately scale to exponential).
if (length(dtimes) > 1000) dtimes <- quantile(dtimes, 0:1000/1000)
# The last interval goes to the max time in the data set
itable <- c(0, dtimes[-length(dtimes)], max(time)) # set of intervals
drate2 <- function(n, ny, y, wt, itable) {
# An alternative to the drate1 function
# Why? The pyears2 routine changed in 6/2001, with the inclusion
# of case weights. We need the newer version. If you have the
# older version of the survival library, the above will crash S.
# How to tell -- list the pyears function, and see whether it's
# call to pyears2 has weights in the argument list.
#
time <- y[, ny-1L]
status <- y[,ny]
ilength <- diff(itable) #lengths of intervals
ngrp <- length(ilength) #number of intervals
# The code below is as opaque as any I've written, all in the
# service of "no for loops".
# First, 'index' gives the time interval (as defined by itable)
# in which the end of each observation's follow-up (time) lies.
# Then 'itime' will be the amount of time spent in that last
# interval, which is of course somewhat less than ilength.
index <- unclass(cut(time, itable, include.lowest=TRUE))
itime <- time - itable[index]
if (ny ==3L) {
# there is both a start time and a stop time
# compute the amount of time NOT spent in the interval that
# the start time lies in.
stime <- y[,1L] #start time for each interval
index2<- unclass(cut(stime, itable, include.lowest=TRUE))
itime2<- stime - itable[index2]
}
# Compute the amount of person-years in each of the intervals
# This is: (width of interval) * (number of "time" elements that
# end in an interval farther right)
# + (ending times in this interval)
# By construction, I know that there is at least 1 obs in each of the
# intervals, so tab1 is of a determined length
tab1 <- table(index)
temp <- rev(cumsum(rev(tab1))) #cumsum, counting from the right
pyears <- ilength * c(temp[-1L], 0) + tapply(itime, index, sum)
if (ny==3L) {
#subtract off the time before "start"
tab2 <- table(index2, levels=1:ngrp) #force the length of tab2
temp <- rev(cumsum(rev(tab2)))
py2 <- ilength * c(0, temp[-ngrp]) + tapply(itime2, index2, sum)
pyears <- pyears - py2
}
deaths <- tapply(status, index, sum)
rate <- deaths/pyears #hazard rate in each interval
rate
}
#
# Now, compute the "new y" for each observation.
# This is a stretching of the time axis
# The cumulative hazard over each interval is rate*length(interval),
# and is the basis of the rescaling.
rate <- drate2(n, ny, y, wt, itable)
cumhaz <- cumsum(c(0, rate*diff(itable)))
newy <- approx(itable, cumhaz, time)$y
if (ny==3L) {
newy <- newy - approx(itable, cumhaz, y[,1L])$y
}
if (length(offset)==n) newy <- newy * exp(offset)
if (missing(parms)) parms <- c(shrink=1L, method=1L)
else {
parms <- as.list(parms)
if(is.null(names(parms))) stop("You must input a named list for parms")
parmsNames <- c("method", "shrink")
indx <- pmatch(names(parms), parmsNames, nomatch= 0L)
if (any(indx==0L))
stop("'parms' component not matched: ", names(parms)[indx==0L])
else names(parms) <- parmsNames[indx]
if (is.null(parms$method)) method <- 1L
else method <- pmatch(parms$method, c("deviance", "sqrt"))
if (is.na(method)) stop("Invalid error method for Poisson")
if (is.null(parms$shrink)) shrink <- 2L-method
else shrink <- parms$shrink
if (!is.numeric(shrink) || shrink < 0L)
stop("Invalid shrinkage value")
parms <- c(shrink=shrink, method=method)
}
list(y=cbind(newy, y[,2L]), parms=parms, numresp=2L, numy=2L,
summary= function(yval, dev, wt, ylevel, digits) {
paste(" events=", formatg(yval[,2]),
", estimated rate=" , formatg(yval[,1], digits),
" , mean deviance=",formatg(dev/wt, digits),
sep = "")
},
text= function(yval, dev, wt, ylevel, digits, n, use.n) {
if(use.n) {paste(formatg(yval[,1L],digits),"\n",
formatg(yval[,2L]),"/",n,sep="")} else
{paste(formatg(yval[,1L],digits))}
})
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.