Nothing
#
# The redistribute-to-the-right algorithm
# Useful for teaching, and for data checks
# Input
# formula: usually Surv(time, status) ~1.
# data, weights, subset, na.action: usual formula arguments
# times: vector of reporting times. If omitted, the largest event time
# is used
#
# Output: a vector or matrix of weights, one row per observation
#
rttright <- function(formula, data, weights, subset, na.action, times,
id, timefix=TRUE, renorm=TRUE) {
Call <- match.call() # save a copy of the call
if (missing(times)) times <- NULL
# The first chunk of this function is essentially a copy of the code
# from the survfit function, the exception being how covariates are handled
# This is more fussy than needed, but the extra complexity save us from
# rare edge cases. User's do odd things. (I expect
# someone to just paste in one of their coxph calls). Because
# we have it worked out, we use it.
#
indx <- match(c('formula', 'data', 'weights', 'subset','na.action',
'istate', 'id'), names(Call), nomatch=0)
#It's very hard to get the next error message other than malice
# eg survfit(wt=Surv(time, status) ~1)
if (indx[1]==0) stop("a formula argument is required")
temp <- Call[c(1, indx)]
temp[[1L]] <- quote(stats::model.frame)
mf <- eval.parent(temp)
Terms <- terms(formula, c("strata", "cluster"))
ord <- attr(Terms, 'order')
if (length(ord) & any(ord !=1))
stop("Interaction terms are not valid for this function")
n <- nrow(mf)
Y <- model.response(mf)
ny <- ncol(Y)
if (!is.Surv(Y)) stop("response must be a Surv object")
type <- attr(Y, "type")
if (!(type %in% c("right", "mright", "counting", "mcounting")))
stop("response must be right censored")
casewt <- model.extract(mf, "weights")
if (is.null(casewt)) casewt <- rep(1, n)
else {
if (!is.numeric(casewt)) stop("weights must be numeric")
if (any(!is.finite(casewt))) stop("weights must be finite")
if (any(casewt <0)) stop("weights must be non-negative")
casewt <- as.numeric(casewt) # transform integer to numeric
}
if (!is.null(attr(Terms, 'offset'))) warning("Offset term ignored")
temp <- untangle.specials(Terms, "cluster")
if (length(temp$vars)>0) {
Terms <- Terms[-temp$terms]
}
ll <- attr(Terms, 'term.labels')
if (length(ll) == 0) X <- factor(rep(1,n)) # ~1 on the right
else X <- strata(mf[ll])
# Deal with the near-ties problem
if (!is.logical(timefix) || length(timefix) > 1)
stop("invalid value for timefix option")
if (timefix) Y <- aeqSurv(Y)
id <- model.extract(mf, "id")
if (ny==3 && (is.null(id))) stop("id is required for start-stop data")
if (!is.null(id)) {
if (is.null(attr(Y, 'states'))) {
ytemp <- Y
attr(ytemp, 'states') <- 'event' # survcheck2 wants a states attr
check <- survcheck2(ytemp, id)
}
else check <- survcheck2(Y, id)
if (any(check$flag > 0))
stop("one or more flags are >0 in survcheck")
n.startstate <- sum(check$transitions[,1] >1)
if (ny ==2) samestart=TRUE # everyone starts at the same time
else {
etemp <- tapply(Y[,1], id, min)
samestart <- all(etemp== etemp[1])
}
temp <- tapply(casewt, id, function(x) diff(range(x)))
if (any(temp > 0)) stop("there are subjects with multiple weights")
} else check <- NULL
# Finally, it's time to do the actual work.
# For simple survival or competing risks data,
# we can use the a compact algorithm that needs only one call to survfit
# The compact algorithm depends on G(t), the censoring distribution
if (is.null(check) || (n.startstate==1 & samestart)) {
istrat <- as.numeric(X) # X will be a factor
nstrat <- max(istrat)
if (renorm) {
for (i in unique(istrat)) {
j <- which(istrat==i)
if (is.null(id)) uj <- j
else uj <- j[!duplicated(id[j])] # don't count someone twice
casewt[j] <- casewt[j]/sum(casewt[uj])
}
}
# Compute the censoring distribution $G$
# 1. G(t) is left continuous, the KM is right continuous
# 2. For tied times, censors happen after deaths
# 3. For an id with multiple observations, censorings in the middle
# are not true censoring
#
# Mark the actual censoring times.
last <- NULL
if (ny==2) censor <- ifelse(Y[,2]== 0, 1, 0)
else {
ord <- order(id, Y[,2]) # time within subject
ltemp <- !duplicated(id[ord], fromLast=TRUE) # last for each id
last <- ltemp[order(ord)] # marks the last obs, in data order
censor <- ifelse(last & Y[,3]==0, 1, 0)
}
# Compute the censoring distribution by creating y2, which is Y with
# the censors shifted right by just a bit. If the type of Y is
# mright or mcounting, change it to right or counting
delta <- min(diff(sort(unique(c(Y[,-ny], times))))) /2
y2 <- Y
y2[,ny] <- censor
y2[censor==1, ny-1] <- y2[censor==1, ny-1] + delta
attr(y2, "type") <- if (ny==2) "right" else "counting"
G <- survfitKM(X, y2, casewt, se.fit=FALSE)
class(G) <- "survfit"
# The ouput matrix has one column for each requested time point
if (is.null(times)) { # single time point
grabwt <- function(Y, GG, casewt, last, times) {
# See the longer discusson below
index <- findInterval(Y[,ny-1], GG$time, left.open=TRUE)
gwt <- c(1,GG$surv)[1+ index] # gwt at event time
if (ncol(Y)==2) haswt <- (Y[,2] > 0)
else haswt <- (last & Y[,3]>0)
unname(ifelse (haswt, casewt/gwt, 0))
}
times <- 0 # a dummy value
}
else {
# for (time1, time2) data the weight is passed forward from row to
# row for a person. The last row, in not censored, retains the
# weight.
# Result is a matrix with one row for each Y, one col for each
# time. Column j of this with be for times[j]
# If Y[,1] >= times[j] the weight is 0. The weight for a set of
# rows belonging to a given id gets handed off like a baton in a
# relay race, and this row hasn't yet received it.
# If Y[,2] < times[j] the weight is 0 as well, unless this is the
# last, uncensored row for a subject. That weight preserves: there
# is no hand off. Censors "hand off" by redistribution.
# Otherwise, the weight = casewt/gwt[col]: not a last row for an id)
# casewt/max(gwt[col], gwt2[row]): last row
#
# For (time, status) data this is simpler: every row is a last row
grabwt <- function(Y, GG, casewt, last, times) {
# G(t) at censoring times
indx <- findInterval(times, GG$time, left.open=TRUE)
gwt <- c(1, GG$surv)[1+indx]
indx2 <- findInterval(Y[,ny-1], GG$time, left.open=TRUE)
gwt2 <- c(1, GG$surv)[1+indx2]
ntime <- length(times)
n <- nrow(Y)
if (ncol(Y)==2) {
wtmat <- ifelse(outer(Y[,1], times, ">="),
outer(casewt, gwt, "/"), 0)
indx3 <- (Y[,2] > 0) # events
wtmat[indx3,] <- casewt[indx3]/outer(gwt2[indx3], gwt, pmax)
}
else {
inner <- outer(Y[,1], times, "<") &
outer(Y[,2], times, ">=")
wtmat <- ifelse(inner, outer(casewt, gwt, "/"), 0)
indx3 <- (Y[,2]> 0 & last)
wtmat[indx3,] <-
casewt[indx3] / outer(gwt2[indx3], gwt, pmax)
}
wtmat
}
}
if (nstrat ==1) { # only one stratum
wtmat <- grabwt(Y, G, casewt, last, times)
} else {
wtmat <- matrix(0, n, length(times))
for (i in 1:nstrat) {
who <- (istrat==i)
wtmat[who,] <- grabwt(Y[who,], G[i], casewt[who], last[who],
times)
}
}
} else {
# The more difficult case, where there are multiple states or delayed
# entry
stop("function not defined for delayed entry or multistate data")
}
if (length(times)> 1) {
dimnames(wtmat) <- list(NULL, times)
wtmat
}
else drop(wtmat)
}
#
# Here is the "cheating" version of the function
# I know that the weights, done correctly, will reproduce the Aalen-Johansen
# So compute the AJ and work backwards
rttright2 <- function(formula, data, weights, subset, na.action, times,
id, timefix=TRUE, renorm=TRUE) {
Call <- match.call() # save a copy of the call
indx <- match(c('formula', 'data', 'weights', 'subset','na.action',
'istate', 'id'), names(Call), nomatch=0)
if (indx[1]==0) stop("a formula argument is required")
temp <- Call[c(1, indx)]
temp[[1L]] <- quote(survival::survfit)
temp$model <- TRUE
sfit <- eval.parent(temp) # a KM or Aalen-Johansen
X <- model.matrix(sfit$model) # get the grouping variable for each obs
Y <- model.response(sfit$model)
n <- nrow(Y)
if (!renorm) {
wt <-model.weights(sfit$model)
if (is.null(wt)) wt <- rep(1, n)
}
dd <- dim(sfit)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.