Nothing
# Aalen's additive regression model
# Originally, this tried to call coxph with certain options.
# But we found the passing ... to a model method just doesn't work (for
# optional things like weights). So the first portion of this is
# essentially coxph, to set up for coxph.detail.
# For distribution, the "variance" test is omitted. Not all aspects are
# yet supported by the downstream printing.
#
aareg <- function(formula, data, weights, subset, na.action,
qrtol=1e-7, nmin, dfbeta=FALSE, taper=1,
test = c('aalen', 'variance', 'nrisk'), cluster,
model=FALSE, x=FALSE, y=FALSE) {
Call <- match.call() # save a copy of the call
test <- match.arg(test) #check for legal argument
# Move any cluster() term out of the formula, and make it an argument
# instead. This makes everything easier. But, I can only do that with
# a local copy, doing otherwise messes up future use of update() on
# the model object for a user stuck in "+ cluster()" mode.
if (missing(formula)) stop("a formula argument is required")
ss <- c("cluster", "offset")
Terms <- if (missing(data)) terms(formula, specials=ss) else
terms(formula, specials=ss, data=data)
tcl <- attr(Terms, 'specials')$cluster
if (length(tcl) > 1) stop("a formula cannot have multiple cluster terms")
if (length(tcl) > 0) { # there is one
factors <- attr(Terms, 'factors')
if (any(factors[tcl,] >1)) stop("cluster() cannot be in an interaction")
if (attr(Terms, "response") ==0)
stop("formula must have a Surv response")
if (is.null(Call$cluster))
Call$cluster <- attr(Terms, "variables")[[1+tcl]][[2]]
else warning("cluster appears both in a formula and as an argument, formula term ignored")
# [.terms is broken at least through R 4.1; use our
# local drop.special() so as to not lose offsets.
Terms <- drop.special(Terms, tcl)
formula <- Call$formula <- formula(Terms)
}
indx <- match(c("formula", "data", "weights", "subset", "na.action",
"cluster"),
names(Call), nomatch=0)
if (indx[1] ==0) stop("A formula argument is required")
temp <- Call[c(1,indx)] # only keep the arguments we wanted
temp[[1L]] <- quote(stats::model.frame) # change the function called
special <- c("strata")
temp$formula <- if(missing(data)) terms(formula, special)
else terms(formula, special, data=data)
m <- eval(temp, parent.frame())
Terms <- attr(m, 'terms')
# Now grab the items that we need
Y <- model.extract(m, "response")
if (!inherits(Y, "Surv")) stop("Response must be a survival object")
offset<- attr(Terms, "offset")
tt <- length(offset)
offset <- if(tt == 0)
rep(0, nrow(Y))
else if(tt == 1)
m[[offset]]
else { #multiple offset terms! add them
ff <- m[[offset[1]]]
for(i in 2:tt)
ff <- ff + m[[offset[i]]]
ff
}
cluster <- model.extract(m, "cluster")
if (length(cluster)) {
cluster <- as.numeric(as.factor(cluster))
dfbeta = TRUE
}
else if (dfbeta) {
cluster <- seq.int(length(Y))
}
# Adding strata, when there is a coefficent per death, is identical
# to doing a totally separate fit per group.
# Using "factor(group)" to get multiple baselines is likely what the
# user wants. However, because we have not processed the strata
# statement (taken it out of X, and created the 'newstrat' of coxph)
# it will act just like a factor.
# I've changed my mind multiple times on commenting out the line below.
# Computationally identical to factor() -- is an error message or not
# an error message the greater source of confusion to a user?
strats <- attr(Terms, "specials")$strata
if (length(strats)) {
stop("Strata terms not allowed")
}
X <- model.matrix(Terms, m)[,-1,drop=FALSE]
nvar <- ncol(X)
nused<- nrow(X)
weights <- model.extract(m, 'weights')
if (length(weights)==0) weights <- rep(1.0, nused)
type <- attr(Y, "type")
if (type!='right' && type!='counting')
stop(paste("Aalen model doesn't support \"", type,
"\" survival data", sep=''))
# Get the pieces that I need from the coxdetail routine
# 1. It expects a "counting process" type of Y
if (ncol(Y)==2) {
mintime <- min(Y[,1])
if (mintime < 0) Y <- cbind( 2*mintime -1, Y)
else Y <- cbind(-1,Y)
}
# Because there are no strata, the number of unique death times is the
# number that will be in the output structures
times <- as.vector(Y[,2]) # toss the labels away
status<- as.vector(Y[,3])
ndeath <- length(unique(times[status==1]))
# Sort everything
ord <- order(times, -status)
times <- times[ord]
status <- status[ord]
weights <- weights[ord]
if (x) saveX <- X
X <- X[ord,,drop=FALSE]
storage.mode(Y) <- 'double'
ff <- .C(Ccoxdetail, as.integer(nused),
as.integer(nvar),
ndeath= as.integer(ndeath),
center = colMeans(X),
y = Y[ord,],
as.double(X),
index = as.integer(rep(0,nused)),
event2 = rep(1.0, nused),
weights = as.double(weights),
means= c(0., double(ndeath*nvar-1)),
u = double(ndeath*nvar),
i = double(ndeath*nvar*nvar),
rmat = integer(ndeath*nused),
nrisk2 = double(ndeath),
double(nvar*(3 + 2*nvar)) )
# riskmat is an nused by ndeath 0/1 matrix showing who is present
riskmat <- matrix(ff$rmat, nused, ndeath)
# Note that imat, as returned by coxdetail, is Var(X) * nevents.
dt <- list(means= (matrix(ff$means,ndeath, nvar)),
var = aareg.taper(taper, array(ff$i, c(nvar, nvar, ndeath)),
ff$event2[1:ndeath]),
time = times[ff$index[1:ndeath]],
nrisk= ff$nrisk2, #weighted # at risk
nevent=ff$event2[1:ndeath]) #weighted number of events
# Set the number of deaths that will be used in the analysis
# This may be smaller than the curren "ndeath", due to small nrisk
# The number of times may even be smaller, if imat is singular at that
# time point.
if (missing(nmin)) nmin <- 3*nvar
if (nvar==1)
ndeath <- sum(dt$nrisk>= nmin & c(dt$var)>0)
else {
ndeath <- sum(dt$nrisk >= nmin)
if (ndeath >0) {
while (1) { #we expect very few iterations of this loop
qri <- qr(dt$var[,,ndeath], tol=qrtol)
if (qri$rank >= nvar) break #not singular
ndeath <- ndeath -1
}
}
}
if (ndeath<=1)
stop("The threshold 'nmin' is too high, no model can be fit")
# This matches the death times in the data set to the
# sorted list of unique death times. "0" = not a death
index <- match(times, dt$time[1:ndeath], nomatch=0) * status
deaths <- (status==1 & index >0)
dindex <- index[deaths] #for each death, a pointer into dt objects
nevent <- length(dindex) #total number of events (ndeath = #unique times)
if (length(cluster)) ncluster <- length(unique(cluster))
else ncluster <- nused
if (dfbeta) {
dmat <- array(0.0, dim=c(ncluster, nvar+1, ndeath))
# the resid marix has a row for each death, and nused cols
# each row has a "1" in it at the position of the death
# the yhat part is subtracted later
resid <- rep(0., nevent*nused)
resid[nevent*((1:nused)[deaths]-1) + 1:nevent] <- 1.0
resid <- matrix(resid, ncol=nused)
}
# Coefficient is the step in Aalen's plots
# If we keep one row of "coefficent" per death, then Aalen's
# variance is coef *% t(coef), treating coef as a col vector.
# If we kept one row per death, then the ndeath nvar by nvar variance
# matrices would need to be kept too. So keep 1 row per event.
# Things like plot will end up accumlating.
# There is no such cheat for dfbeta: it is kept as "# unique deaths"
# p by p matrices.
#
if (nvar==1) { # special case of only 1 covariate
means <- dt$means[dindex]
nrisk <- dt$nrisk[dindex]
xx <- (X[deaths] - means) * weights[deaths]
v.inverse <- 1/dt$var[dindex] #for all time points
twt <- nrisk* 1/cbind(1+ means^2*v.inverse, v.inverse)
coefficient <- v.inverse * xx / nrisk
# Note that ybar is always w_i/nrisk, since we are doing the
# regressions one event at a time.
b0 <- weights[deaths]/nrisk - means*coefficient
if (dfbeta) {
# We first create the nused * #events matrix, and then
# collapse it to be ncluster by n-unique-death-times
xx <- c(X) * riskmat[,dindex] # X repeated in each col, if at risk
predicted <- coefficient * t(xx) + b0*t(riskmat[,dindex])
resid <- resid - predicted #nused cols, nvevent rows
temp1 <- (resid * (t(xx) -means)/(nrisk*dt$var[dindex])) *
rep(weights, rep(nevent, nused))
# temp1[i,j] is the change in alpha at time i for subject j
# the "intercept dfbeta" is resid*wt/sum(wt) - xbar*temp1
temp0 <- resid * outer(1/nrisk, weights) - temp1 * means
# get the matrix, nused by 2, which is the influence of each
# subject on the test statistic.
# This is a bit easier before collapsing
if (test=='nrisk') {
test.dfbeta <- cbind(apply(temp0*nrisk, 2, sum),
apply(temp1*nrisk, 2, sum))
}
else {
test.dfbeta <- cbind(apply(temp0*twt[,1], 2, sum),
apply(temp1*twt[,2], 2, sum))
}
# Now collapse dfbeta, first on the deaths, and then on the cluster
if (nevent > ndeath) {
temp1 <- rowsum(temp1, times[deaths], reorder=FALSE)
temp0 <- rowsum(temp0, times[deaths], reorder=FALSE)
}
dmat[,1,] <- rowsum(t(temp0), cluster[ord], reorder= FALSE)
dmat[,2,] <- rowsum(t(temp1), cluster[ord], reorder =FALSE)
}
# Compute the test statistic, including the intercept term
# (Much of the code above was a litte easier to write without
# the intercept term in thec coef matrix, that below is easier
# with it in).
coefficient <- cbind(b0,coefficient)
if (test=='nrisk') {
temp <- coefficient*nrisk
test.statistic <- apply(temp,2,sum)
test.var <- matrix(0.,2,2)
diag(test.var) <- apply(temp^2, 2, sum)
test.var[1,2] <- test.var[2,1] <- sum(temp[,1]*temp[,2])
}
else { # full V^{-1} and diag(V){-1} variance (Aalen) are the same
temp <- coefficient* twt
test.statistic <- apply(temp,2,sum)
test.var <- matrix(0.,2,2)
diag(test.var) <- apply(temp^2, 2, sum)
test.var[1,2] <- test.var[2,1] <- sum(temp[,1]*temp[,2])
}
}
else { # 2 or more covariates
coefficient <- matrix(0,nevent, nvar)
twt <- matrix(0, nevent, nvar+1)
means <- dt$means[dindex,] # vector of means, at each deatj
nrisk <- dt$nrisk[dindex]
dindex2 <- (1:nused)[deaths] # row number of each death
ybar <- weights[deaths]/nrisk
test.var <- matrix(0.0, nvar, nvar)
if (dfbeta) test.dfbeta <- matrix(0., nused, nvar+1)
for (i in 1:nevent) {
who <- riskmat[,dindex[i]] # 0/1 vector of who is at risk
wt <- weights* who
xx <- who* (X- rep(means[i,], rep(nused, nvar))) # (X-Xbar)
# solve, and check for singularity
# Note that the increment to imat, as returned by
# the coxph.detail function, is Var(X) * #events
# and qri is intended to be the qr of V-inverse
if (i==1 || dindex[i] != dindex[i-1]) { #don't redo qr for ties
qri <- qr(dt$var[,,dindex[i]], tol=qrtol)
vmat <- qr.coef(qri, diag(nvar))
twt[i,] <- nrisk[i] /c(1+ means[i,] %*% vmat %*% means[i,],
diag(vmat))
}
else twt[i,] <- twt[i-1,]
j <- dindex2[i]
coefficient[i,] <-qr.coef(qri, wt[j]*xx[j,]) / nrisk[i]
if (test=='variance') {
temp <- wt[j]*xx[j,]
test.var <- test.var + outer(temp,temp)
}
if (dfbeta) {
resid[i, ] <- resid[i,]- c(ybar[i] + xx %*% c(coefficient[i,]))
temp1 <- t(qr.coef(qri, t(resid[i,]* wt *xx)))/ nrisk[i]
temp0 <- resid[i,]*wt/nrisk[i] - temp1%*% means[i,]
if (test=='aalen')
test.dfbeta <- test.dfbeta +
cbind(temp0, temp1) %*% diag(twt[i,])
else if (test=='nrisk')
test.dfbeta <- test.dfbeta +
cbind(temp0, temp1)* nrisk[i]
else {
test.dfbeta[,-1] <- test.dfbeta[,-1] + resid[i,]* wt *xx
# There really isn't a definition for what weight to
# put on the intercept in the "variance" weighting
# (and who really cares about "testing the intercept"
# anyway). So use the twt one
test.dfbeta[,1] <- test.dfbeta[,1] + temp0*twt[i,1]
}
dmat[,-1,dindex[i]] <- dmat[,-1, dindex[i]] +
rowsum(temp1, cluster[ord], reorder=FALSE)
dmat[,1, dindex[i]] <- dmat[,1,dindex[i]] +
rowsum(temp0, cluster[ord], reorder=FALSE)
}
}
temp <- apply(means*coefficient, 1, sum) # xbar * coef at time t
b0 <- weights[deaths]/nrisk - temp
coefficient <- cbind(b0,coefficient)
# Note - the intercept is a part of the test statistic, even
# though it will always be ignored in the overall chisquare test
if (test=='aalen') {
temp <- twt* coefficient
test.statistic <- colSums(temp)
test.var <- t(temp) %*% temp
}
else if (test=='nrisk') {
temp <- coefficient * nrisk
test.statistic <- colSums(temp)
test.var <- t(temp) %*% temp
}
else {
xx <- weights[deaths]*(X[deaths,] - means[dindex,])
test.statistic <- apply(xx, 2, sum)
}
}
if (dfbeta) {
# The model variance is sum( term[i]^2), i ranging over times,
# and each term an n by p matrix (one row per person)
# The dfbeta one is essentially [sum(term[i])]^2
# the test.dfbeta matrix contains this sum over death times
temp <- rowsum(test.dfbeta, cluster, reorder=FALSE)
test.var2 <- t(temp) %*% temp
}
dimnames(coefficient) <- list(times[deaths],
c("Intercept", dimnames(X)[[2]]))
names(test.statistic) <- c("Intercept", dimnames(X)[[2]])
dimnames(twt) <- NULL
ans <- list(n= c(nused, ndeath, length(dt$time)), times=times[deaths],
nrisk=dt$nrisk[dindex],
coefficient=coefficient,
test.statistic=test.statistic, test.var=test.var, test=test,
tweight = twt, call=Call)
if (dfbeta) {
ans$dfbeta <- dmat
ans$test.var2 <- test.var2
}
if (any(weights!=1)) ans$weights <- weights
# if (ncluster < nused) ans$cluster <- as.numeric(cluster)
na.action <- attr(m, "na.action")
if (length(na.action)) ans$na.action <- na.action
if (model) ans$model <- m
else {
if (x) ans$x <- saveX
if (y) ans$y <- Y
}
class(ans) <- 'aareg'
ans
}
"[.aareg" <- function(x, ..., drop=FALSE) {
if (!inherits(x, 'aareg')) stop ("Must be an aareg object")
i <- ..1
if (is.matrix(x$coefficient)) {
x$coefficient <- x$coefficient[,i, drop=drop]
x$tweight <- x$tweight[,i,drop=drop]
}
else stop("Subsripting impossible, coefficient component not a matrix")
if (!is.null(x$dfbeta)){
x$dfbeta <- x$dfbeta[,i,,drop=drop]
x$test.var2 <- x$test.var2[i,i,drop=drop]
}
x$test.statistic <- x$test.statistic[i, drop=drop]
x$test.var <- x$test.var[i,i,drop=drop]
x
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.