Nothing
# There are two major differences with model.frame.coxph
# Less complex: we don't have to worry about tt() terms, since coxphms models
# don't allow them
# More complex: missing value omission has to happen as a second step. If
# subject i is missing variable zed, but they participate in a transtion that
# doesn't use zed, that observation needs to stay. Only obs that will not
# be used in any transition are tossed, e.g., the variable 'sex' is used in
# all transitions then any row with missing sex will be omitted.
# The stacker routine will eventually deal with this and create a stacked
# data set that has no missing rows.
#
model.frame.coxphms <- function(formula, ...) {
dots <- list(...)
nargs <- dots[match(c("data", "na.action", "subset", "weights",
"id", "cluster", "istate"),
names(dots), 0)]
# If nothing has changed and the coxph object had a model component,
# simply return it.
if (length(nargs) ==0 && !is.null(formula$model)) return(formula$model)
# Otherwise, rebuild the original call to model.frame
Terms <- terms(formula)
fcall <- formula$call
indx <- match(c("formula", "data", "weights", "subset", "na.action",
"cluster", "id", "istate"),
names(fcall), nomatch=0)
if (indx[1] ==0) stop("The coxph call is missing a formula!")
temp <- fcall[c(1,indx)] # only keep the arguments we wanted
temp[[1]] <- quote(stats::model.frame) # change the function called
temp$xlev <- formula$xlevels # this will turn strings to factors
temp$formula <- Terms #keep the predvars attribute
temp$na.action <- na.pass # defer missings until later
# Now, any arguments that were on this call overtake the ones that
# were in the original call.
if (length(nargs) >0)
temp[names(nargs)] <- nargs
# The documentation for model.frame implies that the environment arg
# to eval will be ignored, but if we omit it there is a problem.
if (is.null(environment(formula$terms)))
mf <- eval(temp, parent.frame())
else mf <- eval(temp, environment(formula$terms), parent.frame())
if (!is.null(attr(formula$terms, "dataClasses")))
.checkMFClasses(attr(formula$terms, "dataClasses"), mf)
cmap <- formula$cmap # coefficient map, one row per coef, col per state
# the starting state for each transition
from <- as.numeric(sub(":[0-9]+$", "", colnames(cmap)))
# In a multi-state not every variable might be used in every transition,
# only data rows that have no use at all are removed. So for instance if sex
# is missing in an obs, but that particular obs is at risk for at least one
# transition that doesn't depend on sex, said obs is retained in the data
# frame. (The stacker routine will deal with partial missings, later.)
# The model frame has terms but our cmap matrix has covariates, e.g., mf
# has a factor and cmap the dummy variables created from it. In theory we
# could use the terms structure to map each covariate to its term, but it
# is easier to go the other way and use model.matrix to map from terms
# to covariates.
# First though, any obs with missing Y, state, weight, id, or cluster is
# out regardless
Y <- model.response(mf)
id <- model.extract(mf, "id")
istate <- model.extract(mf, "istate")
miss0 <- is.na(Y) | is.na(id) # Y and id are not optional
if (!is.null(istate)) miss0 <- miss0 | is.na(istate)
mcheck <-survcheck2(Y, id, istate) # use survcheck to curate istate
istate <- mcheck$istate
weights <- model.weights(mf)
if (!is.null(weights)) miss0 <- miss0 | is.na(weights)
cluster <- model.extract(mf, "cluster")
if (!is.null(cluster)) miss0 <- miss0 | is.na(cluster)
# now look at covariates
# Not every obs contributes to every transition, delete those rows that
# will will be tossed for all transitions. Variables with names like
# "ph(2:1)" in cmap will later be constructed, and are not in X matrix
#
phvar <- grepl("^ph\\([0-9]+:[0-9]+\\)$", row.names(cmap))
cmap2 <- cmap[!phvar,, drop=FALSE]
Xna <- is.na(model.matrix(delete.response(Terms), data=mf))
Xna <- Xna[,-1, drop=FALSE] # delete the intercept, has to be done last
bad <- good <- rep(FALSE, nrow(Y))
for (i in 1:ncol(cmap)) {
j <- which(cmap2[,i] >0)
atrisk <- (as.integer(istate) == from[i])
bad[atrisk] <- bad[atrisk] | apply(Xna[atrisk, j, drop=FALSE], 1, any)
good[atrisk]<- good[atrisk] | apply(!Xna[atrisk,j,drop=FALSE], 1, all)
}
# good[i] is TRUE if observation i makes a contribution to some transition
# bad [i] is TRUE observation i contributes a missing for some transition
# last, deal with any missing in the strata, which follow the same rules
# as covariates. Strata in multistate models are unusual, so we don't need
# to be efficient
smap <- formula$smap
if (nrow(smap) >1) {
for (i in 2:nrow(smap)) {
stemp <- is.na(mf[[rownames(smap)[i]]])
for (j in 1:ncol(smap)) { # same cols as cmap, one per trans
atrisk <- (as.integer(istate) == from[i])
if (smap[2,j]>0) {
bad[atrisk] <- bad[atrisk] | stemp[atrisk]
good[atrisk] <-good[atrisk]& !stemp[atrisk]
}
}
}
}
n.partially.used <- sum(good & bad & !miss0) # unused, but interesting
omit <- (!good & bad) | miss0
if (any(omit)) {
temp <- setNames(seq(omit)[omit], attr(mf, "row.names")[omit])
attr(temp, "class") <- "omit"
mf <- mf[!omit,, drop=FALSE]
attr(mf, "na.action") <- temp
}
mf
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.