Nothing
#' Fit the multi-type recurrent event model
#'
#' multi-type events, may be recurrent or terminating
#'
#' @param ... one or more models (see details)
#' @param data the dataset
#' @param na.action function, a function which indicates what should happen when
#' the data contain NAs. The default is na.omit.
#' @param eventVar character, the name of the event variable in the data= dataset
#' @param startTimeVar character, the name of the variable that holds the start
#' time of each interval in the data= dataset
#' @param stopTimeVar character, the name of the variable that holds the stop
#' time of each interval in the data= dataset
#' @param idVar character, the name of the id variable which uniquely identifies
#' individuals in the data= dataset
#' @param link the link used to model the hazard
#' @param maxit integer, the maximum number of iteration for the optimization
#' method. Defaults to 500 for Nelder-Mead, 100 for all other methods.
#' @param noEvent character, the value(s) of the event variable that indicate
#' that no event occurred in an interval
#' @param robust logical, if TRUE robust standard errors are used
#' @param method character, the optimization method see the optim function for
#' details
#' @param method.seed numeric, the seed used
#' @param SANN.init integer, if this is greater than 0 it indicates the number
#' of simulated annealing iterations used to refine the initial estimate of
#' the parameters before calling the method specified in method=.
#' @param hazardPrefix character, the prefix used to denote prior event
#' functions in the formulas for the hazard
#' @param fitDetails logical, if TRUE the returned fit object contains
#' additional information used to create fit diagnostics.
#' @param hessian character, use the Hessian calculated in optim or calculated
#' separately in numDeriv::hessian().
#' @param trace logical, if TRUE additional information is printed by the
#' algorithm
#'
#' @details
#' models for the hazard are of the form
#'
#' lhs ~ rhs
#'
#' where lhs is one of the possible events and rhs is the usual model right
#' hand side, with one addition: the pseudo-function "nPrior." can be
#' used to indicate a dependence of the hazard on prior events. This function
#' has the form
#'
#' nPrior.event(~formula, alpha=value)
#'
#' where
#' \describe{
#' \item{event}{should be replaced by the appropriate event. For example,
#' "nPrior.stroke" indicates a dependence of the hazard on the number of
#' strokes prior to the current interval}
#' \item{formula}{is a possibly empty formula which expresses the possible
#' covariate effects on the extent to which prior events impact the hazard. For
#' example "nPrior.stroke(~hba1c+sbp)" indicates a dependence on the number of
#' prior strokes which may be modulated by HbA1c (variable hba1c) and systolic
#' blood pressure (variable sbp)}
#' \item{value}{is the power to which the number of prior events should
#' be raised. The default is 1, i.e. the hazard depends simply on the number of
#' prior events. A value of 0.5 means that the hazard depends on the square
#' root of the number of prior events, i.e. that additional events have
#' a diminishing effect on the hazard. Specifying alpha=NA means that
#' the power is unknown and should be estimated.}
#' }
#'
#' For example a model of myocardial infaction (event='mi') might be
#'
#' mi ~ bmi + nPrior.mi(alpha=0.5) + nPrior.stroke(~hba1c+sbp)
#'
#' to indicate that the hazard of mi depends on body mass index (variable bmi),
#' on the square root of the number of prior MI's and on the number of prior
#' strokes, modulated by hba1c and systolic blood pressure.
#'
#' @importFrom stats as.formula ave model.matrix optim na.omit reformulate
#' @importFrom utils head
#'
#' @return an object of type agMTRE
#' @export
#' @examples
#' # Fit a model for the multiRecCVD2 dataset
#' fit = multiRec(hf ~ nPrior.afib(~male),
#' afib ~ age + nPrior.afib() + nPrior.hf(),
#' data=multiRecCVD2,
#' idVar='id',
#' link='log')
#'
#' # Fit the model with robust variances
#' fit = multiRec(hf ~ nPrior.afib(~male),
#' afib ~ age + nPrior.afib() + nPrior.hf(),
#' data=multiRecCVD2,
#' idVar='id',
#' link='log',
#' robust = TRUE) # Request robust (sandwich) variances
#'
#' # Use a specified power
#' fit = multiRec(hf ~ nPrior.afib(~male),
#' afib ~ age + nPrior.afib(alpha=0.5) + nPrior.hf(),
#' data=multiRecCVD2,
#' idVar='id',
#' link='log',
#' robust = TRUE)
#'
#' # Display the coefficients
#' coef(fit)
#' confint(fit)
#'
#' # Display intervals with a poor fit
#' boxplot(resid(fit))
multiRec = function(...,
data,
eventVar='event',
startTimeVar='tstart',
stopTimeVar='tstop',
idVar='id',
na.action=na.omit,
link=c('log','identity','yj'),
SANN.init=0,
noEvent=c('',NA),
robust=FALSE,
method=c("BFGS", "Nelder-Mead", "CG", "SANN"),
method.seed=NULL,
hazardPrefix='nPrior.',
maxit,
trace=FALSE,
fitDetails=FALSE,
hessian=c('optim','numDeriv')) {
hessian = match.arg(hessian)
method = match.arg(method)
# ----------------------------------------------------------------------------
# Initialize
# ----------------------------------------------------------------------------
resetIndices()
if (missing(maxit)) {
if (method=='Nelder-Mead') maxit=500
else maxit=100
}
if (method=='SANN' && !is.null(method.seed)) { oldseed <- if (exists('.Random.seed', envir = .GlobalEnv)) get('.Random.seed', envir = .GlobalEnv) else NULL; on.exit(if (!is.null(oldseed)) assign('.Random.seed', oldseed, envir = .GlobalEnv), add = TRUE); set.seed(method.seed) }
# ----------------------------------------------------------------------------
# handle the link
# ----------------------------------------------------------------------------
linkName = match.arg(link)
dlink = list('identity'=identityLink,
'log'=logLink,
'yj'=yjLink)[[linkName]]
iLink = list('identity'=identityInvLink,
'log'=logInvLink,
'yj'=yjInvLink)[[linkName]]
link = list(
link = linkName,
linkfun = dlink,
linkinv = iLink,
param.ix = if (linkName=='yj') getIndices(1) else NULL,
param.init = 0
)
# ----------------------------------------------------------------------------
# Check the dataset
#
# Apply the missing value function. There should be no missing values once
# this is done.
# ----------------------------------------------------------------------------
if (missing(data)) halt('data= not specified')
if (!inherits(data,'data.frame')) halt('data= is not a data.frame')
# ----------------------------------------------------------------------------
# Check that the main variables are in the dataset
# ----------------------------------------------------------------------------
if (!eventVar %in% names(data)) halt('eventVar=%s in not in the dataset',as.character(eventVar))
if (!startTimeVar %in% names(data)) halt('startTimeVar=%s in not in the dataset',as.character(startTimeVar))
if (!stopTimeVar %in% names(data)) halt('stopTimeVar=%s in not in the dataset',as.character(stopTimeVar))
if (!idVar %in% names(data)) halt('idVar=%s in not in the dataset',as.character(idVar))
# ----------------------------------------------------------------------------
# Handle missing values
#
# We first restrict the dataset to the variables we need (missing values in
# unused variables should not matter)
# ----------------------------------------------------------------------------
models = list(...)
used = intersect(unique(unlist(lapply(models,all.vars))),names(data))
used = c(used,eventVar,startTimeVar,stopTimeVar,idVar)
data = na.action(data[,used])
if (any(is.na(data))) halt('The dataset contains missing values.')
omit = attr(data,'na.action')
# ----------------------------------------------------------------------------
# Sanity checks on the main variables
# ----------------------------------------------------------------------------
event = trimws(data[[eventVar]])
start = data[[startTimeVar]]
stop = data[[stopTimeVar]]
id = data[[idVar]]
# The id variable should not have missing values
if (anyNA(id)) halt('Id variable %s has missing values', idVar)
# check for ties
endTime = paste0(id,':',stop)
if (any(duplicated(endTime))) halt('Tied events found. This model does not allow ties')
# Check that the start variable is numeric, not missing and not negative
if (!is.numeric(start)) halt('Time variable %s is not numeric',startTimeVar)
if (anyNA(start)) halt('Time variable %s has missing values', startTimeVar)
if (any(start<0)) halt('Time variable %s has negative values',startTimeVar) # do we care if some are negative?
# Check that the stop variable is numeric, not missing and not negative
if (!is.numeric(stop)) halt('Time variable %s is not numeric',stopTimeVar)
if (anyNA(stop)) halt('Time variable %s has missing values', stopTimeVar)
if (any(stop<0)) halt('Time variable %s has negative values',stopTimeVar) # do we care if some are negative?
# Calculate the interval widths
width = stop-start
if (any(width<0)) halt('Start time (%s) is after stop time (%s) for observation %d',
stopTimeVar, startTimeVar, which(width<0)[1])
if (any(width==0)) halt('Time variable %s is the same as %s for observation %d',
startTimeVar, stopTimeVar,which(width==0)[1])
# ----------------------------------------------------------------------------
# Make sure the dataset is sorted by id and time
# ----------------------------------------------------------------------------
data = data[order(id,start),]
# ----------------------------------------------------------------------------
# What events do we have?
# ----------------------------------------------------------------------------
eventNames = unique(event)
eventNames = eventNames[!eventNames %in% noEvent] # drop non-events
nEvents = table(event)
# ----------------------------------------------------------------------------
# Construct the event counts
# --------------------------
#
# evCounts is a list with an entry for each event type. Each entry is a
# vector of event counts. Specifically, the value in these vectors is the
# number of events of a specific type that occurred strictly before the
# current time.
# ----------------------------------------------------------------------------
nDots = max(nchar(gsub('^(\\.*).*$','\\1',names(data),perl=TRUE)))
uniquePrefix = paste0(strrep('.',nDots+1),'N.')
for (i in seq_along(eventNames)) {
eventName = eventNames[i]
data[[paste0(uniquePrefix,eventName)]] = ave(event==eventName, id, FUN=function(x) cumsum(c(0,x[-length(x)])))
}
# ----------------------------------------------------------------------------
# Construct the list of model pieces
# ----------------------------------------------------------------------------
# Given a model with several hazards, each with intrinsic and prior components
# such as
#
# h1 = i1 + p11 + p12 + p13
# h2 = i2 + p21 + p22 + p23
# h3 = i3 + p31 + p32 + p33
#
# This section of code constructs a list called "hazards" with one element for
# each hazard. Each element is itself a list of components, one for the
# intrinsic term, and one for each of the prior terms. So the list "hazards"
# has the following structure:
#
# hazards = ┌──────────────────────────────────────┐
# |1 ┌────┐ ┌─────┐ ┌─────┐ ┌─────┐ │
# | | i1 ├──┤ p11 ├──┤ p12 ├──┤ p13 | │
# ┌───┤ └────┘ └─────┘ └─────┘ └─────┘ │
# │ └──────────────────────────────────────┘
# |
# | ┌──────────────────────────────────────┐
# └──>|2 ┌────┐ ┌─────┐ ┌─────┐ ┌─────┐ |
# | | i2 ├──┤ p21 ├──┤ p22 ├──┤ p23 | |
# ┌───┤ └────┘ └─────┘ └─────┘ └─────┘ |
# | └──────────────────────────────────────┘
# |
# | ┌──────────────────────────────────────┐
# └──>|3 ┌────┐ ┌─────┐ ┌─────┐ ┌─────┐ |
# | | i3 ├──┤ p31 ├──┤ p32 ├──┤ p33 | |
# | └────┘ └─────┘ └─────┘ └─────┘ |
# └──────────────────────────────────────┘
#
# At the top level, the list has one sublist for each hazard model. Each
# of these sublists contains entries for the intrinsic term (if any) and
# the prior terms (if any).
#
# The entry for an intrisic term is a list with the following elements:
# * event: the string '(Intrinsic)'
# * formula: a formula
# * X: the corresponding design matrix
# * beta.ix: the vector of indices of the betas for this matrix inside the
# parameter vector, with length(beta.ix)=ncol(X)
#
# The entry for a prior term is a list with the following elements:
# * event: the name of the prior event this piece corresponds to
# * formula: the formula for the intrinsic term, or the term inside a
# nPrior.event() call, as a character string
# * X: the corresponding design matrix
# * beta.ix: the vector of indices of the betas for this matrix inside
# the parameter vector, with length(beta.ix)=ncol(X)
# * counts: the vector of counts, with length(counts)=nrow(X)
# * alpha: the power. This is a number if a fixed power was specified,
# NA if the power is to be estimated
# * alpha.ix: the index of alpha in the parameter vector. Only used if
# alpha=NA.
# * alpha.init: the initial value for the unknown parameter. Only used if
# alpha=NA.
#
# The parameter vector
# --------------------
# All model parameters end up in a single vector because that's the way the
# optim function likes it. The parameter vector will generally contain three
# groups of parameters
# - parameters associated with the design matrices (i.e. betas)
# - parameters associated with the link
# - parameters associated with the event count functions
#
# optim will pass a single parameter vector and the various parts of the code
# will have to find extract what they need from this parameter vector. They
# do this through the ".ix" elements described above
# ----------------------------------------------------------------------------
# Create a "prior" function for each of the possible event types. This
# is used to extract the information from the nPrior.event() function.
priorFn = 'function(formula=NULL, alpha=1) return(list(event="%s", formula=formula, alpha=alpha))'
for (eventName in eventNames) {
f = str2lang(sprintf(priorFn, eventName))
assign(paste0(hazardPrefix,eventName),eval(f),envir=environment())
}
hazards = list()
for (m in seq_along(models)) {
model = models[[m]]
# make sure we have a formula
if (!inherits(model,'formula')) {
# is it a misnamed argument?
if (!is.null(names(models)) && nzchar(names(models)[m])) {
halt('multiRec argument "%s=" is not recognized. Is it misspelled?',names(models)[m])
}
else halt('Expecting a formula, but "%s" was found',
deparse1(model))
}
# Isolate the event for which this is the hazard
hazardName = as.character(model[[2]])
if (!hazardName %in% eventNames) halt(paste('There is a hazard model for %s events,',
'but there are no "%s" events in the data'),
hazardName,hazardName)
# If there are "nPrior." terms, make sure they all correspond to actual events
funcs = setdiff(all.vars(model,functions=TRUE),all.vars(model,functions=FALSE))
prior = funcs[startsWith(funcs,hazardPrefix)]
if (length(prior)>0) {
priorEv = substring(prior,nchar(hazardPrefix)+1)
unknown = which(!priorEv %in% eventNames)
n = length(unknown)
if (n>0) {
h = sprintf('"%s"',prior[unknown])
e = sprintf('"%s"',priorEv[unknown])
if (n>1){
h = paste(paste(h[-n],collapse=', '),'and',h[n])
e = paste(paste(e[-n],collapse=', '),'or',e[n])
}
halt('The hazard model for %s has %s but there are no %s events in the data',
hazardName,h,e)
}
}
# extract the terms from the model formula and separate intrinsic terms
# from prior ones
terms = labels(terms(model))
isPrior = startsWith(terms,hazardPrefix)
if (all(isPrior)) intrinsicFormula = ~1
else intrinsicFormula = reformulate(terms[!isPrior])
priorFormulas = lapply(terms[isPrior], str2lang)
X.intrinsic = model.matrix(intrinsicFormula,data=data)
hazardInfo = list('(Intrinsic)' = list(
event = '(Intrinsic)',
formula=intrinsicFormula,
X = X.intrinsic,
beta.ix = getIndices(ncol(X.intrinsic))
))
# The prior event elements of the model
# These will be of the form "nPrior.<eventName>(<formula>, .alpha=<value>)".
# Since the formula may contain nested parentheses, it's simpler to process
# it using R's parse than (say) regular expressions. So we convert them to
# '<eventName>', <formula>, .alpha=<value>)" and run them through
# parse.
for (i in seq_along(priorFormulas)) {
priorName = as.character(priorFormulas[[i]][[1]])
term = eval(priorFormulas[[i]], envir=data, enclos=environment())
alpha = term$alpha
formula = term$formula
if (is.null(formula)) formula = ~1
else {
# Check that the formula a) is a formula and b) that it has no left hand
# side
if (!inherits(formula,'formula')) {
halt('The %s term in the model for %s does not provide a formula',
priorName, hazardName)
}
else if (length(formula)==3) {
halt('The formula for the %s term in the model for %s should not have a right hand side',
priorName, hazardName)
}
}
X = model.matrix(formula,data=data)
out = list(
event = term$event,
formula = formula,
countName = paste0(uniquePrefix,term$event),
X = X,
beta.ix = getIndices(ncol(X)),
alpha = alpha
)
if (is.na(alpha)) {
out$alpha.ix = getIndices(1)
out$alpha.init = 1
# Check to make sure that there are enough events
n = nEvents[[term$event]]
if (n<5) {
if (n==1) howMany = sprintf('there is only one %s event',term$event)
else howMany = sprintf('there are only %d %s events',n,term$event)
warn(paste('The %s term in the model for %s estimates alpha, but %s',
'in the data. The estimate of alpha may be unreliable.'),
priorName, hazardName, howMany)
}
}
hazardInfo[[paste0(hazardPrefix,term$event)]] = out
}
rm(i)
# Check that the variables in the model all exist in the dataset
vars = unique(unlist(lapply(hazardInfo, function(item) all.vars(item$formula))))
unknown = setdiff(vars, names(data))
if (length(unknown)>0) halt.n(length(unknown),
'Variable %s is not in the dataset',
'Variables %s are not in the dataset',
paste.and(unknown))
# Add to the hazards
attr(hazardInfo,'hazardName') = hazardName
attr(hazardInfo,'call') = deparse1(model)
hazards[[hazardName]] = hazardInfo
}
# ----------------------------------------------------------------------------
# Final checks
#
# Any events with no hazard formula?
# ----------------------------------------------------------------------------
noHazard = setdiff(eventNames,names(hazards))
if (length(noHazard)>0) halt.n(length(noHazard),
'Variable %1$s contains values %2$s but there is no corresponding hazard formula',
'Variable %1$s contains values %2$s but there are no corresponding hazard formulas',
eventVar, paste.and(noHazard))
# ----------------------------------------------------------------------------
# Construct the model object
# ----------------------------------------------------------------------------
selector = cbind(1:nrow(data),match(event,names(hazards)))
modelObj = list(hazards=hazards,
id = id,
link=link,
w=width,
selector=selector,
timeVar=startTimeVar,
time2Var=stopTimeVar,
eventVar=eventVar,
uniquePrefix=uniquePrefix,
data=data)
# ----------------------------------------------------------------------------
# Get initial estimates
# ----------------------------------------------------------------------------
init = init(modelObj)
# ----------------------------------------------------------------------------
# If requested, use SANN to improve the initial estimates
#
# Note that this need not converge since all we want is initial parameter
# estimates to feed into the next optim call
# ----------------------------------------------------------------------------
if (SANN.init>0) {
if (!is.null(method.seed)) { oldseed <- if (exists('.Random.seed', envir = .GlobalEnv)) get('.Random.seed', envir = .GlobalEnv) else NULL; on.exit(if (!is.null(oldseed)) assign('.Random.seed', oldseed, envir = .GlobalEnv), add = TRUE); set.seed(method.seed) }
fit0 = try(optim(par=init,
fn=likelihood,
method = 'SANN',
modelObj=modelObj,
hessian = FALSE,
control=list(maxit=SANN.init,fnscale=-1,temp=10,tmax=10)),
silent=TRUE)
init=fit0$par
}
# ----------------------------------------------------------------------------
# Get an improved estimate
# ----------------------------------------------------------------------------
fit = try(optim(par=init,
fn=likelihood,
method = method,
modelObj=modelObj,
hessian = TRUE,
control=list(trace=trace,
REPORT=1,
maxit=maxit,
fnscale=-1)),
silent=TRUE)
if (inherits(fit,'try-error')) {
emsg = attr(fit,'condition')
# Create a fake fit object
fit = list(par=init,
value=0,
count=c('function'=1, 'gradient'=1),
convergence=1,
message=emsg)
warning(emsg)
}
# Calculate additional estimates, residuals
obj = makeFitObject(fit=fit, modelObj=modelObj, robust=robust,
init=init,
fitDetails=fitDetails,
hessian=hessian,
omit=omit)
return(obj)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.