#' Sample paths through a multi-state model
#'
#' Given cumulative transition hazards sample paths through the multi-state
#' model.
#'
#' The procedure is described in detail in Fiocco, Putter & van Houwelingen
#' (2008). The argument \code{beta.state} and the element \code{tstate} from
#' the argument \code{history} are meant to incorporate situations where the
#' time at which some previous states were visited may affect future transition
#' rates. The relation between time of visit of state \code{s} and transition
#' \code{k} is assumed to be linear on the log-hazards; the corresponding
#' regression coefficient is to be supplied as the (s,k)-element of
#' \code{beta.state}, which is 0 if no such effect has been included in the
#' model. If no such effects are present, then \code{beta.state}=\code{NULL}
#' (default) suffices. In the \code{tstate} element of \code{history}, the
#' \code{s}-th element is the time at which state \code{s} was visited. This is
#' only relevant for states which have been visited prior to the beginning of
#' sampling, i.e. before the \code{time} element of \code{history}; the
#' elements of \code{tstate} are internally updated when in the sampling
#' process new states are visited (only if \code{beta.state} is not \code{NULL}
#' to avoid unnecessary computations).
#'
#' @param Haz Cumulative hazards to be sampled from. These should be given as a
#' data frame with columns \code{time}, \code{Haz}, \code{trans}, for instance
#' as the \code{Haz} list element given by \code{\link{msfit}}.
#' @param trans Transition matrix describing the multi-state model. See
#' \code{trans} in \code{\link{msprep}} for more detailed information
#' @param history A list with elements \code{state}, specifying the starting
#' state(s), \code{time}, the starting time(s), and \code{tstate}, a numeric
#' vector of length the number of states, specifying at what times states have
#' been visited, if appropriate. The default of \code{tstate} is \code{NULL};
#' more information can be found under Details.
#'
#' The elements \code{state} and \code{time} may either be scalars or vectors,
#' in which case different sampled paths may start from different states or at
#' different times. By default, all sampled paths start from state 1 at time 0.
#' @param beta.state A matrix of dimension (no states) x (no transitions)
#' specifying estimated effects of times at which earlier states were reached
#' on subsequent transitions. If these are not in the model, the value
#' \code{NULL} (default) suffices; more information can be found under Details
#' @param clock Character argument, either \code{"forward"} (default) or
#' \code{"reset"}, specifying whether the time-scale of the cumulative hazards
#' is in forward time (\code{"forward"}) or duration in the present state
#' (\code{"reset"})
#' @param output One of \code{"state"}, \code{"path"}, or \code{"data"},
#' specifying whether states, paths, or data should be output.
#' @param tvec A numeric vector of time points at which the states or paths
#' should be evaluated. Ignored if \code{output}=\code{"data"}
#' @param cens An independent censoring distribution, given as a data frame
#' with time and Haz
#' @param M The number of sampled trajectories through the multi-state model.
#' The default is 10, since the procedure can become quite time-consuming
#' @param do.trace An integer, specifying that the replication number should be
#' written to the console every \code{do.trace} replications. Default is
#' \code{NULL} in which case no output is written to the console during the
#' simulation
#' @return M simulated paths through the multi-state model given by
#' \code{trans} and \code{Haz}. It is either a data frame with columns
#' \code{time}, \code{pstate1}, ..., \code{pstateS} for S states when
#' \code{output="state"}, or with columns \code{time}, \code{ppath1},...,
#' \code{ppathP} for the P paths specified in \code{\link{paths}}(trans) when
#' \code{output="path"}. When \code{output="data"}, the sampled paths are
#' stored in an \code{"msdata"} object, a data frame in long format such as
#' that obtained by \code{\link{msprep}}. This may be useful for
#' (semi-)parametric bootstrap procedures, in which case \code{cens} may be
#' used as censoring distribution (assumed to be independent of all transition
#' times and independent of any covariates).
#' @author Marta Fiocco, Hein Putter \email{H.Putter@@lumc.nl}
#' @references Fiocco M, Putter H, van Houwelingen HC (2008). Reduced-rank
#' proportional hazards regression and simulation-based prediction for
#' multi-state models. \emph{Statistics in Medicine} \bold{27}, 4340--4358.
#' @keywords datagen
#' @examples
#'
#' # transition matrix for illness-death model
#' tmat <- trans.illdeath()
#' # data in wide format, for transition 1 this is dataset E1 of
#' # Therneau & Grambsch (T&G)
#' tg <- data.frame(illt=c(1,1,6,6,8,9),ills=c(1,0,1,1,0,1),
#' dt=c(5,1,9,7,8,12),ds=c(1,1,1,1,1,1),
#' x1=c(1,1,1,0,0,0),x2=c(6:1))
#' # data in long format using msprep
#' tglong <- msprep(time=c(NA,"illt","dt"),status=c(NA,"ills","ds"),
#' data=tg,keep=c("x1","x2"),trans=tmat)
#' # expanded covariates
#' tglong <- expand.covs(tglong,c("x1","x2"))
#' # Cox model with different covariate
#' cx <- coxph(Surv(Tstart,Tstop,status)~x1.1+x2.2+strata(trans),
#' data=tglong,method="breslow")
#' # new data, to check whether results are the same for transition 1 as T&G
#' newdata <- data.frame(trans=1:3,x1.1=c(0,0,0),x2.2=c(0,1,0),strata=1:3)
#' fit <- msfit(cx,newdata,trans=tmat)
#' tv <- unique(fit$Haz$time)
#' # mssample
#' set.seed(1234)
#' mssample(Haz=fit$Haz,trans=tmat,tvec=tv,M=100)
#' set.seed(1234)
#' paths(tmat)
#' mssample(Haz=fit$Haz,trans=tmat,tvec=tv,M=100,output="path")
#' set.seed(1234)
#' mssample(Haz=fit$Haz,trans=tmat,tvec=tv,M=100,output="data",do.trace=25)
#'
#' @export mssample
`mssample` <- function(Haz, trans, history=list(state=1,time=0,tstate=NULL),
beta.state=NULL,
clock=c("forward","reset"),
output=c("state","path","data"), tvec, cens=NULL, M=10, do.trace=NULL)
{
### Function to sample a path in the multi-state model
### Input:
### Haz: specific cumulative hazard for patient "i"
### trans: matrix describing the multi-state model
### history: the event-history of the patient, given by state and time
### clock: either "forward" or "reset"
### output: "state", "path", or "data"
### tvec: vector of times
### cens: distribution of censored times
### M: number of replications
### Output:
### M simulated path in the muti-state model in the desired output format
###
### Set up
output <- match.arg(output)
clock <- match.arg(clock)
K <- dim(trans)[1]
trans2 <- to.trans2(trans)
ntrans <- nrow(trans2)
if (length(history$state)==1) history$state <- rep(history$state,M)
if (length(history$time)==1) history$time <- rep(history$time,M)
if (length(history$state)!=length(history$time)) stop("lengths of history$state and history$time differ")
if (!is.null(history$tstate)) { # then should be either length K or dim
if (is.vector(history$tstate))
if (length(history$tstate) != K) stop("length of history$tstate should equal no of states")
else history$tstate <- matrix(history$tstate,K,M)
if (is.null(beta.state)) stop("beta.state should be specified when history$tstate not null")
}
if (!is.null(beta.state))
if (any(dim(beta.state) != c(K,ntrans)))
stop("incorrect dimension of beta.state")
if (output=="state") # to contain sum
res <- matrix(0, length(tvec), K)
else if (output=="path") {# to contain sum
thepaths <- paths(trans)
L <- nrow(thepaths)
res <- matrix(0, length(tvec), L)
}
else res <- NULL
for (m in 1:M) {
if (!is.null(history$tstate))
res1 <- mssample1(Haz, trans, history=list(state=history$state[m],
time=history$time[m], tstate=history$tstate[,m]), beta.state=beta.state, clock=clock, output=output,
tvec=tvec, cens=cens)
else
res1 <- mssample1(Haz, trans, history=list(state=history$state[m],
time=history$time[m], tstate=rep(0,K)), beta.state=beta.state, clock=clock, output=output,
tvec=tvec, cens=cens)
if (output=="data") {
res1[,1] <- m
res <- rbind(res,res1)
}
else res <- res + res1
if (!is.null(do.trace)) if (m %% do.trace == 0) {
cat("Replication",m,"finished at",date(),"\n")
flush.console()
}
}
if (output=="state") {
res <- data.frame(cbind(tvec,res/M))
names(res) <- c("time",paste("pstate",1:K,sep=""))
}
else if (output=="path") {
res <- data.frame(cbind(tvec,res/M))
names(res) <- c("time",paste("ppath",1:L,sep=""))
}
else if (output=="data") {
res <- data.frame(res)
names(res) <- c("id","Tstart","Tstop","duration","from","to","status","trans")
attr(res, "trans") <- trans
class(res) <- c("msdata", "data.frame")
}
return(res)
}
`mssample1` <- function(Haz, trans, history, beta.state, clock, output, tvec, cens)
{
###
### Function to sample a single path in the multi-state model
### Used internally in function mssample and msboot
###
### First sample (once) from censoring distribution, this will be the follow-up time
if (!is.null(cens)) {
pcens <- diff(c(0,1-cens$surv))
idx <- sample(1:length(cens$time), size=1, prob=pcens)
fut <- cens$time[idx]
censtime <- list(time=fut, jump=ifelse(idx>1,cens$Haz[idx]-cens$Haz[idx-1],cens$Haz[idx]))
}
else censtime <- NULL
K <- dim(trans)[1]
trans2 <- to.trans2(trans)
from <- to <- history$state
tcond <- t0 <- Tstart <- history$time
if (output=="state")
res <- matrix(0, length(tvec), K)
else if (output=="path") {
thepaths <- paths(trans)
path <- c(to, rep(NA,ncol(thepaths)-1))
res <- matrix(0, length(tvec), nrow(thepaths))
}
else res <- NULL
### keep track of when states were visited
tstates <- history$tstate
while (!is.na(to)) {
from <- to
nstates <- trans[from,]
transs <- nstates[!is.na(nstates)]
allto <- which(!is.na(nstates))
ntr <- length(transs)
if (ntr!=0 ) { # if not yet in absorbing state
transnos <- transs
for (tr in 1:ntr)
Haz$Haz[Haz$trans==transnos[tr]] <-
exp(sum(beta.state[,transnos[tr]]*tstates)) *
Haz$Haz[Haz$trans==transnos[tr]]
whh <- which(!is.na(match(Haz$trans,transnos)))
if (clock=="forward") {
crs <- crsample(Haz[whh,], tcond, censtime)
tcond <- Tstop <- crs$t
}
else {
crs <- crsample(Haz[whh,], t0, censtime)
t0 <- 0
tcond <- Tstop <- crs$t + tcond
}
transno <- crs$trans
if (is.na(transno)) to <- NA
else {
to <- trans2$to[transno]
tstates[to] <- Tstop
}
if (output=="state") {
res[((tvec>=Tstart)&(tvec<Tstop)),from] <- 1
Tstart <- Tstop
}
else if (output=="path") {
idx <- which(apply(thepaths,1,function(x) identical(x,path)))
res[((tvec>=Tstart)&(tvec<Tstop)),idx] <- 1
path[which(is.na(path))[1]] <- to
Tstart <- Tstop
}
else {
res1 <- matrix(c(rep(NA,ntr),rep(Tstart,ntr),rep(Tstop,ntr),rep(Tstop-Tstart,ntr),rep(from,ntr),allto,rep(0,2*ntr)),ntr,8)
res1[res1[,6]==to,7] <- 1
res1[,8] <- trans[from,allto] # trans
Tstart <- Tstop
res <- rbind(res,res1)
}
}
else { # in absorbing state:
to <- NA
if (output=="state") {
res[tvec>=Tstart,from] <- 1
}
else if (output=="path") {
idx <- which(apply(thepaths,1,function(x) identical(x,path)))
res[tvec>=Tstart,idx] <- 1
path[which(is.na(path))[1]] <- to
}
else {
res1 <- matrix(c(rep(NA,ntr),rep(Tstart,ntr),rep(Tstop,ntr),rep(Tstop-Tstart,ntr),rep(from,ntr),allto,rep(0,2*ntr)),ntr,8)
res1[res1[,6]==to,7] <- 1
res1[,8] <- trans[from,allto] # trans
res <- rbind(res,res1)
}
}
}
return(res)
}
`crsample` <- function(Haz, tcond=0, censtime=NULL) ### censtime here is list with time and last jump (=pp.fut)
{
if (is.null(censtime)) fut <- Inf
else
fut <- censtime$time
transs <- Haz$trans
transun <- unique(transs)
K <- length(transun)
tt <- sort(unique(Haz$time))
n <- length(tt)
cim <- matrix(NA, n, 3*K+4)
ci <- as.data.frame(cim)
names(ci)[1] <- "time"
names(ci)[2:(K+1)] <- paste("Haz",as.character(1:K), sep="")
names(ci)[(K+2):(2*K+1)] <- paste("haz",as.character(1:K), sep="")
names(ci)[(2*K+2):(3*K+1)] <- paste("CI",as.character(1:K), sep="")
names(ci)[3*K+2] <- "hazsum"
names(ci)[3*K+3] <- "Hazsum"
names(ci)[3*K+4] <- "S0"
ci$time <- tt
for (k in 1:K) { # select the elements for each transition in the block
wh <- which(Haz$trans==transun[k])
idx <- match(Haz$time[wh],tt)
ci[,k+1][idx] <- Haz$Haz[wh]
ci[,k+1] <- NAfix(ci[,k+1],subst=0)
ci[,K+1+k] <- diff(c(0,ci[,k+1])) # the hazard for transition k
}
## select only t > tcond, and adjust the cumulative hazards
ci <- ci[ci$time>tcond,]
n <- nrow(ci)
for (k in 1:K) # each cumulative hazard is adjusted (cumulative sum of hazard)
ci[,k+1] <- cumsum(ci[,K+1+k])
### compute baseline survival S0, need to sum all columns Hazards
if (K==1) ci$hazsum <- ci[,3]
else ci$hazsum <- apply(ci[,((K+2):(2*K+1))],1,sum)
ci$S0 <- cumprod(1-ci$hazsum)
ci$Hazsum <- -log(ci$S0)
nci <- nrow(ci)
### Follow Dabrowska in first sampling from the sum of the cumulative hazards
k <- NA
tsample <- Hazsample(data.frame(time=ci$time, Haz=ci$Hazsum))
### If follow-up time is less than tsample, no transition is made
if (fut < tsample) crt <- fut
else { # else decide between transitions
crt <- tsample
if (fut>tsample) # the size of the hazard jumps of the transitions
# determine the probabilities of choosing the transitions
{
k <- sample(1:K,size=1,prob=ci[which(ci$time==tsample),(K+2):(2*K+1)])
}
else # i.e. if there is fut==tsample, then the jump in the censoring distribution
# also plays in the lottery, unless both are Inf, then it doesn't matter
if (crt!=Inf) {
k <- sample(c(1:K,NA),size=1,
prob=c(ci[which(ci$time==tsample),(K+2):(2*K+1)],censtime$jump))
}
}
if (!is.na(k)) trans <- unique(Haz$trans)[k] # return transition k
else trans <- NA
return(list(t=crt,trans=trans))
}
`Hazsample` <- function(Haz, size=1, replace=TRUE)
{
### Function to sample from a distribution given by a cumulative hazard
### Input:
### time: vector containing time points to sample from
### H: the cumulative hazard at those time points
### size: the size of the sample
### replace: sample with or without replacement? (default=TRUE, with replacement)
### Output:
### A random realisation from H
p <- diff(c(0,1-exp(-Haz$Haz)))
p <- c(p, exp(-Haz$Haz[nrow(Haz)])) # add probability of sampling time=Inf
return(sample(c(Haz$time, Inf), size=size, prob=p, replace=replace))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.