R/simul.R
In msm: Multi-State Markov and Hidden Markov Models in Continuous Time

Documented in simfitted.msm sim.msm simmulti.msm

### FUNCTIONS FOR SIMULATING FROM MULTI-STATE MODELS

### from help(sample) in base R
resample <- function(x, size, ...)
    if(length(x) <= 1) {
        if(!missing(size) && size == 0) x[FALSE] else x
    } else sample(x, size, ...)

### General function to simulate one individual's realisation from a continuous-time Markov model
### Produces the exact times of transition



#' Simulate one individual trajectory from a continuous-time Markov model
#' 
#' Simulate one realisation from a continuous-time Markov process up to a given
#' time.
#' 
#' The effect of time-dependent covariates on the transition intensity matrix
#' for an individual is determined by assuming that the covariate is a step
#' function which remains constant in between the individual's observation
#' times.
#' 
#' @param qmatrix The transition intensity matrix of the Markov process. The
#' diagonal of \code{qmatrix} is ignored, and computed as appropriate so that
#' the rows sum to zero. For example, a possible \code{qmatrix} for a three
#' state illness-death model with recovery is:
#' 
#' \code{rbind( c( 0, 0.1, 0.02 ), c( 0.1, 0, 0.01 ), c( 0, 0, 0 ) )}
#' 
#' @param maxtime Maximum time for the simulated process.
#' @param covs Matrix of time-dependent covariates, with one row for each
#' observation time and one column for each covariate.
#' @param beta Matrix of linear covariate effects on log transition
#' intensities. The rows correspond to different covariates, and the columns to
#' the transition intensities. The intensities are ordered by reading across
#' rows of the intensity matrix, starting with the first, counting the positive
#' off-diagonal elements of the matrix.
#' @param obstimes Vector of times at which the covariates are observed.
#' @param start Starting state of the process. Defaults to 1.
#' @param mintime Starting time of the process. Defaults to 0.
#' @return A list with components,
#' 
#' \item{states}{Simulated states through which the process moves.  This ends
#' with either an absorption before \code{obstime}, or a transient state at
#' \code{obstime}. }
#' 
#' \item{times}{Exact times at which the process changes to the corresponding
#' states}
#' 
#' \item{qmatrix}{The given transition intensity matrix}
#' @author C. H. Jackson \email{chris.jackson@@mrc-bsu.cam.ac.uk}
#' @seealso \code{\link{simmulti.msm}}
#' @keywords datagen
#' @examples
#' 
#' 
#' qmatrix <- rbind(
#'                  c(-0.2,   0.1,  0.1 ),
#'                  c(0.5,   -0.6,  0.1 ),
#'                  c(0,  0,  0)
#'                  )
#' sim.msm(qmatrix, 30)
#' 
#' @export sim.msm
sim.msm <- function(qmatrix,   # intensity matrix
                    maxtime,   # maximum time for realisations
                    covs=NULL,     # covariate matrix, nobs rows, ncovs cols
                    beta=NULL,     # matrix of cov effects on qmatrix. ncovs rows, nintens cols.
                    obstimes=0, # times at which time-dependent covariates change
                    start = 1,     # starting state
                    mintime = 0    # time to start from
                    )
{
    ## Keep only times where time-dependent covariates change
    if (!is.null(covs)) {
        covs2 <- collapse.covs(covs)
        covs <- covs2$covs
        obstimes <- obstimes[covs2$ind]
    }
    else {obstimes <- mintime; covs <- beta <- 0}
    if (is.vector(beta)) beta <- matrix(beta, ncol=length(beta))
    nct <- length(obstimes)
    nstates <- nrow(qmatrix)
    ## Form an array of qmatrices, one for each covariate change-time
    qmatrices <- array(rep(t(qmatrix), nct), dim=c(dim(qmatrix), nct))
    qmatrices[rep(t(qmatrix)>0, nct)] <- qmatrices[rep(t(qmatrix)>0, nct)] * as.numeric(exp(t(beta)%*%t(covs))) # nintens*nobs
    for (i in 1:nct)
        qmatrices[,,i] <- msm.fixdiag.qmatrix(t(qmatrices[,,i]))
    cur.t <- mintime; cur.st <- next.st <- start; rem.times <- obstimes; t.ind <- 1
    nsim <- 0; max.nsim <- 10
    simstates <- simtimes <- numeric(max.nsim) ## allocate memory up-front for simulated outcome
    absorb <- absorbing.msm(qmatrix=qmatrix)
    ## Simulate up to maxtime or absorption
    while (cur.t < maxtime) {
        nsim <- nsim + 1
        cur.st <- next.st
        simstates[nsim] <- cur.st; simtimes[nsim] <- cur.t
        if (cur.st %in% absorb) break;
        rate <- -qmatrices[cur.st,cur.st, t.ind:length(obstimes)]
        nextlag <- rpexp(1, rate, rem.times-rem.times[1])
        cur.t <- cur.t + nextlag
        t.ind <- which.min((cur.t - obstimes)[cur.t - obstimes > 0])
        rem.times <- cur.t
        if (any(obstimes > cur.t))
            rem.times <- c(rem.times, obstimes[(t.ind+1): length(obstimes)])
        cur.q <- qmatrices[,, t.ind]
        next.st <- resample((1:nstates)[-cur.st], size=1, prob = cur.q[cur.st, -cur.st])
        if (nsim > max.nsim) { ## need more memory for simulated outcome, allocate twice as much
            simstates <- c(simstates, numeric(max.nsim))
            simtimes <- c(simtimes, numeric(max.nsim))
            max.nsim <- max.nsim*2
        }
    }
    ## If process hasn't absorbed by the end, then include a censoring time
    if (cur.t >= maxtime) {
        nsim <- nsim+1
        simstates[nsim] <- cur.st
        simtimes[nsim] <- maxtime
    }
    list(states = simstates[1:nsim], times = simtimes[1:nsim], qmatrix = qmatrix)
}


## Drop rows of a covariate matrix which are identical to previous row
## Similar method to R's unique.data.frame

collapse.covs <- function(covs)
{
    if (nrow(covs)==1) list(covs=covs, ind=1)
    else {
        pcovs <- apply(covs, 1, function(x) paste(x, collapse="\r"))
        lpcovs <- c("\r", pcovs[1:(length(pcovs)-1)])
        ind <- pcovs!=lpcovs
        list(covs=covs[ind,,drop=FALSE], ind=which(ind))
    }
}

### Given a simulated Markov model, get the current state at various observation times
### By default, only keep one observation in the absorbing state

getobs.msm <- function(sim, obstimes, death=FALSE, drop.absorb=TRUE)
{
    absorb <- absorbing.msm(qmatrix=sim$qmatrix)
                                        # Only keep one observation in the absorbing state
    if (drop.absorb && any(sim$states %in% absorb)) {
        if (any(sim$states %in% death))
            keep <- which(obstimes < max(sim$times))
        else {
            lo <- c(-Inf, obstimes[1:(length(obstimes)-1)])
            keep <- which(lo <= max(sim$times))
        }
    }
    else keep <- 1 : length(obstimes)
    obstimes <- obstimes[keep]
    state <- sim$states[rowSums(outer(obstimes, sim$times, ">="))]
    time <- obstimes
    if (any(sim$states %in% death)) { # Keep the exact death time if required
        state <- c(state, sim$states[sim$states %in% death])
        time <- c(time, sim$times[sim$states %in% death])
        state <- state[order(time)]
        time <- time[order(time)]
        keep <- c(keep, max(keep)+1)
    }
    list(state = state, time = time, keep=keep)
}

### Simulate a multi-state Markov or hidden Markov model dataset using fixed observation times

### Would it be better to make specification of covariate model consistent with model fitting function?
### e.g. separate hcovariates and  covariates formulae,
### plus covinits and hcovinits?



#' Simulate multiple trajectories from a multi-state Markov model with
#' arbitrary observation times
#' 
#' Simulate a number of individual realisations from a continuous-time Markov
#' process. Observations of the process are made at specified arbitrary times
#' for each individual, giving panel-observed data.
#' 
#' \code{\link{sim.msm}} is called repeatedly to produce a simulated trajectory
#' for each individual. The state at each specified observation time is then
#' taken to produce a new column \code{state}.  The effect of time-dependent
#' covariates on the transition intensity matrix for an individual is
#' determined by assuming that the covariate is a step function which remains
#' constant in between the individual's observation times.  If the subject
#' enters an absorbing state, then only the first observation in that state is
#' kept in the data frame. Rows corresponding to future observations are
#' deleted.  The entry times into states given in \code{death} are assumed to
#' be known exactly.
#' 
#' @param data A data frame with a mandatory column named \code{time},
#' representing observation times.  The optional column named \code{subject},
#' corresponds to subject identification numbers. If not given, all
#' observations are assumed to be on the same individual. Observation times
#' should be sorted within individuals.  The optional column named \code{cens}
#' indicates the times at which simulated states should be censored. If
#' \code{cens==0} then the state is not censored, and if \code{cens==k}, say,
#' then all simulated states at that time which are in the set
#' \code{censor.states} are replaced by \code{k}.  Other named columns of the
#' data frame represent any covariates, which may be time-constant or
#' time-dependent.  Time-dependent covariates are assumed to be constant
#' between the observation times.
#' @param qmatrix The transition intensity matrix of the Markov process, with
#' any covariates set to zero.  The diagonal of \code{qmatrix} is ignored, and
#' computed as appropriate so that the rows sum to zero. For example, a
#' possible \code{qmatrix} for a three state illness-death model with recovery
#' is:
#' 
#' \code{rbind( c( 0, 0.1, 0.02 ), c( 0.1, 0, 0.01 ), c( 0, 0, 0 ) )}
#' @param covariates List of linear covariate effects on log transition
#' intensities. Each element is a vector of the effects of one covariate on all
#' the transition intensities. The intensities are ordered by reading across
#' rows of the intensity matrix, starting with the first, counting the positive
#' off-diagonal elements of the matrix.
#' 
#' For example, for a multi-state model with three transition intensities, and
#' two covariates \code{x} and \code{y} on each intensity,
#' 
#' \code{covariates=list(x = c(-0.3,-0.3,-0.3), y=c(0.1, 0.1, 0.1))}
#' @param death Vector of indices of the death states.  A death state is an
#' absorbing state whose time of entry is known exactly, but the individual is
#' assumed to be in an unknown transient state ("alive") at the previous
#' instant.  This is the usual situation for times of death in chronic disease
#' monitoring data.  For example, if you specify \code{death = c(4, 5)} then
#' states 4 and 5 are assumed to be death states.
#' 
#' \code{death = TRUE} indicates that the final state is a death state, and
#' \code{death = FALSE} (the default) indicates that there is no death state.
#' @param start A vector with the same number of elements as there are distinct
#' subjects in the data, giving the states in which each corresponding
#' individual begins.  Or a single number, if all of these are the same.
#' Defaults to state 1 for each subject.
#' @param ematrix An optional misclassification matrix for generating observed
#' states conditionally on the simulated true states. As defined in
#' \code{\link{msm}}.
#' @param misccovariates Covariate effects on misclassification probabilities
#' via multinomial logistic regression. Linear effects operate on the log of
#' each probability relative to the probability of classification in the
#' correct state. In same format as \code{covariates}.
#' @param hmodel An optional hidden Markov model for generating observed
#' outcomes conditionally on the simulated true states. As defined in
#' \code{\link{msm}}.  Multivariate outcomes (\code{hmmMV}) are not supported.
#' @param hcovariates List of the same length as \code{hmodel}, defining any
#' covariates governing the hidden Markov outcome models.  Unlike in the
#' \code{msm} function, this should also define the values of the covariate
#' effects. Each element of the list is a named vector of the initial values
#' for each set of covariates for that state.  For example, for a three-state
#' hidden Markov model with two, one and no covariates on the state 1, 2 and 3
#' outcome models respectively,
#' 
#' \code{ hcovariates = list (c(acute=-8, age=0), c(acute=-8), NULL) }
#' @param censor.states Set of simulated states which should be replaced by a
#' censoring indicator at censoring times.  By default this is all transient
#' states (representing alive, with unknown state).
#' @param drop.absorb Drop repeated observations in the absorbing state,
#' retaining only one.
#' @return A data frame with columns, \item{subject}{Subject identification
#' indicators} \item{time}{Observation times} \item{state}{Simulated (true)
#' state at the corresponding time} \item{obs}{Observed outcome at the
#' corresponding time, if \code{ematrix} or \code{hmodel} was supplied}
#' \item{keep}{Row numbers of the original data.  Useful when
#' \code{drop.absorb=TRUE}, to show which rows were not dropped} plus any
#' supplied covariates.
#' @author C. H. Jackson \email{chris.jackson@@mrc-bsu.cam.ac.uk}
#' @seealso \code{\link{sim.msm}}
#' @keywords datagen
#' @examples
#' 
#' ### Simulate 100 individuals with common observation times
#' sim.df <- data.frame(subject = rep(1:100, rep(13,100)), time = rep(seq(0, 24, 2), 100))
#' qmatrix <- rbind(c(-0.11,   0.1,  0.01 ),
#'                  c(0.05,   -0.15,  0.1 ),
#'                  c(0.02,   0.07, -0.09))
#' simmulti.msm(sim.df, qmatrix)
#' 
#' @export simmulti.msm
simmulti.msm <- function(data,           # data frame with subject, times, covariates...
                         qmatrix,        # intensity matrix
                         covariates=NULL,  # initial values
                         death = FALSE,  # vector of indicators for "death" states, ie absorbing states whose entry time is known exactly,
                                        # but with unknown transient state at previous instant
                         start,         # starting states of the process, defaults to all 1.
                         ematrix = NULL,# misclassification matrix
                         misccovariates = NULL, # covariates on misclassification probabilities
                         hmodel = NULL,  # hidden Markov model formula
                         hcovariates = NULL,   # covariate effects on hidden Markov model response distribution
                         censor.states = NULL,
                         drop.absorb = TRUE
                         )
{

### Check consistency of qmatrix and covariate inits
    nstates <- nrow(qmatrix)
    msm.check.qmatrix(qmatrix)
    qmatrix <- msm.fixdiag.qmatrix(qmatrix)

### Subject, time and state
    if (!("subject" %in% names(data)))
        data$subject <- rep(1, nrow(data))
    if (!("time" %in% names(data)))
        stop("\"time\" column missing from data")
    data <- as.data.frame(data)
    subject <- data[,"subject"]
    time <- data[,"time"]
    if (is.unsorted(subject)){
        warning("Data are not ordered by subject ID and time - output will be ordered.")
        data <- data[order(subject, time),]
    }
    if (any(duplicated(data[,c("subject", "time")]))){
        warning("Data contain duplicated observation times for a subject - removing duplicates.")
        data <- data[!duplicated(data[,c("subject", "time")]), ]
    }
    subject <- data[,"subject"]; time <- data[,"time"];
    cens <- if (any(colnames(data)=="cens")) data[,"cens"] else rep(0, length(subject))
    msm.check.times(time, subject)
    times <- split(time, subject)
    cens <- split(cens, subject)
    n <- length(unique(subject))

### Covariates on intensities
    covnames <- names(covariates)
    ncovs <- length(covnames)
    misscovs <- setdiff(covnames, names(data))
    if (length(misscovs) > 0)
        stop("Covariates ", paste(misscovs, collapse=", "), " not found in data")
    covs <- if (ncovs > 0) lapply(split(data[,covnames], subject), as.matrix) else NULL
    allcovs <- covnames

### Covariates on misclassification
    misccovnames <- names(misccovariates)
    nmisccovs <- length(misccovnames)
    misscovs <- setdiff(misccovnames, names(data))
    if (length(misscovs) > 0)
        stop("Misclassification covariates ", paste(misscovs, collapse=", "), " not found in data")
    misccovs <- if (nmisccovs > 0) lapply(split(data[,setdiff(misccovnames,covnames)], subject), as.matrix) else NULL
    allmisccovs <- misccovnames

### Covariates on HMM
    if (!is.null(hcovariates)) {
        if (is.null(hmodel)) stop("hcovariates specified, but no hmodel")
        hcovnames <- unique(names(unlist(hcovariates)))
        if (length(hcovariates) != nstates)
            stop("hcovariates of length ", length(hcovariates), ", expected ", nstates)
        msm.check.hmodel(hmodel, nstates)
        misscovs <- setdiff(hcovnames, names(data))
        if (length(misscovs) > 0)
            stop("Covariates ", paste(misscovs, collapse=", "), " not found in data")
        hcovs <- lapply(split(data[,setdiff(hcovnames, setdiff(misccovnames,covnames))], subject), as.matrix)
    }
    else hcovs <- hcovnames <- NULL

### Extra variables to return
    extravars <- setdiff(names(data), unique(c("subject","time", "cens", covnames, misccovnames, hcovnames)))
    extradat <- if(length(extravars)>0) split(data[,extravars,drop=FALSE], subject) else NULL

### Starting states
    if (missing(start)) start <- rep(1, n)
    else if (length(start) == 1) start <- rep(start, n)
    else if (length(start) != n)
        stop("Supplied ", length(start), " starting states, expected 1 or ", n)

    nq <- length(qmatrix[qmatrix > 0])
    misspeccovs <- covnames[sapply(covariates, length) != nq]
    if (length(misspeccovs) > 0)
        stop("Initial values for covariates ", paste(misspeccovs, collapse=", "), " should be of length ", nq)
    beta <- do.call("rbind", as.list(covariates))
    ne <- length(ematrix[ematrix > 0])
    misspeccovs <- covnames[sapply(misccovariates, length) != ne]
    if (length(misspeccovs) > 0)
        stop("Initial values for misclassification covariates ", paste(misspeccovs, collapse=", "), " should be of length ", ne)
    beta.misc <- do.call("rbind", as.list(misccovariates))

### Check death argument. Logical values allowed for backwards compatibility
### (TRUE means final state is death, FALSE means no death state)
    statelist <- if (nstates==2) "1, 2" else if (nstates==3) "1, 2, 3" else paste("1, 2, ... ,",nstates)
    if (is.logical(death) && death==TRUE)  {death <- nstates}
    else if (is.logical(death) && death==FALSE) {death <- 0}
    else if (length(setdiff(unique(death), 1:nstates)) > 0)
        stop(paste("Death states indicator contains states not in",statelist))

### Simulate a realisation for each person
    state <- numeric()
    keep.data <- numeric()
    subj <- split(subject, subject)
    subj.num <- match(subject, unique(subject))
    for (pt in 1:n)
    {
        sim.mod <- sim.msm(qmatrix, max(times[[pt]]), covs[[pt]], beta, times[[pt]], start[pt], min(times[[pt]]))
        obsd <- getobs.msm(sim.mod, times[[pt]], death, drop.absorb)
        pt.data <- cbind(subj[[pt]][obsd$keep], obsd$time, obsd$state, cens[[pt]][obsd$keep])
        if (!is.null(covnames)) pt.data <- cbind(pt.data, covs[[pt]][obsd$keep,,drop=FALSE])
        if (!is.null(misccovnames)) pt.data <- cbind(pt.data, misccovs[[pt]][obsd$keep,,drop=FALSE])
        if (!is.null(hcovnames)) pt.data <- cbind(pt.data, hcovs[[pt]][obsd$keep,,drop=FALSE])
        if (!is.null(extravars)) pt.data <- cbind(pt.data, extradat[[pt]][obsd$keep,,drop=FALSE])
        inds <- which(subj.num==pt)
        pt.data <- cbind(pt.data, keep=inds[obsd$keep])
        keep.data <- rbind(keep.data, pt.data)
      }
    colnames(keep.data) <- c("subject","time","state","cens",union(union(covnames,misccovnames),hcovnames),extravars,"keep")
    keep.data <- as.data.frame(keep.data)

### Simulate some misclassification or a HMM conditionally on the underlying state
    if (!is.null(ematrix)) {
        if (!all(dim(ematrix) == dim(qmatrix)))
            stop("Dimensions of qmatrix and ematrix should be equal")
        keep.data <- cbind(keep.data, obs=simmisc.msm(keep.data$state, ematrix, beta.misc, keep.data[,misccovnames,drop=FALSE]))
    }
    else if (!is.null(hmodel))
        keep.data <- cbind(keep.data, obs=simhidden.msm(keep.data$state, hmodel, nstates, hcovariates, keep.data[,hcovnames,drop=FALSE]))

### Replace state at censor times by censoring indicators
    censor <- unique(keep.data$cens[keep.data$cens != 0])
    if (is.null(censor.states)) censor.states <- 1:(nstates-1)
    if (!is.null(keep.data$obs))
      keep.data$obs <- ifelse(keep.data$cens > 0 & keep.data$state %in% censor.states, keep.data$cens, keep.data$obs)
    else
      keep.data$state <- ifelse(keep.data$cens > 0 & keep.data$state %in% censor.states, keep.data$cens, keep.data$state)
    keep.data$cens <- NULL

#    attr(keep.data, "keep") <- obsd$keep
    keep.data
}


## Simulate misclassification conditionally on an underlying state

simmisc.msm <- function(state, ematrix, beta, misccovs)
{
    ostate <- state
    if (is.null(ematrix))
        warning("No misclassification matrix given, assuming no misclassification")
    else {
        if (any(ematrix < 0)) stop("Not all elements of ematrix are > 0")
        if (any(ematrix > 1)) stop("Not all elements of ematrix are < 1")
        if (nrow(ematrix) != ncol(ematrix)) stop("Number of rows and columns of ematrix are not equal")
        nstates <- nrow(ematrix)
        ematrix <- msm.fixdiag.ematrix(ematrix)
        ostate <- state
        beta.states <- t(row(ematrix))[t(ematrix) > 0 & !(row(ematrix)==col(ematrix))]
        for (i in 1:nstates)
          if (any(state==i)) {
                n <- length(state[state==i])
                if (!is.null(beta)) { # covariates on misclassification probabilities
                    X <- as.matrix(misccovs[state==i,])
                    b <- beta[,beta.states == i,drop=FALSE]
                    p <- matrix(rep(ematrix[i,], n), nrow=n, byrow=TRUE)
                    mu <- log(p / p[,i])
                    emu <- array(0, dim=dim(p))
                    miscstates <- setdiff(which(ematrix[i,] > 0), i)
                    for (j in seq_along(miscstates))
                        emu[,miscstates[j]] <- exp(mu[,miscstates[j]] + X %*% b[,j])
                    emu[,i] <- 1
                    emu[,ematrix[i,]==0] <- 0
                    p <- emu / rowSums(emu)
                    for (j in 1:n)
                        ostate[state==i][j] <- resample(1:nstates, size=1, prob=p[j,], replace=TRUE)
                }
                else ostate[state==i] <- resample(1:nstates, size=n, prob=ematrix[i,], replace=TRUE)
            }
    }
    ostate
}

## Simulate HMM outcome conditionally on an underlying state

simhidden.msm <- function(state, hmodel, nstates, beta=NULL, x=NULL)
{
    y <- state
    msm.check.hmodel(hmodel, nstates)
    for (i in 1:nstates)
        if (any(state==i)) {
            if (inherits(hmodel[[i]], "hmmMVdist")) 
              stop("multivariate HMM outcomes not supported for simulation")
            ## don't change the underlying state if the HMM is the null (identity) model
            if (!(hmodel[[i]]$label=="identity" && (length(hmodel[[i]]$pars) == 0)))  {
                ## simulate from the sampling function "r" in the HMM object
                ## transform the location parameter by covariates if necessary
                rcall <- list(n=length(state[state==i]))
                if (!is.null(beta[[i]])) {
                    link <- get(hmodel[[i]]$link)
                    invlink <- get(.msm.INVLINK[hmodel[[i]]$link])
                    locpar <- .msm.LOCPARS[hmodel[[i]]$label]
                    loc <- hmodel[[i]]$pars[locpar]
                    loc <- invlink(link(loc) + as.matrix(x[state==i,names(beta[[i]])]) %*% beta[[i]])
                    rcall[[paste("r",locpar,sep="")]] <- loc
                }
                rfn <- hmodel[[i]]$r
                y[state==i] <- do.call("rfn", rcall)
            }
        }
    y
}


### Simulate data from fitted model with same observation scheme
### Used for parametric bootstrap in pearson.msm



#' Simulate from a Markov model fitted using msm
#' 
#' Simulate a dataset from a Markov model fitted using \code{\link{msm}}, using
#' the maximum likelihood estimates as parameters, and the same observation
#' times as in the original data.
#' 
#' This function is a wrapper around \code{\link{simmulti.msm}}, and only
#' simulates panel-observed data.  To generate datasets with the exact times of
#' transition, use the lower-level \code{\link{sim.msm}}.
#' 
#' Markov models with misclassified states fitted through the \code{ematrix}
#' option to \code{\link{msm}} are supported, but not general hidden Markov
#' models with \code{hmodel}.  For misclassification models, this function
#' includes misclassification in the simulated states.
#' 
#' This function is used for parametric bootstrapping to estimate the null
#' distribution of the test statistic in \code{\link{pearson.msm}}.
#' 
#' @param x A fitted multi-state model object as returned by \code{\link{msm}}.
#' @param drop.absorb Should repeated observations in an absorbing state be
#' omitted.  Use the default of \code{TRUE} to avoid warnings when using the
#' simulated dataset for further \code{\link{msm}} fits.  Or set to
#' \code{FALSE} if exactly the same number of observations as the original data
#' are needed.
#' @param drop.pci.imp In time-inhomogeneous models fitted using the \code{pci}
#' option to \code{\link{msm}}, censored observations are inserted into the
#' data by \code{\link{msm}} at the times where the intensity changes, but
#' dropped by default when simulating from the fitted model using this
#' function. Set this argument to \code{FALSE} to keep these observations and
#' the corresponding indicator variable.
#' @return A dataset with variables as described in \code{\link{simmulti.msm}}.
#' @author C. H. Jackson \email{chris.jackson@@mrc-bsu.cam.ac.uk}
#' @seealso \code{\link{simmulti.msm}}, \code{\link{sim.msm}},
#' \code{\link{pearson.msm}}, \code{\link{msm}}.
#' @keywords models
#' @export simfitted.msm
simfitted.msm <- function(x, drop.absorb=TRUE, drop.pci.imp=TRUE){
    sim.df <- x$data$mf
    x$data <- expand.data(x)
    sim.df$"cens" <- ifelse(sim.df$"(state)" %in% 1:x$qmodel$nstates, 0, sim.df$"(state)") # 0 if not censored, cens indicator if censored, so that censoring is retained in simulated data.  TODO used in pearson?
    if (x$qcmodel$ncovs > 0) {
        sim.df <- cbind(sim.df, x$data$mm.cov)
        cov.effs <- lapply(x$Qmatrices, function(y)t(y)[t(x$qmodel$imatrix)==1])[x$qcmodel$covlabels]
    } else cov.effs <- NULL
    if (x$ecmodel$ncovs > 0) {
        sim.df <- cbind(sim.df, x$data$mm.mcov)
        misccov.effs <- lapply(x$Ematrices, function(y)t(y)[t(x$emodel$imatrix)==1])[x$ecmodel$covlabels]
    } else misccov.effs <- NULL
    names(sim.df) <- replace(names(sim.df), match(c("(time)","(subject)"), names(sim.df)),
                             c("time","subject"))
    if (!is.null(sim.df$"(subject.weights)")) names(sim.df)[names(sim.df)=="(subject.weights)"] = "subject.weights"
    sim.df$state <- NULL # replace observed with simulated state
    if (any(union(names(cov.effs), names(misccov.effs)) %in% c("state","time","subject","cens")))
        stop("Not supported with covariates named \"state\", \"time\", \"subject\" or \"cens\"") # TODO?
    boot.df <- simmulti.msm(data=sim.df,
                            qmatrix=qmatrix.msm(x, covariates=0, ci="none"),
                            covariates=cov.effs,
                            death=FALSE,
                            ematrix=ematrix.msm(x, covariates=0, ci="none"),
                            misccovariates=misccov.effs,
                            drop.absorb=drop.absorb
                            )
    if (drop.pci.imp & !is.null(boot.df$"(pci.imp)")) {
        boot.df <- boot.df[!boot.df$"(pci.imp)",]
        boot.df$"(pci.imp)" <- NULL
    }
    boot.df
}
Any scripts or data that you put into this service are public.
msm documentation built on April 4, 2025, 3:50 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
msm
Multi-State Markov and Hidden Markov Models in Continuous Time

R/simul.R
In msm: Multi-State Markov and Hidden Markov Models in Continuous Time

Defines functions simfitted.msm simhidden.msm simmisc.msm simmulti.msm getobs.msm collapse.covs sim.msm resample

Documented in simfitted.msm sim.msm simmulti.msm

Try the msm package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

msm Multi-State Markov and Hidden Markov Models in Continuous Time

R/simul.R In msm: Multi-State Markov and Hidden Markov Models in Continuous Time

Defines functions simfitted.msm simhidden.msm simmisc.msm simmulti.msm getobs.msm collapse.covs sim.msm resample

Documented in simfitted.msm sim.msm simmulti.msm

Try the msm package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

msm
Multi-State Markov and Hidden Markov Models in Continuous Time

R/simul.R
In msm: Multi-State Markov and Hidden Markov Models in Continuous Time