R/JMsim.R

Defines functions jm.sim

Documented in jm.sim

#' Simulate from joint models
#' @description This function simlates longitudinal responses and event times from joint models.
#'     Available options are Weibull and log-logistic for the time-to-event submodel,
#'     and LME for the longitudinal process, with a simple linear model for the random-effects component
#'     (i.e., \eqn{b_0 + b_1} \code{* times}, where \eqn{b_0} and \eqn{b_1} are the random intercept and random slope,
#'     respectively).
#' @keywords AFT model, Joint model, Simulation
#' @param surv.formula a one-sided formula of the form \code{~ z1 + z2 + ... + zp}, where 
#'       \code{z1, z2, ..., zp} are baseline covariates for the time-to-event process. 
#' @param surv.par a list of the form \code{list(beta = a vector, phi = a scalar, logrho = a scalar,} 
#'      \code{logkappa = a scalar, status.rho = a scalar)} for the time-to-event model parameters, 
#'      where \code{beta} is the vector of regression coefficients for the baseline covariates as specified in
#'      \code{surv.formula} (without an intercept), \code{phi} is the association parameter, 
#'      \code{logrho} is the log of the rate parameter
#'      of the time-to-event distribution (Weibull or log-logistic),
#'      \code{logkappa} is the log of the shape parameter of the time-to-event distribution 
#'      (Weibull or log-logistic), and \code{status.rho} is the rate
#'      parameter of the exponential distribution from which censored data are generated. Note that \code{surv.par}
#'      must be a named list as above. See \code{survreg.fit} for parameterizations of the time-to-event distributions. 
#' @param surv.model the AFT model to be used to describe the event process. Available options are
#'       \code{"weibull"} and \code{"llogistic"}
#'       for Weibull and log-logistic distributions, respectively.
#' @param lme.formula a one-sided formula of the form \code{~ x1 + x2 + ... + xq + times}, where 
#'       \code{x1, ..., xq} are time-independent covariates for the longitudinal process and
#'       \code{times} is the time variable (time points at which longitudinal measurements are taken).
#' @param lme.par a list of the form \code{list(alpha = a vector, sigma2 = a scalar, Sigma = a 2 x 2 covariance matrix)} 
#'      for the longitudinal model parameters, 
#'      where \code{alpha} is the vector of fixed-effects regression coefficients as specified in
#'      \code{lme.formula} (includes an intercept), \code{sigma2} is the variance of measurement errors,
#'      and \code{Sigma} is the covariance matrix of the random effects \eqn{b_{i0}} and \eqn{b_{i1}} 
#'      (only a simple linear model \eqn{b_{i0} + b_{i1}} \code{* times} is allowed for random effects).
#'      Note that \code{lme.par} must be a named list as above. 
#' @param times a numeric vector for the time points at which longitudinal measurements are planned to be taken.
#' @param timevar the time variable (a character string) of the longitudinal model. For example,
#'        if \code{lme.formula = ~ x1 + times}, then \code{timevar = "times"}.
#' @param Data a data frame containing the baseline covariates \code{z}'s for the time-to-event model and
#'        the time-independent covariates \code{x}'s for the longitudinal model. For \code{n} subjects, it must
#'        must have \code{n} rows.
#' @details This function simlates longitudinal responses and event times from joint models.
#'     Available options are Weibull and log-logistic for the time-to-event submodel,
#'     and LME for the longitudinal process, with a simple linear model for the random-effects component
#'     (i.e., \eqn{b_0 + b_1} \code{* times}, where \eqn{b_0} and \eqn{b_1} are the random intercept and random slope,
#'     respectively). The survival function of the time-to-event model is of the form
#'     \eqn{S_0(\int_0^t} exp\eqn{[-(\beta_1 z_1 + \beta_2 z_2 + ... + \beta_p z_p)-\phi(b_0 + b_1 u)]du)}, and
#'     the longitudinal model is of the form \eqn{y = \alpha_0 + \alpha_1 x_1 + ... + \alpha_q x_q + \alpha_{q+1}} \code{times} +
#'     \eqn{b_0 + b_1} \code{times} + \eqn{\epsilon}. The amount of censoring can be controlled by changing the value of 
#'      \code{status.rho}.
#' @return A list with components:
#' @return \code{b:} random effects (a matrix).
#' @return \code{long.data:} simulated longitudinal data.
#' @return \code{surv.data:} simulated event time data.
#' @author Shahedul Khan <khan@math.usask.ca>
#' @seealso \code{\link{jmreg.aft}}
#' @examples 
#'   n<-200  # number of subjects
#'   surv.formula<- ~z1+z2 
#'   lme.formula<- ~x1+times 
#' # correlation matrix for random-effects
#'   cor.mat<-matrix(c(1,-0.2,-0.2,1),ncol=2,byrow=TRUE) 
#' # sd of random effects 
#'   sd<-c(0.5,0.1) 
#' # covariance matrix for random-effects
#'   Sigma<-(sd %*% t(sd))*cor.mat
#' # parameter values for lme (a named list) 
#'   lme.par<-list(alpha=c(10,1,-1),sigma2=0.5,Sigma=Sigma) 
#' # parameter values for the survival model (a named list)
#'   surv.par<-list(beta=c(-0.5,0.5),phi=-0.25,logrho=log(0.25),
#'       logkappa=log(2),status.rho=0.095) 
#'   set.seed(54566)
#'   z1<-rbinom(n,1,0.5)
#'   z2<-rnorm(n) 
#' # data with all covariate information 
#'   Data<-data.frame(z1=z1,z2=z2,x1=z1) 
#' # time points at which longitudinal measurements are planned to be taken
#'   times<-c(0,1,2,3,4) 
#' # simulate data with Weibull time-to-event model
#'   jmsim.dat<-jm.sim(surv.formula=surv.formula,surv.par=surv.par,
#'       surv.model="weibull",lme.formula=lme.formula,lme.par=lme.par,
#'       times=times,timevar="times",Data=Data) 
#'   surv.data<-jmsim.dat$surv.data
#'   long.data<-jmsim.dat$long.data
#'   surv.fit<-coxph(Surv(st,status)~z1+z2,data=surv.data,x=TRUE) 
#'   lme.fit<-lme(y~x1+times,random=~times | id,data=long.data) 
#' # Fit the joint model
#'   jmfit.w<-jmreg.aft(surv.fit=surv.fit,lme.fit=lme.fit,
#'       surv.model="weibull",rand.model="simple",timevar="times",
#'       n.chain=2,n.adapt =5000,n.burn=15000,n.iter=50000,n.thin = 5)
#'   sum.w<-jm.summary(jmfit.w,density.plot=TRUE,trace.plot=TRUE)
#'   sum.w
#' @export

jm.sim<-function(surv.formula,surv.par,surv.model,
   lme.formula,lme.par,times,timevar,Data){
if(isFALSE(surv.model=="weibull" || surv.model=="llogistic")){
stop("surv.model must be 'weibull' or 'llogistic'")
}
survvar<-all.vars(surv.formula)
if(isFALSE(all(survvar %in% colnames(Data)))){
stop("variable names in 'surv.formula' did not match
     with the variable names in 'Data'")
}
surv.par.names<-names(surv.par)
#if(surv.model=="weibull" || surv.model=="llogistic"){
survpar<-c("beta","phi","logrho","logkappa","status.rho")
if(isFALSE(all(survpar %in% surv.par.names))){
stop("use names to define 'surv.par' list; at least one
  element of 'surv.par' is missing or not recognized")
}
#}
#if(surv.model=="eweibull"){
#survpar<-c("beta","phi","logrho","logkappa","loggamma","status.rho")
#if(isFALSE(all(survpar %in% surv.par.names))){
#stop("use names to define 'surv.par' list; at least one
#  element of 'surv.par' is missing or not recognized")
#}
#}  
if(length(surv.par$beta)!=length(survvar)){
stop("length of beta in 'surv.par' did not match with 'surv.formula'")
}
length.t<-length(times)
rep.t<-rep(times,n)
id<-rep(1:n,each=length.t)
dat1<-data.frame(id=id,Data[rep(seq_len(nrow(Data)),each=length.t),],rep.t)
colnames(dat1)[ncol(dat1)]<-timevar
colnames(dat1)[2:(ncol(dat1)-1)]<-colnames(Data)
lmevar<-all.vars(lme.formula)
if(isFALSE(all(lmevar %in% colnames(dat1)))){
stop("variable names in 'lme.formula' did not match
     with the variable names in data; check your variable
     names in 'Data', 'timevar' and 'lme.formula'")
}
lme.par.names<-names(lme.par)
lmepar<-c("alpha","sigma2","Sigma")
if(isFALSE(all(lmepar %in% lme.par.names))){
stop("use names to define 'lme.par' list; at least one
  element of 'lme.par' is missing or not recognized")
}
if(length(lme.par$alpha)!=(length(lmevar)+1)){
stop("length of alpha in 'lme.par' did not match with 'lme.formula'")
}
if(lme.par$sigma2<=0){
stop("sigma2 in 'lme.par' must be positive")
}
if(isFALSE(all(dim(lme.par$Sigma)==c(2,2)))){
stop("Sigma in 'lme.par' must be a 2 x 2 positive definite matrix")
}
beta<-surv.par$beta
phi<-surv.par$phi
rho<-exp(surv.par$logrho)
kappa<-exp(surv.par$logkappa)
status.rho<-surv.par$status.rho
Sigma<-lme.par$Sigma
surv.mat<-matrix(model.matrix(surv.formula,data=Data)[,-1],ncol=length(survvar))
pred<-c(surv.mat%*%beta)
n<-length(pred)
nb<-nrow(Sigma)
b.mat<-matrix(0,nrow=n,ncol=nb)
st1<-NULL
if(surv.model=="weibull"){
options(warn=-1)
for(i in 1:n){
repeat{
b<-mvrnorm(1,rep(0,nb),Sigma)
u<-runif(1)
st0<--log(1-(phi*b[2]*exp(pred[i]+phi*b[1])*(-log(u))^(1/kappa))/rho)/(phi*b[2])
if(is.finite(st0)) break
}
st1[i]<-st0
b.mat[i,]<-b
}
options(warn=0)
}
#if(surv.model=="eweibull"){
#options(warn=-1)
#if(!is.finite(surv.par$loggamma) || is.null(surv.par$loggamma)){
#stop("loggamma in 'surv.par' must be numeric")
#}
#gamma<-exp(surv.par$loggamma)
#for(i in 1:n){
#repeat{
#b<-mvrnorm(1,rep(0,nb),Sigma)
#u<-runif(1)
#w<-((-log1p(-(1-u)^(1/gamma)))^(1/kappa))/rho
#st0<--log1p(-phi*b[2]*w*exp(pred[i]+phi*b[1]))/(phi*b[2])
#if(is.finite(st0) && st0>0) break
#}
#st1[i]<-st0
#b.mat[i,]<-b
#}
#options(warn=0)
#}
if(surv.model=="llogistic"){
options(warn=-1)
for(i in 1:n){
repeat{
b<-mvrnorm(1,rep(0,nb),Sigma)
u<-runif(1)
w<- (((1-u)/u)^(1/kappa))/rho
st0<--log1p(-phi*b[2]*w*exp(pred[i]+phi*b[1]))/(phi*b[2])
if(is.finite(st0) && st0>0) break
}
st1[i]<-st0
b.mat[i,]<-b
}
options(warn=0)
}
ctimes<-rexp(n,rate=status.rho)
st<-pmin(st1,ctimes)
status<-as.numeric(st1<=ctimes)
xlong<- model.matrix(lme.formula, data=dat1)
zlong<- cbind(1,rep.t)
bb<-b.mat[rep(seq_len(nrow(b.mat)),each=length.t),]
alpha<-lme.par$alpha
sigma2<-lme.par$sigma2
eps<-rnorm(length(id),mean=0,sd=sqrt(sigma2))
y<-c(xlong%*%alpha)+as.vector(rowSums(bb*zlong))+eps
dat2<-data.frame(dat1,y=y,st=rep(st,each=length.t),status=rep(status,each=length.t))
long.data<-dat2[dat2[,"st"]>=dat2[,timevar],]
rownames(long.data)<-NULL
surv.data<-data.frame(Data,st=st,status=status)
return(list(b=b.mat,long.data=long.data,surv.data=surv.data))
}
sa4khan/AFTjmr documentation built on March 12, 2020, 1:24 a.m.