R/presmoothTP.R
In presmoothedTP: Presmoothed Landmark Aalen-Johansen Estimator of Transition Probabilities for Complex Multi-State Models

Documented in presmoothTP

#' This function implements presmoothed estimates based on the landmark 
#' Aalen-Johansen method for non-parametric estimation of transition 
#' probabilities in non-Markov
#' multi-state models.
#'
#' @description This function is used to obtain nonparametric presmoothed 
#' estimates of the transition probabilities for any type of multistate model, 
#' including models that allow for reversible transitions, and potencially
#' non-Markovian seetings. The methods used to compute the transition
#' probabilities are ”PLM” (given by a ”logistic” model), ”cauchit”, ”probit”.
#' The proposed presmoothed estimator is designed to handle complex multi-state 
#' models with more than three states, as well as reversible transitions, 
#' accommodating potentially non-Markovian behaviors.
#'
#' @param data A data frame in the long format containing the subject id; 
#' from corresponding to the starting state; the receiving state, to; the 
#' transition number, trans; the starting time of the transition given by 
#' Tstart; the stopping time of the transition, Tstop, and status for the status
#' variable, with 1 indicating an event (transition), 0 a censoring.
#' @param s The first time for obtaining estimates for the transition.
#' @param from The starting state of the transition probabilities.
#' @param to The last receiving state considered for the estimation of the 
#' transition probabilities.
#' @param method The method used to compute the transition probabilities.
#' Possible options are \code{"PLM"}, \code{"cauchit"} and \code{"probit"}.
#' @param trans The transition matrix for multi-state model.

#' @return Nonparametric presmoothed estimates of transition probabilities 
#' based on logistic’, ’probit’, and ’cauchit’ distributions (PLM,cauchit and 
#' probit).
#' 
#' @examples
#' data("ebmt4")
#' db_wide <- ebmt4
#' positions<-list(c(2, 3, 5, 6), c(4, 5, 6), c(4, 5, 6), c(5, 6), c(6), c())
#' states.names = c("Tx", "Rec", "AE", "Rec+AE", "Rel", "Death")
#' trans<-transTP(positions, states.names)
#' times.names = c(NA, "rec", "ae","recae", "rel", "srv")
#' status.names=c(NA, "rec.s", "ae.s", "recae.s", "rel.s", "srv.s")
#' data.long<-prepTP(data=db_wide, trans, times.names, status.names)
#' 
#' res1<-presmoothTP(data.long, s = 100, from = 1, to = 1, 
#' method = 'PLM', trans=trans)
#' res2<-presmoothTP(data.long, s = 100, from = 1, to = 1, method = 'cauchit', 
#' trans=trans)
#' res3<-presmoothTP(data.long, s = 100, from = 1, to = 1, method = 'probit', 
#' trans=trans)
#' 
#' plot(res1, type='s', xlim=c(100,3000), ylim=c(0.6,1), ylab=c('p11(100,t)'))
#' lines(res2, type='s', col=2)
#' lines(res3, type='s', col=4)
#' legend("topright", legend = c("PLM","Cauchit", "Probit"), lwd = 0.3, 
#' col = c(1,2,4), cex=0.7, lty=c(1, 1))

#' res4<-presmoothTP(data.long, s = 1000, from = 2, to = 6, method = 'PLM', 
#' trans=trans)
#' res5<-presmoothTP(data.long, s = 1000, from = 2, to = 6, method = 'cauchit', 
#' trans=trans)
#' res6<-presmoothTP(data.long, s = 1000, from = 2, to = 6, method = 'probit', 
#' trans=trans)

#' plot(res4, type='s', xlim=c(1000,3000), ylim=c(0,0.12),ylab=c('p26(1000,t)'))
#' lines(res5, type='s', col=2)
#' lines(res6, type='s', col=4)
#' legend("topright", legend = c("PLM","Cauchit", "Probit"), lwd = 0.3, 
#' col = c(1,2,4), cex=0.7, lty=c(1, 1))
#' 
#' @author Gustavo Soutinho and Luis Meira-Machado.
#' 
#' @importFrom "survival" coxph Surv survfit strata untangle.specials
#' @importFrom "graphics" legend abline axis legend lines matplot par plot polygon
#' @importFrom "stats" pchisq pnorm quantile sd na.omit terms approxfun as.formula rnorm rpois weighted.mean
#' @importFrom "utils" capture.output flush.console head
#' @importFrom "mstate" transMat msprep msfit probtrans LMAJ xsect cutLMms 
#' @importFrom "mstate" msdata2etm trans2Q events to.trans2
#' @importFrom "stats" model.matrix model.frame model.response model.offset  
#' @importFrom "stats" delete.response delete.response glm predict
#' @importFrom "stats" binomial
#' @importFrom "plyr" ddply
#' @export presmoothTP
#' @export unsmoothTP
#' @export transTP
#' @export prepTP

presmoothTP<-function(data, s, from, to, method='LM', trans){
  
  #trunc=FALSE
  to_ini<-to
  from_ini<-from
  format='long'
  tmat<-trans
    
 suppressWarnings({ 

#if(format=='wide'){

#  tmat<-transTP(positions, states.names)
  
#  msprep<-prepTP(data = data, trans = tmat, time = times.names,  
#                 status = status.names)
  
#  db.long<-msprep[, c(1:8)]
  
#}else{
  
#  db.long<-data
  
 #}

db.long<-data

#if(trunc==FALSE){
  
#  db.long<-data
  
#}else{
  
  #load('trunc.values.RData')
  
#  db.long[db.long$Tstart>=db.long$Tstop, ]
  
#  db.long<-db.long[db.long$Tstart<db.long$Tstop, ]
  
#  head(ebmt)
  
#  summary(ebmt$srv)
  
  #trunc <- runif(nrow(ebmt),0, 4*365)  # TRUNCATURA  U[0, ...]
  
#  if(value.dist==365){
    
#    trunc<-trunc.values365
    
#  }else{
    
#    trunc<-trunc.values1460
#  }
  
  #table(ebmt$srv<trunc)/nrow(ebmt) #30% dados truncados
  
  #trunc<-rep(2000, nrow(ebmt))
  
  #table(ebmt$rec<trunc)/nrow(ebmt) #35% rec truncados
  
#  dat.trunc<-cbind(1:nrow(ebmt), trunc)
  
#  head(dat.trunc)
  
#  dat.trunc<-as.data.frame(dat.trunc)
  
#  names(dat.trunc)<-c('id', 'trunc')
  
#  db.long<-merge(db.long, dat.trunc[,c('id', 'trunc')], by.x='id', by.y = 'id', 
#                 all.x = T)
  
 #  db.long[1:10,]
  
 #  db.long.aux<-db.long
  
 #  db.long$LT<-ifelse(db.long$Tstart<db.long$trunc, db.long$trunc,db.long$Tstart)
  
 #  db.long.f<-db.long[db.long$Tstop>db.long$trunc, ]
  
  #db.long.f[1:10,]
  
 #  db.long<-db.long.f
  
 #  db.long[1:20,]
  
 #  db.long$Tstart<-db.long$LT
  
#}


if(method=='PLM'){
  
  msdata<-db.long
  method.lmaj = "greenwood"
  from<-from ########################## from
  
  K <- nrow(tmat)
  
  if (any(is.na(match(from, 1:K)))) 
    stop("from should be subset of 1:K with K number of states")
  
  xsect<-function (msdata, xtime = 0) 
  {
    msd <- msdata[msdata$Tstart <= xtime & msdata$Tstop > xtime, 
    ]
    msd <- msd[order(msd$id, msd$trans, msd$Tstart), ]
    msd <- msd[!duplicated(msd$id), ]
    idstate <- data.frame(id = msd$id, state = msd$from)
    #tmat <- attr(msdata, "trans")
    
    #msd$trans
    
    
    #acrescentei
    
    #tmat <- matrix(NA, 3, 3)
    #tmat[1, 2:3] <- 1:2
    #tmat[2, 3] <- 3
    
    #tmat <- transMat(x = list(c(2, 3, 5, 6), c(4, 5, 6), c(4, 5, 6), c(5, 6),
     #                         c(6), c()), names = c("Tx", "Rec", "AE", 
    #"Rec+AE", "Rel", "Death"))
    
    #tmat<-ms.trans(positions, states.names)

    K <- nrow(tmat)
    msd$from <- factor(msd$from, levels = 1:K, labels = 1:K)
    tbl <- table(msd$from)
    atrisk <- sum(tbl)
    prop <- tbl/atrisk
    res <- idstate
    attr(res, "atrisk") <- tbl
    attr(res, "prop") <- prop
    return(res)
  }
  
  #s=500  
  
  xss <- xsect(msdata, s)
  
  infrom <- xss$id[xss$state %in% from]
  msdatas <- cutLMms(msdata, LM = s)
  msdatasfrom <- msdatas[msdatas$id %in% infrom, ]
  
  head(msdatasfrom) #primeiro q esta em estado 2 e id=7
  
  #haz -------
  
  tab.f<-NULL
  
  for(i in unique(msdatasfrom$trans)){
    
    #i<-6
    
    colon2<-msdatasfrom[msdatasfrom$trans==i, ]
    
    colon2.ord<-colon2[order(colon2$Tstop) , c('Tstop', 'status')]
    
    colon3<-as.data.frame(colon2.ord)
    
    fit.logit<- glm(status ~ Tstop, data = colon3, 
                    family=binomial(link="logit"))
    
    colon3$prob.logit <- predict(fit.logit, type="response")
    
    colon3[1:15,]
    
    #plot(colon3$Tstop, colon3$status)
    
    #lines(colon3$Tstop, colon3$prob.logit, col=2)
    
    nrisk<-rep(0, nrow(colon3))
    
    for(j in 1:nrow(colon3)){
      
      #j<-1
      
      #nrow(colon2[colon2$Tstart<400 & colon2$Tstop>=400,]) 
      #COMO E Q ELES PROGRAMARAM
      
      nrisk[j]<-length(which(as.numeric(colon3$Tstop[j])> 
                               as.numeric(colon2[, 'Tstart']) & 
                               as.numeric(colon3$Tstop[j])<
                               as.numeric(colon2[, 'Tstop']))) 
      #inicialmente tinha posto >= , <=
      
    }
    
    colon3$nrisk<-nrisk
    
    head(colon3)
    
    colon4<-colon3[ , c(1, 4, 2, 3)]
    
    names(colon4)<-c('time', 'nrisk', 'event', 'prob')
    
    colon4[1:10,]
    
    colon4$haz<-colon4$event/colon4$nrisk
    
    colon4[1:10,]
    
    colon4$pres.haz<-colon4$prob/colon4$nrisk
    
    colon4$pres.Haz<-cumsum(colon4$pres.haz)
    
    colon4$Haz<-cumsum(colon4$haz)
    
    colon4[1:15,]
    
    #plot(colon4$time, exp(-colon4$Haz), type = 'l')
    
    #lines(colon4$time, exp(-colon4$pres.Haz), type = 'l', col=2) 
    #tenta acompanhar e sem saltos
    
    colon4$pres.haz2<-colon4$prob/sum(colon4$prob)
    
    colon4$pres.Haz2<-cumsum(colon4$pres.haz2)
    
    colon4[1:15,]
    
    #plot(colon4$time, exp(-colon4$Haz), type = 'l', ylim=c(0.5,1))
    
    #lines(colon4$time, exp(-colon4$pres.Haz2), type = 'l', col=2) 
    #totalmente mal!
    
    
    colon4[1:20,]
    
    colon4[colon4$time==1170,]
    
    colon4$time #aparecem repetidos, p. ex 1170 para a transicao 
    
    colon4.max<-ddply(colon4, 'time', function(x) {
      mean<- suppressWarnings(mean(x$pres.Haz, na.rm = T)) 
      #INICIALMENTE ESTAVA MAX
    })
    
    colon4.max$trans<-rep(i, nrow(colon4.max))
    
    names(colon4.max)<-c('time', 'pres.Haz', 'trans')
    
    colon4.max[colon4.max$time==1170,]
    
    colon4$trans<-rep(i, nrow(colon4))
    
    tab.f<-rbind(tab.f, colon4.max)
    
  }
  
  head(tab.f)
  
  tab.f[tab.f$trans==6,][1:20,] #para todos os tempos tem presmooth-Haz
  
  #PASSAR PARA O FORMATO FINAL - mfit:
  
  sf0 <- summary(survfit(coxph(Surv(Tstart, Tstop, status) ~ strata(trans), 
                               data = msdatasfrom)))
  
  sf0$time
  
  c0 <- coxph(Surv(Tstart, Tstop, status) ~ strata(trans), data = msdatasfrom)
  
  msf0 <- msfit2(c0, trans = tmat)
  
  unique(sort(tab.f$time)) %in% unique(sort(msf0$Haz$time))
  
  unique(sort(msf0$Haz$time)) %in% unique(sort(tab.f$time))
  
  unique(sort(sf0$time)) %in%  unique(sort(msf0$Haz$time))#sao os mesmos
  
  #tab.f<-tab.f[tab.f$time %in% unique(sort(sf0$time)), ]  
  #NAO FILTRO - MANTER TEMPOS TODOS ###########################
  
  tab.f.msf0.all.presm<-NULL
  
  for(i in unique(msdatasfrom$trans)){ #1:length(unique(tab.f$trans))
    
    #i<-1
    
    tab.f.msf0.aux<-as.data.frame(sort(unique(tab.f$time)))
    
    names(tab.f.msf0.aux)<-'time'
    
    tab.f.msf0.aux$time #tem todos os tempos
    
    tab.f.msf0<-merge(tab.f.msf0.aux, tab.f[tab.f$trans==i, 
                                            c('time', 'pres.Haz')], 
                      by= 'time',all = T)
    
    tab.f.msf0$trans<-i
    
    tab.f.msf0$pres.Haz[1]<-ifelse(is.na(tab.f.msf0$pres.Haz[1]), 0,
                                   tab.f.msf0$pres.Haz) 
    
    for(z in 2:nrow(tab.f.msf0)){
      
      #z<-2
      
      tab.f.msf0$pres.Haz[z]<-ifelse(is.na(tab.f.msf0$pres.Haz[z]), 
                                     tab.f.msf0$pres.Haz[(z-1)],
                                     tab.f.msf0$pres.Haz[z]) 
      
    }
    
    tab.f.msf0.all.presm<-rbind(tab.f.msf0.all.presm, tab.f.msf0)
    
  }
  
  tab.f.msf0.all.presm
  
  unique(tab.f.msf0.all.presm$trans)
  
  unique(tab.f.msf0.all.presm$time)
  
  tab.f.msf0.all.presm[tab.f.msf0.all.presm$trans==6,][1:10,]
  
  #tmat <- matrix(NA, 3, 3)
  #tmat[1, 2:3] <- 1:2
  #tmat[2, 3] <- 3
  #dimnames(tmat) <- list(from = c("healthy", "illness", "death"), 
  #                       to = c("healthy","illness", "death"))
  #tmat
  

  #tmat <- transMat(x = list(c(2, 3, 5, 6), c(4, 5, 6), c(4, 5, 6), c(5, 6),
  #                         c(6), c()), names = c("Tx", "Rec", "AE", "Rec+AE", 
  #"Rel", "Death"))
  
  #tmat<-ms.trans(positions, states.names)
  
  res.presm<-list(Haz=tab.f.msf0.all.presm, trans=tmat)
  
  names(res.presm$Haz)<-c("time", "Haz", "trans")
  
  res.presm$Haz[res.presm$Haz$trans==6,][1:10,]
  
  #P - estimativas --------
  
  object<-res.presm
  predt=s
  method = "greenwood"
  variance=FALSE
  #covariance=FALSE
  direction="forward"
  
  (trans <- object$trans)
  (transit <- to.trans2(trans))
  (numtrans <- nrow(transit))
  
  stackhaz <- object$Haz #data.frame para haz
  
  stackhaz[stackhaz$trans==6,][1:10,] #todos os tempos com presHaz
  
  for (i in 1:numtrans){
    
    #i<-1
    stackhaz$dhaz[stackhaz$trans==i] <- 
      diff(c(stackhaz$Haz[stackhaz$trans==i][1],
                                               stackhaz$Haz[stackhaz$trans==i]))
    
    
    if (direction=="forward")
      stackhaz <- stackhaz[stackhaz$time > predt,]
    else stackhaz <- stackhaz[stackhaz$time <= predt,]
  }
  
  (untimes <- sort(unique(stackhaz$time))) 
  #ordenar e considerar apenas tempos unicos
  
  (TT <- length(untimes)) #total de tempos
  
  (S <- nrow(trans)) #total de estados
  
  if (direction=="forward") {
    if (variance==TRUE) res <- array(0,c(TT+1,2*S+1,S)) 
    # 2*S+1 for time, probs (S), se (S)   
    else res <- array(0,c(TT+1,S+1,S)) # S+1 for time, probs (S)
    # first line (in case of forward) contains values for t=predt
    res[1,1,] <- predt
    for (j in 1:S) res[1, 1+j,] <- rep(c(0,1,0), c(j-1,1,S-j))
    if (variance) res[1,(S+2):(2*S+1),] <- 0
  } #primeira linha preenchida: 365, 1, 0, ..., 0
  
  (P <- diag(S)) #coloco 1's na diagonal
  
  #OBTER AS TABELAS: P --- valores AJ. Estimator
  
  for (i in 1:TT)
  {
    #i<-1
    
    idx <- ifelse(direction=="forward",i,TT+1-i) 
    #fica com valor 1 -- para forward
    
    tt <- untimes[idx]    #untimes --- tempos nao repetidos
    
    Haztt <- stackhaz[stackhaz$time==tt,]  
    #Haz --- para cada transicao nesse ponto
    lHaztt <- nrow(Haztt)  
    #numero de linhas --- 13. no caso, todas as transicoes no caso EBMT
    # build S x S matrix IplusdA
    IplusdA <- diag(S)  # I + dA
    
    
    for (j in 1:lHaztt){
      
      from <- transit$from[transit$transno==Haztt$trans[j]]
      to <- transit$to[transit$transno==Haztt$trans[j]]
      IplusdA[from, to] <- Haztt$dhaz[j]
      IplusdA[from, from] <- IplusdA[from, from] - Haztt$dhaz[j]
    }
    
    #if (any(diag(IplusdA)<0))
    #  warning("Warning! Negative diagonal elements of (I+dA); 
    #the estimate may not be meaningful. \n")
    
    
    if (method=="greenwood"){
      
      P <- P %*% IplusdA    #ESTIMADOR A. J. --- para cada tempo (usado)  
      #produto limite em relacao anterior
      
    }
    
    if (method=="aalen"){
      
      P <- P %*% IplusdA    #ESTIMADOR A. J. --- para cada tempo
    }
    
    res[idx+1,1,] <- tt
    res[idx+1,2:(S+1),] <- t(P)  #VAO SENDO COLOCADOS OS RESULTADOS PARA CADA 
    #UM DOS TEMPOS #passando 
    #para os possiveis arrays
    
  } # fim for (i in 1:TT) -----------------------------------------------
  
  res.plmaj<- vector("list", S)
  
  for (s2 in 1:S) { #S -- seis estados
    #s2<-1
    tmp <- as.data.frame(res[,,s2]) #estimativas para cada um dos estados
    
    if (min(dim(tmp))==1) tmp <- res[,,s2]
    names(tmp) <- c("time",paste("pstate",1:S,sep=""))
    res.plmaj[[s2]] <- tmp
    
  }
  
  res.plmaj.ult<-res.plmaj

  to2<-paste('pstate', to_ini, sep = '')
  
  res<-res.plmaj.ult[[from_ini]][, c('time', to2)]
  
  return(res)

}
  
if(method=='probit'){
  
  
  msdata<-db.long
  method.lmaj = "greenwood"
  from<-from ################### from
  
  K <- nrow(tmat)
  
  if (any(is.na(match(from, 1:K)))) 
    stop("from should be subset of 1:K with K number of states")
  
  xsect<-function (msdata, xtime = 0) 
  {
    msd <- msdata[msdata$Tstart <= xtime & msdata$Tstop > xtime, 
    ]
    msd <- msd[order(msd$id, msd$trans, msd$Tstart), ]
    msd <- msd[!duplicated(msd$id), ]
    idstate <- data.frame(id = msd$id, state = msd$from)
    #tmat <- attr(msdata, "trans")
    
    #msd$trans
    
    
    #acrescentei
    
    #tmat <- matrix(NA, 3, 3)
    #tmat[1, 2:3] <- 1:2
    #tmat[2, 3] <- 3
    
    #tmat <- transMat(x = list(c(2, 3, 5, 6), c(4, 5, 6), c(4, 5, 6), c(5, 6),
    #                         c(6), c()), names = c("Tx", "Rec", "AE", "Rec+AE",
    #"Rel", "Death"))
    
    #tmat<-ms.trans(positions, states.names)
    
    K <- nrow(tmat)
    msd$from <- factor(msd$from, levels = 1:K, labels = 1:K)
    tbl <- table(msd$from)
    atrisk <- sum(tbl)
    prop <- tbl/atrisk
    res <- idstate
    attr(res, "atrisk") <- tbl
    attr(res, "prop") <- prop
    return(res)
  }
  
  #s=500  
  
  xss <- xsect(msdata, s)
  
  infrom <- xss$id[xss$state %in% from]
  msdatas <- cutLMms(msdata, LM = s)
  msdatasfrom <- msdatas[msdatas$id %in% infrom, ]
  
  head(msdatasfrom) #primeiro q esta em estado 2 e id=7
  
  #haz -------
  
  tab.f<-NULL
  
  for(i in unique(msdatasfrom$trans)){
    
    #i<-6
    
    colon2<-msdatasfrom[msdatasfrom$trans==i, ]
    
    colon2.ord<-colon2[order(colon2$Tstop) , c('Tstop', 'status')]
    
    colon3<-as.data.frame(colon2.ord)
    
    fit.logit<- glm(status ~ Tstop, data = colon3, 
                    family=binomial(link="probit"))
    
    colon3$prob.logit <- predict(fit.logit, type="response")
    
    colon3[1:15,]
    
    #plot(colon3$Tstop, colon3$status)
    
    #lines(colon3$Tstop, colon3$prob.logit, col=2)
    
    nrisk<-rep(0, nrow(colon3))
    
    for(j in 1:nrow(colon3)){
      
      #j<-1
      
      #nrow(colon2[colon2$Tstart<400 & colon2$Tstop>=400,]) 
      #COMO E Q ELES PROGRAMARAM
      
      nrisk[j]<-length(which(
        as.numeric(colon3$Tstop[j])> as.numeric(colon2[, 'Tstart']) & 
                               as.numeric(colon3$Tstop[j])<
          as.numeric(colon2[, 'Tstop']))) #inicialmente tinha posto >= , <=
      
    }
    
    colon3$nrisk<-nrisk
    
    head(colon3)
    
    colon4<-colon3[ , c(1, 4, 2, 3)]
    
    names(colon4)<-c('time', 'nrisk', 'event', 'prob')
    
    colon4[1:10,]
    
    colon4$haz<-colon4$event/colon4$nrisk
    
    colon4[1:10,]
    
    colon4$pres.haz<-colon4$prob/colon4$nrisk
    
    colon4$pres.Haz<-cumsum(colon4$pres.haz)
    
    colon4$Haz<-cumsum(colon4$haz)
    
    colon4[1:15,]
    
    #plot(colon4$time, exp(-colon4$Haz), type = 'l')
    
    #lines(colon4$time, exp(-colon4$pres.Haz), type = 'l', col=2) 
    #tenta acompanhar e sem saltos
    
    colon4$pres.haz2<-colon4$prob/sum(colon4$prob)
    
    colon4$pres.Haz2<-cumsum(colon4$pres.haz2)
    
    colon4[1:15,]
    
    #plot(colon4$time, exp(-colon4$Haz), type = 'l', ylim=c(0.5,1))
    
    #lines(colon4$time, exp(-colon4$pres.Haz2), type = 'l', col=2) #totalmente mal!
    
    
    colon4[1:20,]
    
    colon4[colon4$time==1170,]
    
    colon4$time #aparecem repetidos, p. ex 1170 para a transicao 
    
    colon4.max<-ddply(colon4, 'time', function(x) {
      mean<- suppressWarnings(mean(x$pres.Haz, na.rm = T)) 
      #INICIALMENTE ESTAVA MAX
    })
    
    colon4.max$trans<-rep(i, nrow(colon4.max))
    
    names(colon4.max)<-c('time', 'pres.Haz', 'trans')
    
    colon4.max[colon4.max$time==1170,]
    
    colon4$trans<-rep(i, nrow(colon4))
    
    tab.f<-rbind(tab.f, colon4.max)
    
  }
  
  head(tab.f)
  
  tab.f[tab.f$trans==6,][1:20,] #para todos os tempos tem presmooth-Haz
  
  #PASSAR PARA O FORMATO FINAL - mfit:
  
  sf0 <- summary(survfit(coxph(Surv(Tstart, Tstop, status) ~ strata(trans), 
                               data = msdatasfrom)))
  
  sf0$time
  
  c0 <- coxph(Surv(Tstart, Tstop, status) ~ strata(trans), data = msdatasfrom)
  
  msf0 <- msfit2(c0, trans = tmat)
  
  unique(sort(tab.f$time)) %in% unique(sort(msf0$Haz$time))
  
  unique(sort(msf0$Haz$time)) %in% unique(sort(tab.f$time))
  
  unique(sort(sf0$time)) %in%  unique(sort(msf0$Haz$time))#sao os mesmos
  
  #tab.f<-tab.f[tab.f$time %in% unique(sort(sf0$time)), ]  
  #NAO FILTRO - MANTER TEMPOS TODOS ###########################
  
  tab.f.msf0.all.presm<-NULL
  
  for(i in unique(msdatasfrom$trans)){ #1:length(unique(tab.f$trans))
    
    #i<-1
    
    tab.f.msf0.aux<-as.data.frame(sort(unique(tab.f$time)))
    
    names(tab.f.msf0.aux)<-'time'
    
    tab.f.msf0.aux$time #tem todos os tempos
    
    tab.f.msf0<-merge(tab.f.msf0.aux, 
                      tab.f[tab.f$trans==i, c('time', 'pres.Haz')],  
                      by= 'time',all = T)
    
    tab.f.msf0$trans<-i
    
    tab.f.msf0$pres.Haz[1]<-ifelse(is.na(
      tab.f.msf0$pres.Haz[1]), 0,tab.f.msf0$pres.Haz) 
    
    for(z in 2:nrow(tab.f.msf0)){
      
      #z<-2
      
      tab.f.msf0$pres.Haz[z]<-ifelse(is.na(tab.f.msf0$pres.Haz[z]), 
                                     tab.f.msf0$pres.Haz[(z-1)],
                                     tab.f.msf0$pres.Haz[z]) 
      
    }
    
    tab.f.msf0.all.presm<-rbind(tab.f.msf0.all.presm, tab.f.msf0)
    
  }
  
  tab.f.msf0.all.presm
  
  unique(tab.f.msf0.all.presm$trans)
  
  unique(tab.f.msf0.all.presm$time)
  
  
  tab.f.msf0.all.presm[tab.f.msf0.all.presm$trans==6,][1:10,]
  
  #tmat <- matrix(NA, 3, 3)
  #tmat[1, 2:3] <- 1:2
  #tmat[2, 3] <- 3
  #dimnames(tmat) <- list(from = c("healthy", "illness", "death"), 
  #                       to = c("healthy","illness", "death"))
  #tmat
  #tmat <- transMat(x = list(c(2, 3, 5, 6), c(4, 5, 6), c(4, 5, 6), c(5, 6),
  #                         c(6), c()), names = c("Tx", "Rec", "AE", "Rec+AE", 
  #"Rel", "Death"))
  
  #tmat<-ms.trans(positions, states.names)
  
  res.presm<-list(Haz=tab.f.msf0.all.presm, trans=tmat)
  
  names(res.presm$Haz)<-c("time", "Haz", "trans")
  
  res.presm$Haz[res.presm$Haz$trans==6,][1:10,]
  
  #P - estimativas --------
  
  object<-res.presm
  predt=s
  method = "greenwood"
  variance=FALSE
  #covariance=FALSE
  direction="forward"
  
  (trans <- object$trans)
  (transit <- to.trans2(trans))
  (numtrans <- nrow(transit))
  
  stackhaz <- object$Haz #data.frame para haz
  
  stackhaz[stackhaz$trans==6,][1:10,] #todos os tempos com presHaz
  
  for (i in 1:numtrans){
    
    #i<-1
    stackhaz$dhaz[stackhaz$trans==i] <- 
      diff(c(stackhaz$Haz[stackhaz$trans==i][1],
                                               stackhaz$Haz[stackhaz$trans==i]))
    
    
    if (direction=="forward")
      stackhaz <- stackhaz[stackhaz$time > predt,]
    else stackhaz <- stackhaz[stackhaz$time <= predt,]
  }
  
  (untimes <- sort(unique(stackhaz$time))) 
  #ordenar e considerar apenas tempos unicos
  
  (TT <- length(untimes)) #total de tempos
  
  (S <- nrow(trans)) #total de estados
  
  if (direction=="forward") {
    if (variance==TRUE) res <- array(0,c(TT+1,2*S+1,S)) 
    # 2*S+1 for time, probs (S), se (S)   
    else res <- array(0,c(TT+1,S+1,S)) # S+1 for time, probs (S)
    # first line (in case of forward) contains values for t=predt
    res[1,1,] <- predt
    for (j in 1:S) res[1, 1+j,] <- rep(c(0,1,0), c(j-1,1,S-j))
    if (variance) res[1,(S+2):(2*S+1),] <- 0
  } #primeira linha preenchida: 365, 1, 0, ..., 0
  
  (P <- diag(S)) #coloco 1's na diagonal
  
  #OBTER AS TABELAS: P --- valores AJ. Estimator
  
  for (i in 1:TT)
  {
    #i<-1
    
    idx <- ifelse(direction=="forward",i,TT+1-i) 
    #fica com valor 1 -- para forward
    
    tt <- untimes[idx]    #untimes --- tempos nao repetidos
    
    Haztt <- stackhaz[stackhaz$time==tt,]  
    #Haz --- para cada transicao nesse ponto
    lHaztt <- nrow(Haztt)  
    #numero de linhas --- 13. no caso, todas as transicoes no caso EBMT
    # build S x S matrix IplusdA
    IplusdA <- diag(S)  # I + dA
    
    
    for (j in 1:lHaztt){
      
      from <- transit$from[transit$transno==Haztt$trans[j]]
      to <- transit$to[transit$transno==Haztt$trans[j]]
      IplusdA[from, to] <- Haztt$dhaz[j]
      IplusdA[from, from] <- IplusdA[from, from] - Haztt$dhaz[j]
    }
    
    #if (any(diag(IplusdA)<0))
    #warning("Warning! Negative diagonal elements of (I+dA); the estimate 
    #may not be meaningful. \n")
    
    
    if (method=="greenwood"){
      
      P <- P %*% IplusdA    #ESTIMADOR A. J. --- para cada tempo (usado)  
      #produto limite em relacao anterior
      
    }
    
    if (method=="aalen"){
      
      P <- P %*% IplusdA    #ESTIMADOR A. J. --- para cada tempo
    }
    
    res[idx+1,1,] <- tt
    res[idx+1,2:(S+1),] <- t(P)  #VAO SENDO COLOCADOS OS RESULTADOS PARA 
    #CADA UM DOS TEMPOS #passando 
    #para os possiveis arrays
    
  } # fim for (i in 1:TT) -----------------------------------------------
  
  res.plmaj<- vector("list", S)
  
  for (s2 in 1:S) { #S -- seis estados
    #s2<-1
    tmp <- as.data.frame(res[,,s2]) #estimativas para cada um dos estados
    
    if (min(dim(tmp))==1) tmp <- res[,,s2]
    names(tmp) <- c("time",paste("pstate",1:S,sep=""))
    res.plmaj[[s2]] <- tmp
    
  }
  
  res.probit<-res.plmaj
  
  to2<-paste('pstate', to_ini, sep = '')
  
  res<-res.probit[[from_ini]][, c('time', to2)]

  return(res)
  
  }
  
if(method=='cauchit'){
  
  msdata<-db.long
  method.lmaj = "greenwood"
  from<-from ############################ from
  
  K <- nrow(tmat)
  
  if (any(is.na(match(from, 1:K)))) 
    stop("from should be subset of 1:K with K number of states")
  
  xsect<-function (msdata, xtime = 0) 
  {
    msd <- msdata[msdata$Tstart <= xtime & msdata$Tstop > xtime, 
    ]
    msd <- msd[order(msd$id, msd$trans, msd$Tstart), ]
    msd <- msd[!duplicated(msd$id), ]
    idstate <- data.frame(id = msd$id, state = msd$from)
    #tmat <- attr(msdata, "trans")
    
    #msd$trans
    
    
    #acrescentei
    
    #tmat <- matrix(NA, 3, 3)
    #tmat[1, 2:3] <- 1:2
    #tmat[2, 3] <- 3
    #tmat <- transMat(x = list(c(2, 3, 5, 6), c(4, 5, 6), c(4, 5, 6), c(5, 6),
    #                         c(6), c()), names = c("Tx", "Rec", "AE", "Rec+AE", 
    #"Rel", "Death"))
    
    #tmat<-ms.trans(positions, states.names)
    
    K <- nrow(tmat)
    msd$from <- factor(msd$from, levels = 1:K, labels = 1:K)
    tbl <- table(msd$from)
    atrisk <- sum(tbl)
    prop <- tbl/atrisk
    res <- idstate
    attr(res, "atrisk") <- tbl
    attr(res, "prop") <- prop
    return(res)
  }
  
  #s=500  
  
  xss <- xsect(msdata, s)
  
  infrom <- xss$id[xss$state %in% from]
  msdatas <- cutLMms(msdata, LM = s)
  msdatasfrom <- msdatas[msdatas$id %in% infrom, ]
  
  head(msdatasfrom) #primeiro q esta em estado 2 e id=7
  
  #haz -------
  
  tab.f<-NULL
  
  for(i in unique(msdatasfrom$trans)){
    
    #i<-6
    
    colon2<-msdatasfrom[msdatasfrom$trans==i, ]
    
    colon2.ord<-colon2[order(colon2$Tstop) , c('Tstop', 'status')]
    
    colon3<-as.data.frame(colon2.ord)
    
    fit.logit<- glm(status ~ Tstop, data = colon3, 
                    family=binomial(link="cauchit"))
    
    colon3$prob.logit <- predict(fit.logit, type="response")
    
    colon3[1:15,]
    
    #plot(colon3$Tstop, colon3$status)
    
    #lines(colon3$Tstop, colon3$prob.logit, col=2)
    
    nrisk<-rep(0, nrow(colon3))
    
    for(j in 1:nrow(colon3)){
      
      #j<-1
      
      #nrow(colon2[colon2$Tstart<400 & colon2$Tstop>=400,]) 
      #COMO E Q ELES PROGRAMARAM
      
      nrisk[j]<-length(which(as.numeric(colon3$Tstop[j])> 
                               as.numeric(colon2[, 'Tstart']) & 
                               as.numeric(colon3$Tstop[j])<
                               as.numeric(colon2[, 'Tstop']))) 
      #inicialmente tinha posto >= , <=
      
    }
    
    colon3$nrisk<-nrisk
    
    head(colon3)
    
    colon4<-colon3[ , c(1, 4, 2, 3)]
    
    names(colon4)<-c('time', 'nrisk', 'event', 'prob')
    
    colon4[1:10,]
    
    colon4$haz<-colon4$event/colon4$nrisk
    
    colon4[1:10,]
    
    colon4$pres.haz<-colon4$prob/colon4$nrisk
    
    colon4$pres.Haz<-cumsum(colon4$pres.haz)
    
    colon4$Haz<-cumsum(colon4$haz)
    
    colon4[1:15,]
    
    #plot(colon4$time, exp(-colon4$Haz), type = 'l')
    
    #lines(colon4$time, exp(-colon4$pres.Haz), type = 'l', col=2) 
    #tenta acompanhar e sem saltos
    
    colon4$pres.haz2<-colon4$prob/sum(colon4$prob)
    
    colon4$pres.Haz2<-cumsum(colon4$pres.haz2)
    
    colon4[1:15,]
    
    #plot(colon4$time, exp(-colon4$Haz), type = 'l', ylim=c(0.5,1))
    
    #lines(colon4$time, exp(-colon4$pres.Haz2), type = 'l', col=2) 
    #totalmente mal!
    
    
    colon4[1:20,]
    
    colon4[colon4$time==1170,]
    
    colon4$time #aparecem repetidos, p. ex 1170 para a transicao 
    
    colon4.max<-ddply(colon4, 'time', function(x) {
      mean<- suppressWarnings(mean(x$pres.Haz, na.rm = T)) 
      #INICIALMENTE ESTAVA MAX
    })
    
    colon4.max$trans<-rep(i, nrow(colon4.max))
    
    names(colon4.max)<-c('time', 'pres.Haz', 'trans')
    
    colon4.max[colon4.max$time==1170,]
    
    colon4$trans<-rep(i, nrow(colon4))
    
    tab.f<-rbind(tab.f, colon4.max)
    
  }
  
  head(tab.f)
  
  tab.f[tab.f$trans==6,][1:20,] #para todos os tempos tem presmooth-Haz
  
  #PASSAR PARA O FORMATO FINAL - mfit:
  
  sf0 <- summary(survfit(coxph(Surv(Tstart, Tstop, status) ~ strata(trans), 
                               data = msdatasfrom)))
  
  sf0$time
  
  c0 <- coxph(Surv(Tstart, Tstop, status) ~ strata(trans), 
              data = msdatasfrom)
  
  msf0 <- msfit2(c0, trans = tmat)
  
  unique(sort(tab.f$time)) %in% unique(sort(msf0$Haz$time))
  
  unique(sort(msf0$Haz$time)) %in% unique(sort(tab.f$time))
  
  unique(sort(sf0$time)) %in%  unique(sort(msf0$Haz$time))#sao os mesmos
  
  #tab.f<-tab.f[tab.f$time %in% unique(sort(sf0$time)), ]  
  #NAO FILTRO - MANTER TEMPOS TODOS ###########################
  
  tab.f.msf0.all.presm<-NULL
  
  for(i in unique(msdatasfrom$trans)){ #1:length(unique(tab.f$trans))
    
    #i<-1
    
    tab.f.msf0.aux<-as.data.frame(sort(unique(tab.f$time)))
    
    names(tab.f.msf0.aux)<-'time'
    
    tab.f.msf0.aux$time #tem todos os tempos
    
    tab.f.msf0<-merge(tab.f.msf0.aux, tab.f[tab.f$trans==i, 
                                            c('time', 'pres.Haz')], 
                      by= 'time',all = T)
    
    tab.f.msf0$trans<-i
    
    tab.f.msf0$pres.Haz[1]<-ifelse(is.na(tab.f.msf0$pres.Haz[1]), 0,
                                   tab.f.msf0$pres.Haz) 
    
    for(z in 2:nrow(tab.f.msf0)){
      
      #z<-2
      
      tab.f.msf0$pres.Haz[z]<-ifelse(is.na(tab.f.msf0$pres.Haz[z]), 
                                     tab.f.msf0$pres.Haz[(z-1)],
                                     tab.f.msf0$pres.Haz[z]) 
      
    }
    
    tab.f.msf0.all.presm<-rbind(tab.f.msf0.all.presm, tab.f.msf0)
    
  }
  
  tab.f.msf0.all.presm
  
  unique(tab.f.msf0.all.presm$trans)
  
  unique(tab.f.msf0.all.presm$time)
  
  
  tab.f.msf0.all.presm[tab.f.msf0.all.presm$trans==6,][1:10,]
  
  #tmat <- matrix(NA, 3, 3)
  #tmat[1, 2:3] <- 1:2
  #tmat[2, 3] <- 3
  #dimnames(tmat) <- list(from = c("healthy", "illness", "death"), 
  #                       to = c("healthy","illness", "death"))
  #tmat
  
  #tmat <- transMat(x = list(c(2, 3, 5, 6), c(4, 5, 6), c(4, 5, 6), c(5, 6),
    #                         c(6), c()), names = c("Tx", "Rec", "AE", "Rec+AE",
  #"Rel", "Death"))
  
  #tmat<-transTP(positions, states.names)
  
  res.presm<-list(Haz=tab.f.msf0.all.presm, trans=tmat)
  
  names(res.presm$Haz)<-c("time", "Haz", "trans")
  
  res.presm$Haz[res.presm$Haz$trans==6,][1:10,]
  
  #P - estimativas --------
  
  object<-res.presm
  predt=s
  method = "greenwood"
  variance=FALSE
  #covariance=FALSE
  direction="forward"
  
  (trans <- object$trans)
  (transit <- to.trans2(trans))
  (numtrans <- nrow(transit))
  
  stackhaz <- object$Haz #data.frame para haz
  
  stackhaz[stackhaz$trans==6,][1:10,] #todos os tempos com presHaz
  
  for (i in 1:numtrans){
    
    #i<-1
    stackhaz$dhaz[stackhaz$trans==i] <- diff(c(stackhaz$Haz[stackhaz$trans==i][1],
                                               stackhaz$Haz[stackhaz$trans==i]))
    
    
    if (direction=="forward")
      stackhaz <- stackhaz[stackhaz$time > predt,]
    else stackhaz <- stackhaz[stackhaz$time <= predt,]
  }
  
  (untimes <- sort(unique(stackhaz$time))) 
  #ordenar e considerar apenas tempos unicos
  
  (TT <- length(untimes)) #total de tempos
  
  (S <- nrow(trans)) #total de estados
  
  if (direction=="forward") {
    if (variance==TRUE) res <- array(0,c(TT+1,2*S+1,S)) 
    # 2*S+1 for time, probs (S), se (S)   
    else res <- array(0,c(TT+1,S+1,S)) # S+1 for time, probs (S)
    # first line (in case of forward) contains values for t=predt
    res[1,1,] <- predt
    for (j in 1:S) res[1, 1+j,] <- rep(c(0,1,0), c(j-1,1,S-j))
    if (variance) res[1,(S+2):(2*S+1),] <- 0
  } #primeira linha preenchida: 365, 1, 0, ..., 0
  
  (P <- diag(S)) #coloco 1's na diagonal
  
  #OBTER AS TABELAS: P --- valores AJ. Estimator
  
  for (i in 1:TT)
  {
    #i<-1
    
    idx <- ifelse(direction=="forward",i,TT+1-i) 
    #fica com valor 1 -- para forward
    
    tt <- untimes[idx]    #untimes --- tempos nao repetidos
    
    Haztt <- stackhaz[stackhaz$time==tt,]  
    #Haz --- para cada transicao nesse ponto
    lHaztt <- nrow(Haztt)  
    #numero de linhas --- 13. no caso, todas as transicoes no caso EBMT
    # build S x S matrix IplusdA
    IplusdA <- diag(S)  # I + dA
    
    
    for (j in 1:lHaztt){
      
      from <- transit$from[transit$transno==Haztt$trans[j]]
      to <- transit$to[transit$transno==Haztt$trans[j]]
      IplusdA[from, to] <- Haztt$dhaz[j]
      IplusdA[from, from] <- IplusdA[from, from] - Haztt$dhaz[j]
    }
    
    #if (any(diag(IplusdA)<0))
    #warning("Warning! Negative diagonal elements of (I+dA); 
    #the estimate may not be meaningful. \n")
    
    
    if (method=="greenwood"){
      
      P <- P %*% IplusdA    
      #ESTIMADOR A. J. --- para cada tempo (usado)  produto limite 
      #em relacao anterior
      
    }
    
    if (method=="aalen"){
      
      P <- P %*% IplusdA    #ESTIMADOR A. J. --- para cada tempo
    }
    
    res[idx+1,1,] <- tt
    res[idx+1,2:(S+1),] <- t(P)  
    #VAO SENDO COLOCADOS OS RESULTADOS PARA CADA UM DOS TEMPOS #passando 
    #para os possiveis arrays
    
  } # fim for (i in 1:TT) -----------------------------------------------
  
  res.plmaj<- vector("list", S)
  
  for (s2 in 1:S) { #S -- seis estados
    #s2<-1
    tmp <- as.data.frame(res[,,s2]) #estimativas para cada um dos estados
    
    if (min(dim(tmp))==1) tmp <- res[,,s2]
    names(tmp) <- c("time",paste("pstate",1:S,sep=""))
    res.plmaj[[s2]] <- tmp
    
  }
  
  res.cauchit<-res.plmaj
  
  to2<-paste('pstate', to_ini, sep = '')
  
  res<-res.cauchit[[from_ini]][, c('time', to2)]
  
  return(res)
  
}
    })
  
}
Any scripts or data that you put into this service are public.
presmoothedTP documentation built on April 3, 2025, 7:44 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
presmoothedTP
Presmoothed Landmark Aalen-Johansen Estimator of Transition Probabilities for Complex Multi-State Models

R/presmoothTP.R
In presmoothedTP: Presmoothed Landmark Aalen-Johansen Estimator of Transition Probabilities for Complex Multi-State Models

Defines functions presmoothTP

Documented in presmoothTP

Try the presmoothedTP package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

presmoothedTP Presmoothed Landmark Aalen-Johansen Estimator of Transition Probabilities for Complex Multi-State Models

R/presmoothTP.R In presmoothedTP: Presmoothed Landmark Aalen-Johansen Estimator of Transition Probabilities for Complex Multi-State Models

Defines functions presmoothTP

Documented in presmoothTP

Try the presmoothedTP package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

presmoothedTP
Presmoothed Landmark Aalen-Johansen Estimator of Transition Probabilities for Complex Multi-State Models

R/presmoothTP.R
In presmoothedTP: Presmoothed Landmark Aalen-Johansen Estimator of Transition Probabilities for Complex Multi-State Models