R/unsmoothTP.R

Defines functions unsmoothTP

Documented in unsmoothTP

#' This function implements unsmoothed estimates of from the Aalen-Johansen 
#' estimator (AJ) and landmark Aalen-Johansen method of Putter & Spitoni (2016) 
#' for non-parametric estimation of transition probabilities in non-Markov
#' multi-state models.
#'
#' @description This function is used to obtain nonparametric unsmoothed 
#' estimates of the transition probabilities. The methods used to compute the 
#' transition probabilities are ”AJ” and ”LM” which can be also applied for any
#' type of multistate model, including models that allow for reversible 
#' transitions, and potencially non-Markovian seetings. 
#'
#' @param data A data frame in the long format containing the subject id; 
#' from corresponding to the starting state; the receiving state, to; the 
#' transition number, trans; the starting time of the transition given by 
#' Tstart; the stopping time of the transition, Tstop, and status for the status
#' variable, with 1 indicating an event (transition), 0 a censoring.
#' @param s The first time for obtaining estimates for the transition.
#' @param from The starting state of the transition probabilities.
#' @param to The last receiving state considered for the estimation of the 
#' transition probabilities.
#' @param method The method used to compute the transition probabilities.
#' Possible options are \code{"AJ"} and \code{"LM"}.
#' @param trans The transition matrix for multi-state model.

#' @return Nonparametric estimates of transition probabilities in general 
#' multi-state models using the AJ and the LMAJ estimators.
#' 
#' @examples
#' data("ebmt4")
#' db_wide <- ebmt4
#' positions<-list(c(2, 3, 5, 6), c(4, 5, 6), c(4, 5, 6), c(5, 6), c(6), c())
#' states.names = c("Tx", "Rec", "AE", "Rec+AE", "Rel", "Death")
#' trans<-transTP(positions, states.names)
#' times.names = c(NA, "rec", "ae","recae", "rel", "srv")
#' status.names=c(NA, "rec.s", "ae.s", "recae.s", "rel.s", "srv.s")
#' data.long<-prepTP(data=db_wide, trans, times.names, status.names)
#' 
#' res1<-unsmoothTP(data.long, s = 100, from = 1, to = 1, method = 'AJ', 
#' trans=trans)
#' head(res1)

#' res2<-unsmoothTP(data.long, s = 100, from = 1, to = 1, method = 'LM', 
#' trans=trans)
#' head(res2)

#' plot(res1, type='s', xlim=c(0,3000), ylim=c(0.6,1), ylab=c('p11(100,t)'))
#' lines(res2, type='s', col=2)
#' legend("topright", legend = c("AJ","LM"), lwd = 0.3, col = c(1,2), cex=0.7,
#' lty=c(1, 1))
#' 
#' res3<-unsmoothTP(data.long, s = 1000, from = 2, to = 6, method = 'AJ', 
#' trans=trans)
#' res4<-unsmoothTP(data.long, s = 1000, from = 2, to = 6, method = 'LM', 
#' trans=trans)
#' 
#' plot(res3, type='s', xlim=c(1000,3000), ylim=c(0,0.12), ylab=c('p26(100,t)'))
#' lines(res4, type='s', col=2)
#' legend("topright", legend = c("AJ","LM"), lwd = 0.3, col = c(1,2), cex=0.7,
#' lty=c(1, 1))

unsmoothTP<-function(data, s, from, to, method='LM', trans=trans){
  
  #trunc=FALSE
  to_ini<-to
  from_ini<-from
  format='long'
  tmat<-trans
  suppressWarnings({ 
    
  #if(format=='wide'){

    #  tmat<-transTP(positions, states.names)

    #msprep<-prepTP(data = data, trans = tmat, time = times.names,  
    #               status = status.names)
  
    #db.long<-msprep[, c(1:8)]

    #}else{
  
#db.long<-data

  #}
   
     db.long<-data
    

if(method=='AJ'){
  
  c0 <- coxph(Surv(Tstart, Tstop, status) ~ strata(trans), data = db.long) 
  
  msf0 <- msfit(object = c0, vartype = "greenwood", trans = tmat)
  
  res.aj<-probtrans(msf0, predt=s)
  
  res.aj[[from]][1:15,] #from 1 to 6
  
  #res<-res.aj
  
  to2<-paste('pstate', to, sep = '')
  
  res<-res.aj[[from_ini]][, c('time', to2)] #as.data.frame( 
  
  return(res)
  
}else{

  msdata<-db.long
  method.lmaj = "greenwood"
  from<-from ############################################ from
  #TENHO DE CARREGAR MAIS A FRENTE TAMBEM!
  
  K <- nrow(tmat)
  
  if (any(is.na(match(from, 1:K)))) 
    stop("from should be subset of 1:K with K number of states")
  
  xsect<-function (msdata, xtime = 0) 
  {
    msd <- msdata[msdata$Tstart <= xtime & msdata$Tstop > xtime, 
    ]
    msd <- msd[order(msd$id, msd$trans, msd$Tstart), ]
    msd <- msd[!duplicated(msd$id), ]
    idstate <- data.frame(id = msd$id, state = msd$from)
    #tmat <- attr(msdata, "trans")
    
    K <- nrow(tmat)
    msd$from <- factor(msd$from, levels = 1:K, labels = 1:K)
    tbl <- table(msd$from)
    atrisk <- sum(tbl)
    prop <- tbl/atrisk
    res <- idstate
    attr(res, "atrisk") <- tbl
    attr(res, "prop") <- prop
    return(res)
  }
  
  xss <- xsect(msdata, s)
  
  infrom <- xss$id[xss$state %in% from]
  msdatas <- cutLMms(msdata, LM = s)
  msdatasfrom <- msdatas[msdatas$id %in% infrom, ]
  
  head(msdatasfrom) #primeiro q esta em estado 2 e id=7
  
  sf0 <- summary(survfit(coxph(Surv(Tstart, Tstop, status) ~ strata(trans), 
                               data = msdatasfrom)))
  
  sf0$time #tempos a usar depois....q sao os obtidos usando msfit.
  
  sf0$n.risk
  
  length(sf0$n.event)
  
  length(sf0$time)
  
  length(sf0$n.risk)
  
  sf0$n.event #apenas tempos em que houve transicao
  
  c0 <- coxph(Surv(Tstart, Tstop, status) ~ strata(trans), data = msdatasfrom)
  
  msf0 <- msfit2(c0, trans = tmat)
  
  msf0$Haz[msf0$Haz$trans==2, ] #acumulado...valor igual para tempos quando nao 
  #houve transicao
  
  msf0$Haz[msf0$Haz$trans==2, ][1:15,]
  
  msf0$Haz[msf0$Haz$trans==5, ][1:15,]
  
  unique(sf0$time) %in% unique(msf0$Haz$time) #tudo
  
  unique(msf0$Haz$time) %in% unique(sf0$time) #1 valor q nao consta
  
  #msfit #CONSTRUIR
  
  tab.f<-NULL
  
  for(i in unique(msdatasfrom$trans)){
    
    #i<-5
    
    #i<-unique(msdatasfrom$trans)
    
    colon2<-msdatasfrom[msdatasfrom$trans==i, ]
    
    colon2.ord<-colon2[order(colon2$Tstop) , c('Tstop', 'status')]
    
    colon3<-as.data.frame(colon2.ord)
    
    colon3[1:22,] #171 repetido duas vezes
    
    colon2[1:5,]
    
    nrisk<-rep(0, nrow(colon3))
    
    for(j in 1:nrow(colon3)){
      
      #j<-1
      
      #nrow(colon2[colon2$Tstart<400 & colon2$Tstop>=400,]) #COMO E Q ELES 
      #PROGRAMARAM
      
      nrisk[j]<-length(which(
        as.numeric(colon3$Tstop[j])> as.numeric(colon2[, 'Tstart']) & 
        as.numeric(colon3$Tstop[j])<as.numeric(colon2[, 'Tstop']))) 
        #inicialmente tinha posto >= , <=
      
    }
    
    nrisk
    
    colon3$nrisk<-nrisk
    
    colon3
    
    colon4<-colon3[ , c(1,3,2)]
    
    names(colon4)<-c('time', 'nrisk', 'event')
    
    colon4$Haz<-cumsum(colon4$event/colon4$nrisk)
    
    colon4$trans<-rep(i, nrow(colon4))
    
    colon4[1:10,]
    
    colon4<-colon4[ ,c(1, 4, 2, 3, 5)]
    
    names(colon4)<-c('time', 'Haz', 'norisk', 'noevent', 'trans')
    
    colon4[1:40,]
    
    colon4.max<-ddply(colon4, 'time', function(x) {
      max<- suppressWarnings(max(x$Haz, na.rm = T))
    })
    
    colon4.max$trans<-rep(i, nrow(colon4.max))
    
    names(colon4.max)<-c('time', 'Haz', 'trans')
    
    colon4.max[1:40,]
    
    tab.f<-rbind(tab.f, colon4.max)
    
  }
  
  tab.f
  
  tab.f.aux<-tab.f 
  
  tab.f<-tab.f[tab.f$time %in% unique(sort(sf0$time)), ]
  
  tab.f.msf0.all<-NULL
  
  for(i in unique(msdatasfrom$trans)){ #length(unique(tab.f$trans))
    
    #i<-1
    
    tab.f.msf0.aux<-as.data.frame(sort(unique(tab.f$time)))
    
    names(tab.f.msf0.aux)<-'time'
    
    tab.f.msf0<-merge(tab.f.msf0.aux, tab.f[tab.f$trans==i, c('time', 'Haz')],  
                      by= 'time',all = T)
    
    tab.f.msf0$trans<-i
    
    tab.f.msf0$Haz[1]<-ifelse(is.na(tab.f.msf0$Haz[1]), 0,tab.f.msf0$Haz) 
    
    for(z in 2:nrow(tab.f.msf0)){
      
      #z<-2
      
      tab.f.msf0$Haz[z]<-ifelse(is.na(tab.f.msf0$Haz[z]), tab.f.msf0$Haz[(z-1)],
                                tab.f.msf0$Haz[z]) 
      
    }
    
    tab.f.msf0.all<-rbind(tab.f.msf0.all, tab.f.msf0)
    
  }
  
  tab.f.msf0.all
  
  res<-list(Haz=tab.f.msf0.all, trans=tmat)
  
  #LMAJ contruindo PROBTRANS:
  
  object<-res
  predt = s ################################# s #####
  method.lmaj = "greenwood"
  variance=FALSE
  #covariance=FALSE
  direction="forward"
  
  (trans <- object$trans)
  (transit <- to.trans2(trans))
  (numtrans <- nrow(transit))
  
  stackhaz <- object$Haz #data.frame para haz
  
  for (i in 1:numtrans){
    
    stackhaz$dhaz[stackhaz$trans==i] <- diff(c(stackhaz$Haz[stackhaz$trans==i][1],
                                               stackhaz$Haz[stackhaz$trans==i]))
    
    
    if (direction=="forward")
      stackhaz <- stackhaz[stackhaz$time > predt,]
    else stackhaz <- stackhaz[stackhaz$time <= predt,]
  }
  
  
  (untimes <- sort(unique(stackhaz$time))) #ordenar e considerar 
  #apenas tempos unicos
  
  (TT <- length(untimes)) #total de tempos
  
  (S <- nrow(trans)) #total de estados
  
  if (direction=="forward") {
    if (variance==TRUE) res <- array(0,c(TT+1,2*S+1,S)) # 2*S+1 for time, 
    #probs (S), se (S)   
    else res <- array(0,c(TT+1,S+1,S)) # S+1 for time, probs (S)
    # first line (in case of forward) contains values for t=predt
    res[1,1,] <- predt
    for (j in 1:S) res[1, 1+j,] <- rep(c(0,1,0), c(j-1,1,S-j))
    if (variance) res[1,(S+2):(2*S+1),] <- 0
  } #primeira linha preenchida: 365, 1, 0, ..., 0
  
  (P <- diag(S)) #coloco 1's na diagonal
  
  #OBTER AS TABELAS: P --- valores AJ. Estimator
  
  for (i in 1:TT)
  {
    #i<-1
    
    idx <- ifelse(direction=="forward",i,TT+1-i)#fica com valor1- para forward
    
    tt <- untimes[idx]    #untimes --- tempos nao repetidos
    
    Haztt <- stackhaz[stackhaz$time==tt,]#Haz ---para cada transicao nesse ponto
    lHaztt <- nrow(Haztt)  #numero de linhas --- 13. no caso, todas as 
    #transicoes no caso EBMT
    # build S x S matrix IplusdA
    IplusdA <- diag(S)  # I + dA
    
    
    
    for (j in 1:lHaztt){
      
      from <- transit$from[transit$transno==Haztt$trans[j]]
      to <- transit$to[transit$transno==Haztt$trans[j]]
      IplusdA[from, to] <- Haztt$dhaz[j]
      IplusdA[from, from] <- IplusdA[from, from] - Haztt$dhaz[j]
    }
    
    #if (any(diag(IplusdA)<0))
    #  warning("Warning! Negative diagonal elements of (I+dA); 
    #the estimate may not be meaningful. \n")
    
    
    if (method.lmaj=="greenwood"){
      
      P <- P %*% IplusdA    #ESTIMADOR A. J. --- para cada tempo (usado)  
      #produto limite em relacao anterior
      
    }
    
    if (method.lmaj=="aalen"){
      
      P <- P %*% IplusdA    #ESTIMADOR A. J. --- para cada tempo
    }
    
    res[idx+1,1,] <- tt
    res[idx+1,2:(S+1),] <- t(P)  #VAO SENDO COLOCADOS OS RESULTADOS PARA CADA 
    #UM DOS TEMPOS #passando 
    #para os possiveis arrays
    
  } # fim for (i in 1:TT) -----------------------------------------------
  
  res2.lmaj  <- vector("list", S)
  
  for (s2 in 1:S) { #S -- seis estados
    
    tmp <- as.data.frame(res[,,s2]) #estimativas para cada um dos estados
    
    if (min(dim(tmp))==1) tmp <- res[,,s2]
    names(tmp) <- c("time",paste("pstate",1:S,sep=""))
    res2.lmaj[[s2]] <- tmp
    
  }
  
  #res2.lmaj[[from_ini]][1:10,]
  
  res.lmaj<-res2.lmaj
  
  res<-res.lmaj

  #to2<-to_ini+1
  
  #res<-res[[from_ini]][, c(1, to2)] 
  
  to2<-paste('pstate', to_ini, sep = '')
  
  res<-res[[from_ini]][, c('time', to2)]

  return(res)

}
  })
}

Try the presmoothedTP package in your browser

Any scripts or data that you put into this service are public.

presmoothedTP documentation built on April 3, 2025, 7:44 p.m.