R/uacv.R

Defines functions uacv RloglikUACV

Documented in uacv

RloglikUACV <- function(b0, bfix0, fix0, Y0, X0, Xd0, idD0, D0, Xseuil0, nmes0, nv0, idx0, idiag0, ncor0, ny0, nalea0, ntr0, link0, nodes0, epsY0, nRE0, nBM0, nbevt0, idcause0, entreRetard0, nbTronc0)
    {
        .Call("loglikUACV", b0, bfix0, fix0, Y0, X0, Xd0, idD0, D0, Xseuil0, nmes0, nv0, idx0, idiag0, ncor0, ny0, nalea0, ntr0, link0, nodes0, epsY0, nRE0, nBM0, nbevt0, idcause0, entreRetard0, nbTronc0)
    }


## uacv pour calculer le fit de la partie survie ##
#' Computation of the Universal Approximate Cross Validation (UACV) criterion for a joint model estimated by JointMult function
#'
#' @param model a JointMult model
#' @param Y a list of \code{multlcmm} objects
#' @param D a list of two-sided formula defining the event part of the model
#' @param data data.frame containing the observations and variables
#' @param var.time a character vector indicating the name of the time variables
#' @param RE an indicator of the random effect structure between dimensions
#' @param BM an indicator of the correlation of the Brownian motions
#' @param B vector containing initial values for the parameters
#' @param posfix optional vector specifying the indices in vector B of the parameters that are not estimated
#' @param breaks optional vector specifying the break points in the case where the event time is discretized
#' @param delayed logical vector indicating if delayed entry should be accounted for
#' @return the value of the Universal Approximate Cross Validation criterion
#' @author Cecile Proust-Lima and Viviane Philipps
#' @references
#' Commenges D, Proust-Lima C, Samieri C, Liquet B.
#' A universal approximate cross-validation criterion for regular risk functions.
#' The International Journal of Biostatistics 2015; 11(1): 51–67.
#' doi: 10.1515/ijb-2015-0004
#' 
#' @export

uacv <- function(model,Y,D,data,var.time,RE="block-diag",BM="diag",B,posfix,breaks=NULL,delayed=TRUE)
    {
        ## verif arg
        if(missing(Y)) stop("Y is missing")
        if(missing(D)) stop("D is missing")
        if(missing(data)) stop("data is missing")
        if(missing(var.time)) stop("var.time is missing")

        if(!is.list(Y)) stop("Y should be a list of multlcmm objects")
        if(!(all(sapply(Y,class)=="multlcmm"))) stop("Y should only contain multlcmm objects")
        if(!is.list(D)) stop("D should be a list of 1 or 2 formula")
        if(length(D)>2) stop("D should be of length 1 or 2")
        if(!(all(sapply(D,class)=="formula")))  stop("D should only contain two-sided formula")
        if(!is.data.frame(data)) stop("data should be a data frame")
        if((!nrow(data)) | (!ncol(data))) stop("data is empty")
        if(!(RE %in% c("block-diag","full"))) stop("RE should be either 'block-diag' or 'full'")
        if(!(BM %in% c("diag","full"))) stop("BM should be either 'diag' or 'full'")
        if(!is.null(breaks)){if(!is.vector(breaks)) stop("breaks should be a vector")}
        if(!all(is.logical(delayed))) stop("delayed should be a logical vector")

        ##### donnees Y #####
        K <- length(Y)
        if(length(var.time)==1) var.time <- rep(var.time,K)
        dataY <- NULL
        ny <- rep(NA,K)
        Ynames <- vector("list",K)
        Xnames <- vector("list",K)
        nomsX <- unique(unlist(sapply(Y,function(x) x$Xnames2)))
        
        for(k in 1:K)
            {
                ### modele k
                if(length(Y[[k]]$call))
                    {
                        z <- Y[[k]]$call
                        z$data <- data
                        z$maxiter <- 0
                        z$B <- Y[[k]]$best
                        z$verbose <- FALSE
                        mod <- eval(z)
                    }
                else
                    {
                        mod <- eval(Y[[k]])
                    }
                assign(paste("mod",k,sep=""),mod)

                subject <- mod$call$subject
                if(k>1){if(subject != colnames(dataY)[1]) stop("Subject variable should be the same for all multlcmm models")}
                
                colx <- c(subject,mod$Xnames2)
                
                ny[k] <- length(mod$Ynames)

                Ynames[[k]] <- mod$Ynames
                Xnames[[k]] <- mod$Xnames2

                ## donnees km
                for(m in 1:ny[k])
                    {
                        ## data frame de l'outcome m
                        colx <- c(subject,nomsX,mod$Ynames[m])
                        if(length(mod$na.action[[m]]))
                            {
                                datam <- data[-mod$na.action[[m]],colx,drop=FALSE]
                            }
                        else
                            {
                                datam <- data[,colx,drop=FALSE]
                            }

                        datam <- datam[order(datam[,1],datam[,var.time[k]]),,drop=FALSE]
                        old <- colnames(datam)
                        datam$processK <- k
                        datam$outcomeM <- m
                        #datam <- datam[,c("processK","outcomeM",old)]
                        datam <- datam[,c(old[1],"processK","outcomeM",old[-1])]

                        if((k==1) & (m==1))
                            {
                                dataY <- datam
                                colnames(dataY)[which(colnames(dataY)==var.time[k])] <- "timeT"
                                colnames(dataY)[which(colnames(dataY)==Ynames[[k]][m])] <- "measureY"
                            }
                        else
                            {
                                colnames(datam)[which(colnames(datam)==var.time[k])] <- "timeT"
                                colnames(datam)[which(colnames(datam)==Ynames[[k]][m])] <- "measureY"
                                Xplus <- setdiff(colnames(datam),colnames(dataY))
                                if(length(Xplus))
                                    {
                                        for(l in 1:length(Xplus))
                                            {
                                                old <- colnames(dataY)
                                                dataY <- cbind(dataY,NA)
                                                colnames(dataY) <- c(old,Xplus[l])
                                            }
                                    }

                                Xmqt <- setdiff(colnames(dataY),colnames(datam))
                                if(length(Xmqt))
                                    {
                                        for(l in 1:length(Xmqt))
                                            {
                                                old <- colnames(datam)
                                                datam <- cbind(datam,NA)
                                                colnames(datam) <- c(old,Xmqt[l])
                                            }
                                        datam <- datam[,colnames(dataY),drop=FALSE]
                                    }

                                dataY <- rbind(dataY,datam)
                            }
                    }
                
            }

        
        #### donnees D ####
        nbevt <- length(D)

        ## premier evt
        dem <- NULL
        D1 <- D[[1]]
        tD1 <- terms(D1)        
        formD1 <- gsub("[[:space:]]","",D1[3])
        
        if(!any(c(grep("longDiag",D1),grep("survBreak",D1)))) stop("Left side of formula in D should be either 'longDiag()' or 'survBreak()'")
         
        gauche1 <- all.vars(tD1[[2]]) #t0, t et d
       
        if(length(grep("longDiag",D1[2])))
            {
                discretise <- 0
                
                if(length(gauche1)!=3) stop("Please specified entry time, time and indicator in that order in longDiag")
            
                entry <- gauche1[1]
                var.time <- c(var.time,gauche1[2])
                dem <- gauche1[3]
            }
        else
            {
                if(length(grep("survBreak",D1[2])))
                    {
                        discretise <- 1
                        if(length(gauche1)!=3) stop("Please specified entry time, time and indicator in that order in survBreak")
                        
                        entry <- gauche1[1]
                        var.time <- c(var.time,gauche1[2])
                        dem <- gauche1[3]
                    }
            }
               
        varexplD1 <- all.vars(formula(paste("~",D1[3])))

        
        ## deuxieme evt
        dc <- NULL
        D2 <- NULL
        gauche2 <- NULL
        varexplD2 <- NULL
        if(nbevt==2)
            {
                D2 <- D[[2]]
                tD2 <- terms(D2)
                formD2 <- gsub("[[:space:]]","",D2[3])

                if(!any(c(grep("longDiag",D2),grep("survBreak",D2)))) stop("Left side of formula in D should be either 'longDiag()' or 'survBreak()'")
        
                gauche2 <- all.vars(tD2[[2]]) #t0, t et d
                
                if(length(grep("longDiag",D2[2])))
                    {
                        discretise <- c(discretise,0)

                        if(length(gauche2)!=3) stop("Please specified entry time, time and indicator in that order in longDiag")

                        entry <- c(entry,gauche2[1])
                        var.time <- c(var.time,gauche2[2])
                        dc <- gauche2[3]
                    }
                else
                    {
                        if(length(grep("survBreak",D2[2])))
                            {
                                discretise <- c(discretise,1)

                                if(length(gauche2)!=3) stop("Please specified entry time, time and indicator in that order in survBreak")
                                
                                entry <- c(entry,gauche2[1])
                                var.time <- c(var.time,gauche2[2])
                                dc <- gauche2[3]
                            }
                    }
                
                varexplD2 <- all.vars(formula(paste("~",D2[3])))

                if(all(discretise==1)) stop("At most 1 event time can be discretized")
                if((discretise[1]==1) & (discretise[2]==0)) stop("exchange events!") #!**
            }

        varexplD <- unique(c(varexplD1,varexplD2))
        nvarexplD <- length(varexplD)
        delayed <- rep(delayed,length.out=nbevt)
        
        
        
        dataD1 <- NULL
        dataD2 <- NULL
        ni01 <- NULL
        ni02 <- NULL
        
        ## donnees dem
        if(!is.null(dem))
            {
                if(discretise[1]==0)
                    {
                        dataD1 <- data[,c(subject,entry[1],var.time[K+1],dem,setdiff(unique(c(varexplD1,nomsX)),c(var.time,entry[1])))]
                        dataD1 <- na.omit(dataD1)
                        dataD1 <- dataD1[order(dataD1[,1],dataD1[,3]),]

                        ## ajouter temps T0 en premiere ligne de chaque sujet
                        if(delayed[1]==TRUE)
                            {
                                ntmp <- table(dataD1[,1])
                                dataD1entry <- dataD1[c(1,cumsum(ntmp[-length(ntmp)])+1),]
                                dataD1entry[,3] <- dataD1entry[,2]
                                dataD1entry$measure <- 0
                                dataD1entry[,dem] <- 0
                                
                                dataD1measure <- dataD1
                                dataD1measure$measure <- 1

                                dataD1bis <- merge(dataD1entry,dataD1measure,all=TRUE)
                                jinf <- which(!is.finite(dataD1bis[,var.time[K+1]]))
                                if(length(jinf))
                                    {
                                        dataD1bis <- dataD1bis[-jinf,]
                                    }
                                dataD1 <- dataD1bis[order(dataD1bis[,subject],dataD1bis[,"measure"],dataD1bis[,var.time[K+1]]),]
                            }
                        
                        ## garder jusqu au premier 1
                        nd <- as.vector(table(dataD1[,1]))
                        nok <- tapply(dataD1[,dem],dataD1[,1],function(x){return(1:ifelse(any(x==1),which(x==1)[1],length(x)))})
                        garde <- unlist(sapply(1:length(nok),function(k,nd,nok) sum(nd[1:k])-nd[k]+nok[[k]],nd=nd,nok=nok))
                        
                        dataD1 <- dataD1[garde,,drop=FALSE]
                        
                        ## temps max pr chaque sujet
                        max1 <- tapply(dataD1[,3],dataD1[,1],max)
                        t1max <- data.frame(id=names(max1),t1max=as.vector(max1),stringsAsFactors=FALSE)
                        maxd1 <- as.numeric(tapply(dataD1[,4],dataD1[,1],max))
                        t1max <- data.frame(t1max,maxd1)
                        
                        colnames(t1max) <- c(colnames(dataY)[1],"t1max","d1")
                        
                        ## nb de mesures
                        ni01 <- table(dataD1[,1]) # mesure T0 dedans! revoir !**

                        ## entree retardee
                        if(delayed[1]==TRUE)
                            {
                                nbTronc <- matrix(1,length(ni01),ncol=1)

                                dataD1T0 <- unique(dataD1[,c(subject,entry[1])])
                                nbTronc[which(!is.finite(dataD1T0[,2])),1] <- 0
                            }
                        else
                            {
                                nbTronc <- matrix(0,length(ni01),ncol=1)
                            }

                    }
                else
                    {
                        ## evt absorbant et en temps continu
                        
                        dataD1 <- data[,c(subject,entry[1],var.time[K+1],dem,setdiff(unique(c(varexplD1,nomsX)),c(var.time,entry[1])))]
                        dataD1 <- na.omit(dataD1)
                        dataD1 <- dataD1[order(dataD1[,1],dataD1[,3]),]
                        
                        ## garder la premiere/derniere ligne de chaque sujet de dataD1
                        ntmp <- table(dataD1[,1])
                        #dataD1 <- dataD1[c(1,cumsum(ntmp[-length(ntmp)])+1),,drop=FALSE]# premiere
                        dataD1 <- dataD1[cumsum(ntmp),,drop=FALSE]# derniere
                        
                        ## discretiser
                        breaks <- sort(breaks)
                        tbr <- (breaks[-length(breaks)]+breaks[-1])/2
                        nbr <- length(breaks)

                        t1max <- dataD1[,c(1,3,4,2)]
                        colnames(t1max) <- c(colnames(dataY)[1],"t1max","d1","t1min")

                        t1max <- data.frame(t1max,intervalle=as.numeric(cut(x=t1max$t1max,breaks=breaks,labels=1:(nbr-1),include.lowest=TRUE,right=FALSE)))
                        t1max <- data.frame(t1max,intervT0=as.numeric(cut(x=t1max$t1min,breaks=breaks,labels=1:(nbr-1),include.lowest=TRUE,right=FALSE)))

                        ## si derniere mesure=0, s arreter a l intervalle  precedent
                        t1max$intervalle[which(t1max$d1==0)] <- t1max$intervalle[which(t1max$d1==0)]-1

                        ## entree retardee
                        nbTronc <- matrix(rep(as.numeric(delayed[1]==TRUE),nrow(t1max)),ncol=1)
                        nbTronc[which(t1max$intervT0==1),1] <- 0

                        ## recreer les donnees dc
                        dataD1bis <- as.data.frame(matrix(NA,0,ncol(dataD1),dimnames=list(NULL,colnames(dataD1))))
                        ni01 <- NULL
                        for(i in 1:nrow(t1max))
                            {
                                if(delayed[1]==TRUE)
                                    {
                                        t1 <- tbr[(t1max[i,6]-1):t1max[i,5]]
                                        t01 <- rep(-Inf,length(t1))
                                        if(t1max$intervT0[i]>1)
                                            {
                                                t01 <- rep(tbr[t1max[i,6]-1],length(t1))
                                            }
                                    }
                                else
                                    {
                                        t1 <- tbr[t1max[i,6]:t1max[i,5]]
                                        t01 <- rep(-Inf,length(t1))
                                    }

                                di1 <- data.frame(rep(t1max[i,1],length(t1)),t01,t1,rep(0,length(t1)))
                                if(t1max[i,3]==1) di1[length(t1),4] <- 1
                                if(ncol(dataD1)>4)
                                    {
                                        ## boucle pour garder les differents types (character, numeric)
                                        for(k in 5:ncol(dataD1))
                                            {
                                                di1 <- data.frame(di1,rep(dataD1[which(dataD1[,1]==t1max[i,1])[1],k],length(t1)))
                                            }
                                    }
                                        
                                colnames(di1) <- colnames(dataD1)
                                dataD1bis <- rbind(dataD1bis,di1)

                                ni01 <- c(ni01,length(which(dataD1bis[,1]==t1max[i,1])))
                            }
                        
                        dataD1 <- dataD1bis
                    }

                D <- t1max[,c(1,3)]
            }
        
        ## donnees dc
        if(!is.null(dc))
            {                
                if(discretise[2]==0)
                    {
                        ## 2e evt de type diagnostic
                        dataD2 <- data[,c(subject,entry,var.time[K+2],dc,setdiff(unique(c(varexplD2,nomsX)),c(var.time,entry[2])))]
                        dataD2 <- na.omit(dataD2)
                        dataD2 <- dataD2[order(dataD2[,1],dataD2[,3]),]

                        ## ajouter temps T0 en premiere ligne de chaque sujet
                        if(delayed[2]==TRUE)
                            {
                                ntmp <- table(dataD2[,1])
                                dataD2entry <- dataD2[c(1,cumsum(ntmp[-length(ntmp)])+1),]
                                dataD2entry[,3] <- dataD2entry[,2]
                                dataD2entry$measure <- 0
                                dataD2entry[,dc] <- 0
                                
                                dataD2measure <- dataD2
                                dataD2measure$measure <- 1

                                dataD2bis <- merge(dataD2entry,dataD2measure,all=TRUE)
                                
                                jinf <- which(!is.finite(dataD2bis[,var.time[K+1]]))
                                if(length(jinf))
                                    {
                                        dataD2bis <- dataD2bis[-jinf,]
                                    }
                                dataD2 <- dataD2bis[order(dataD2bis[,subject],dataD2bis[,var.time[K+2]]),]
                            }
                        
                        ## garder jusqu au premier 1
                        nd <- as.vector(table(dataD2[,1]))
                        nok <- tapply(dataD2[,dc],dataD2[,1],function(x){return(1:ifelse(any(x==1),which(x==1)[1],length(x)))})
                        garde <- unlist(sapply(1:length(nok),function(k,nd,nok) sum(nd[1:k])-nd[k]+nok[[k]],nd=nd,nok=nok))
                        
                        dataD2 <- dataD2[garde,,drop=FALSE]

                        ## temps max pr chaque sujet
                        max2 <- tapply(dataD2[,3],dataD2[,1],max)
                        t2max <- data.frame(id=names(max2),t2max=as.vector(max2),stringsAsFactors=FALSE)
                        maxd2 <- as.numeric(tapply(dataD2[,4],dataD2[,1],max))
                        t2max <- data.frame(t2max,maxd2)
                        colnames(t2max) <- c(colnames(dataY)[1],"t2max","d2")

                        ## matrice sujet indicateur evt
                        D <- merge(t1max,t2max,all=FALSE) #selection ici
                        evt <- (D[,"t1max"]<=D[,"t2max"])*D[,"d1"]+2*(D[,"t1max"]>=D[,"t2max"])*D[,"d2"]
                        evt[which(evt==3)] <- 1 # ? !** si ex aequo, on prend D1
                        Tevt <- apply(D[,c("t1max","t2max")],1,min) # ? a voir si ok  si evt=0!**
                        D <- data.frame(D[,1],evt,Tevt)
                        colnames(D) <- c(colnames(dataY)[1],"evt","Tevt")
                        D <- D[order(D[,1]),]

                        ## dataD1 et dataD2 sans les temps post-diag
                        dataD1bis <- as.data.frame(matrix(NA,0,ncol(dataD1),dimnames=list(NULL,colnames(dataD1))))
                        dataD2bis <- as.data.frame(matrix(NA,0,ncol(dataD2),dimnames=list(NULL,colnames(dataD2))))
                        ni01 <- NULL
                        ni02 <- NULL
                        for(i in 1:nrow(D))
                            {
                                if(D[i,2]==1)
                                    {
                                        dataD1bis <- rbind(dataD1bis,dataD1[which(dataD1[,1]==D[i,1]),])
                                        
                                        di2 <- dataD2[which((dataD2[,1]==D[i,1]) & (dataD2[,2]<=D[i,3])),] # ? a voir si < ou <= !**
                                        di2[,3] <- 0
                                        dataD2bis <- rbind(dataD2bis,di2)
                                    }
                                if(D[i,2]==2)
                                    {
                                        dataD2bis <- rbind(dataD2bis,dataD2[which(dataD2[,1]==D[i,1]),])
                                        
                                        di1 <- dataD1[which((dataD1[,1]==D[i,1]) & (dataD1[,2]<=D[i,3])),]
                                        di1[,3] <- 0
                                        dataD1bis <- rbind(dataD1bis,di1)
                                    }

                                ni01 <- c(ni01,length(which(dataD1bis[,1]==D[i,1])))
                                ni02 <- c(ni02,length(which(dataD2bis[,1]==D[i,1])))
                            }

                        dataD1 <- dataD1bis
                        dataD2 <- dataD2bis
                        D <- D[,1:2]                        

                        ## entree retardee
                        nbTronc <- matrix(0,nrow(D),2)
                        if(delayed[1]==TRUE)
                            {
                                nbTronc[,1] <- 1

                                dataD1T0 <- unique(dataD1[,c(subject,entry[1])])
                                nbTronc[which(!is.finite(dataD1T0[,2])),1] <- 0
                            }
                        
                        if(delayed[2]==TRUE)
                            {
                                nbTronc[,2] <- 1

                                dataD2T0 <- unique(dataD2[,c(subject,entry[2])])
                                nbTronc[which(!is.finite(dataD2T0[,2])),2] <- 0
                            }
                    }
                else
                    {
                        ## evt absorbant et en temps continu
                        
                        dataD2 <- data[,c(subject,entry[2],var.time[K+2],dc,setdiff(unique(c(varexplD2,nomsX)),c(var.time,entry[2])))]
                        dataD2 <- na.omit(dataD2)
                        dataD2 <- dataD2[order(dataD2[,1],dataD2[,3]),]
                        
                        ## garder la premiere ligne de chaque sujet de dataD2
                        ntmp <- table(dataD2[,1])
                        #dataD2 <- dataD2[c(1,cumsum(ntmp[-length(ntmp)])+1),,drop=FALSE]#premiere
                        dataD2 <- dataD2[cumsum(ntmp),,drop=FALSE] # derniere
                        
                        ## discretiser
                        breaks <- sort(breaks)
                        tbr <- (breaks[-length(breaks)]+breaks[-1])/2
                        nbr <- length(breaks)

                        t2max <- dataD2[,c(1,3,4,2)]
                        colnames(t2max) <- c(colnames(dataY)[1],"t2max","d2","t2min")

                        t2max <- data.frame(t2max,intervalle=as.numeric(cut(x=t2max$t2max,breaks=breaks,labels=1:(nbr-1),include.lowest=TRUE,right=FALSE)))
                        t2max <- data.frame(t2max,intervT0=as.numeric(cut(x=t2max$t2min,breaks=breaks,labels=1:(nbr-1),include.lowest=TRUE,right=FALSE)))

  
                        tmax <- merge(t2max,t1max,all=FALSE) #selection ici
                        tmax <- data.frame(tmax,intervDem=as.numeric(cut(x=tmax$t1max[which(t1max[,1] %in% t2max[,1])],breaks=breaks,labels=1:(nbr-1),include.lowest=TRUE,right=FALSE)))
                        
                        ## si dem=1, mettre dc=0
                        tmax$d2[which(tmax$d1==1)] <- 0
                        ## si dc=0, s arreter avt l intervalle de dem
                        tmax$intervalle[which(tmax$d2==0)] <- tmax$intervDem[which(tmax$d2==0)]-1
                        
                        ## indicateur evt
                        D <- matrix(tmax[,1],ncol=1)
                        evt <- rep(0,nrow(tmax))
                        evt[which(tmax$d1==1)] <- 1
                        evt[which(tmax$d2==1)] <- 2
                        D <- data.frame(D,evt,stringsAsFactors=FALSE)
                        colnames(D) <- c(colnames(dataY)[1],"evt")
                        D <- D[order(D[,1]),]
                        
                        ## recreer les donnees dc
                        dataD1bis <- as.data.frame(matrix(NA,0,ncol(dataD1),dimnames=list(NULL,colnames(dataD1))))
                        dataD2bis <- as.data.frame(matrix(NA,0,ncol(dataD2),dimnames=list(NULL,colnames(dataD2))))
                        ni01 <- NULL
                        ni02 <- NULL
                        for(i in 1:length(evt))
                            {
                                dataD1bis <- rbind(dataD1bis,dataD1[which(dataD1[,1]==D[i,1]),])
                                if(delayed[nbevt]==TRUE)
                                    {
                                        t2 <- tbr[(tmax[i,6]-1):tmax[i,5]]
                                        t02 <- rep(-Inf,length(t2))
                                        if(tmax$intervT0[i]>1)
                                            {
                                                t02 <- rep(tbr[tmax[i,6]-1],length(t2))
                                            }
                                    }
                                else
                                    {
                                        t2 <- tbr[tmax[i,6]:tmax[i,5]]
                                        t02 <- rep(-Inf,length(t2))
                                    }
                                
                                di2 <- data.frame(rep(D[i,1],length(t2)),t02,t2,rep(0,length(t2)))
                                if(D[i,2]==2) di2[length(t2),4] <- 1
                                if(ncol(dataD2)>4)
                                    {
                                        ## boucle pour garder les differents types (character, numeric)
                                        for(k in 5:ncol(dataD2))
                                            {
                                                di2 <- data.frame(di2,rep(dataD2[which(dataD2[,1]==D[i,1])[1],k],length(t2)))
                                            }
                                    }
                                colnames(di2) <- colnames(dataD2)
                                dataD2bis <- rbind(dataD2bis,di2)
                                
                                ni01 <- c(ni01,length(which(dataD1bis[,1]==D[i,1])))
                                ni02 <- c(ni02,length(which(dataD2bis[,1]==D[i,1])))
                                
                            }
                        
                        dataD1 <- dataD1bis
                        dataD2 <- dataD2bis

                        ## entree retardee
                        nbTronc <- matrix(0,nrow(D),2)
                        if(delayed[1]==TRUE)
                            {
                                nbTronc[,1] <- 1

                                dataD1T0 <- unique(dataD1[,c(subject,entry[1])])
                                nbTronc[which(!is.finite(dataD1T0[,2])),1] <- 0
                            }
                        if(delayed[2]==TRUE)
                            {
                                nbTronc[,2] <- 1
                                nbTronc[which(tmax$intervT0==1),2] <- 0
                            }
                    }
            }
        
       

        ## union des temps de mesure evts
        if(nbevt==1)
            {
                idD <- rep(1,sum(ni01))
                dataD <- dataD1
                dataD$tdemdc <- dataD[,var.time[K+1]]
            }
        if(nbevt==2)
            {
                dataD1$tdemdc <- dataD1[,var.time[K+1]]
                dataD2$tdemdc <- dataD2[,var.time[K+2]]
                dataD <- merge(dataD1,dataD2,all=TRUE)
                dataD <- dataD[order(dataD[,1],dataD[,"tdemdc"]),]
                idD <- c(as.numeric(!is.na(dataD[,dem])),as.numeric(!is.na(dataD[,dc])))
            }
        

        ## nb de sujets (select au moins 1 mesure dc, non prevalents diag)       
        ni0 <- table(dataD[,1])
        ns <- length(ni0)

        ## enlever les sujets exclus de dataY
        ntmp <- table(dataY[,1])
        ienlev <- setdiff(names(ntmp),names(ni0))
        if(length(ienlev))
            {
                dataY <- dataY[-which(dataY[,1] %in% ienlev),]
            }
        

        ## variables sur D
        nvarD <- 0
        ndept <- 0
        nvarD1 <- 0
        ndept1 <- 0
        nvarD2 <- 0
        ndept2 <- 0
        varD <- NULL
        varD1 <- NULL
        varD2 <- NULL
        vardept <- NULL
        vardept1 <- NULL
        vardept2 <- NULL
        idcause <- matrix(0,nbevt,nvarexplD)
        modmat1 <- NULL
        modmat2 <- NULL
        Xseuil <- NULL

        namesevt1 <- NULL
        if(D1[[3]]!=1)
            {
                form1 <- formula(paste("~",subject,"+",D1[3]))
                modmat1 <- model.matrix(form1,data=dataD)[,-1,drop=FALSE]
                vars1 <- labels(terms(form1))
                placea <- NULL
                placeb <- NULL
                for(l in 2:ncol(modmat1))
                    {
                        tmp <- unique(na.omit(modmat1[,c(1,l)]))
                        if(nrow(tmp)==ns) # variable pas dep du tps
                            {
                                varD1 <- c(varD1,vars1[attr(modmat1,"assign")[l]])
                                nvarD1 <- nvarD1+1
                                placea <- c(placea,l-1)
                            }
                        else # variable dep du tps
                            {
                                vardept1 <- c(vardept1,vars1[attr(modmat1,"assign")[l]])
                                ndept1 <- ndept1+1
                                placeb <- c(placeb,l-1)
                            }
                    }
                namesevt1 <- colnames(modmat1)[-1]
                namesevt1 <- c(namesevt1[placea],namesevt1[placeb])
                
                modmat1 <- modmat1[,c(1,placea+1,placeb+1)]
            }

        namesevt2 <- NULL
        if(nbevt==2)
            {
                if(D2[[3]]!=1)
                    {
                        form2 <- formula(paste("~",subject,"+",D2[3]))
                        modmat2 <- model.matrix(form2,data=dataD)[,-1,drop=FALSE]
                        vars2 <- labels(terms(form2))
                        placea <- NULL
                        placeb <- NULL
                        for(l in 2:ncol(modmat2))
                            {
                                tmp <- unique(na.omit(modmat2[,c(1,l)]))
                                if(nrow(tmp)==ns) # variable pas dep du tps
                                    {
                                        varD2 <- c(varD2,vars2[attr(modmat2,"assign")[l]])
                                        nvarD2 <- nvarD2+1
                                        placea <- c(placea,l-1)
                                    }
                                else # variable dep du tps
                                    {
                                        vardept2 <- c(vardept2,vars2[attr(modmat2,"assign")[l]])
                                        ndept2 <- ndept2+1
                                        placeb <- c(placeb,l-1)
                                    }
                            }
                        namesevt2 <- colnames(modmat2)[-1]
                        namesevt2 <- c(namesevt2[placea],namesevt2[placeb])

                        modmat2 <- modmat2[,c(1,placea+1,placeb+1)]
                    }
            }

        if(nbevt==1)
            {
                if(nvarD1>0)
                    {
                        D <- cbind(D,unique(modmat1[,c(1:(nvarD1+1))])[,-1])
                    }

                if(ndept1>0)
                    {
                        Xseuil <- modmat1[,1+nvarD1+1:ndept1,drop=FALSE]
                    }

                idcause <- matrix(1,nrow=1,ncol=nvarD1+ndept1)
            }

        if(nbevt==2)
            {
                if((nvarD1+nvarD2)>0)
                    {
                        tmp1 <- unique(modmat1[,1:(nvarD1+1),drop=FALSE])
                        tmp2 <- unique(modmat2[,1:(nvarD2+1),drop=FALSE])
                        modmat <- cbind(tmp1,tmp2[,-1,drop=FALSE])
                        modmat <- modmat[,c(1,1+which(!duplicated(colnames(modmat)[-1])))]

                        D <- cbind(D,modmat[,-1])

                        idcause <- matrix(0,nrow=nbevt,ncol=ncol(modmat)-1)
                        idcause[1,1:nvarD1] <- 1
                        idcause[2,which(colnames(modmat1)[-1] %in% colnames(modmat2)[-1])] <- 1
                        if(ncol(idcause)>nvarD1)
                            {
                                idcause[2,(nvarD1+1):ncol(idcause)] <- 1
                            }
                    }

                if((ndept1+ndept2)>0)
                    {
                        Xseuil1 <- NULL
                        Xseuil2 <- NULL

                        if(ndept1>0)
                            {
                                Xseuil1 <- modmat1[,1+nvarD1+1:ndept1,drop=FALSE]
                            }
                    
                        if(ndept2>0)
                            {
                                Xseuil2 <- modmat2[,1+nvarD2+1:ndept2,drop=FALSE]
                            }

                        Xseuil1 <- cbind(Xseuil1,un=rep(1,sum(ni01)))
                        Xseuil2 <- cbind(Xseuil2,deux=rep(2,sum(ni02)))
                        Xseuil <- merge(Xseuil1,Xseuil2,by.x=c("un"),by.y=c("deux"),all=TRUE)
                        Xseuil <- Xseuil[,setdiff(colnames(Xseuil),c("un","deux")),drop=FALSE]
                        if(any(is.na(Xseuil)))
                            {
                                Xseuil <- apply(Xseuil,2,function(x) { x[which(is.na(x))] <- -3000; return(x) })
                            }

                        idcause <- cbind(idcause,
                                         rbind(c(rep(1,ndept1),rep(0,ndept2)),
                                               c(rep(0,ndept1),rep(1,ndept2))))
                    }
            }
        ## extraire les infos de chaque sous-modele
        link <- NULL
        nodes <- NULL
        nmes <- NULL
        btot <- NULL
        idx <- NULL
        fix <- NULL
        epsY <- NULL

        nv <- rep(0,K)
        nobs <- rep(0,K+nbevt)
        idiag <- rep(0,K)
        npmtot <- rep(0,K)
        p <- rep(0,K)
        q <- rep(0,K)
        ctr <- rep(0,K)
        ncor <- rep(0,K)
        ntrtot <- rep(0,K)
        ntr <- NULL
        nalea <- rep(0,K)

        X <- vector("list",K)
        Xd <- vector("list",K)
        Y <- NULL

        namesmod <- NULL

        for(k in 1:K)
            {   
                mod <- get(paste("mod",k,sep=""))
                colx <- c(mod$call$subject,mod$Xnames2,var.time[K+1])
                namesmod <- c(namesmod,names(mod$best))


                ## formule k       
                formk <- paste(paste(mod$call$fixed[3],collapse="+"),paste(mod$call$random[2],collapse="+"),"1",sep="+")
                if(!is.null(mod$call$cor))
                    {
                        formk <- paste(formk,as.character(mod$call$cor)[2],sep="+")
                    }
                
                ## les donnees X et Y
                yk <- NULL
                xk <- NULL
                id <- NULL
                outcome <- NULL
                for(m in 1:ny[k])
                    {
                        datam <- dataY[which((dataY$processK==k) & (dataY$outcomeM==m)),]
                        colnames(datam)[which(colnames(datam)=="timeT")] <- var.time[k]
                        colnames(datam)[which(colnames(datam)=="measureY")] <- mod$Ynames[m]
                        yk <- c(yk,datam[,mod$Ynames[m]])
                        xk <- rbind(xk,model.matrix(formula(paste("~",formk)),data=datam))
                        id <- c(id,as.numeric(factor(datam[,1]))) # ? va poser pb si pas les memes sujets pr chaque outcome !**
                        outcome <- c(outcome,rep(m,nrow(datam)))

                        ## nb mesures
                        nmesm <- table(datam[,1])
                        nmesm <- data.frame(id=names(nmesm),nm=as.vector(nmesm))
                        if((k==1) & (m==1))
                            {
                                nmes <- nmesm
                                colnames(nmes) <- c("id","k1m1")
                            }
                        else
                            {
                                old <- colnames(nmes)
                                nmes <- merge(nmes,nmesm,by="id",all=TRUE,sort=FALSE)
                                colnames(nmes) <- c(old,paste("k",k,"m",m,sep=""))
                            }
                        
                        ## ntr
                        if(mod$linktype[m]==0) ntr <- c(ntr,2)
                        if(mod$linktype[m]==1) ntr <- c(ntr,4)
                        if(mod$linktype[m]==2) ntr <- c(ntr,mod$nbnodes[m]+2)
                    }

                ## mettre dans l ordre
                yx <- cbind(id,outcome,yk,xk)
                yxord <- yx[order(yx[,1],yx[,2]),]
                Y <- c(Y,as.vector(yxord[,3]))
                X[[k]] <- yxord[,-c(1,2,3),drop=FALSE]

                ## Xd
                ty <- paste("\\b",var.time[k],"\\b",sep="")
                form0 <- gsub(ty,"tdemdc",formk)
                Xd[[k]] <- model.matrix(formula(paste("~",form0)),data=dataD)

                ## infos du modele k
                nv[k] <- ncol(xk)
                nobs[k] <- nrow(xk) #mod$N[9] # plus pareil si prevalents enleves
                idiag[k] <- mod$idiag
                npmtot[k] <- length(mod$best)
                ncor[k] <- mod$N[7]
                p[k] <- sum(mod$idg)
                ctr[k] <- sum(mod$idcontr)
                q[k] <- sum(mod$idea)
                ntrtot[k] <- npmtot[k]-sum(mod$N[c(3,4,5,6,7,8)])
                nalea[k] <- mod$N[6]

                idx <- cbind(idx,rbind(mod$idg,mod$idcontr,mod$idea,mod$idcor))

                link <- c(link,mod$linktype)
                nodes <- c(nodes,as.vector(mod$linknodes))
                epsY <- c(epsY,mod$epsY)
                
                ## parametres (estimes ou fixes)
                btotk <- as.vector(mod$best)
                if(mod$N[4]>0)
                    {
                        if(!mod$idiag)
                            {
                                vark <- matrix(0,q[k],q[k])
                                vark[upper.tri(vark,diag=TRUE)] <- c(1,btotk[mod$N[3]+1:mod$N[4]])
                                vark <- t(vark)
                                vark[upper.tri(vark,diag=TRUE)] <- c(1,btotk[mod$N[3]+1:mod$N[4]])
                                
                                sigk <- sqrt(diag(vark))
                                sigcorrk <- sweep(vark,1,sigk,"/")
                                sigcorrk <- sweep(sigcorrk,2,sigk,"/")
                                diag(sigcorrk) <- sigk
                                rho <- sigcorrk[upper.tri(sigcorrk)]
                                sigcorrk[upper.tri(sigcorrk)] <- log((1+rho)/(1-rho))
                                btotk[mod$N[3]+1:mod$N[4]] <- sigcorrk[upper.tri(sigcorrk,diag=TRUE)][-1]
                            }
                        else
                            {
                                btotk[mod$N[3]+1:mod$N[4]] <- sqrt(btotk[mod$N[3]+1:mod$N[4]])
                            }

                        #if(any(!is.finite(btotk))) stop(paste("Infinite parameters for multlcmm model ",k,". Check the correlations.")) # a remettre !**
                    }
                btot <- c(btot,btotk)

                fix <- c(fix,rep(0,length(mod$best)))
                if(length(mod$call$posfix)) fix[length(fix)-length(mod$best)+eval(mod$call$posfix)] <- 1
            } # fin boucle sur k
                        
        ## pas de link beta
        if(any(ntr==4)) stop("Use only linear or splines links")
        if(any(ncor==2)) stop("Autoregressive correlations are not allowed")
        
        ## tous les parametres du modele
        if(K==1 | RE=="block-diag") nRE <- 0
        if(K>1 & RE!="block-diag") nRE <- sum(sapply(1:(length(q)-1), function(k,x) sum(x[k]*x[(k+1):length(x)]),x=q))
        bRE <- rep(0,nRE)
        BM <- "diag"
        if((K==1) | (BM=="diag")) nBM <- 0
        if((K>1) & (BM=="full"))
            {
                ncortot <- sum(ncor==1)
                nBM <- ncortot*(ncortot-1)/2
            }
        bBM <- rep(0,nBM)
        eta1 <- rep(0,1+nvarD1+ndept1)
        gamma1 <- rep(0,K*(1+nvarD1))
        btot <- c(btot,bRE,bBM,eta1,gamma1)
        if(nbevt==2)
            {
                eta2 <- rep(0,1+nvarD2+ndept2)
                gamma2 <- rep(0,K*(1+nvarD2))
                btot <- c(btot,eta2,gamma2)
            }
        fix <- model$fix


## a voir : demander 1 mesure a chaque Y ?

        ## rassembler nb mesures
        nmes <- cbind(nmes[,-1],ni01=as.vector(ni01))
        if(nbevt==2) nmes <- cbind(nmes,ni02=as.vector(ni02))
        nmes <- cbind(nmes,ni0=as.vector(ni0))
        nmes <- as.matrix(nmes)
        if(any(is.na(nmes))) nmes[which(is.na(nmes))] <- 0 # peut arriver pr Y
        #if(any(nmes==0)) stop("nmes=0")
        nobs[K+1] <- sum(ni01)
        if(nbevt==2) nobs[K+2] <- sum(ni02)
        niparK <- matrix(0,ns,K)
        m0 <- 0
        for(k in 1:K)
            {
                for(m in m0+1:ny[k])
                    {
                        niparK[,k] <- niparK[,k]+nmes[,m]
                    }
                m0 <- m0+ny[k]
            }

        M <- sum(ny)
        
        ## max 20 mesures
        nmesbis <- nmes[,M+1:nbevt]-nbTronc
        if(nbevt==1) if(any(nmesbis[,1]>20)) stop("The number of repeated measures for D is limited to 20")
        if(nbevt==2) if(any(nmesbis[,1]+nmesbis[,2]>20)) stop("The number of repeated measures for D is limited to 20")

        nef <- p-1
        ncontr <- ctr*(ny-1)
        nea <- q

        ch <- 0
        D <- as.matrix(D[,-1,drop=FALSE])
        if(is.null(Xseuil)) Xseuil <- matrix(0,0,0)
        Xseuil <- as.matrix(Xseuil)
        idcause <- as.vector(t(idcause))
        entreRetard <- as.numeric(delayed)

        
        ## prm estimes et prm fixes
        b <- model$b
        bfix <- 0
        if(any(model$fix==1)) bfix <- model$bopt[which(model$fix==1)]


        ## debut calcul uacv ###

        ## vrais cond (P(dem/Y)) a l'optimum
        vrais2i <- RloglikUACV(b0=b, bfix0=bfix, fix0=fix, Y0=Y, X0=X, Xd0=Xd, idD0=idD, D0=D, Xseuil0=Xseuil, nmes0=nmes, nv0=nv, idx0=idx, idiag0=idiag, ncor0=ncor, ny0=ny, nalea0=nalea, ntr0=ntr, link0=link, nodes0=nodes, epsY0=epsY, nRE0=nRE, nBM0=nBM, nbevt0=nbevt, idcause0=idcause, entreRetard0=entreRetard, nbTronc0=nbTronc) # vrais conditionnelles puis vrais totales (condi1,condi2,.., toti1, toti2...)

        ## gradients de vrais cond et tot
        npm <- length(b)
        matGrad <- matrix(0,nrow=2*ns,ncol=npm)
        for(k in 1:npm)
        {
            th <- max(0.000001,0.0001*abs(b[k]))

            bplus <- b
            bplus[k] <- bplus[k]+th

            bmoins <- b
            bmoins[k] <- bmoins[k]-th

            llplus <-  RloglikUACV(b0=bplus, bfix0=bfix, fix0=fix, Y0=Y, X0=X, Xd0=Xd, idD0=idD, D0=D, Xseuil0=Xseuil, nmes0=nmes, nv0=nv, idx0=idx, idiag0=idiag, ncor0=ncor, ny0=ny, nalea0=nalea, ntr0=ntr, link0=link, nodes0=nodes, epsY0=epsY, nRE0=nRE, nBM0=nBM, nbevt0=nbevt, idcause0=idcause, entreRetard0=entreRetard, nbTronc0=nbTronc)

            llmoins <- RloglikUACV(b0=bmoins, bfix0=bfix, fix0=fix, Y0=Y, X0=X, Xd0=Xd, idD0=idD, D0=D, Xseuil0=Xseuil, nmes0=nmes, nv0=nv, idx0=idx, idiag0=idiag, ncor0=ncor, ny0=ny, nalea0=nalea, ntr0=ntr, link0=link, nodes0=nodes, epsY0=epsY, nRE0=nRE, nBM0=nBM, nbevt0=nbevt, idcause0=idcause, entreRetard0=entreRetard, nbTronc0=nbTronc)
            
            matGrad[,k] <- -(llplus-llmoins)/(2*th)
        }


        ## variance des estimateurs
        varb <- matrix(0,nrow=npm,ncol=npm)
        varb[upper.tri(varb,diag=TRUE)] <- model$v
        varb <- t(varb)
        varb[upper.tri(varb,diag=TRUE)] <- model$v


        ## trace(H^(-1)K)
        prodGrad <- sapply(1:ns,function(i,mat) t(mat[i,,drop=FALSE])%*%mat[ns+i,,drop=FALSE],mat=matGrad,simplify=FALSE)
        sumGrad <- 1/(ns-1)*Reduce("+",prodGrad)

        trace <- sum(diag(varb%*%sumGrad))

        ## uacv
        res <- -1/ns*sum(vrais2i[1:ns])+trace
        
        return(res)
}
VivianePhilipps/multLPM documentation built on March 13, 2021, 6:35 a.m.