R/compress.R
In riskRegression: Risk Regression Models and Prediction Scores for Survival Analysis with Competing Risks

### compress.R --- 
##----------------------------------------------------------------------
## Author: Brice Ozenne
## Created: sep  9 2024 (14:04) 
## Version: 
## Last-Updated: Oct 21 2024 (09:46) 
##           By: Brice Ozenne
##     Update #: 169
##----------------------------------------------------------------------
## 
### Commentary: 
## 
### Change Log:
##----------------------------------------------------------------------
## 
### Code:

## * compressData
##' @description Find unique covariate sets and restrict the dataset to those.
##' Used by predictCox and predict.CauseSpecificCox
##' @noRd
compressData <- function(object, newdata, times, diag, average.iid,
                         oorder.times, times.sorted, nStrata, infoVar, level){

    ## ** extract information
    if(is.null(newdata) || (!is.null(level) && level == "full")){
        return(NULL)
    }else if(inherits(object,"prodlim")){
        n.cov <- 1 ## unique covariate profile (as there is no covariate so all predictions are equals up to the strata)
        nStrata <- length(object$first.strata)
        if(!is.null(object$bandwidth)){
           stop("Cannot handle continuous covariates with prodlim objects. \n")
        }
        test.allCategorical <- TRUE
    }else if(inherits(object,"CauseSpecificCox")){
        ls.infoVar <- lapply(object$model, function(iO){coxVariableName(iO, model.frame = coxModelFrame(iO))})
        test.allCategorical <- all(sapply(ls.infoVar, function(iInfo){length(intersect(iInfo$lpvars, iInfo$lpvars.original))==0}))

        if(test.allCategorical || (!is.null(level) && level == "minimal")){
            ## *** covariates
            keep.col <- !duplicated(do.call("c",lapply(ls.infoVar, "[[","lpvars.original")))
            if(length(keep.col)>0){
                newdata.X <- do.call(cbind,lapply(object$model, FUN = function(iO){stats::model.matrix(iO, data = newdata)}))[,keep.col,drop=FALSE]
            }else{
                newdata.X <- NULL
            }
            if(is.null(newdata.X)){
                n.cov <- 1
            }else{
                n.cov <- 2^NCOL(newdata.X)
            }
            
            ## *** strata variables
            allStrata.var <- setdiff(unique(unlist(lapply(ls.infoVar,"[[","stratavars.original"))),names(newdata.X))
            if(length(allStrata.var)>0){
                if(any(allStrata.var %in% names(newdata) == FALSE)){
                    stop("Incorrect argument \'newdata\': missing column(s) \"",paste(setdiff(allStrata.var,names(newdata)), collapse = "\", \""),"\". \n")
                }
                newdata.strata <- as.data.frame(newdata)[allStrata.var]
                if(is.null(newdata.X)){
                    newdata.X <- newdata.strata
                }else{
                    newdata.X <- cbind(newdata.X,newdata.strata)
                }
            }
            if(sum(!duplicated(lapply(ls.infoVar,"[[","stratavars.original")))==1){
                nStrata <- length(ls.infoVar[[1]][["strata.levels"]])
            }else{ ## conservative approximation
                nStrata <- prod(lengths(lapply(ls.infoVar,"[[","strata.levels")))
            }            
        }else{
            n.cov <- Inf
            nStrata <- Inf
        }
    }else{
        test.allCategorical <- length(intersect(infoVar$lpvars, infoVar$lpvars.original))==0

        if(test.allCategorical || (!is.null(level) && level == "minimal")){
            ## *** covariates
            newdata.X <- model.matrix(object, data = newdata)
            n.cov <- 2^NCOL(newdata.X)
            
            ## *** strata variables
            allStrata.var <- setdiff(infoVar$stratavars.original,names(newdata.X))
            if(length(allStrata.var)>0){
                if(any(allStrata.var %in% names(newdata) == FALSE)){
                    stop("Incorrect argument \'newdata\': missing column(s) \"",paste(setdiff(allStrata.var,names(newdata)), collapse = "\", \""),"\". \n")
                }
                newdata.strata <- as.data.frame(newdata)[allStrata.var]
                if(NCOL(newdata.X)==0 || length(newdata.X)==0){ ## for cph object with strata, newdata.X may be list() i.e. has one column but 0 length
                    newdata.X <- newdata.strata
                }else{
                    newdata.X <- cbind(newdata.X,newdata.strata)
                }
                nStrata <- length(infoVar$strata.levels)
            }else{
                nStrata <- 1
            }
            
        }else{
            n.cov <- Inf
            nStrata <- Inf
        }
    }

    ## ** find unique patient profiles
    if(is.null(level) && (!test.allCategorical || NROW(newdata) <= nStrata*n.cov)){
        return(NULL)
    }else if(inherits(object,"prodlim")){ ## Kaplan Meier
        newdata.save <- newdata

        ## find unique combinations of predictors
        newdata.strata <- coxStrata(object, data = newdata, 
                                    sterms = infoVar$strata.sterms, 
                                    strata.vars = infoVar$stratavars, 
                                    strata.levels = infoVar$strata.levels)
        if(is.factor(newdata.strata)){
            newdata.strata <- droplevels(newdata.strata)
        }

        ## associate each observation to a unique combination of predictors
        newdata.index <- tapply(1:NROW(newdata), INDEX = newdata.strata, FUN = identity, simplify = FALSE)
        attr(newdata.index,"vectorwise") <- as.numeric(newdata.strata)

    }else if(nStrata == 1 && n.cov == 1){ ## No covariate nor strata
        newdata.save <- newdata

        ## find unique combinations of predictors
        newdata.index <- list(1:NROW(newdata))

        ## associate each observation to a unique combination of predictors
        attr(newdata.index,"vectorwise") <- rep(1, NROW(newdata))

    }else{ ## Cox or Cause specific Cox
        ## associate each observation to a unique combination of predictors
        if(NCOL(newdata.X)==1){
            if(is.factor(newdata.X[,1])){ ## might be a factor in presence of strata
                newdata.X[,1] <- droplevels(newdata.X[,1])
            }
            newdata.index <- tapply(1:NROW(newdata), INDEX = newdata.X[,1], FUN = identity, simplify = FALSE)
        }else if(all(newdata.X %in% 0:1) && NCOL(newdata.X)<=10){
            newdata.index <- tapply(1:NROW(newdata), INDEX = rowSums(newdata.X * matrix(10^(1:NCOL(newdata.X)), nrow = NROW(newdata.X), ncol = NCOL(newdata.X), byrow = TRUE)), FUN = identity, simplify = FALSE)
        }else{
            newdata.index <- tapply(1:NROW(newdata), INDEX = do.call(paste, args = c(as.data.frame(newdata.X), sep = ".")), FUN = identity, simplify = FALSE)
        }

        ## library(microbenchmark)
        ## microbenchmark(A = rowSums(newdata.X * 10^(1:NCOL(newdata.X))),
        ##                B = do.call(paste, args = c(as.data.frame(newdata.X), sep = ".")),
        ##                C = interaction(as.data.frame(newdata.X)))
        
        ## associate each observation to a unique combination of predictors
        attr(newdata.index,"vectorwise") <- unlist(lapply(1:length(newdata.index), function(iN){rep(iN,length(newdata.index[[iN]]))}))[order(unlist(newdata.index))]

    }

    ## ** compress data
    ## save full data
    newdata.save <- newdata
    ## subset data
    if(data.table::is.data.table(newdata)){
        newdata <- newdata.save[sapply(newdata.index,"[",1)]
    }else{
        newdata <- as.data.frame(newdata.save)[sapply(newdata.index,"[",1),,drop=FALSE]
    }
    ## do not do diag
    diag.save <- diag
    diag <- FALSE
    ## update times (mostly relevant when diag=TRUE to avoid duplicated times)
    oorder.times.save <- oorder.times
    times.sorted.save <- times.sorted

    times.sorted <- unique(times.sorted)
    order.times <- order(times.sorted)
    oorder.times <- order(order.times)
    nTimes <- length(times.sorted)

    if(average.iid==TRUE){
        if(is.null(attr(average.iid,"factor"))){
            attr(average.iid,"rm.list") <- TRUE
            if(diag.save){
                newdata.table <- table(attr(newdata.index,"vectorwise"),times.sorted.save[oorder.times.save])
                attr(average.iid,"factor") <- list(NROW(newdata)*matrix(as.double(newdata.table), nrow = NROW(newdata.table), ncol = NCOL(newdata.table))/NROW(newdata.save))
            }else{
                attr(average.iid,"factor") <- list(matrix(NROW(newdata)*lengths(newdata.index)/NROW(newdata.save), byrow = FALSE, nrow = NROW(newdata), ncol = nTimes))
            }
        }else{
            attr(average.iid,"rm.list") <- FALSE
            for(iF in 1:length(attr(average.iid,"factor"))){ ## iF <- 1
                attr(average.iid,"factor")[[iF]] <- do.call(rbind,lapply(newdata.index, function(iIndex){
                    if(diag.save){
                        iFactor <- tapply(attr(average.iid,"factor")[[iF]][iIndex,1],
                                          INDEX = factor(times.sorted.save[oorder.times.save][iIndex], levels = times.sorted),
                                          FUN = sum, default = 0)
                    }else{
                        iFactor <- colSums(attr(average.iid,"factor")[[iF]][iIndex,,drop=FALSE])
                    }
                    return(NROW(newdata)*iFactor/NROW(newdata.save))
                }))
            }
        }
    }

    ## ** export
    return(list(newdata = newdata, newdata.save = newdata.save, newdata.index = newdata.index,
                times.sorted = times.sorted, times.sorted.save = times.sorted.save,
                diag = diag, diag.save = diag.save,
                order.times = order.times,
                oorder.times = oorder.times, oorder.times.save = oorder.times.save,
                nTimes = nTimes,
                average.iid = average.iid))
}

## * decompressData
##' @description Expand the prediction and iid to the original data based on the results for the unique covariate sets.
##' Used by predictCox and predict.CauseSpecificCox
##' @noRd
decompressData <- function(object, newdata, type, diag, times, se, confint, band, iid, average.iid,
                           newdata.index, times.sorted, needOrder){

    ## ** recover original profiles
    if(diag){
        for(iType in type){ ## iType <- "cumhazard"
            iNewIndex <- attr(newdata.index,"vectorwise") + (match(times, times.sorted)-1) * NROW(newdata)
            object[[iType]] <- cbind(object[[iType]][iNewIndex])
            if(se){
                object[[paste(iType,"se",sep=".")]] <- cbind(object[[paste(iType,"se",sep=".")]][iNewIndex])
            }
            if(confint){
                object[[paste(iType,"lower",sep=".")]] <- cbind(object[[paste(iType,"lower",sep=".")]][iNewIndex])
                object[[paste(iType,"upper",sep=".")]] <- cbind(object[[paste(iType,"upper",sep=".")]][iNewIndex])
            }
            if(band){ ## adjustment is performed on the compressed dataset, i.e., is incorrect. It is re-done below
                object[[paste(iType,"quantileBand",sep=".")]] <- NULL
                object[[paste(iType,"lowerBand",sep=".")]] <- NULL
                object[[paste(iType,"upperBand",sep=".")]] <- NULL
            }
            if(iid){
                iLS.iid <- mapply(slice = attr(newdata.index,"vectorwise"), column = match(times, times.sorted), function(slice,column){
                    object[[paste(iType,"iid",sep=".")]][,column,slice]
                }, SIMPLIFY = FALSE)
                object[[paste(iType,"iid",sep=".")]] <- array(unlist(iLS.iid), dim = c(NROW(object[[paste(iType,"iid",sep=".")]]),1,length(iLS.iid)))
            }
            if(average.iid){
                if(!is.list(object[[paste(iType,"average","iid",sep=".")]])){
                    object[[paste(iType,"average","iid",sep=".")]] <- cbind(rowSums(object[[paste(iType,"average","iid",sep=".")]]))
                }else if(attr(average.iid,"rm.list")){
                    object[[paste(iType,"average","iid",sep=".")]] <- cbind(rowSums(object[[paste(iType,"average","iid",sep=".")]][[1]]))
                }else{
                    object[[paste(iType,"average","iid",sep=".")]] <- lapply(object[[paste(iType,"average","iid",sep=".")]], function(iM){cbind(rowSums(iM))})
                }
            }
        }
        object$diag <- TRUE
        if(band){
            object <- stats::confint(object)
        }
            
    }else{        
        for(iType in type){ ## iType <- "cumhazard"
            object[[iType]] <- object[[iType]][attr(newdata.index,"vectorwise"),,drop=FALSE]
            if(se){
                object[[paste(iType,"se",sep=".")]] <- object[[paste(iType,"se",sep=".")]][attr(newdata.index,"vectorwise"),,drop=FALSE]
            }
            if(confint){
                object[[paste(iType,"lower",sep=".")]] <- object[[paste(iType,"lower",sep=".")]][attr(newdata.index,"vectorwise"),,drop=FALSE]
                object[[paste(iType,"upper",sep=".")]] <- object[[paste(iType,"upper",sep=".")]][attr(newdata.index,"vectorwise"),,drop=FALSE]
            }
            if(band){
                object[[paste(iType,"quantileBand",sep=".")]] <- object[[paste(iType,"quantileBand",sep=".")]][attr(newdata.index,"vectorwise")]
                object[[paste(iType,"lowerBand",sep=".")]] <- object[[paste(iType,"lowerBand",sep=".")]][attr(newdata.index,"vectorwise"),,drop=FALSE]
                object[[paste(iType,"upperBand",sep=".")]] <- object[[paste(iType,"upperBand",sep=".")]][attr(newdata.index,"vectorwise"),,drop=FALSE]
            }
            if(iid){
                object[[paste(iType,"iid",sep=".")]] <- object[[paste(iType,"iid",sep=".")]][,,attr(newdata.index,"vectorwise"),drop=FALSE]                    
            }
            if(average.iid && attr(average.iid,"rm.list") && is.list(object[[paste(iType,"average","iid",sep=".")]])){
                object[[paste(iType,"average","iid",sep=".")]] <- object[[paste(iType,"average","iid",sep=".")]][[1]]
            }

            if(needOrder || length(times) != length(times.sorted)){
                col.reorder <- match(times,times.sorted) ## restaure original times and original time ordering (has been reduce to ascending unique times)
                object[[iType]] <- object[[iType]][,col.reorder,drop=FALSE]
                if(se){
                    object[[paste(iType,"se",sep=".")]] <- object[[paste(iType,"se",sep=".")]][,col.reorder,drop=FALSE]
                }
                if(confint){
                    object[[paste(iType,"lower",sep=".")]] <- object[[paste(iType,"lower",sep=".")]][,col.reorder,drop=FALSE]
                    object[[paste(iType,"upper",sep=".")]] <- object[[paste(iType,"upper",sep=".")]][,col.reorder,drop=FALSE]
                }
                if(band){
                    object[[paste(iType,"lowerBand",sep=".")]] <- object[[paste(iType,"lowerBand",sep=".")]][,col.reorder,drop=FALSE]
                    object[[paste(iType,"upperBand",sep=".")]] <- object[[paste(iType,"upperBand",sep=".")]][,col.reorder,drop=FALSE]
                }
                if(iid){
                    ## does not decompress vcov
                    object[[paste(iType,"iid",sep=".")]] <- object[[paste(iType,"iid",sep=".")]][,col.reorder,,drop=FALSE]                    
                }
                if(average.iid){
                    if(attr(average.iid,"rm.list")){
                        object[[paste(iType,"average","iid",sep=".")]] <- object[[paste(iType,"average","iid",sep=".")]][,col.reorder,drop=FALSE]
                    }else{
                        object[[paste(iType,"average","iid",sep=".")]] <- lapply(object[[paste(iType,"average","iid",sep=".")]], function(iM){iM[,col.reorder,drop=FALSE]})
                    }
                        
                }
            }
            
        }
    }

    ## ** export
    return(object)
}


##----------------------------------------------------------------------
### compress.R ends here
Any scripts or data that you put into this service are public.
riskRegression documentation built on June 8, 2025, 11:33 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
riskRegression
Risk Regression Models and Prediction Scores for Survival Analysis with Competing Risks

R/compress.R
In riskRegression: Risk Regression Models and Prediction Scores for Survival Analysis with Competing Risks

Defines functions decompressData compressData

Try the riskRegression package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

riskRegression Risk Regression Models and Prediction Scores for Survival Analysis with Competing Risks

R/compress.R In riskRegression: Risk Regression Models and Prediction Scores for Survival Analysis with Competing Risks

Defines functions decompressData compressData

Try the riskRegression package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

riskRegression
Risk Regression Models and Prediction Scores for Survival Analysis with Competing Risks

R/compress.R
In riskRegression: Risk Regression Models and Prediction Scores for Survival Analysis with Competing Risks