Nothing
### compress.R ---
##----------------------------------------------------------------------
## Author: Brice Ozenne
## Created: sep 9 2024 (14:04)
## Version:
## Last-Updated: Oct 21 2024 (09:46)
## By: Brice Ozenne
## Update #: 169
##----------------------------------------------------------------------
##
### Commentary:
##
### Change Log:
##----------------------------------------------------------------------
##
### Code:
## * compressData
##' @description Find unique covariate sets and restrict the dataset to those.
##' Used by predictCox and predict.CauseSpecificCox
##' @noRd
compressData <- function(object, newdata, times, diag, average.iid,
oorder.times, times.sorted, nStrata, infoVar, level){
## ** extract information
if(is.null(newdata) || (!is.null(level) && level == "full")){
return(NULL)
}else if(inherits(object,"prodlim")){
n.cov <- 1 ## unique covariate profile (as there is no covariate so all predictions are equals up to the strata)
nStrata <- length(object$first.strata)
if(!is.null(object$bandwidth)){
stop("Cannot handle continuous covariates with prodlim objects. \n")
}
test.allCategorical <- TRUE
}else if(inherits(object,"CauseSpecificCox")){
ls.infoVar <- lapply(object$model, function(iO){coxVariableName(iO, model.frame = coxModelFrame(iO))})
test.allCategorical <- all(sapply(ls.infoVar, function(iInfo){length(intersect(iInfo$lpvars, iInfo$lpvars.original))==0}))
if(test.allCategorical || (!is.null(level) && level == "minimal")){
## *** covariates
keep.col <- !duplicated(do.call("c",lapply(ls.infoVar, "[[","lpvars.original")))
if(length(keep.col)>0){
newdata.X <- do.call(cbind,lapply(object$model, FUN = function(iO){stats::model.matrix(iO, data = newdata)}))[,keep.col,drop=FALSE]
}else{
newdata.X <- NULL
}
if(is.null(newdata.X)){
n.cov <- 1
}else{
n.cov <- 2^NCOL(newdata.X)
}
## *** strata variables
allStrata.var <- setdiff(unique(unlist(lapply(ls.infoVar,"[[","stratavars.original"))),names(newdata.X))
if(length(allStrata.var)>0){
if(any(allStrata.var %in% names(newdata) == FALSE)){
stop("Incorrect argument \'newdata\': missing column(s) \"",paste(setdiff(allStrata.var,names(newdata)), collapse = "\", \""),"\". \n")
}
newdata.strata <- as.data.frame(newdata)[allStrata.var]
if(is.null(newdata.X)){
newdata.X <- newdata.strata
}else{
newdata.X <- cbind(newdata.X,newdata.strata)
}
}
if(sum(!duplicated(lapply(ls.infoVar,"[[","stratavars.original")))==1){
nStrata <- length(ls.infoVar[[1]][["strata.levels"]])
}else{ ## conservative approximation
nStrata <- prod(lengths(lapply(ls.infoVar,"[[","strata.levels")))
}
}else{
n.cov <- Inf
nStrata <- Inf
}
}else{
test.allCategorical <- length(intersect(infoVar$lpvars, infoVar$lpvars.original))==0
if(test.allCategorical || (!is.null(level) && level == "minimal")){
## *** covariates
newdata.X <- model.matrix(object, data = newdata)
n.cov <- 2^NCOL(newdata.X)
## *** strata variables
allStrata.var <- setdiff(infoVar$stratavars.original,names(newdata.X))
if(length(allStrata.var)>0){
if(any(allStrata.var %in% names(newdata) == FALSE)){
stop("Incorrect argument \'newdata\': missing column(s) \"",paste(setdiff(allStrata.var,names(newdata)), collapse = "\", \""),"\". \n")
}
newdata.strata <- as.data.frame(newdata)[allStrata.var]
if(NCOL(newdata.X)==0 || length(newdata.X)==0){ ## for cph object with strata, newdata.X may be list() i.e. has one column but 0 length
newdata.X <- newdata.strata
}else{
newdata.X <- cbind(newdata.X,newdata.strata)
}
nStrata <- length(infoVar$strata.levels)
}else{
nStrata <- 1
}
}else{
n.cov <- Inf
nStrata <- Inf
}
}
## ** find unique patient profiles
if(is.null(level) && (!test.allCategorical || NROW(newdata) <= nStrata*n.cov)){
return(NULL)
}else if(inherits(object,"prodlim")){ ## Kaplan Meier
newdata.save <- newdata
## find unique combinations of predictors
newdata.strata <- coxStrata(object, data = newdata,
sterms = infoVar$strata.sterms,
strata.vars = infoVar$stratavars,
strata.levels = infoVar$strata.levels)
if(is.factor(newdata.strata)){
newdata.strata <- droplevels(newdata.strata)
}
## associate each observation to a unique combination of predictors
newdata.index <- tapply(1:NROW(newdata), INDEX = newdata.strata, FUN = identity, simplify = FALSE)
attr(newdata.index,"vectorwise") <- as.numeric(newdata.strata)
}else if(nStrata == 1 && n.cov == 1){ ## No covariate nor strata
newdata.save <- newdata
## find unique combinations of predictors
newdata.index <- list(1:NROW(newdata))
## associate each observation to a unique combination of predictors
attr(newdata.index,"vectorwise") <- rep(1, NROW(newdata))
}else{ ## Cox or Cause specific Cox
## associate each observation to a unique combination of predictors
if(NCOL(newdata.X)==1){
if(is.factor(newdata.X[,1])){ ## might be a factor in presence of strata
newdata.X[,1] <- droplevels(newdata.X[,1])
}
newdata.index <- tapply(1:NROW(newdata), INDEX = newdata.X[,1], FUN = identity, simplify = FALSE)
}else if(all(newdata.X %in% 0:1) && NCOL(newdata.X)<=10){
newdata.index <- tapply(1:NROW(newdata), INDEX = rowSums(newdata.X * matrix(10^(1:NCOL(newdata.X)), nrow = NROW(newdata.X), ncol = NCOL(newdata.X), byrow = TRUE)), FUN = identity, simplify = FALSE)
}else{
newdata.index <- tapply(1:NROW(newdata), INDEX = do.call(paste, args = c(as.data.frame(newdata.X), sep = ".")), FUN = identity, simplify = FALSE)
}
## library(microbenchmark)
## microbenchmark(A = rowSums(newdata.X * 10^(1:NCOL(newdata.X))),
## B = do.call(paste, args = c(as.data.frame(newdata.X), sep = ".")),
## C = interaction(as.data.frame(newdata.X)))
## associate each observation to a unique combination of predictors
attr(newdata.index,"vectorwise") <- unlist(lapply(1:length(newdata.index), function(iN){rep(iN,length(newdata.index[[iN]]))}))[order(unlist(newdata.index))]
}
## ** compress data
## save full data
newdata.save <- newdata
## subset data
if(data.table::is.data.table(newdata)){
newdata <- newdata.save[sapply(newdata.index,"[",1)]
}else{
newdata <- as.data.frame(newdata.save)[sapply(newdata.index,"[",1),,drop=FALSE]
}
## do not do diag
diag.save <- diag
diag <- FALSE
## update times (mostly relevant when diag=TRUE to avoid duplicated times)
oorder.times.save <- oorder.times
times.sorted.save <- times.sorted
times.sorted <- unique(times.sorted)
order.times <- order(times.sorted)
oorder.times <- order(order.times)
nTimes <- length(times.sorted)
if(average.iid==TRUE){
if(is.null(attr(average.iid,"factor"))){
attr(average.iid,"rm.list") <- TRUE
if(diag.save){
newdata.table <- table(attr(newdata.index,"vectorwise"),times.sorted.save[oorder.times.save])
attr(average.iid,"factor") <- list(NROW(newdata)*matrix(as.double(newdata.table), nrow = NROW(newdata.table), ncol = NCOL(newdata.table))/NROW(newdata.save))
}else{
attr(average.iid,"factor") <- list(matrix(NROW(newdata)*lengths(newdata.index)/NROW(newdata.save), byrow = FALSE, nrow = NROW(newdata), ncol = nTimes))
}
}else{
attr(average.iid,"rm.list") <- FALSE
for(iF in 1:length(attr(average.iid,"factor"))){ ## iF <- 1
attr(average.iid,"factor")[[iF]] <- do.call(rbind,lapply(newdata.index, function(iIndex){
if(diag.save){
iFactor <- tapply(attr(average.iid,"factor")[[iF]][iIndex,1],
INDEX = factor(times.sorted.save[oorder.times.save][iIndex], levels = times.sorted),
FUN = sum, default = 0)
}else{
iFactor <- colSums(attr(average.iid,"factor")[[iF]][iIndex,,drop=FALSE])
}
return(NROW(newdata)*iFactor/NROW(newdata.save))
}))
}
}
}
## ** export
return(list(newdata = newdata, newdata.save = newdata.save, newdata.index = newdata.index,
times.sorted = times.sorted, times.sorted.save = times.sorted.save,
diag = diag, diag.save = diag.save,
order.times = order.times,
oorder.times = oorder.times, oorder.times.save = oorder.times.save,
nTimes = nTimes,
average.iid = average.iid))
}
## * decompressData
##' @description Expand the prediction and iid to the original data based on the results for the unique covariate sets.
##' Used by predictCox and predict.CauseSpecificCox
##' @noRd
decompressData <- function(object, newdata, type, diag, times, se, confint, band, iid, average.iid,
newdata.index, times.sorted, needOrder){
## ** recover original profiles
if(diag){
for(iType in type){ ## iType <- "cumhazard"
iNewIndex <- attr(newdata.index,"vectorwise") + (match(times, times.sorted)-1) * NROW(newdata)
object[[iType]] <- cbind(object[[iType]][iNewIndex])
if(se){
object[[paste(iType,"se",sep=".")]] <- cbind(object[[paste(iType,"se",sep=".")]][iNewIndex])
}
if(confint){
object[[paste(iType,"lower",sep=".")]] <- cbind(object[[paste(iType,"lower",sep=".")]][iNewIndex])
object[[paste(iType,"upper",sep=".")]] <- cbind(object[[paste(iType,"upper",sep=".")]][iNewIndex])
}
if(band){ ## adjustment is performed on the compressed dataset, i.e., is incorrect. It is re-done below
object[[paste(iType,"quantileBand",sep=".")]] <- NULL
object[[paste(iType,"lowerBand",sep=".")]] <- NULL
object[[paste(iType,"upperBand",sep=".")]] <- NULL
}
if(iid){
iLS.iid <- mapply(slice = attr(newdata.index,"vectorwise"), column = match(times, times.sorted), function(slice,column){
object[[paste(iType,"iid",sep=".")]][,column,slice]
}, SIMPLIFY = FALSE)
object[[paste(iType,"iid",sep=".")]] <- array(unlist(iLS.iid), dim = c(NROW(object[[paste(iType,"iid",sep=".")]]),1,length(iLS.iid)))
}
if(average.iid){
if(!is.list(object[[paste(iType,"average","iid",sep=".")]])){
object[[paste(iType,"average","iid",sep=".")]] <- cbind(rowSums(object[[paste(iType,"average","iid",sep=".")]]))
}else if(attr(average.iid,"rm.list")){
object[[paste(iType,"average","iid",sep=".")]] <- cbind(rowSums(object[[paste(iType,"average","iid",sep=".")]][[1]]))
}else{
object[[paste(iType,"average","iid",sep=".")]] <- lapply(object[[paste(iType,"average","iid",sep=".")]], function(iM){cbind(rowSums(iM))})
}
}
}
object$diag <- TRUE
if(band){
object <- stats::confint(object)
}
}else{
for(iType in type){ ## iType <- "cumhazard"
object[[iType]] <- object[[iType]][attr(newdata.index,"vectorwise"),,drop=FALSE]
if(se){
object[[paste(iType,"se",sep=".")]] <- object[[paste(iType,"se",sep=".")]][attr(newdata.index,"vectorwise"),,drop=FALSE]
}
if(confint){
object[[paste(iType,"lower",sep=".")]] <- object[[paste(iType,"lower",sep=".")]][attr(newdata.index,"vectorwise"),,drop=FALSE]
object[[paste(iType,"upper",sep=".")]] <- object[[paste(iType,"upper",sep=".")]][attr(newdata.index,"vectorwise"),,drop=FALSE]
}
if(band){
object[[paste(iType,"quantileBand",sep=".")]] <- object[[paste(iType,"quantileBand",sep=".")]][attr(newdata.index,"vectorwise")]
object[[paste(iType,"lowerBand",sep=".")]] <- object[[paste(iType,"lowerBand",sep=".")]][attr(newdata.index,"vectorwise"),,drop=FALSE]
object[[paste(iType,"upperBand",sep=".")]] <- object[[paste(iType,"upperBand",sep=".")]][attr(newdata.index,"vectorwise"),,drop=FALSE]
}
if(iid){
object[[paste(iType,"iid",sep=".")]] <- object[[paste(iType,"iid",sep=".")]][,,attr(newdata.index,"vectorwise"),drop=FALSE]
}
if(average.iid && attr(average.iid,"rm.list") && is.list(object[[paste(iType,"average","iid",sep=".")]])){
object[[paste(iType,"average","iid",sep=".")]] <- object[[paste(iType,"average","iid",sep=".")]][[1]]
}
if(needOrder || length(times) != length(times.sorted)){
col.reorder <- match(times,times.sorted) ## restaure original times and original time ordering (has been reduce to ascending unique times)
object[[iType]] <- object[[iType]][,col.reorder,drop=FALSE]
if(se){
object[[paste(iType,"se",sep=".")]] <- object[[paste(iType,"se",sep=".")]][,col.reorder,drop=FALSE]
}
if(confint){
object[[paste(iType,"lower",sep=".")]] <- object[[paste(iType,"lower",sep=".")]][,col.reorder,drop=FALSE]
object[[paste(iType,"upper",sep=".")]] <- object[[paste(iType,"upper",sep=".")]][,col.reorder,drop=FALSE]
}
if(band){
object[[paste(iType,"lowerBand",sep=".")]] <- object[[paste(iType,"lowerBand",sep=".")]][,col.reorder,drop=FALSE]
object[[paste(iType,"upperBand",sep=".")]] <- object[[paste(iType,"upperBand",sep=".")]][,col.reorder,drop=FALSE]
}
if(iid){
## does not decompress vcov
object[[paste(iType,"iid",sep=".")]] <- object[[paste(iType,"iid",sep=".")]][,col.reorder,,drop=FALSE]
}
if(average.iid){
if(attr(average.iid,"rm.list")){
object[[paste(iType,"average","iid",sep=".")]] <- object[[paste(iType,"average","iid",sep=".")]][,col.reorder,drop=FALSE]
}else{
object[[paste(iType,"average","iid",sep=".")]] <- lapply(object[[paste(iType,"average","iid",sep=".")]], function(iM){iM[,col.reorder,drop=FALSE]})
}
}
}
}
}
## ** export
return(object)
}
##----------------------------------------------------------------------
### compress.R ends here
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.