Nothing
## * predictCox (documentation)
#' @title Survival probabilities, hazards and cumulative hazards from Cox regression models
#' @name predictCox
#'
#' @description Routine to get baseline hazards and subject specific hazards
#' as well as survival probabilities from a \code{survival::coxph} or \code{rms::cph} object.
#' @param object The fitted Cox regression model object either
#' obtained with \code{coxph} (survival package) or \code{cph}
#' (rms package).
#' @param newdata [data.frame or data.table] Contain the values of the predictor variables
#' defining subject specific predictions.
#' Should have the same structure as the data set used to fit the \code{object}.
#' @param times [numeric vector] Time points at which to return
#' the estimated hazard/cumulative hazard/survival.
#' @param centered [logical] If \code{TRUE} return prediction at the
#' mean values of the covariates \code{fit$mean}, if \code{FALSE}
#' return a prediction for all covariates equal to zero. in the
#' linear predictor. Will be ignored if argument \code{newdata} is
#' used. For internal use.
#' @param type [character vector] the type of predicted value. Choices are \itemize{
#' \item \code{"hazard"} the baseline hazard function when
#' argument \code{newdata} is not used and the hazard function
#' when argument \code{newdata} is used. \item \code{"cumhazard"}
#' the cumulative baseline hazard function when argument
#' \code{newdata} is not used and the cumulative hazard function
#' when argument \code{newdata} is used. \item \code{"survival"}
#' the survival baseline hazard function when argument
#' \code{newdata} is not used and the cumulative hazard function
#' when argument \code{newdata} is used. } Several choices can be
#' combined in a vector of strings that match (no matter the case)
#' strings \code{"hazard"},\code{"cumhazard"}, \code{"survival"}.
#' @param keep.strata [logical] If \code{TRUE} add the (newdata) strata
#' to the output. Only if there any.
#' @param keep.times [logical] If \code{TRUE} add the evaluation times
#' to the output.
#' @param keep.newdata [logical] If \code{TRUE} add the value of the
#' covariates used to make the prediction in the output list.
#' @param keep.infoVar [logical] For internal use.
#' @param se [logical] If \code{TRUE} compute and add the standard errors to the output.
#' @param band [logical] If \code{TRUE} compute and add the quantiles for the confidence bands to the output.
#' @param iid [logical] If \code{TRUE} compute and add the influence function to the output.
#' @param confint [logical] If \code{TRUE} compute and add the confidence intervals/bands to the output.
#' They are computed applying the \code{confint} function to the output.
#' @param diag [logical] when \code{FALSE} the hazard/cumlative hazard/survival for all observations at all times is computed,
#' otherwise it is only computed for the i-th observation at the i-th time.
#' @param average.iid [logical] If \code{TRUE} add the average of the influence function over \code{newdata} to the output.
#' @param product.limit [logical]. If \code{TRUE} the survival is computed using the product limit estimator.
#' Otherwise the exponential approximation is used (i.e. exp(-cumulative hazard)).
#' @param store [vector of length 2] Whether prediction should only be computed for unique covariate sets and mapped back to the original dataset (\code{data="minimal"}) and whether the influence function should be stored in a memory efficient way (\code{iid="minimal"}). Otherwise use \code{data="full"} and/or \code{iid="full"}.
#'
#' @details
#' When the argument \code{newdata} is not specified, the function computes the baseline hazard estimate.
#' See (Ozenne et al., 2017) section "Handling of tied event times".
#'
#' Otherwise the function computes survival probabilities with confidence intervals/bands.
#' See (Ozenne et al., 2017) section "Confidence intervals and confidence bands for survival probabilities".
#' The survival is computed using the exponential approximation (equation 3).
#'
#' A detailed explanation about the meaning of the argument \code{store = c(iid="full")} can be found
#' in (Ozenne et al., 2017) Appendix B "Saving the influence functions".
#'
#' The function is not compatible with time varying predictor variables nor frailty.
#'
#' The centered argument enables us to reproduce the results obtained with the \code{basehaz}
#' function from the survival package but should not be modified by the user.
#'
#' @return A list with the predicted values. The iid decomposition is contained in
#' the value in an attribute called \code{"iid"},
#' using an array containing the value of the influence of each subject used to fit
#' the object (dim 1), for each subject in newdata (dim 3), and each time (dim 2).
#'
#' @author Brice Ozenne broz@@sund.ku.dk, Thomas A. Gerds tag@@biostat.ku.dk
#'
#' @references
#' Brice Ozenne, Anne Lyngholm Sorensen, Thomas Scheike, Christian Torp-Pedersen and Thomas Alexander Gerds.
#' riskRegression: Predicting the Risk of an Event using Cox Regression Models.
#' The R Journal (2017) 9:2, pages 440-460.
#'
#' @seealso
#' \code{\link{confint.predictCox}} to compute confidence intervals/bands.
#' \code{\link{autoplot.predictCox}} to display the predictions.
## * predictCox (examples)
#' @examples
##'
#' library(survival)
#' library(data.table)
#'
#' #######################
#' #### generate data ####
#' #######################
#'
#' set.seed(10)
#' d <- sampleData(50,outcome="survival") ## training dataset
#' nd <- sampleData(5,outcome="survival") ## validation dataset
#'
#' ## add ties
#' d$time.round <- round(d$time,1)
#' any(duplicated(d$time.round))
#'
#' ## add categorical variables
#' d$U <- sample(letters[1:3],replace=TRUE,size=NROW(d))
#' d$V <- sample(letters[5:8],replace=TRUE,size=NROW(d))
#' nd <- cbind(nd,d[sample(NROW(d),replace=TRUE,size=NROW(nd)),c("U","V")])
#'
#' ######################
#' #### Kaplan Meier ####
#' ######################
#'
#' #### no ties
#' fit.KM <- coxph(Surv(time,event)~ 1, data=d, x = TRUE, y = TRUE)
#' predictCox(fit.KM) ## exponential approximation
#' ePL.KM <- predictCox(fit.KM, product.limit = TRUE) ## product-limit
#' as.data.table(ePL.KM)
#'
#' range(survfit(Surv(time,event)~1, data = d)$surv - ePL.KM$survival) ## same
#'
#' #### with ties (exponential approximation, Efron estimator for the baseline hazard)
#' fit.KM.ties <- coxph(Surv(time.round,event)~ 1, data=d, x = TRUE, y = TRUE)
#' predictCox(fit.KM)
#'
#' ## retrieving survfit results with ties
#' fit.KM.ties <- coxph(Surv(time.round,event)~ 1, data=d,
#' x = TRUE, y = TRUE, ties = "breslow")
#' ePL.KM.ties <- predictCox(fit.KM, product.limit = TRUE)
#' range(survfit(Surv(time,event)~1, data = d)$surv - ePL.KM.ties$survival) ## same
#'
#' #################################
#' #### Stratified Kaplan Meier ####
#' #################################
#'
#' fit.SKM <- coxph(Surv(time,event)~strata(X2), data=d, x = TRUE, y = TRUE)
#' ePL.SKM <- predictCox(fit.SKM, product.limit = TRUE)
#' ePL.SKM
#'
#' range(survfit(Surv(time,event)~X2, data = d)$surv - ePL.SKM$survival) ## same
#'
#' ###################
#' #### Cox model ####
#' ###################
#'
#' fit.Cox <- coxph(Surv(time,event)~X1 + X2 + X6, data=d, x = TRUE, y = TRUE)
#'
#' #### compute the baseline cumulative hazard
#' Cox.haz0 <- predictCox(fit.Cox)
#' Cox.haz0
#'
#' range(survival::basehaz(fit.Cox)$hazard-Cox.haz0$cumhazard) ## same
#'
#' #### compute individual specific cumulative hazard and survival probabilities
#' ## exponential approximation
#' fit.predCox <- predictCox(fit.Cox, newdata=nd, times=c(3,8),
#' se = TRUE, band = TRUE)
#' fit.predCox
#'
#' ## product-limit
#' fitPL.predCox <- predictCox(fit.Cox, newdata=nd, times=c(3,8),
#' product.limit = TRUE, se = TRUE)
#' fitPL.predCox
#'
#' ## Note: product limit vs. exponential does not affect uncertainty quantification
#' range(fitPL.predCox$cumhazard.se - fit.predCox$cumhazard.se) ## same
#' range(fitPL.predCox$survival.se - fit.predCox$survival.se) ## different
#' ## except through a multiplicative factor (multiply by survival in the delta method)
#' fitPL.predCox$survival.se - fitPL.predCox$cumhazard.se * fitPL.predCox$survival
#'
#'
#' #### left truncation
#' test2 <- list(start=c(1,2,5,2,1,7,3,4,8,8),
#' stop=c(2,3,6,7,8,9,9,9,14,17),
#' event=c(1,1,1,1,1,1,1,0,0,0),
#' x=c(1,0,0,1,0,1,1,1,0,0))
#' m.cph <- coxph(Surv(start, stop, event) ~ 1, test2, x = TRUE)
#' as.data.table(predictCox(m.cph))
#'
#' basehaz(m.cph)
#'
#' ##############################
#' #### Stratified Cox model ####
#' ##############################
#'
#' #### one strata variable
#' fit.SCox <- coxph(Surv(time,event)~X1+strata(X2)+X6, data=d, x = TRUE, y = TRUE)
#' predictCox(fit.SCox, newdata=nd, times = c(3,12))
#' predictCox(fit.SCox, newdata=nd, times = c(3,12), product.limit = TRUE)
#' ## NA: timepoint is beyond the last observation in the strata and censoring
#' tapply(d$time,d$X2,max)
#'
#' #### two strata variables
#' fit.S2Cox <- coxph(Surv(time,event)~X1+strata(U)+strata(V)+X2,
#' data=d, x = TRUE, y = TRUE)
#'
#' base.S2Cox <- predictCox(fit.S2Cox)
#' range(survival::basehaz(fit.S2Cox)$hazard - base.S2Cox$cumhazard) ## same
#' predictCox(fit.S2Cox, newdata=nd, times = 3)
## * predictCox (code)
#' @rdname predictCox
#' @export
predictCox <- function(object,
times,
newdata = NULL,
centered = TRUE,
type=c("cumhazard","survival"),
keep.strata = TRUE,
keep.times = TRUE,
keep.newdata = FALSE,
keep.infoVar = FALSE,
se = FALSE,
band = FALSE,
iid = FALSE,
confint = (se+band)>0,
diag = FALSE,
average.iid = FALSE,
product.limit = FALSE,
store = NULL){
call <- match.call()
newdata <- data.table::copy(newdata)
## centering
if(!is.null(newdata)){
if(inherits(centered,"data.frame")){
df.reference <- centered
centered2 <- TRUE ## for the linear predictor of the hazard
}else{
df.reference <- NULL
centered2 <- centered ## for the linear predictor of the hazard
}
centered <- TRUE ## for the linear predictor of the baseline hazard
}else{
centered2 <- FALSE
}
## ** Extract elements from object
if (missing(times)) {
nTimes <- 0
times <- numeric(0)
}else{
if(!is.numeric(times) && !is.integer(times)){
stop("Argument \'times\' should be a numeric vector. \n",
"Provide class: ",class(times)[1],". \n")
}
nTimes <- length(times)
}
needOrder <- (nTimes[1]>0 && is.unsorted(times))
if (all(!is.na(times)) && needOrder) {
order.times <- order(times)
oorder.times <- order(order.times)
times.sorted <- sort(times)
}else{
if (nTimes==0){
times.sorted <- numeric(0)
order.times <- integer(0)
oorder.times <- integer(0)
}else{
times.sorted <- times
order.times <- 1:nTimes
oorder.times <- 1:nTimes
}
}
object.n <- coxN(object)
object.modelFrame <- coxModelFrame(object)
infoVar <- coxVariableName(object, model.frame = object.modelFrame)
object.baseEstimator <- coxBaseEstimator(object)
## ease access
is.strata <- infoVar$is.strata
object.levelStrata <- levels(object.modelFrame$strata) ## levels of the strata variable
nStrata <- length(object.levelStrata) ## number of strata
nVar.lp <- length(infoVar$lpvars) ## number of variables in the linear predictor
## ** normalize model frame
## convert strata to numeric
object.modelFrame[,c("strata.num") := as.numeric(.SD$strata) - 1]
## linear predictor
## if we predict the hazard for newdata then there is no need to center the covariates
set(object.modelFrame,
j = "eXb",
value = exp(coxLP(object, data = NULL, center = if(is.null(newdata)){centered}else{FALSE})))
## add linear predictor and remove useless columns
rm.name <- setdiff(names(object.modelFrame),c("start","stop","status","eXb","strata","strata.num"))
if(length(rm.name)>0){
object.modelFrame[,c(rm.name) := NULL]
}
## sort the data
object.modelFrame[, c("statusM1") := 1-.SD$status] ## sort by statusM1 such that deaths appear first and then censored events
object.modelFrame[, c("XXXindexXXX") := 1:.N] ## keep track of the initial positions (useful when calling calcSeCox)
if(inherits(object,"prodlim") && object$reverse){
reverse <- TRUE
data.table::setkeyv(object.modelFrame, c("strata.num","stop","start","status"))
}else{
reverse <- FALSE
data.table::setkeyv(object.modelFrame, c("strata.num","stop","start","statusM1"))
}
## last event time in each strata
if(!is.null(attr(times,"etimes.max"))){ ## specified by the user
etimes.max <- attr(times,"etimes.max")
attr(times,"etimes.max") <- NULL
attr(times.sorted,"etimes.max") <- etimes.max
}else if(is.strata){ ## based on the data
if(nVar.lp==0){
iDTtempo <- object.modelFrame[, .SD[which.max(.SD$stop)], by = "strata.num"]
etimes.max <- iDTtempo[,if(.SD$status==1){1e12}else{.SD$stop}, by = "strata.num"][[2]]
}else{
etimes.max <- object.modelFrame[, max(.SD$stop), by = "strata.num"][[2]]
}
}else{
if(nVar.lp==0 && (utils::tail(object.modelFrame$status,1)==1)){ ## no covariates and ends by a death
etimes.max <- 1e12
}else{
etimes.max <- max(object.modelFrame[["stop"]])
}
}
## ** checks
## *** object
if((reverse == FALSE) && ("risk" %in% object$type)){
stop("predictCox can only deal with prodlim objects applied to survival data or targeting the censoring distribution. \n")
}
## predictCox is not compatible with all coxph/cph object (i.e. only handle only simple cox models)
if(!is.null(object$weights) && !all(object$weights==1)){
stop("predictCox does not know how to handle Cox models fitted with weights \n")
}
if(!is.null(object$naive.var) || !is.null(object$frail)){
stop("predictCox does not know how to handle frailty.")
}
if(object.baseEstimator == "exact"){
stop("Prediction with exact handling of ties is not implemented.\n")
}
if(!is.null(object$call$tt)){
stop("predictCox does not know how to handle time varying effects.\n")
}
## convergence issue
if(!is.null(coef(object)) && any(is.na(coef(object)))){
print(coef(object))
stop("One or several parameters of the regression model have no value, i.e., a value 'NA'.\n")
}
## *** times
if(nTimes[1]>0 && any(is.na(times))){
stop("Missing (NA) values in argument \'times\' are not allowed.\n")
}
## *** type
type <- tolower(type)
if(any(type %in% c("lp","hazard","cumhazard","survival") == FALSE)){
stop("type can only be \"lp\", \"hazard\", \"cumhazard\" or/and \"survival\" \n")
}
if(is.null(newdata) && "lp" %in% type){
stop("Cannot evaluate the linear predictor when argument \'newdata\' is missing. \n")
}
if(length(times)>1 && "lp" %in% type){
stop("Cannot evaluate the linear predictor when there are multiple timepoints. \n")
}
## *** newdata
if (missing(newdata) && (se || iid || average.iid)){
stop("Argument 'newdata' is missing. Cannot compute standard errors in this case.")
}
if(!is.null(newdata)){
if(nTimes[1]==0 && !identical(as.character(type),"lp")){
stop("Time points at which to evaluate the predictions are missing \n")
}
if(!is.vector(times)){
stop("Argument \'times\' must be a vector \n")
}
name.regressor <- c(infoVar$lpvars.original, infoVar$stratavars.original)
if(length(name.regressor) > 0 && any(name.regressor %in% names(newdata) == FALSE)){
stop("Missing variables in argument \'newdata\': \"",
paste0(setdiff(name.regressor,names(newdata)), collapse = "\" \""),
"\"\n")
}
if(se[1] && ("hazard" %in% type)){
stop("Standard error cannot be computed for the hazard \n")
}
if(band[1] && ("hazard" %in% type)){
stop("Confidence bands cannot be computed for the hazard \n")
}
}
## *** diag
if(!is.logical(diag)){
stop("Argument \'diag\' must be of type logical \n")
}
if(diag){
if(NROW(newdata)!=length(times)){
stop("When argument \'diag\' is TRUE, the number of rows in \'newdata\' must equal the length of \'times\' \n")
}
}
## *** store
store.data <- NULL
store.iid <- "full"
if(!is.null(store)){
if(length(store) > 2){
stop("Argument \'store\' should contain at most two elements. \n",
"For instance store = c(data = \"full\", iid = \"full\") or store = c(data = \"minimal\", iid = \"minimal\").\n")
}
if(is.null(names(store)) || any(names(store) %in% c("data","iid") == FALSE)){
stop("Incorrect names for argument \'store\': should \"data\" and \"iid\". \n",
"For instance store = c(data = \"full\", iid = \"full\") or store = c(data = \"minimal\", iid = \"minimal\").\n")
}
if("data" %in% names(store) && !is.null(store[["data"]])){
if(store[["data"]] %in% c("minimal","full") == FALSE){
stop("Element in argument \'store\' should take value \'minimal\' or \'full\'.\n",
"For instance store = c(data = \"full\") or store = c(data = \"minimal\").\n")
}
store.data <- store[["data"]]
}
if("iid" %in% names(store) && !is.null(store[["iid"]])){
if(store[["iid"]] %in% c("minimal","full") == FALSE){
stop("Element in argument \'store\' should take value \'minimal\' or \'full\'.\n",
"For instance store = c(iid = \"full\") or store = c(iid = \"minimal\").\n")
}
store.iid <- store[["iid"]]
}
}
## *** average.iid
if(average.iid==TRUE && !is.null(attr(average.iid,"factor"))){
if(is.null(store.iid) && !is.null(object$iid$store.iid)){
store.iid <- object$iid$store.iid
}
test.list <- !is.list(attr(average.iid,"factor"))
if(test.list){
stop("Attribute \"factor\" of argument \'average.iid\' must be a list \n")
}
test.matrix <- any(unlist(lapply(attr(average.iid,"factor"), is.matrix))==FALSE)
if(test.matrix){
stop("Attribute \"factor\" of argument \'average.iid\' must be a list of matrices \n")
}
for(iFactor in 1:length(attr(average.iid,"factor"))){ ## iFactor <- 1
## when only one column and diag = FALSE, use the same weights at all times
if((diag == FALSE) && (NCOL(attr(average.iid,"factor")[[iFactor]])==1) && (nTimes > 1)){
attr(average.iid,"factor")[[iFactor]] <- matrix(attr(average.iid,"factor")[[iFactor]][,1],
nrow = NROW(attr(average.iid,"factor")[[iFactor]]),
ncol = nTimes, byrow = FALSE)
}
## check dimensions
if(any(dim(attr(average.iid,"factor")[[iFactor]])!=c(NROW(newdata), diag + (1-diag)*nTimes))){
stop("Attribute \"factor\" of argument \'average.iid\' must be a list of matrices of size ",new.n,",",diag + (1-diag)*nTimes," \n")
}
}
}
## ** compress newdata into unique patient profile
outCompress <- compressData(object, newdata = newdata, times = times, diag = diag, average.iid = average.iid,
oorder.times = oorder.times, times.sorted = times.sorted,
nStrata = nStrata, infoVar = infoVar, level = store.data)
if(!is.null(outCompress)){
newdata <- outCompress$newdata
times.sorted <- outCompress$times.sorted
diag <- outCompress$diag
order.times <- outCompress$order.times
oorder.times <- outCompress$oorder.times
nTimes <- outCompress$nTimes
average.iid <- outCompress$average.iid
}
## ** baseline hazard
## *** evaluate at requested times
## if(inherits(object,"prodlim")){
## Lambda0 <- baseHaz_prodlim(object,
## times = times.sorted,
## etimes.max = etimes.max)
## }else{
Lambda0 <- baseHaz_cpp(starttimes = object.modelFrame$start,
stoptimes = object.modelFrame$stop,
status = object.modelFrame$status,
eXb = object.modelFrame$eXb,
strata = object.modelFrame$strata.num,
nPatients = object.n,
nStrata = nStrata,
emaxtimes = etimes.max,
predtimes = times.sorted,
cause = 1,
Efron = (object.baseEstimator == "efron"),
reverse = reverse)
## }
## *** evaluate at all jump times
if(product.limit){
if(length(times.sorted)==0){
AllLambda0 <- Lambda0
## }else if(inherits(object,"prodlim")){
## AllLambda0 <- baseHaz_prodlim(object,
## times = numeric(0),
## etimes.max = pmin(etimes.max,max(c(times.sorted,Inf))) ## times.sorted may be numeric(0)
## )
}else{
AllLambda0 <- baseHaz_cpp(starttimes = object.modelFrame$start,
stoptimes = object.modelFrame$stop,
status = object.modelFrame$status,
eXb = object.modelFrame$eXb,
strata = object.modelFrame$strata.num,
nPatients = object.n,
nStrata = nStrata,
emaxtimes = pmin(etimes.max,max(c(times.sorted,Inf))), ## times.sorted may be numeric(0),
predtimes = numeric(0),
cause = 1,
Efron = (object.baseEstimator == "efron"),
reverse = reverse)
}
}
## *** post-process
## restaure strata levels (numeric to character)
Lambda0$strata <- factor(Lambda0$strata, levels = 0:(nStrata-1), labels = object.levelStrata)
if(product.limit && "survival" %in% type){
AllLambda0$strata <- factor(AllLambda0$strata, levels = 0:(nStrata-1), labels = object.levelStrata)
}
## update AllLambda with time 0, timepoints at which the hazard can no more be computed (NA), and evaluate baseline survival
if(product.limit && "survival" %in% type){
ls.AllLambda0 <- lapply(levels(AllLambda0$strata), function(iS){ ## iS <- levels(AllLambda0$strata)[1]
iIndex <- which(AllLambda0$strata==iS)
iOut <- list(times = AllLambda0$times[iIndex],
hazard = AllLambda0$hazard[iIndex],
cumhazard = AllLambda0$cumhazard[iIndex],
strata = AllLambda0$strata[iIndex])
if(iOut$times[1]>0 && (!is.null(newdata) || length(times.sorted)>0)){ ## add time 0 when it is not there
iOut$times <- c(0,iOut$times)
iOut$hazard <- c(0,iOut$hazard)
iOut$cumhazard <- c(0,iOut$cumhazard)
iOut$strata <- c(iOut$strata[1],iOut$strata)
}
if(any(is.na(Lambda0$hazard[Lambda0$strata==iS])) && (!is.null(newdata) || length(times.sorted)>0)){ ## add time where the first NA occured
iOut$times <- c(iOut$times, etimes.max[levels(Lambda0$strata)==iS]+1e-12)
iOut$hazard <- c(iOut$hazard,NA)
iOut$cumhazard <- c(iOut$cumhazard,NA)
iOut$strata <- c(iOut$strata,iOut$strata[1])
}
if(is.null(newdata)){ ## compute baseline survival, i.e., survival for covariate values of 0
iOut$survival <- cumprod(1-iOut$hazard)
}
return(iOut)
})
AllLambda0 <- list(times = unlist(lapply(ls.AllLambda0,"[[","times"), use.names = FALSE),
hazard = unlist(lapply(ls.AllLambda0,"[[","hazard"), use.names = FALSE),
cumhazard = unlist(lapply(ls.AllLambda0,"[[","cumhazard"), use.names = FALSE),
survival = unlist(lapply(ls.AllLambda0,"[[","survival"), use.names = FALSE),
strata = unlist(lapply(ls.AllLambda0,"[[","strata"), use.names = FALSE))
}
## *** special case: return baseline hazard/cumulative hazard/survival
if (is.null(newdata)){
if (!("hazard" %in% type)){
Lambda0$hazard <- NULL
}
if ("survival" %in% type){ ## must be before cumhazard
if(product.limit){
if(length(times.sorted)==0){
Lambda0$survival <- AllLambda0$survival
}else{
Lambda0$survival <- unlist(lapply(unique(AllLambda0$strata), function(iS){ ## iS <- 1
AllLambda0$survival[AllLambda0$strata==iS][prodlim::sindex(jump.times = AllLambda0$times[AllLambda0$strata==iS], eval.times = times.sorted[oorder.times])]
}))
}
}else{
Lambda0$survival <- exp(-Lambda0$cumhazard)
}
}else{
Lambda0$survival <- NULL
}
if (!("cumhazard" %in% type)){
Lambda0$cumhazard <- NULL
}
if (keep.times==FALSE){
Lambda0$times <- NULL
}
if (keep.strata[[1]]==FALSE ||(is.null(call$keep.strata) && !is.strata)){
Lambda0$strata <- NULL
}
if( keep.newdata[1]==TRUE){
Lambda0$newdata <- object.modelFrame
}
add.list <- list(lastEventTime = etimes.max,
se = FALSE,
band = FALSE,
type = type,
nTimes = nTimes,
baseline = TRUE,
var.lp = infoVar$lpvars.original,
var.strata = infoVar$stratavars.original)
if(keep.infoVar){
add.list$infoVar <- infoVar
}
Lambda0[names(add.list)] <- add.list
class(Lambda0) <- "predictCox"
return(Lambda0)
}
## ** compute subject specific hazard / cumlative hazard / survival
out <- list()
## *** reformat newdata (compute linear predictor and strata)
new.n <- NROW(newdata)
newdata <- data.table::as.data.table(newdata)
Xb <- coxLP(object, data = newdata, center = FALSE)
if ("lp" %in% type){
out$lp <- cbind(Xb)
lp.iid <- centered2 ## when ask for centered then we need the iid for exporting the correct se
}else{
lp.iid <- FALSE
}
new.eXb <- exp(Xb)
new.strata <- coxStrata(object, data = newdata,
sterms = infoVar$strata.sterms,
strata.vars = infoVar$stratavars,
strata.levels = infoVar$strata.levels)
new.levelStrata <- levels(droplevels(new.strata))
## *** estimates
if (is.strata==FALSE && any(c("hazard","cumhazard","survival") %in% type)){ ## no strata
if(diag && (!product.limit || "hazard" %in% type || "cumhazard" %in% type)){
iTimes <- prodlim::sindex(jump.times = Lambda0$times, eval.times = times.sorted[oorder.times])
}
if("survival" %in% type && product.limit){
iAllTimes <- prodlim::sindex(jump.times = AllLambda0$times, eval.times = times.sorted[oorder.times])
}
if ("hazard" %in% type){
if(diag){
out$hazard <- cbind(new.eXb * Lambda0$hazard[iTimes])
}else{
out$hazard <- (new.eXb %o% Lambda0$hazard[oorder.times])
}
}
if ("cumhazard" %in% type || ("survival" %in% type && product.limit == FALSE)){
if(diag){
cumhazard <- cbind(new.eXb * Lambda0$cumhazard[iTimes])
}else{
cumhazard <- new.eXb %o% Lambda0$cumhazard[oorder.times]
}
if ("cumhazard" %in% type){
out$cumhazard <- cumhazard
}
if ("survival" %in% type && product.limit == FALSE){
out$survival <- exp(-cumhazard)
}
}
if ("survival" %in% type && product.limit){
if(diag){
out$survival <- cbind(mapply(x = new.eXb, t = iAllTimes, FUN = function(x,t){
prod(1 - x * AllLambda0$hazard[1:t])
}))
}else{
out$survival <- do.call(rbind,apply(1 - new.eXb %o% AllLambda0$hazard, MARGIN = 1, cumprod, simplify = FALSE))[,iAllTimes,drop=0L]
}
}
}else if(any(c("hazard","cumhazard","survival") %in% type)){ ## strata
## initialization
if ("hazard" %in% type){
out$hazard <- matrix(0, nrow = new.n, ncol = nTimes*(1-diag)+diag)
}
if ("cumhazard" %in% type){
out$cumhazard <- matrix(NA, nrow = new.n, ncol = nTimes*(1-diag)+diag)
}
if ("survival" %in% type){
out$survival <- matrix(NA, nrow = new.n, ncol = nTimes*(1-diag)+diag)
}
## loop across strata
for(S in new.levelStrata){ ## S <- 1
id.S <- which(Lambda0$strata==S)
newid.S <- which(new.strata==S)
if(product.limit && "survival" %in% type){
jump.times.S <- AllLambda0$times[AllLambda0$strata==S]
hazard0.S <- AllLambda0$hazard[AllLambda0$strata==S]
}
if(diag && (!product.limit || "hazard" %in% type || "cumhazard" %in% type)){
iSTimes <- prodlim::sindex(jump.times = Lambda0$times[id.S], eval.times = times.sorted[oorder.times[newid.S]])
}
if("survival" %in% type && product.limit){
if(diag){
iSAllTimes <- prodlim::sindex(jump.times = jump.times.S, eval.times = times.sorted[oorder.times[newid.S]])
}else{
iSAllTimes <- prodlim::sindex(jump.times = jump.times.S, eval.times = times.sorted[oorder.times])
}
}
if ("hazard" %in% type){
if(diag){
out$hazard[newid.S] <- new.eXb[newid.S] * Lambda0$hazard[id.S][iSTimes]
}else{
out$hazard[newid.S,] <- new.eXb[newid.S] %o% Lambda0$hazard[id.S][oorder.times]
}
}
if ("cumhazard" %in% type || ("survival" %in% type && product.limit == FALSE)){
if(diag){
cumhazard.S <- cbind(new.eXb[newid.S] * Lambda0$cumhazard[id.S][iSTimes])
}else{
cumhazard.S <- new.eXb[newid.S] %o% Lambda0$cumhazard[id.S][oorder.times]
}
if ("cumhazard" %in% type){
out$cumhazard[newid.S,] <- cumhazard.S
}
if ("survival" %in% type && product.limit == FALSE){
out$survival[newid.S,] <- exp(-cumhazard.S)
}
}
if("survival" %in% type && product.limit){
if(diag){
out$survival[newid.S,] <- mapply(x = new.eXb[newid.S], t = iSAllTimes, FUN = function(x,t){
prod(1 - x * hazard0.S[1:t])
})
}else{
out$survival[newid.S,] <- do.call(rbind,apply(1 - new.eXb[newid.S] %o% hazard0.S, MARGIN = 1, cumprod, simplify = FALSE))[,iSAllTimes,drop=0L]
}
}
}
}
## *** uncertainty
if(se[[1]] || band[[1]] || iid[[1]] || average.iid[[1]]){
if(nVar.lp > 0){
## get the (new) design matrix
new.LPdata <- model.matrix(object, data = newdata)
if(NROW(new.LPdata)!=NROW(newdata)){
stop("NROW of the design matrix and newdata differ. \n",
"Maybe because newdata contains NA values \n")
}
if(any(sort(colnames(new.LPdata))!=sort(names(coef(object))))){
stop("Names of the design matrix and model parameters differ. \n",
"Possible error in model.matrix due to special operator in the formula. \n")
}
}else{
new.LPdata <- matrix(0, ncol = 1, nrow = new.n)
}
## restaure original ordering
data.table::setkeyv(object.modelFrame,"XXXindexXXX")
if(diag){
Lambda0$oorder.times <- oorder.times
}else{
Lambda0$oorder.times <- 1:nTimes
}
## Computation of the influence function and/or the standard error
export <- c("iid"[(iid+band+lp.iid)>0],"se"[(se+band)>0],"average.iid"[average.iid==TRUE])
if(!is.null(attr(average.iid,"factor"))){
if(diag){
attr(export,"factor") <- attr(average.iid,"factor")
}else{
## re-order columns according to times
attr(export,"factor") <- lapply(attr(average.iid,"factor"), function(iF){ ## iF <- attr(average.iid,"factor")[[1]]
iF[,order.times,drop=FALSE]
})
}
}
if(diag){
times2 <- times.sorted[oorder.times]
}else{
times2 <- times.sorted
}
attr(times2,"etimes.max") <- attr(times.sorted,"etimes.max")
outSE <- calcSeCox(object,
times = times2,
nTimes = nTimes,
type = type,
diag = diag,
Lambda0 = Lambda0,
object.n = object.n,
object.time = object.modelFrame$stop,
object.eXb = object.modelFrame$eXb,
object.strata = object.modelFrame$strata,
nStrata = nStrata,
new.n = new.n,
new.eXb = new.eXb,
new.LPdata = new.LPdata,
new.strata = new.strata,
new.survival = if(diag){out$survival}else{out$survival[,order.times,drop=FALSE]},
nVar.lp = nVar.lp,
export = export,
store.iid = store.iid)
## restaure orginal time ordering
if((iid+band)>0){
if ("lp" %in% type){
out$lp.iid <- outSE$lp.iid
}
if ("hazard" %in% type){
if (needOrder[1] && (diag[1] == FALSE))
out$hazard.iid <- outSE$hazard.iid[,oorder.times,,drop=0L]
else
out$hazard.iid <- outSE$hazard.iid
}
if ("cumhazard" %in% type){
if (needOrder[1] && (diag[1] == FALSE))
out$cumhazard.iid <- outSE$cumhazard.iid[,oorder.times,,drop=0L]
else
out$cumhazard.iid <- outSE$cumhazard.iid
}
if ("survival" %in% type){
if (needOrder[1] && (diag[1] == FALSE))
out$survival.iid <- outSE$survival.iid[,oorder.times,,drop=0L]
else
out$survival.iid <- outSE$survival.iid
}
}
if(average.iid == TRUE){
if("lp" %in% type){
out$lp.average.iid <- outSE$lp.average.iid
}
if ("hazard" %in% type){
if (needOrder && (diag[1] == FALSE)){
if(is.list(outSE$hazard.average.iid)){
out$hazard.average.iid <- lapply(outSE$hazard.average.iid, function(iIID){iIID[,oorder.times,drop=0L]})
}else{
out$hazard.average.iid <- outSE$hazard.average.iid[,oorder.times,drop=0L]
}
}else{
out$hazard.average.iid <- outSE$hazard.average.iid
}
}
if ("cumhazard" %in% type){
if (needOrder && (diag[1] == FALSE)){
if(is.list(outSE$cumhazard.average.iid)){
out$cumhazard.average.iid <- lapply(outSE$cumhazard.average.iid, function(iIID){iIID[,oorder.times,drop=0L]})
}else{
out$cumhazard.average.iid <- outSE$cumhazard.average.iid[,oorder.times,drop=0L]
}
}else{
out$cumhazard.average.iid <- outSE$cumhazard.average.iid
}
}
if ("survival" %in% type){
if (needOrder && (diag[1] == FALSE)){
if(is.list(outSE$survival.average.iid)){
out$survival.average.iid <- lapply(outSE$survival.average.iid, function(iIID){iIID[,oorder.times,drop=0L]})
}else{
out$survival.average.iid <- outSE$survival.average.iid[,oorder.times,drop=0L]
}
}else {
out$survival.average.iid <- outSE$survival.average.iid
}
}
if(is.list(attr(average.iid,"factor"))){
if("hazard" %in% type){
names(out$hazard.average.iid) <- names(attr(average.iid,"factor"))
}
if("cumhazard" %in% type){
names(out$cumhazard.average.iid) <- names(attr(average.iid,"factor"))
}
if("survival" %in% type){
names(out$survival.average.iid) <- names(attr(average.iid,"factor"))
}
}
}
if((se+band)>0){
if("lp" %in% type){
out$lp.se <- outSE$lp.se
}
if ("cumhazard" %in% type){
if (needOrder && (diag[1] == FALSE)){
out$cumhazard.se <- outSE$cumhazard.se[,oorder.times,drop=0L]
}else{
out$cumhazard.se <- outSE$cumhazard.se
}
}
if ("survival" %in% type){
if (needOrder && (diag[1] == FALSE)){
out$survival.se <- outSE$survival.se[,oorder.times,drop=0L]
} else{
out$survival.se <- outSE$survival.se
}
}
}
}
## ** substract reference
if("lp" %in% type && centered2){
if(is.null(df.reference)){
data <- try(eval(object$call$data), silent = TRUE)
if(inherits(x=data,what="try-error")){
stop("Could not evaluate the dataset used to fit the model to define a reference level. \n",
"Set argument \'centered\' to FALSE or to a data.frame definining the reference level. \n")
}
var.original <- infoVar$lpvars.original
ls.ref <- lapply(var.original, function(iVar){
if(is.numeric(data[[iVar]])){
return(unname(mean(data[[iVar]])))
}else if(is.factor(data[[iVar]])){
return(unname(factor(levels(data[[iVar]])[1],levels(data[[iVar]]))))
}else if(is.character(data[[iVar]])){
return(unname(sort(unique(data[[iVar]])[1])))
}
})
df.reference <- as.data.frame(setNames(ls.ref,var.original))
}
ls.args <- as.list(call)[-1]
ls.args$newdata <- df.reference
ls.args$centered <- FALSE
ls.args$type <- "lp"
ls.args$se <- (se+band>0)
ls.args$iid <- (iid+se+band>0)
ls.args$band <- FALSE
ls.args$confint <- FALSE
outRef <- do.call(predictCox, args = ls.args)
out$lp <- out$lp - as.double(outRef$lp)
if(band[1] || se[1]){
out$lp.se <- cbind(sqrt(colSums(colCenter_cpp(outSE$lp.iid, outRef$lp.iid)^2))) ## use outSe$lp.iid instead of out$lp.iid for when not exporting the iid
}
if(band[1] || iid[1]){
out$lp.iid <- colCenter_cpp(out$lp.iid, outRef$lp.iid)
}
}
## ** add information to the predictions
add.list <- list(lastEventTime = etimes.max,
se = se,
band = band,
type = type,
diag = diag,
nTimes = nTimes,
baseline = FALSE,
var.lp = infoVar$lpvars.original,
var.strata = infoVar$stratavars.original)
if (keep.times==TRUE){
add.list$times <- times
}
if( keep.infoVar){
add.list$infoVar <- infoVar
}
out[names(add.list)] <- add.list
class(out) <- "predictCox"
## ** confidence intervals/bands
if(confint){
out <- stats::confint(out)
}
if(band[1] && se[1]==FALSE){
out[paste0(type,".se")] <- NULL
}
if(band[1] && iid[1]==FALSE){
out[paste0(type,".iid")] <- NULL
}
## ** retrieve original patient profile from unique patient profiles
if(!is.null(outCompress)){
out <- decompressData(out, newdata = newdata, type = type, diag = outCompress$diag.save, times = times, se = se, confint = confint, band = band, iid = iid, average.iid = average.iid,
newdata.index = outCompress$newdata.index, times.sorted = times.sorted, needOrder = needOrder)
newdata <- outCompress$newdata.save
}
all.covars <- c(infoVar$stratavars.original,infoVar$lpvars.original)
if(keep.newdata[1]==TRUE && length(all.covars)>0){
if(data.table::is.data.table(newdata)){
out$newdata <- newdata[, all.covars, with = FALSE]
}else{
out$newdata <- newdata[, all.covars, drop = FALSE]
}
}
if(keep.strata[1]==TRUE && length(infoVar$stratavars.original)>0){
if(!is.null(outCompress)){
out$strata <- new.strata[attr(outCompress$newdata.index,"vectorwise")]
}else{
out$strata <- new.strata
}
}
## ** export
return(out)
}
## * baseHaz_prodlim
baseHaz_prodlim <- function(object, times, etimes.max){
## ** check input
if(!inherits(object,"prodlim")){
stop("baseHaz_prodlim can only handle prodlim objects. \n")
}
## ** gather baseline hazard, cumulative hazard and corresponding strata and jump times
n.strata <- length(object$first.strata)
Lambda0 <- list(times = object$time,
hazard = object$hazard,
cumhazard = NA,
strata = unlist(mapply(s = 0:(n.strata-1), size = object$size.strata, function(s,size){rep(s,size)}, SIMPLIFY = FALSE)))
Lambda0$cumhazard <- unname(unlist(tapply(Lambda0$hazard, Lambda0$strata, cumsum, simplify = FALSE)))
## ** subset to match user specific times
if(length(times)>0){
ls.Lambda0 <- lapply(0:(n.strata-1), function(iS){ ## iS <- 0
iIndex <- which(Lambda0$strata==iS)
iSindex <- prodlim::sindex(jump.times = Lambda0$times[iIndex], eval.times = times)
iOut <- list(times = times,
hazard = c(0,Lambda0$hazard[iIndex])[iSindex+1],
cumhazard = c(0,Lambda0$cumhazard[iIndex])[iSindex+1],
strata = rep(iS, length(times)))
return(iOut)
})
Lambda0 <- list(times = unlist(lapply(ls.Lambda0,"[[","times")),
hazard = unlist(lapply(ls.Lambda0,"[[","hazard")),
cumhazard = unlist(lapply(ls.Lambda0,"[[","cumhazard")),
strata = unlist(lapply(ls.Lambda0,"[[","strata")))
## set to NA after the last event in each strata
index.NA <- which(Lambda0$times>etimes.max[Lambda0$strata+1])
if(length(index.NA)>0){
Lambda0$hazard[index.NA] <- NA
Lambda0$cumhazard[index.NA] <- NA
}
}
## ** export
return(Lambda0)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.