#' Calculate w-year risk from a landmark time point
#'
#' @param supermodel fitted landmarking supermodel
#' @param newdata dataframe of individuals to make predictions for. Must contain the original covariates (i.e., without landmark interaction)
#' @param tLM time points at which to predict risk of w more years.
#' Note tLM must be one value for newdata or must have the same length as the number of rows of newdata
#' (i.e., each datapoint is associated with one LM/prediction time point).
#' @param cause Cause of interest if under competing risks.
#' @param w Prediction window, i.e., predict w-year (/month/..) risk from each of the tLMs.
#' Defaults to the w used in model fitting.
#' If w > than that used in model fitting, results are unreliable, but can be produced by setting extend=T.
#' @param extend Argument to allow for predictions at landmark times that are greater than those used in model fitting,
#' or prediction windows greater than the one used in model fitting.
#' Default is FALSE. If set to TRUE, predictions may be unreliable.
#' @param silence Silence the warning message when extend is set to TRUE.
#' @param complete Only make predictions for data entries with non-NA entries (i.e., non-NA predictions). Default is TRUE.
#'
#' @return An object of class "LMpred" with components:
#' - preds: a dataframe with columns LM and risk, each entry corresponds to one individual and prediction time point (landmark)
#' - w, type, LHS: as in the fitted super model
#' - data: the newdata given in input
#' @details See the Github for example code
#' @import survival
#' @export
#'
predLMrisk <- function(supermodel, newdata, tLM, cause, w, extend=F, silence=F, complete=T)
{
func_covars <- supermodel$func_covars
func_LM <- supermodel$func_LM
model_w <- supermodel$w
if(missing(w)){w <- model_w}
else{
if(w > model_w && !extend) stop(paste0("Prediction window w (=",w,") is larger than the window used in model fitting (=",model_w,").",
"\nIf you wish to still make predictions at these times, set arg extend=T but note that results may be unreliable."))
else if (w > model_w & extend) {
if (!silence) message(paste0("NOTE: Prediction window w (=",w,") is larger than the window used in model fitting (=",model_w,"). ",
"\nPredictions may be unreliable."))
}
}
fm <- supermodel$model
type <- supermodel$type
if (type == "coxph") {
models <- list(fm)
num_causes <- 1
if (!missing(cause) ){
if(!is.null(cause)) stop("No cause should be specified for a coxph model.")
}
cause<-1
} else if (type == "CauseSpecificCox" | type =="CSC") {
models <- fm$models
num_causes <- length(models)
if (missing(cause)) { cause <- as.numeric(fm$theCause)
} else if (is.null(cause)) { cause <- as.numeric(fm$theCause) }
if (length(cause) > 1) stop(paste0("Can only predict one cause. Provided are: ", paste(cause, collapse = ", "), sep = ""))
if (!(cause %in% fm$causes)) stop("Error in cause. Not one of the causes in the model.")
} else {
stop("Error in fm argument. Input model is of the wrong type.
\nOnly supported classes are CauseSpecificCox and coxph")
}
if (missing(newdata) & !missing(tLM)) {
stop("newdata must be specified")
}
if (!missing(newdata)){
if (missing(tLM)) {
tLM = newdata$LM
if (is.null(tLM)) {
stop("tLM must be specified")
}
}
## Check prediction times match with LMs used in training
if (max(tLM) > supermodel$end_time & !extend){
stop(paste0("Landmark/prediction time points tLM contains values later than the last LM used in model fitting
(last LM used in model fitting=",supermodel$end_time," and max tLM value=",max(tLM),").
If you wish to still make predictions at these times, set arg extend=T but note that results may be unreliable."))
}
else if (max(tLM) > supermodel$end_time & extend){
if (!silence) message(paste0("NOTE:landmark/prediction time points tLM contains values later (max value=",max(tLM),") than the last LM used in model fitting (=",supermodel$end_time,").",
"\nPredictions at times after ",supermodel$end_time," may be unreliable."))
}
## Check prediction times & newdata given are coherent with each other
num_preds <- nrow(newdata)
if(!(length(tLM)==num_preds)){
if (length(tLM) == 1){
tLM<-rep(tLM,num_preds)
} else {
stop("Error in newdata or tLM. Must have length(tLM) == nrow(newdata) or tLM be one landmarking point.")
}
}
## Get risk scores
risks <- matrix(sapply(1:num_preds, function(i){
tLMi <- tLM[i]
newdatai <- newdata[i,]
sapply(1:num_causes, function(c) riskScore(models[[c]], tLMi, newdatai, func_covars, func_LM))
}),nrow=num_causes)
data <- newdata
} else {
## Get risk scores
## Note that linear predictors are centered, so need to un-center them for correct comparison.
tLM <- supermodel$original.landmarks
risks <- supermodel$linear.predictors
num_preds <- ncol(risks)
# sanity check
if (num_preds == 0){stop("Newdata must be specified.")}
if (length(tLM) != num_preds){ stop("Error in newdata or tLM. Must have length(tLM) == nrow(newdata) or tLM be one landmarking point.") }
data <- fm$call$data
}
# Baseline hazards
sf <- lapply(1:num_causes,function(i) {
base_data = 0 * models[[i]]$coefficients
survfit(models[[i]], newdata=base_data)
})
sf <- lapply(sf, function(s) data.frame(time=s$time,surv=s$surv,Haz=-log(s$surv)))
Fw <- rep(0, length(tLM))
sf1 <-sf[[cause]]
Fw <- rep(NA, num_preds)
for(i in 1:num_preds) {
if (is.na(risks[cause,i])){
Fw[i] <- NA
} else {
tLMi <- tLM[i]
pred_window <- (tLMi <= sf1$time & sf1$time <= tLMi+w)
n_times <- sum(pred_window)
haz <- sf1$Haz[pred_window] * exp(risks[cause,i])
instHaz <- haz[2:n_times]-haz[1:n_times-1]
idx <- (instHaz != 0)
instHaz <- instHaz[idx]
times <- sf1$time[pred_window][2:n_times]
times <- times[idx]
w_adj <- times-tLMi
surv <- c()
for (j in 1:length(w_adj)){
s <- 0
wj<-w_adj[j]
for (c in 1:num_causes){
sfc <- sf[[c]]
sfc$Haz <- sfc$Haz * exp(risks[c,i])
f <- stats::stepfun(sfc$time, c(0,sfc$Haz), right=FALSE)
s <- s + (f(tLMi+wj)-f(tLMi))
}
surv <- c(surv,exp(-s))
}
Fw[i] <- sum(instHaz*surv)
}
}
if(complete){
idx = !is.na(Fw)
preds = data.frame(LM=tLM[idx],risk=Fw[idx])
data = data[idx,]
} else {
preds = data.frame(LM=tLM,risk=Fw)
data = data
}
out = list(preds=preds, w=w, type=type, LHS=supermodel$LHS, data=data, cause=cause)
class(out) = "LMpred"
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.