#' Calibration Analysis of Survival Predictions
#'
#' @rdname survCalibCurve
#'
#' @param data A dataset
#' @param time unquoted name of the time variable
#' @param event unquoted name of the event variable (0/1)
#' @param p unquoted name of the variable containing predicted probabilities (at
#' t=timepoint)
#' @param timepoint Value of 'time' where probabilities are extracted (from the
#' Kaplan-Meier curves).
#' @param showCStat Logical. If TRUE, a concordance statistic (from
#' \code{rms::somers2()}) with confidence limits (2 std. errors) will be displayed
#' @param p.out grid of predicted probability values at which calibration curve
#' is estimated. If NULL, a sequence of 10 values evenly spaced along the range
#' of inputted predicted probabilities is generated.
#' @param na.rm Logical. Remove rows with missing values?
#' @param knots knots parameter passed to \code{splines::bs()}
#' @param bs.degree degree parameter passed to \code{splines::bs()}
#'
#' @details The predicted probabilities are transform via the logit prior to
#' spline curve fitting, in order to stabilize the resulting calibration
#' curve estimates. Therefore, if specifying the \code{knots=} parameter to
#' \code{bs.params}, the values must be expressed on the logit scale. (It's
#' usually easier/sufficient to just supply \code{df} instead of \code{knots}.)
#'
#' Spline fits in Cox proportional hazards model are prone to minor convergence
#' issues, which impact the validity of Wald-based standard errors, tests, and
#' confidence intervals. Hence this may produce a warning of the form "Loglik
#' converged before variable X; beta may be infinite". See
#' https://stat.ethz.ch/pipermail/r-help/2008-September/174201.html for details.
#'
#' The user is advised to choose the knots for the splines very carefully,
#' especially in the presence of highly skewed distributions of model-predicted
#' probabilities. The default values are just a suggestion for a typical
#' scenario where predictions are highly skewed to the right.
#'
#' @return
#' @export
#'
#' @examples
survCalibCurve <- function(data, time, event, p, timepoint,
showCStat=TRUE, p.out = NULL, na.rm=FALSE,
bs.knots=c(.01,.025,.05,.1,.2,.3), bs.degree=3) {
time <- enquo(time)
event <- enquo(event)
p <- enquo(p)
gv <- group_vars(data)
md <- data %>%
select(group_vars(data), !!time, !!event, !!p) %>%
rename(time=!!time, event=!!event, p=!!p) %>%
mutate(risk.gp = as.character(cut(p, c(0,bs.knots,1), include.lowest = TRUE)))
if(na.rm){
md <- md %>% filter(!is.na(time) & !is.na(event) & !is.na(p))
}
if(any(apply(md,2,function(x) sum(is.na(x)))>0)) stop("Input data contains NAs")
if(min(md$p)<0 | max(md$p) > 1) stop("Parameter 'p' must contain probabilities.")
if(is.null(p.out)) {
p.out <- seq(min(md$p), max(md$p), length.out=100)
} else{
if(min(p.out)<0 | max(p.out) > 1) {
stop("Parameter 'p.out' must contain probabilities.")
}
}
# this subroutine fits a spline model to get the calibration curve
getCalibCurve <- function(data){
g <- survival::coxph(survival::Surv(time, event) ~
splines::bs(p, knots = bs.knots, degree = bs.degree),
data = data)
pg <- survival::survfit(g, newdata = tibble(p = p.out))
tix <- which(pg$time >= timepoint)[1]
tibble(p = p.out,
incidence = 1-pg$surv[tix,],
lcl = 1-pg$upper[tix,],
ucl = 1-pg$lower[tix,])
}
# this subroutine splits the data into risk groups and does the same
getRiskGpCalibData <- function(data){
rgps <- unique(data$risk.gp)
mean.p <- group_by(data, risk.gp) %>%
summarize(mean.p = mean(p)) %>%
select(risk.gp, mean.p)
h <- survival::coxph(survival::Surv(time, event) ~ risk.gp, data = data)
ph <- survival::survfit(h, newdata = tibble(risk.gp = rgps))
tix <- which(ph$time >= timepoint)[1]
left_join(mean.p,
tibble::tibble(risk.gp = rgps,
incidence = 1-ph$surv[tix,],
lcl = 1-ph$upper[tix,],
ucl = 1-ph$lower[tix,]),
by = "risk.gp")
}
list(
cal = plyr::ddply(md, gv, getCalibCurve),
risk.gp.cal = plyr::ddply(md, gv, getRiskGpCalibData),
knots = bs.knots
)
}
#d$pcerm <- d$pcermpct / 100; d$pcermpct <- NULL
# z <- group_by(d, sex, diabetes) %>%
# survCalibCurve(event_weeks, ascvd_event, pcerm, 260, na.rm=TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.