### survival
# kmplot, kmplot_by, kmplot_ticks, local_coxph_test, surv_cp, surv_summary,
# surv_table, survdiff_pairs, coxph_pairs, landmark, surv_extract, surv_median,
# surv_prob, kmdiff, surv_dist
#
# unexported:
# stratify_formula, points.kmplot, kmplot_data_, atrisk_data_, terms.inner
#
# surv_test (unexported):
# lr_text, lr_pval, tt_text, tt_pval, hr_text, hr_pval, pw_pval, pw_text,
# c_text, cc_pval, cc_text
###
stratify_formula <- function(formula, vars = NULL) {
# rawr:::stratify_formula(Surv(a, b) ~ x, c('y', 'z'))
# rawr:::stratify_formula(Surv(a, b) ~ x + y, c('y', 'z'))
if (is.null(vars))
return(as.formula(formula))
st <- paste(sprintf('strata(%s)', vars), collapse = ' + ')
ff <- as.character(formula)
update(
as.formula(sprintf('%s ~ %s + %s', ff[2L], ff[3L], st)),
as.formula(sprintf('. ~ . %s', paste(sprintf('- %s', vars), collapse = '')))
)
}
#' Survival curves
#'
#' Plot Kaplan-Meier or Cox regression models with optional at-risk table,
#' median survival summary, confidence bands/intervals, pairwise tests,
#' hazard ratios, cumulative incidences, and other features.
#'
#' Line specifications (e.g., \code{lty.surv}, \code{lwd.surv}, etc.) will be
#' recycled as needed.
#'
#' If \code{col.band} is not \code{NULL}, \code{NA}, or \code{FALSE}, a
#' confidence band is plotted; however, this is not a confidence band in the
#' statistical sense, i.e., a xx-percent chance of containing the entire
#' population of the survival curve which are wider than the point-wise
#' confidence limits. Rather, it refers to a band of color plotted between
#' the confidence limits calculated in the survfit object. That is, the xx-\%
#' confidence interval (plotted when \code{lty.ci != 0}) and the confidence
#' bands are identical--just two ways of plotting the same invervals.
#'
#' \code{xaxs} is the style of the x-axis; see \code{\link{par}}. The default
#' for \code{kmplot} is \code{"S"} which is equivalent to \code{xaxs = "i"}
#' but with the maximum \code{xlim} value increased by 4\%. Other styles for
#' \code{xaxs} currently implemented in \code{R} are \code{"r"} (default for
#' plotting and the previous value for \code{kmplot}) and \code{"i"} which
#' will \emph{not} add padding to the ends of the axes.
#'
#' When saving plots, it is highly recommended to use \code{\link{png}},
#' \code{\link{svg}}, \code{\link{pdf}}, etc. instead of exporting directly
#' from the \code{R} graphics device. Doing so may cause the at-risk table or
#' legend to be mis-aligned.
#'
#' @param object an object of class \code{\link[survival]{survfit}} or
#' \code{\link[survival]{survfit.coxph}}; note that for the latter some
#' features are not available, e.g., \code{kmdiff}, \code{lr_test},
#' \code{tt_test}, \code{hr_text}, \code{atrisk.type} and/or
#' \code{times.type \%in\% c("atrisk", "events", "atrisk-events")}, etc.
#' @param data (optional) the data frame used to create \code{s}, used to
#' obtain some data not available in \code{survfit} objects; if not given,
#' \code{object$call$data} will be searched for in the \code{\link{sys.frames}}
#' @param lty.surv,lwd.surv,col.surv line type, width, and color for survival
#' curve(s); colors may be either numeric, color names as character
#' string(s), or hexadecimal string(s)
#'
#' colors will be assigned to curves in order of strata or may be mapped
#' using a named vector (the names must match the ones given to
#' \code{strata.lab} or those that are created by
#' \code{\link[survival]{strata}}); this can be useful to set colors for
#' specific strata levels in multiple plots if some levels are missing; see
#' examples in \code{\link{kmplot_by}}
#' @param mark,lwd.mark numeric plotting character (\code{\link{pch}}) or
#' character string, e.g., \code{''}, \code{'|'}; if \code{mark = 'bump'}, a
#' mark will be drawn only above the curve; \code{lwd.mark} controls the line
#' width when a \code{pch} or \code{"bump"} is used
#' @param lty.ci,lwd.ci,col.ci line type, width, and color for confidence
#' interval(s); not plotted (i.e., \code{= 0}) by default
#' @param col.band color for confidence band(s); either as numeric, color
#' string(s), or hexadecimal string(s); if \code{TRUE}, \code{col.surv}
#' values will be used; if \code{FALSE}, \code{NA}, or \code{NULL}, no
#' bands will be plotted; also note that this is not a true confidence band;
#' see details
#' @param alpha.band alpha transparency (in \code{[0, 1]}) of the band
#' @param kmdiff logical; if \code{TRUE}, a confidence band for the difference
#' between two curves is shown; see \code{\link{kmdiff}} (note that the color
#' can be passed via \code{col.ci} and the confidence interval is taken from
#' \code{object$conf.int})
#' @param atrisk.table logical; if \code{TRUE} (default), draws at-risk table
#' @param atrisk.type a character string giving the type of "at-risk" table to
#' show; one of \code{"atrisk"} (number at-risk), \code{"events"} (cumulative
#' number of events), \code{"atrisk-events"} (both), \code{"survival"}
#' (survival probability), \code{"percent"} (survival percent), or
#' \code{"cuminc"} (cumulative incidence); \code{"survival-ci"},
#' \code{"percent-ci"}, \code{"percent-cuminc"}, \code{"cuminc-ci"}, and
#' \code{"percent-cuminc-ci"} are similar but add confidence intervals
#' @param atrisk.digits when survival probabilities are shown in at-risk table
#' (see \code{atrisk.type}), number of digits past the decimal to keep
#' @param atrisk.lab heading for at-risk table
#' @param atrisk.pad extra padding between plot and at-risk table; alternatively,
#' a vector of padding for each line in the at-risk table, recycled as needed
#' @param atrisk.lines logical; draw lines next to strata in at-risk table
#' @param atrisk.lines.adj a vector of length 2 giving the left and right
#' adjustment for the at-risk lines in normalized units, e.g.,
#' \code{c(0, 1)} and \code{c(-0.5, 0.5)} doubles the length by extending
#' right-only and equally left and right, respectively
#' @param atrisk.col logical or a vector with colors for at-risk table text;
#' if \code{TRUE}, \code{col.surv} will be used
#' @param atrisk.min optional integer to replace any at-risk counts
#' \code{< atrisk.min} with \code{"---"} by default; note that if
#' \code{atrisk.min} is \emph{named}, then \code{names(atrisk.min)} will be
#' the replacement string
#' @param strata.lab labels used in legend and at-risk table for strata; if
#' \code{NULL} (default), labels created in \code{survfit} are used; if only
#' one strata is present, "All" is used by default; if \code{FALSE}, labels
#' are not used; if \code{TRUE}, labels will be stripped of variable names
#' @param strata.expr an alternative to \code{strata.lab} which allows for
#' \code{\link{bquote}} or \code{\link{expression}} to be passed to labels
#' for at-risk table; note that \code{strata.expr} trumps \code{strata.lab}
#' @param strata.order order of strata in legend and at-risk table
#' @param extra.margin increase left margin when strata labels in at-risk
#' table are long (note that this will be overridden by \code{mar})
#' @param mar margins; see \code{mar} section in \code{\link{par}}
#' @param median logical or numeric; if \code{TRUE}, median and confidence
#' interval for each curve is added to at-risk table at a calculated
#' position; for more control, use a specific x-coordinate
#' @param digits.median number of digits past the decimal point to keep for
#' median(s)
#' @param ci.median logical; if \code{TRUE}, confidence interval for medians
#' are shown
#' @param args.median an optional \emph{named} list of \code{\link{mtext}}
#' arguments controlling the \code{median} text
#'
#' additionally, \code{list(x = , y = )} may be given for exact placement
#' (translated to \code{at} and \code{line}, respectively)
#' @param xaxs style of axis; see details or \code{\link{par}}
#' @param xlim,ylim x- and y-axis limits
#' @param xaxis.at,yaxis.at positions for x- and y-axis labels and ticks
#' @param atrisk.at x-coordinates to show at-risk table (default is
#' \code{xaxis.at})
#' @param xaxis.lab,yaxis.lab x- and y-axis tick labels
#' @param xlab,ylab x- and y-axis labels
#' @param main title of plot
#' @param cex.axis,cex.atrisk text size for axis tick labels and at-risk table
#' @param cex.lab,cex.main text size for axis and main titles
#' @param legend logical, a vector of x/y coordinates, or a keyword (see
#' \code{\link{legend}}); if \code{TRUE}, the default position is
#' \code{"bottomleft"}
#' @param args.legend an optional \emph{named} list of \code{\link{legend}}
#' arguments controlling the \code{legend}
#' @param median.legend logical; if \code{TRUE}, medians (and optionally
#' confidence intervals if \code{ci.median = TRUE}) are added to the legend,
#' useful for adding a legend and median without at-risk table
#' @param lr_test logical or numeric; if \code{TRUE}, a log-rank test will be
#' performed and the results added to the top-right corner of the plot; if
#' numeric, the value is passed as \code{rho} controlling the type of test
#' performed; see \code{\link{survdiff}}
#' @param tt_test logical; if \code{TRUE}, Tarone's trend test will be
#' performed and the resultls added to the top-right corner of the plot;
#' note that this will override \code{lr_test}
#' @param test_details logical; if \code{TRUE} (default), all test details
#' (test statistic, degrees of freedom, p-value) are shown; if \code{FALSE},
#' only the p-value is shown
#' @param args.test an optional \emph{named} list of \code{\link{mtext}}
#' arguments controlling the \code{*_test} text; additional text can be
#' appended with \code{list(.prefix = '')} or \code{list(.suffix = '')}
#' @param hr_text logical; if \code{TRUE}, a \code{\link{coxph}} model is fit,
#' and a summary (hazard ratios, confidence intervals, and Wald p-values)
#' is shown
#' @param args.hr an optional \emph{named} list of \code{\link{legend}}
#' arguments controlling the \code{hr_text} legend
#' @param events logical; if \code{TRUE} and \code{hr_text = TRUE}, the total
#' events by group is shown
#' @param pw_test logical; if \code{TRUE}, all pairwise tests of survival
#' curves are performed, and p-valuees are shown
#' @param args.pw an optional \emph{named} list of \code{\link{legend}}
#' arguments controlling the \code{pw_text} legend
#' @param format_pval logical; if \code{TRUE}, p-values are formatted with
#' \code{\link{pvalr}}; if \code{FALSE}, no formatting is performed;
#' alternatively, a function can be passed which should take a numeric value
#' and return a character string (or a value to be coerced) for printing
#' @param stratify (dev) \code{\link[survival]{strata}} variables
#' @param fun survival curve transformation, one of \code{"S"} or \code{"F"}
#' for the usual survival curve or empirical CDF, respectively
#' @param times time points to show additional summaries in a table added to
#' the plot (requires \href{https://github.com/raredd/plotr}{\pkg{plotr}})
#' @param times.lab heading for the \code{times} table
#' @param times.type see \code{atrisk.type}
#' @param times.digits when survival probabilities are shown in times table
#' (see \code{times.type}), number of digits past the decimal to keep
#' @param args.times an optional \emph{named} list of \code{plotr::tableplot}
#' arguments controlling the table appearance and/or location
#' @param add logical; if \code{TRUE}, \code{par}s are not reset; allows for
#' multiple panels, e.g., when using \code{par(mfrow = c(1, 2))}
#' @param panel.first an expression to be evaluated after the plot axes are
#' set up but before any plotting takes place
#' @param panel.last an expression to be evaluated after plotting but before
#' returning from the function
#' @param ... additional parameters (\code{font}, \code{mfrow}, \code{bty},
#' \code{tcl}, etc.) passed to \code{par}
#'
#' @references
#' Adapted from \url{http://biostat.mc.vanderbilt.edu/wiki/Main/TatsukiRcode}
#'
#' @seealso
#' \code{\link{kmplot_by}}; \code{survival:::plot.survfit};
#' \code{\link{kmplot_data_}}; \code{\link{atrisk_data_}};
#' \code{\link{lr_text}}; \code{\link{lr_pval}}; \code{\link{points.kmplot}};
#' \code{\link{kmdiff}}; \code{\link[plotr]{ggsurv}}
#'
#' @examples
#' library('survival')
#' km1 <- survfit(Surv(time, status) ~ sex, data = colon)
#' km2 <- survfit(Surv(time, status) ~ I(rx == "Obs") + adhere, data = colon)
#'
#'
#' ## basic usage
#' kmplot(km1)
#' kmplot(km1, args.median = list(x = 500, y = 0.2))
#' kmplot(km1, atrisk.table = FALSE, median.legend = TRUE)
#' kmplot(km1, fun = 'F')
#' kmplot(km1, atrisk.col = c('grey50', 'tomato'), test_details = FALSE,
#' args.test = list(col = 'red', cex = 1.5, .prefix = 'Log-rank: '))
#' kmplot(km1, mark = 'bump', atrisk.lines = FALSE, median = TRUE)
#' kmplot(km1, mark = 'bump', atrisk.lines = FALSE, median = 3500)
#'
#' ## atrisk spacing adjustment
#' kmplot(km2, atrisk.pad = 3)
#' kmplot(km2, atrisk.pad = 1:4 / 4)
#'
#' kmplot(km2, atrisk.table = FALSE, lwd.surv = 2, lwd.mark = 0.5,
#' col.surv = 1:4, col.band = c(1, 0, 0, 4))
#'
#'
#' ## table of additional stats
#' kmplot(km1, times = 1:3 * 1000, times.type = 'percent-ci')
#' kmplot(km1, times = 1:3 * 1000, times.type = 'percent-ci',
#' atrisk.table = FALSE, args.times = list(x = 'bottomright'))
#' kmplot(km1, times = 0:10 * 250, times.type = 'survival',
#' times.digits = 3, times.lab = 'Prob. of survival',
#' args.times = list(x = mean(par('usr')[1]), y = 0))
#'
#'
#' ## use stratified models
#' kmplot(km1, stratify = 'differ')
#' kmplot(km1, stratify = c('differ', 'adhere'))
#'
#'
#' ## for hazard ratios, use factors to fit the proper cox model
#' kmplot(survfit(Surv(time, status) ~ factor(sex), data = colon),
#' hr_text = TRUE, strata.lab = TRUE)
#' kmplot(survfit(Surv(time, status) ~ interaction(sex, rx), data = colon),
#' hr_text = TRUE, atrisk.table = FALSE, legend = FALSE)
#' kmplot(survfit(Surv(time, status) ~ interaction(sex, rx), data = colon),
#' hr_text = TRUE, atrisk.table = FALSE, legend = FALSE,
#' format_pval = function(x) format(x, digits = 2, scipen = 0))
#'
#'
#' ## at-risk and p-value options
#' kmplot(km2, tt_test = TRUE, test_details = FALSE)
#' kmplot(km2, tt_test = TRUE, format_pval = format.pval)
#' kmplot(km1, atrisk.type = 'survival', atrisk.digits = 3L)
#' kmplot(km1, atrisk.type = 'atrisk-events')
#' kmplot(survfit(Surv(time, status) ~ rx, data = colon),
#' pw_test = TRUE, args.pw = list(text.col = 1:3, x = 'bottomleft'))
#'
#' ## expressions in at-risk table (strata.expr takes precedence)
#' kmplot(km1, strata.lab = c('\u2640', '\u2642'))
#' kmplot(km1, strata.lab = c('\u2640', '\u2642'),
#' strata.expr = expression(widetilde(ring(Female)),
#' phantom() >= Male))
#'
#' ## character vectors passed to strata.expr will be parsed
#' kmplot(km1, strata.expr = c('Sex[Female]', 'Sex[Male]'))
#' ## but not those passed to strata.lab
#' kmplot(km1, strata.lab = c('Sex[Female]', 'Sex[Male]'))
#'
#'
#' ## when using mfrow options, use add = TRUE and mar to align axes
#' mar <- c(9, 6, 2, 2)
#' op <- par(mfrow = c(1, 2))
#' kmplot(km1, add = TRUE, mar = mar)
#' kmplot(km2, add = TRUE, strata.lab = TRUE, mar = mar)
#' par(mfrow = op)
#'
#'
#' ## survfit.coxph object
#' cx1 <- coxph(Surv(time, status) ~ sex, data = colon)
#' cx2 <- coxph(Surv(time, status) ~ sex + adhere, data = colon)
#' cx3 <- coxph(Surv(time, status) ~ I(rx == "Obs") + adhere + age, data = colon)
#'
#' kmplot(survfit(cx1))
#' kmplot(survfit(cx1, data.frame(sex = 0:1)), strata.lab = c('F', 'M'))
#'
#' nd <- expand.grid(adhere = 0:1, sex = 0:1)
#' kmplot(survfit(cx2, nd), col.surv = 1:4, strata.lab = do.call(paste, nd))
#' ## compare
#' kmplot(survfit(Surv(time, status) ~ sex + adhere, data = colon),
#' atrisk.type = 'survival')
#'
#' nd <- data.frame(rx = 'Obs', adhere = 0:1, age = c(30, 30, 70, 70))
#' kmplot(survfit(cx3, nd), col.surv = 1:4, strata.lab = do.call(paste, nd))
#'
#' @export
kmplot <- function(object, data = NULL,
## basic plot options
lty.surv = par('lty'), lwd.surv = par('lwd'),
col.surv = seq_along(object$n), mark = 3L, lwd.mark = lwd.surv,
## confidence options
lty.ci = 0, lwd.ci = lwd.surv,
col.ci = col.surv, col.band = FALSE, alpha.band = 0.5,
kmdiff = FALSE,
## at-risk table options
atrisk.table = TRUE, atrisk.lab = NULL, atrisk.pad = 0.5,
atrisk.type = c('atrisk', 'events', 'atrisk-events',
'survival', 'survival-ci',
'percent', 'percent-ci',
'cuminc', 'percent-cuminc',
'cuminc-ci', 'percent-cuminc-ci'),
atrisk.digits = (!grepl('percent', atrisk.type)) * 2,
atrisk.lines = TRUE, atrisk.lines.adj = NA,
atrisk.col = !atrisk.lines, atrisk.min = NULL,
strata.lab = NULL,
strata.expr = NULL, strata.order = seq_along(object$n),
extra.margin = 5, mar = NULL,
median = FALSE, digits.median = 0L, ci.median = TRUE,
args.median = list(),
## aesthetics
xaxs = 's', xlim = NULL, ylim = NULL,
xaxis.at = pretty(xlim), xaxis.lab = xaxis.at,
atrisk.at = xaxis.at,
yaxis.at = pretty(0:1), yaxis.lab = yaxis.at,
xlab = 'Time', ylab = 'Probability', main = NULL,
cex.axis = par('cex.axis'), cex.atrisk = cex.axis,
cex.lab = par('cex.lab'), cex.main = par('cex.main'),
legend = !atrisk.table && !is.null(object$strata),
args.legend = list(), median.legend = FALSE,
## test/hazard ratio options
lr_test = TRUE, tt_test = FALSE, test_details = TRUE,
args.test = list(),
hr_text = FALSE, args.hr = list(), events = FALSE,
pw_test = FALSE, args.pw = list(),
format_pval = TRUE,
## other options
stratify = NULL, fun = c('S', 'F'),
times = NULL, times.lab = NULL,
times.type = c('percent',
'atrisk', 'events', 'atrisk-events',
'survival', 'survival-ci',
'percent-ci',
'cuminc', 'percent-cuminc',
'cuminc-ci', 'percent-cuminc-ci'),
times.digits = (!grepl('percent', times.type)) * 2,
args.times = list(),
add = FALSE, panel.first = NULL, panel.last = NULL, ...) {
if (!inherits(object, 'survfit'))
stop('\'object\' must be a \'survfit\' object')
## coerce survfitcox object to mimic survfit
if (inherits(object, 'survfitcox')) {
kmdiff <- lr_test <- tt_test <- hr_text <- FALSE
types <- c('survival', 'survival-ci', 'percent', 'percent-ci', 'cuminc',
'percent-cuminc', 'cuminc-ci', 'percent-cuminc-ci')
atrisk.type <- if (missing(atrisk.type))
'survival'
else if (!atrisk.type[1L] %in% types) {
warning('for \'survfitcox\' objects \'atrisk.type\' should be one of:\n\t',
toString(dQuote(types)),
'\ndefaulting to atrisk.type = "survival"')
'survival'
} else atrisk.type[1L]
times.type <- if (missing(times.type))
'survival'
else if (!times.type[1L] %in% types) {
warning('for \'survfitcox\' objects \'times.type\' should be one of:\n\t',
toString(dQuote(types)),
'\ndefaulting to times.type = "survival"')
'survival'
} else times.type[1L]
ng <- max(1L, ncol(object$surv))
object <- within.list(object, {
strata <- rep_len(length(time), ng)
names(strata) <- colnames(surv) %||% 'All'
n <- rep_len(n, length(strata))
time <- rep(time, ng)
n.risk <- rep(n.risk, ng)
n.event <- rep(n.event, ng)
n.censor <- rep(n.censor, ng)
surv <- c(surv)
cumhaz <- c(cumhaz)
std.err <- c(std.err)
std.chaz <- c(std.chaz)
lower <- c(lower)
upper <- c(upper)
})
}
if (length(form <- object$call$formula) == 1L)
object$call$formula <- as.formula(eval(form, parent.frame(1L)))
if (is.logical(lr_test)) {
rho <- 0
} else {
rho <- if (is.numeric(lr_test))
lr_test else 0
lr_test <- TRUE
}
if (tt_test)
lr_test <- FALSE
fun <- match.arg(fun)
form <- object$call$formula
svar <- as.character(form)[-(1:2)]
# svar <- as.character(unlist(object$call$formula[-(1:2)]))
## formula with strata (if given), used for tests/coxph
sform <- stratify_formula(form, stratify)
sdat <- object$.data %||% {
sdat <- deparse(object$call$data)
sname <- gsub('[$[].*', '', sdat)
tryCatch(if (identical(sdat, sname))
get(sdat, where(sname)) else eval(parse(text = sdat), where(sname)),
error = function(e) NULL)
}
sdat <- data %||% sdat
## remove missing here for special case: all NA for one strata level
## drops level in object$strata but not in table(sdat[, svar])
if (!is.null(sdat))
sdat <- na.omit(sdat[, c(all.vars(form), stratify)])
## single strata
one <- identical(svar, '1')
if (!(ng <- length(object$strata))) {
object$strata <- length(object$time)
if (length(svar) == 1L & !one) {
svar <- tryCatch({
tbl <- table(sdat[, svar])
names(tbl)[tbl > 0L]
}, error = function(e) NULL)
}
names(object$strata) <- if (!is.null(svar) & !one) svar else 'All'
# legend <- atrisk.lines <- FALSE
legend <- FALSE
}
cs <- col.surv
ng <- max(ng, 1L)
if (length(col.band) <= 1L)
if (is.null(col.band) || is.na(col.band))
col.band <- FALSE
col.surv <- if (is.null(col.surv)) {
if (isTRUE(col.band) || all(!col.band))
seq_along(object$n)
else rep_len(col.band, ng)
} else rep(col.surv, length.out = ng)
## must use rep instead of rep_len for names
col.band <- if (isTRUE(col.band))
col.surv else if (length(col.band) == 1L & identical(col.band, FALSE))
rep_len(NA, ng) else rep_len(col.band, ng)
mark <- rep_len(mark, ng)
lty.surv <- rep_len(lty.surv, ng)
lwd.surv <- rep_len(lwd.surv, ng)
lwd.mark <- rep_len(lwd.mark, ng)
lty.ci <- rep_len(lty.ci, ng)
lwd.ci <- rep_len(lwd.ci, ng)
col.ci <- rep_len(col.ci, ng)
## group names and more error checks
gr <- c(object$strata)
if (isTRUE(strata.lab)) {
svar <- colnames(model.frame(form, sdat)[, -1L, drop = FALSE])
cl <- c(list(survival::strata),
lapply(svar, as.symbol),
shortlabel = TRUE)
mode(cl) <- 'call'
names(object$strata) <- levels(eval(cl, model.frame(form, sdat)))
}
if (!is.null(strata.lab) && isTRUE(strata.lab))
strata.lab <- NULL
if (!is.null(strata.lab) && identical(strata.lab, FALSE))
strata.lab <- rep_len('', ng)
if (is.null(strata.lab))
strata.lab <- names(object$strata)
if (length(strata.lab) != ng && strata.lab[1L] != FALSE) {
strata.lab <- strata.lab[seq.int(ng)]
warning('length(strata.lab) != number of groups')
}
if (suppressWarnings(any(sort(strata.order) != seq.int(ng))))
stop('sort(strata.order) must contain 1:', ng)
if (ng == 1L & (strata.lab[1L] == 'strata.lab')) {
strata.lab <- 'Number at risk'
atrisk.lab <- ifelse(is.null(atrisk.lab), strata.lab, atrisk.lab)
}
## if cols are to be mapped by name, override col.surv
if (!is.null(names(cs))) {
cs <- cs[match(strata.lab, names(cs))]
if (!anyNA(cs))
col.surv <- col.atrisk <- cs
else warning(
sprintf('Mismatches in names(col.surv) - %s\n and strata.lab - %s\n',
toString(shQuote(names(col.surv))),
toString(shQuote(strata.lab))),
sprintf('\nTry col.surv = c(%s)', catlist(
setNames(as.list(shQuote(col.surv)), shQuote(strata.lab)))),
call. = FALSE
)
}
## run thru tcol to convert integers and strings? idk but
## running a string with alpha trans resets to no alpha/trans
## so skip if already a hex color with alpha/trans
if (!any(grepl('(?i)#[a-z0-9]{8}', col.surv)))
col.surv <- tcol(col.surv)
if (!any(grepl('(?i)#[a-z0-9]{8}', col.band)))
col.band <- tcol(col.band)
median.mar <- c('x', 'at') %in% names(args.median) | is.numeric(median)
if (identical(median, FALSE) & length(args.median) > 0L)
median <- TRUE
median.at <- NA
if (!identical(median, FALSE)) {
median.at <- median
median <- TRUE
}
op <- par(no.readonly = TRUE)
if (!add)
on.exit(par(op))
## guess margins based on at-risk table options
atrisk.pad <- rep_len(atrisk.pad, ng)
mar <- mar %||% c(
5 + ng * atrisk.table + max(atrisk.pad),
4 + pmax(4, extra.margin) - 3 * !atrisk.table,
2,
2 + 6 * (median & atrisk.table & !any(median.mar))
)
par(mar = mar, ...)
if (is.null(xlim))
xlim <- c(0, max(object$time))
oxlim <- xlim
if (is.null(ylim))
ylim <- c(0, 1.025)
## if default *lim not used, get new *-axis positions
if (is.null(xaxis.at))
xaxis.at <- pretty(xlim)
if (is.null(yaxis.at))
yaxis.at <- pretty(ylim)
## as in survival:::plot.survfit, adjust x-axis to start at 0
xaxs <- if (tolower(xaxs)[1L] == 's') {
xlim[2L] <- xlim[2L] * 1.04
'i'
} else 'r'
## reformat survival estimates
dat <- kmplot_data_(object, strata.lab)
dat.list <- split(dat, dat$order)
## base plot
cex.cex <- 1 / switch(par('mfrow')[1L], 1, 0.83, 0.66)
if (!length(cex.cex))
cex.cex <- 0.66
plot(
0, type = 'n', xlim = xlim, ylim = ylim, ann = FALSE,
axes = FALSE, xaxs = xaxs, panel.first = panel.first,
panel.last = {
box(bty = par('bty'))
axis(1L, xaxis.at, FALSE, lwd = 0, lwd.ticks = 1)
axis(1L, xaxis.at, xaxis.lab, FALSE, cex.axis = cex.axis * cex.cex)
axis(2L, yaxis.at, yaxis.lab, las = 1L, cex.axis = cex.axis * cex.cex)
title(xlab = xlab, ylab = ylab, main = main, ...,
cex.lab = cex.lab * cex.cex, cex.main = cex.main * cex.cex)
}
)
## at-risk table
usr <- par('usr')
atrisk.at <- atrisk.at[atrisk.at <= usr[2L]]
atrisk.type <- match.arg(atrisk.type)
wh <- switch(
atrisk.type,
atrisk = 'atrisk',
events = 'events',
'atrisk-events' = 'atrisk.events',
survival = 'survival',
'survival-ci' = 'survival.ci',
percent = 'percent',
'percent-ci' = 'percent.ci',
cuminc = 'cuminc',
'cuminc-ci' = 'cuminc.ci',
'percent-cuminc' = 'percent.cuminc',
'percent-cuminc-ci' = 'percent.cuminc.ci'
)
ar <- atrisk_data_(object, atrisk.at, atrisk.digits)
ss <- ar$summary
d2 <- ar$data
## at-risk table below surv plot
line.pos <- seq.int(ng)[order(strata.order)] + 3L + atrisk.pad
## set colors for lines of text
col.atrisk <- if (isTRUE(atrisk.col))
col.surv
else if (!identical(atrisk.col, FALSE) && length(atrisk.col) == ng)
atrisk.col else rep_len(1L, ng)
if (atrisk.table) {
## labels for each row in at-risk table
group.name.pos <- diff(usr[1:2]) / ifelse(atrisk.lines, -8, -16)
if (identical(strata.lab, FALSE))
strata.lab <- ''
strata.lab <- if (!is.null(strata.expr))
parse(text = strata.expr) else as.list(strata.lab)
for (ii in seq_along(strata.lab))
mtext(strata.lab[[ii]], side = 1L, at = group.name.pos, adj = 1, las = 1L,
line = line.pos[ii], col = col.atrisk[ii], cex = cex.atrisk)
## draw matching lines for n at risk
atrisk.lines.adj <- rep_len(atrisk.lines.adj, 2L)
atrisk.lines.adj[is.na(atrisk.lines.adj)] <- 0
padding <- abs(group.name.pos / 8)
at <- c(group.name.pos + padding, 0 - 4 * padding)
at <- at + diff(at) * atrisk.lines.adj
if (atrisk.lines)
for (ii in seq.int(ng))
## mess with the 4 here to adjust the length of the atrisk.line
axis(1L, at, FALSE, xpd = NA, line = line.pos[ii] + 0.5, lwd.ticks = 0,
col = col.surv[ii], lty = lty.surv[ii], lwd = lwd.surv[ii])
## right-justify numbers
ndigits <- lapply(d2, function(x) nchar(x[, 2L]))
max.len <- max(sapply(ndigits, length))
L <- do.call('rbind', lapply(ndigits, `length<-`, max.len))
nd <- apply(L, 2L, max, na.rm = TRUE)
for (ii in seq.int(ng)) {
tmp <- d2[[ii]]
w.adj <- strwidth('0', cex = cex.axis, font = par('font')) /
2 * nd[seq.int(nrow(tmp))]
## right-justify
right <- c('atrisk', 'events', 'atrisk-events')
right <- ''
## replace counts < atrisk.min with string
rpl <- names(atrisk.min) %||% '---'
idx <- as.integer(gsub(' .*|,', '', tmp[, wh])) < atrisk.min
# idx <- idx |
# c(FALSE, abs(diff(as.integer(gsub(',', '', tmp[, wh])))) < atrisk.min)
mtext(
if (is.null(atrisk.min) & grepl('atrisk', wh))
gsub('^0.*', '', tmp[, wh]) else replace(tmp[, wh], idx, rpl),
side = 1L, col = col.atrisk[ii],
las = 1L, line = line.pos[ii], cex = cex.atrisk,
# at = tmp$time + w.adj * atrisk.type %ni% right,
at = tmp$time, adj = if (atrisk.type %in% right) 1 else 0.5
)
}
if (is.null(atrisk.lab))
atrisk.lab <- switch(
atrisk.type,
atrisk = 'Number at risk',
events = 'Cumulative events',
'atrisk-events' = 'At risk (Events)',
survival = 'Probability',
'survival-ci' = sprintf('Probability (%s%% CI)', ss$conf.int * 100),
percent = 'Percent',
'percent-ci' = sprintf('Percent (%s%% CI)', ss$conf.int * 100),
cuminc = 'Cuminc',
'cuminc-ci' = sprintf('Cuminc (%s%% CI)', ss$conf.int * 100),
'percent-cuminc' = '% cuminc',
'percent-cuminc-ci' = sprintf('%% cuminc (%s%% CI)', ss$conf.int * 100)
)
if (!(identical(atrisk.lab, FALSE)))
mtext(atrisk.lab, side = 1L, at = usr[1L], cex = cex.atrisk,
line = min(line.pos) - 1.5, adj = 1, col = 1L, las = 1L)
}
## median (ci) text on at-risk or using x, y coords
tt <- surv_median(object, ci = ci.median, digits = digits.median,
show_conf = FALSE, print = FALSE)
tt <- format(tt, big.mark = ',')
tt <- gsub('NA', 'NR', tt)
at <- if (isTRUE(median.at))
usr[2L] + diff(usr[1:2]) / 10 else median.at
largs <- list(
text = c(if (!is.null(object$conf.int))
sprintf('Median (%s%% CI)', object$conf.int * 100) else 'Median', tt),
side = 1L, col = c(palette()[1L], col.atrisk), at = at, las = 1L,
adj = 0.5, cex = cex.atrisk, line = c(min(line.pos) - 1.5, line.pos)
)
largs$text <- trimws(largs$text)
if (!ci.median)
largs$text <- gsub(' .*', '', largs$text)
if (!islist(args.median))
args.median <- list()
## convert (x, y) coords to work with mtext
if (all(c('x', 'y') %in% names(args.median))) {
args.median <- within.list(args.median, {
at <- grconvertX(x, 'user', 'ndc')
line <- largs$line - min(largs$line) - 0.5 -
grconvertY(args.median$y, 'user', 'line')
outer <- TRUE
side <- 1L
text <- c(largs$text[1L], sprintf('%s: %s', strata.lab, largs$text[-1L]))
text <- gsub('^:\\s*', '', trimws(text))
x <- y <- NULL
})
}
args.median <- modifyList(largs, args.median)
if (median)
do.call('mtext', args.median)
## summary by strata at specified times
if (!is.null(times)) {
times.type <- match.arg(times.type)
wh <- switch(
times.type,
atrisk = 'atrisk',
events = 'events',
'atrisk-events' = 'atrisk.events',
survival = 'survival',
'survival-ci' = 'survival.ci',
percent = 'percent',
'percent-ci' = 'percent.ci',
cuminc = 'cuminc',
'cuminc-ci' = 'cuminc.ci',
'percent-cuminc' = 'percent.cuminc',
'percent-cuminc-ci' = 'percent.cuminc.ci'
)
if (is.null(times.lab))
times.lab <- switch(
times.type,
atrisk = 'Number at risk',
events = 'Cumulative events',
'atrisk-events' = 'At risk (Events)',
survival = 'Probability',
'survival-ci' = sprintf('Probability (%s%% CI)', ss$conf.int * 100),
percent = 'Percent',
'percent-ci' = sprintf('Percent (%s%% CI)', ss$conf.int * 100),
cuminc = 'Cuminc',
'cuminc-ci' = sprintf('Cuminc (%s%% CI)', ss$conf.int * 100),
'percent-cuminc' = '% cuminc',
'percent-cuminc-ci' = sprintf('%% cuminc (%s%% CI)', ss$conf.int * 100)
)
ar <- atrisk_data_(object, times, times.digits)
ss <- ar$summary
d2 <- ar$data
d3 <- lapply(d2, function(x) x[, c('time', wh)])
d3 <- suppressWarnings({
setNames(merge2(d3, by = 'time', all = TRUE), c('Time', strata.lab))
})
largs <- list(
x = 'bottomleft',
table = d3[, c(1, strata.order + 1L)], title = times.lab,
frame.colnames = TRUE, frame.type = 'line', font.title = 1L
)
if (!islist(args.times))
args.times <- list()
if (!requireNamespace('plotr', quietly = TRUE)) {
warning(
'the \'plotr\' package is required... run:\n\t',
'devtools::install_github(\'raredd/plotr\')'
)
} else do.call(plotr::tableplot, modifyList(largs, args.times))
}
## legend
if (!identical(legend, FALSE)) {
## defaults passed to legend
largs <- list(
x = if (isTRUE(legend)) 'bottomleft' else legend[1L],
y = if (length(legend) > 1L) legend[2L] else NULL,
legend = if (!is.null(strata.expr))
strata.expr[strata.order] else
(if (identical(strata.lab, FALSE) || all(strata.lab %in% ''))
names(object$strata) else strata.lab)[strata.order],
col = col.surv[strata.order], bty = 'n',
lty = lty.surv[strata.order], lwd = lwd.surv[strata.order]
)
if (!islist(args.legend))
args.legend <- list()
lg <- do.call('legend', modifyList(largs, args.legend))
largs <- within.list(largs, {
x <- lg$rect$left + lg$rect$w
y <- lg$rect$top
legend <- tt[strata.order]
})
largs <- largs[setdiff(names(largs), c('lwd', 'lty'))]
if (median.legend) {
lg <- do.call('legend', largs)
text(
lg$rect$left + lg$rect$w / 2, lg$rect$top,
args.median$text[1L], xpd = NA
)
}
}
## hazard ratios
cform <- sform
cform <- stratify_formula(sform, stratify)
txt <- tryCatch(
hr_text(cform, sdat, pFUN = format_pval),
error = function(e) ''
)
if (!identical(hr_text, FALSE)) {
if (length(nchar(na.omit(txt))) != ng) {
warning(
'number of strata levels does not equal number of HR estimates:\n',
' rhs of formula should be one factor with all combinations of\n',
' the strata levels (eg, see ?interaction)',
call. = FALSE
)
hr_text <- FALSE
}
tot <- tapply(object$n.event, rep(seq_along(object$strata), object$strata), sum)
if (events)
txt <- paste(txt, sprintf('(%s events)', tot))
largs <- list(
x = 'bottomleft', y = NULL, bty = 'n', xpd = NA,
legend = txt[strata.order], col = col.surv[strata.order],
lty = lty.surv[strata.order], lwd = lwd.surv[strata.order]
)
if (!islist(args.hr))
args.hr <- list()
do.call('legend', modifyList(largs, args.hr))
}
## pairwise tests
if (!identical(pw_test, FALSE)) {
txt <- pw_text(sform, sdat, pFUN = format_pval)
largs <- list(x = 'topright', legend = txt, bty = 'n', xpd = NA)
if (!islist(args.pw))
args.pw <- list()
do.call('legend', modifyList(largs, args.pw))
}
## survival and confidence lines
u0 <- u1 <- par('usr')
u1[2L] <- oxlim[2L] * 1.01
do.call('clip', as.list(u1))
for (ii in seq.int(ng)) {
tmp <- dat.list[[ii]]
if (nrow(tmp) < 2L) {
if (any(!is.na(col.band)))
message('note: strata level with one observation - no CI plotted')
} else {
x <- tmp$time
L <- tmp$lower
U <- tmp$upper
if (is.na(L[1L]))
L[1L] <- 1
if (is.na(U[1L]))
U[1L] <- 1
S <- tmp$survival
naL <- which(is.na(L))
L[naL] <- L[naL - 1L]
U[naL] <- U[naL - 1L]
lines(x, L, type = 's', col = col.ci[ii], lty = lty.ci[ii], lwd = lwd.ci[ii])
lines(x, U, type = 's', col = col.ci[ii], lty = lty.ci[ii], lwd = lwd.ci[ii])
## confidence bands
if (any(!is.na(col.band))) {
col.band[ii] <- tcol(col.band[ii], alpha = alpha.band)
px <- c(x, rev(x))
py <- c(U, rev(L))
if (identical(fun, 'F')) {
py <- 1 - py
}
polygon(px, py, border = NA, col = col.band[ii])
}
}
## survival curves
lines(object[ii], conf.int = FALSE, col = col.surv[ii], fun = fun,
lty = lty.surv[ii], lwd = lwd.surv[ii], mark.time = FALSE)
if (!mark[ii] == FALSE)
points.kmplot(
object[ii], col = col.surv[ii], pch = mark[ii], censor = TRUE, plot = TRUE,
event = FALSE, bump = mark[ii] == 'bump', lwd = lwd.mark[ii], fun = fun
)
}
if (isTRUE(kmdiff)) {
col.diff <- if (identical(col.surv, tcol(col.ci)))
adjustcolor('grey', 0.5) else col.ci[1L]
kmdiff(object, col.diff, object$conf.int)
}
do.call('clip', as.list(u0))
## add test text in upper right corner
if (lr_test | tt_test) {
FUN <- if (tt_test)
function(f, d, r, ...) tt_text(f, d, ...)
else lr_text
txt <- if (identical(svar, '1'))
'' else
tryCatch(
FUN(sform, sdat, rho, details = test_details, pFUN = format_pval),
error = function(e) ''
)
if (identical(txt, FALSE))
message('only one group for ', svar, ' -- no test performed')
else {
txt <- if (inherits(txt, 'call')) {
txt <- as.list(txt)
as.call(c(txt[1L], args.test$.prefix, txt[-1L], args.test$.suffix))
} else paste0(args.test$.prefix, txt, args.test$.suffix)
args.test$.prefix <- args.test$.suffix <- NULL
largs <- alist(
text = txt, side = 3L, at = par('usr')[2L], adj = 1, line = 0.25
)
if (!islist(args.test))
args.test <- list()
do.call('mtext', modifyList(largs, args.test))
}
}
panel.last
scall <- object$call
scall$formula <- sform
ccall <- scall
ccall[[1L]] <- quote(coxph)
scall[[1L]] <- quote(survdiff)
dat <- structure(dat, call = object$call, scall = scall, ccall = ccall)
invisible(dat)
}
#' \code{kmplot} points
#'
#' \code{survival:::points.survfit} with additional features.
#'
#' @param x,xscale,xmax,fun,col,pch see \code{\link{points.survfit}}
#' @param censor,event logical; \code{TRUE} will draw points at censored
#' and event times, respectively
#' @param bump logical; \code{TRUE} will draw a bump mark rather than pch
#' @param plot logical; \code{FALSE} will not plot but return plotting data
#' @param ... additional graphical parameters passed to \code{\link{par}}
points.kmplot <- function(x, xscale, xmax, fun,
col = par('col'), pch = par('pch'),
censor = TRUE, event = FALSE,
bump = FALSE, plot = TRUE, ...) {
conf.int <- FALSE
if (inherits(x, 'survfitms')) {
x$surv <- 1 - x$pstate
if (is.matrix(x$surv)) {
dimnames(x$surv) <- list(NULL, x$states)
if (ncol(x$surv) > 1 && any(x$states == '')) {
x$surv <- x$surv[, x$states != '']
x$p0 <- if (is.matrix(x$p0))
x$p0[, x$states != ''] else x$p0[x$states != '']
}
}
if (!is.null(x$lower)) {
x$lower <- 1 - x$lower
x$upper <- 1 - x$upper
}
if (missing(fun))
fun <- 'event'
}
firstx <- firsty <- NA # part of the common args but irrelevant for points
ssurv <- as.matrix(x$surv)
stime <- x$time
if(!is.null(x$upper)) {
supper <- as.matrix(x$upper)
slower <- as.matrix(x$lower)
} else {
conf.int <- FALSE
supper <- NULL ## marker for later code
}
## set up strata
if (is.null(x$strata)) {
nstrat <- 1
stemp <- rep_len(1, length(x$time)) ## same length as stime
} else {
nstrat <- length(x$strata)
stemp <- rep(seq.int(nstrat), x$strata) ## same length as stime
}
ncurve <- nstrat * ncol(ssurv)
firsty <- matrix(firsty, nrow = nstrat, ncol = ncol(ssurv))
if (!missing(xmax) && any(x$time > xmax)) {
## prune back the survival curves
## I need to replace x's over the limit with xmax, and y's over the
## limit with either the prior y value or firsty
keepx <- keepy <- NULL ## lines to keep
tempn <- table(stemp)
offset <- cumsum(c(0, tempn))
for (ii in seq.int(nstrat)) {
ttime <- stime[stemp == ii]
if (all(ttime <= xmax)) {
keepx <- c(keepx, seq.int(tempn)[ii] + offset[ii])
keepy <- c(keepy, seq.int(tempn)[ii] + offset[ii])
} else {
bad <- min((seq.int(tempn)[ii])[ttime > xmax])
if (bad == 1) { ## lost them all
if (!is.na(firstx)) { ## and we are plotting lines
keepy <- c(keepy, 1 + offset[ii])
ssurv[1 + offset[ii], ] <- firsty[ii, ]
}
} else keepy <- c(keepy, c(seq.int(bad - 1), bad - 1) + offset[ii])
keepx <- c(keepx, (seq.int(bad)) + offset[ii])
stime[bad + offset[ii]] <- xmax
x$n.event[bad + offset[ii]] <- 1 ## don't plot a tick mark
}
}
## ok, now actually prune it
stime <- stime[keepx]
stemp <- stemp[keepx]
x$n.event <- x$n.event[keepx]
if (!is.null(x$n.censor))
x$n.censor <- x$n.censor[keepx]
ssurv <- ssurv[keepy, , drop = FALSE]
if (!is.null(supper)) {
supper <- supper[keepy, , drop = FALSE]
slower <- slower[keepy, , drop = FALSE]
}
}
# stime <- stime / xscale ## scaling is deferred until xmax processing is done
if (!missing(fun)) {
if (is.character(fun)) {
tfun <- switch(
fun,
'log' = function(x) x,
'event' = function(x) 1 - x,
'cumhaz' = function(x) -log(x),
'cloglog' = function(x) log(-log(x)),
'pct' = function(x) x * 100,
## special case further below
'logpct' = function(x) 100 * x,
'identity' = function(x) x,
'S' = function(x) x,
'F' = function(x) 1 - x,
stop('Unrecognized function argument')
)
} else if (is.function(fun))
tfun <- fun
else stop('Invalid \'fun\' argument')
ssurv <- tfun(ssurv)
if (!is.null(supper)) {
supper <- tfun(supper)
slower <- tfun(slower)
}
firsty <- tfun(firsty)
}
res <- cbind(
stime = stime,
ssurv = if (ncol(ssurv) == 1L) c(ssurv) else ssurv,
n.event = x$n.event
)
if (!plot)
return(res)
if (ncurve == 1L || length(col) == 1L) {
if (censor) {
st <- stime[x$n.event == 0]
ss <- ssurv[x$n.event == 0]
if (bump)
segments(st, ss, st, ss + diff(par('usr')[3:4]) / 100,
col = col, ...)
else points(st, ss, col = col, pch = pch, ...)
}
if (event) {
st <- stime[x$n.event > 0]
ss <- ssurv[x$n.event > 0]
if (bump)
segments(st, ss, st, ss + diff(par('usr')[3:4]) / 100, col = col, ...)
else points(st, ss, col = col, pch = pch, ...)
}
} else {
c2 <- 1
col <- rep_len(col, ncurve)
if (!missing(pch))
pch2 <- rep_len(pch, ncurve)
for (jj in seq.int(ncol(ssurv))) {
for (ii in unique(stemp)) {
if (censor) {
who <- which(stemp == ii & x$n.event == 0)
st <- stime[who]
ss <- ssurv[who, jj]
if (bump)
segments(st, ss, st, ss + diff(par('usr')[3:4]) / 100, col = col, ...)
else points(st, ss, col = col[c2], pch = pch2[c2], ...)
}
if (event) {
who <- which(stemp == ii & x$n.event > 0)
st <- stime[who]
ss <- ssurv[who, jj]
if (bump)
segments(st, ss, st, ss + diff(par('usr')[3:4]) / 100, col = col, ...)
else points(st, ss, col = col[c2], pch = pch2[c2], ...)
}
c2 <- c2 + 1L
}
}
}
invisible(res)
}
#' Create data frame to plot survival data
#'
#' @param object a \code{\link{survfit}} object
#' @param strata.lab character vector of strata labels; must be same length
#' as \code{object$n}
#'
#' @seealso \code{\link{kmplot}}; \code{\link{kmplot_by}}
kmplot_data_ <- function(object, strata.lab) {
stopifnot(inherits(object, 'survfit'))
gr <- c(object$strata)
ng <- max(length(gr), 1L)
## if conf.type = 'none', upper/lower defined as object$surv for plotting
if (is.null(object$lower))
object <- c(object, list(lower = object$surv))
if (is.null(object$upper))
object <- c(object, list(upper = object$surv))
with(object, {
data.frame(
time, n.risk, n.event, survival = surv, lower = lower, upper = upper,
group = rep(strata.lab, gr), order = rep(seq.int(ng), gr)
)
})
}
#' Create a list of data frames for at-risk tables
#'
#' @param object a \code{\link{survfit}} object
#' @param times time points
#' @param digits digits
#'
#' @seealso
#' \code{\link{kmplot}}; \code{\link{kmplot_by}}
atrisk_data_ <- function(object, times, digits) {
ss <- summary(object, times = times, extend = TRUE)
if (is.null(ss$strata))
ss$strata <- rep_len(1L, length(ss$time))
d1 <- data.frame(
time = ss$time, n.risk = ss$n.risk, n.event = ss$n.event,
strata = c(ss$strata), surv = ss$surv, lci = ss$lower, uci = ss$upper
)
d2 <- split(d1, d1$strata)
d2 <- lapply(d2, function(x) {
# if (fun == 'F')
# x$surv <- 1 - x$surv
x$atrisk <- roundr(x$n.risk, 0L)
x$events <- roundr(cumsum(x$n.event), 0L)
x$atrisk.events <- sprintf('%s (%s)', x$atrisk, x$events)
x$survival <- roundr(x$surv, digits)
x$survival.ci <- sprintf(
'%s (%s, %s)', x$survival,
roundr(x$lci, digits),
roundr(x$uci, digits)
)
x$percent <- roundr(x$surv * 100, digits)
x$percent.ci <- sprintf(
'%s (%s, %s)', x$percent,
roundr(x$lci * 100, digits),
roundr(x$uci * 100, digits)
)
x$cuminc <- roundr(1 - x$surv, digits)
x$cuminc.ci <- sprintf(
'%s (%s, %s)', x$cuminc,
roundr(1 - x$uci, digits),
roundr(1 - x$lci, digits)
)
x$percent.cuminc <- roundr(100 - x$surv * 100, digits)
x$percent.cuminc.ci <- sprintf(
'%s (%s, %s)', x$percent.cuminc,
roundr(100 - x$uci * 100, digits),
roundr(100 - x$lci * 100, digits)
)
if (0 %in% times) {
vars <- c('survival.ci', 'percent.ci', 'cuminc.ci', 'percent.cuminc.ci')
zeros <- x[x$time %in% 0, vars]
x[x$time %in% 0, vars] <- gsub('\\s+.*', '', zeros)
}
x
})
list(summary = ss, data = d2)
}
#' Survival curve tests
#'
#' @description
#' Internal functions for \code{\link{survdiff}} and \code{\link{survfit}}
#' objects. Current methods include log-rank (\code{lr_*}) and pairwise
#' (\code{pw_*}) log-rank tests (by default although the exact test may be
#' controlled with the \code{rho} parameter passed to \code{survdiff}); a
#' trend test described by Tarone (\code{tt_*}); and Wald tests of
#' coefficients in a Cox regression (\code{hr_*}).
#'
#' \code{*_pval} functions take (\code{survfit} or \code{survdiff}) objects
#' or formulas and compute test statistics, p-values, etc. and return a
#' numeric vector or list.
#'
#' \code{*_text} functions format the test results for plotting and return
#' an expression or vector of character strings.
#'
#' Note that \code{pw_pval} and \code{pw_text} do not support formulas
#' with more than one predictor, e.g., \code{y ~ a + b}. An equivalent
#' formula is acceptable, e.g., \code{y ~ x} where \code{x} is the
#' \code{interaction(a, b)} or similar. See \code{\link{survdiff_pairs}}
#' for more details and examples.
#'
#' \code{c_text}, \code{cc_pval}, and \code{cc_text} are convenience functions
#' for \code{\link[survC1]{Inf.Cval}} and \code{\link[survC1]{Inf.Cval.Delta}}
#' to compute and compare c-statistics on censored data. \code{c_text} shows
#' c for a single model where \code{cc_text} and \code{cc_pval} compare two
#' models; see Uno (2011).
#'
#' @param formula,data,rho,... passed to \code{\link{survdiff}} or
#' \code{\link{coxph}}
#' @param object a \code{\link{survfit}}, \code{\link{survdiff}}, or
#' \code{\link{coxph}} object; alternatively a
#' \code{\link[=Surv]{survival formula}} in which case \code{data} must be
#' given
#' @param details logical; \code{TRUE} returns statistic, degrees of freedom,
#' and p-value where \code{FALSE} returns only a pvalue
#' @param pFUN logical; if \code{TRUE}, p-values are formatted with
#' \code{\link{pvalr}}; if \code{FALSE}, no formatting is performed;
#' alternatively, a function can be passed which should take a numeric value
#' and return a character string (or a value to be coerced) for printing
#' @param method for \code{pw_*}, the method used to adjust p-values for
#' multiple comparisons (default is \code{"none"}); see
#' \code{\link{p.adjust.methods}}
#' @param tau,iter,seed arguments passed to \code{\link[survC1]{Inf.Cval}} or
#' \code{\link[survC1]{Inf.Cval.Delta}}
#' @param digits,conf,show_conf for \code{c_text} and \code{cc_text}, options
#' to control the text output
#' @param formula1,formula2 for \code{cc_pva} and \code{cc_text}, the formulas
#' of the two models to compare
#'
#' @references
#' Tarone, Robert E. Tests for Trend in Life Table Analysis. \emph{Biometrika}
#' \strong{62} vol. 62 (Dec 1975), 679-82.
#'
#' Tarone, see also:
#' \url{https://stat.ethz.ch/pipermail/r-help/2008-April/160209.html}
#'
#' Uno, Hajime, et al. On the C-statistics for Evaluating Overall Adequacy of
#' Risk Prediction Procedures with Censored Survival Data. \emph{Statistics
#' in Medicine} \strong{30} vol. 10 (May 2011), 1105-17.
#'
#' @seealso
#' \code{\link{survdiff_pairs}}
#'
#' @examples
#' \dontrun{
#' library('survival')
#' data('larynx', package = 'KMsurv')
#' larynx$stage <- factor(larynx$stage)
#'
#' form <- Surv(time, delta) ~ stage
#' sf <- survfit(form, larynx)
#' sd <- survdiff(form, larynx)
#'
#' kmplot(sf, lr_test = TRUE)
#'
#'
#' ## log-rank
#' rawr:::lr_pval(sf)
#' rawr:::lr_pval(sd, TRUE)
#' rawr:::lr_text(Surv(time, delta) ~ stage, larynx)
#'
#'
#' ## pairwise log-rank
#' sf$call$formula <- form
#' rawr:::pw_pval(sf)
#' rawr:::pw_text(sf)
#'
#'
#' ## tarone trend
#' rawr:::tt_pval(sf)
#' rawr:::tt_pval(sd, TRUE)
#' rawr:::tt_text(Surv(time, delta) ~ stage, larynx)
#'
#' ## compare
#' chi <- coxph(Surv(time, delta) ~ stage, larynx)$score
#' list(chi = chi, p.value = pchisq(chi, 1, lower.tail = FALSE))
#'
#'
#' ## hazard ratio/wald p-values
#' rawr:::hr_pval(sf)
#' rawr:::hr_pval(sd, TRUE)
#' rawr:::hr_text(Surv(time, delta) ~ stage, larynx)
#'
#'
#' ## c-statistics for censored data (uno, 2011)
#' f1 <- Surv(time, delta) ~ stage
#' f2 <- Surv(time, delta) ~ stage + age
#' rawr:::c_text(f1, larynx)
#' rawr:::c_text(f2, larynx, details = TRUE)
#' rawr:::cc_text(f1, f2, larynx)
#' rawr:::cc_pval(f1, f2, larynx)
#' }
#'
#' @name surv_test
#' @rdname surv_test
lr_pval <- function(object, details = FALSE, data = NULL, ...) {
object <- if (inherits(object, 'survfit')) {
if (length(form <- object$call$formula) == 1L)
object$call$formula <- eval(object$call$formula, parent.frame(1L))
survdiff(as.formula(object$call$formula),
eval(data %||% object$.data %||% object$call$data))
} else if (inherits(object, c('formula', 'call')))
survdiff(object, data, ...)
else object
stopifnot(inherits(object, 'survdiff'))
chi <- object$chisq
df <- length(object$n) - 1L
pv <- pchisq(chi, df, lower.tail = FALSE)
if (details)
list(chisq = chi, df = df, p.value = pv) else pv
}
#' @rdname surv_test
lr_text <- function(formula, data, rho = 0, ..., details = TRUE, pFUN = NULL) {
pFUN <- if (is.null(pFUN) || isTRUE(pFUN))
function(x) pvalr(x, show.p = TRUE)
else if (identical(pFUN, FALSE))
identity else match.fun(pFUN)
object <- if (inherits(formula, 'survdiff'))
formula
else if (inherits(formula, 'survfit'))
survdiff(as.formula(formula$call$formula),
if (!missing(data)) data else eval(formula$call$data), ...)
else survdiff(formula, data, rho = rho, ...)
capture.output(
## Warning message: In pchisq(x$chisq, df) : NaNs produced
## this warning is only produced during print.survdiff
sd <- tryCatch(
print(object),
warning = function(w) '',
error = function(e) {
if (any(grepl('(?i)[1no]+ group', e)))
TRUE else e
})
)
if (isTRUE(sd))
return(FALSE)
if (identical(sd, ''))
return(sd)
if (!inherits(sd, 'survdiff'))
stop(sd)
## does not work with stratified models
# df <- sum(1 * (colSums(if (is.matrix(sd$obs))
# sd$exp else t(sd$exp)) > 0)) - 1
df <- length(sd$n) - 1L
pv <- pchisq(sd$chisq, df, lower.tail = FALSE)
txt <- sprintf('%s (%s df), %s', roundr(sd$chisq, 1L), df, pFUN(pv))
if (details)
bquote(paste(chi^2, ' = ', .(txt))) else pFUN(pv)
}
#' @rdname surv_test
tt_pval <- function(object, details = FALSE, data = NULL, ...) {
object <- if (inherits(object, c('survdiff', 'survfit'))) {
if (length(form <- object$call$formula) == 1L)
object$call$formula <- eval(object$call$formula, parent.frame(1L))
coxph(as.formula(object$call$formula),
eval(data %||% object$.data %||% object$call$data))
} else if (inherits(object, c('formula', 'call')))
coxph(object, data, ...)
else object
stopifnot(inherits(object, 'coxph'))
chi <- object$score
pv <- pchisq(chi, 1L, lower.tail = FALSE)
if (details)
list(chisq = chi, df = 1L, p.value = pv) else pv
}
#' @rdname surv_test
tt_text <- function(formula, data, ..., details = TRUE, pFUN = NULL) {
pFUN <- if (is.null(pFUN) || isTRUE(pFUN))
function(x) pvalr(x, show.p = TRUE)
else if (identical(pFUN, FALSE))
identity else match.fun(pFUN)
object <- if (inherits(formula, 'coxph'))
formula
else if (inherits(formula, 'survfit'))
coxph(as.formula(formula$call$formula),
if (!missing(data)) data else eval(formula$call$data), ...)
else formula
cph <- tryCatch(
if (inherits(object, 'coxph'))
object else coxph(formula, data, ...),
warning = function(w) '',
error = function(e) e
)
if (isTRUE(cph))
return(FALSE)
if (identical(cph, ''))
return(cph)
if (!inherits(cph, 'coxph'))
stop(cph)
chi <- cph$score
pv <- pchisq(chi, 1L, lower.tail = FALSE)
txt <- sprintf('%s (1 df), %s', roundr(chi, 1L), pFUN(pv))
if (details)
bquote(paste(chi^2, ' = ', .(txt))) else pFUN(pv)
}
#' @rdname surv_test
hr_pval <- function(object, details = FALSE, data = NULL, ...) {
object <- if (inherits(object, c('survdiff', 'survfit'))) {
if (length(form <- object$call$formula) == 1L)
object$call$formula <- eval(object$call$formula, parent.frame(1L))
coxph(as.formula(object$call$formula),
eval(data %||% object$.data %||% object$call$data))
} else if (inherits(object, c('formula', 'call')))
coxph(object, data, ...)
else object
stopifnot(inherits(object, 'coxph'))
obj <- summary(object)
obj <- cbind(obj$conf.int[, -2L, drop = FALSE],
p.value = obj$coefficients[, 'Pr(>|z|)'])
if (details)
obj else obj[, 'p.value']
}
#' @rdname surv_test
hr_text <- function(formula, data, ..., details = TRUE, pFUN = NULL) {
pFUN <- if (is.null(pFUN) || isTRUE(pFUN))
function(x) pvalr(x, show.p = TRUE)
else if (identical(pFUN, FALSE))
identity else match.fun(pFUN)
object <- if (inherits(formula, 'coxph'))
formula
else if (inherits(formula, 'survfit'))
coxph(as.formula(formula$call$formula),
if (!missing(data)) data else eval(formula$call$data), ...)
else formula
suppressWarnings({
cph <- tryCatch(
if (inherits(object, 'coxph'))
object else coxph(formula, data, ...),
# warning = function(w) '',
error = function(e) e
)
})
if (isTRUE(cph))
return(FALSE)
if (identical(cph, ''))
return(cph)
if (!inherits(cph, 'coxph'))
stop(cph)
obj <- hr_pval(cph, details = TRUE)
txt <- apply(obj, 1L, function(x)
sprintf('HR %.2f (%.2f, %.2f), %s', x[1L], x[2L], x[3L],
{pv <- pFUN(x[4L]); if (is.na(pv)) 'p > 0.99' else pv}))
lbl <- attr(terms(cph), 'term.labels')
txt <- paste(cph$xlevels[[lbl[!grepl('strata\\(', lbl)]]],
c('Reference', txt), sep = ': ')
if (is.null(cph$xlevels)) {
if (length(txt) == 2L)
gsub('^.*: ', '', txt) else c(NA, gsub('^.*: ', '', txt)[-1L])
} else txt
}
#' @rdname surv_test
pw_pval <- function(object, details = FALSE, data = NULL, ...,
method = 'none') {
object <- if (inherits(object, 'survdiff_pairs'))
object
else if (inherits(object, c('survdiff', 'survfit'))) {
survdiff_pairs(object, ..., digits = 10L)
} else if (inherits(object, c('formula', 'call'))) {
object <- eval(
substitute(survdiff(form, data = data, ...), list(form = object))
)
survdiff_pairs(object)
} else stop('pw_pval - Invalid object', call. = FALSE)
stopifnot(inherits(object, 'survdiff_pairs'))
m <- object$p.value
p <- m[lower.tri(m)]
p <- p.adjust(p, method = method)
n <- sprintf('%s vs %s', colnames(m)[col(m)[lower.tri(m, FALSE)]],
rownames(m)[row(m)[lower.tri(m, FALSE)]])
setNames(p, n)
}
#' @rdname surv_test
pw_text <- function(formula, data, ..., details = TRUE, pFUN = NULL,
method = 'none') {
pFUN <- if (is.null(pFUN) || isTRUE(pFUN))
function(x) pvalr(x, show.p = TRUE)
else if (identical(pFUN, FALSE))
identity else match.fun(pFUN)
obj <- pw_pval(object = formula, data = data, method = method, ...)
sprintf('%s: %s', names(obj), pFUN(obj))
}
#' @rdname surv_test
c_text <- function(formula, data, tau = NULL, iter = 1000L, seed = 1L,
digits = 2L, conf = 0.95, show_conf = TRUE, details = FALSE) {
vv <- all.vars(formula(formula))
data <- data[, vv]
## remove rows with missing data, indicator functions for factors
idx <- complete.cases(data)
if (any(!idx)) {
message(sum(!idx), ' observations removed due to missingness', domain = NA)
data <- data[idx, ]
}
mf <- model.matrix(reformulate(vv[-(1:2)]), data)[, -1L, drop = FALSE]
data <- cbind(data[, vv[1:2]], mf)
if (is.null(tau))
tau <- tail(pretty(data[, 1L]), 2L)[1L]
alpha <- abs(0:1 - (1 - conf) / 2)
res <- c(survC1::Inf.Cval(data, tau, iter, seed), tau = tau)
if (details)
return(res)
res <- res$Dhat + res$se * c(0, qnorm(alpha))
res <- roundr(res, digits)
res <- sprintf('%s (%s%% CI: %s - %s)', res[1L], conf * 100, res[2L], res[3L])
if (show_conf)
res else gsub(' .*', '', res)
}
#' @rdname surv_test
cc_pval <- function(formula1, formula2, data, tau = NULL, iter = 1000L, seed = 1L) {
vv1 <- all.vars(formula(formula1))
vv2 <- all.vars(formula(formula2))
data <- data[, unique(c(vv1, vv2))]
## remove rows with missing data, indicator functions for factors
idx <- complete.cases(data)
if (any(!idx)) {
message(sum(!idx), ' observations removed due to missingness', domain = NA)
data <- data[idx, ]
}
mf1 <- model.matrix(reformulate(vv1[-(1:2)]), data)[, -1L, drop = FALSE]
mf2 <- model.matrix(reformulate(vv2[-(1:2)]), data)[, -1L, drop = FALSE]
if (is.null(tau))
tau <- tail(pretty(data[, 1L]), 2L)[1L]
res <- survC1::Inf.Cval.Delta(data[, 1:2], mf1, mf2, tau, iter, seed)
z <- abs(res[3L, 1L] / res[3L, 2L])
p <- pnorm(z, lower.tail = FALSE) * 2
cbind(res, p.value = c(NA, NA, p), tau = tau)
}
#' @rdname surv_test
cc_text <- function(formula1, formula2, data, tau = NULL, iter = 1000L, seed = 1L,
digits = 2L, conf = 0.95, show_conf = TRUE, details = TRUE) {
com <- cc_pval(formula1, formula2, data, tau, iter, seed)
del <- com[3L, ]
alpha <- abs(0:1 - (1 - conf) / 2)
res <- del[1L] + del[2L] * c(0, qnorm(alpha))
res <- roundr(res, digits)
res <- sprintf('%s (%s%% CI: %s - %s), %s',
res[1L], conf * 100, res[2L], res[3L],
pvalr(del[5L], show.p = TRUE))
if (!details)
return(unname(pvalr(del[5L], show.p = TRUE)))
if (show_conf)
res else gsub(' .*, ', ', ', res)
}
#' kmplot_by
#'
#' @description
#' This function helps create stratified \code{\link{kmplot}}s quickly with
#' panel labels, survival curve test(s), and/or Cox regression summaries for
#' each plot.
#'
#' \code{data} should have at least three variables: \code{strata},
#' \code{*_time}, and \code{*_ind} where \code{*} is \code{event}. For
#' example, to use progression-free survival, \code{data} should have columns
#' \code{"pfs_time"} and \code{"pfs_ind"} (in this case the user should use
#' \code{event = 'pfs'}) and optionally the \code{strata} column unless only
#' the null model (\code{strata = "1"} is needed (default).
#'
#' Alternatively, the \code{time} argument may be used instead of following
#' the above; in this case, \code{time} and \code{event} must be variable
#' names in \code{data}. However, the method described above is more
#' efficient and preferred.
#'
#' @param strata,event,time,by character strings of the strata, event (pfs, os,
#' ttp, etc.; see details), time (optional), and stratification variables;
#' additionally, vectors for each are allowed
#' @param data a data frame
#' @param single logical; if \code{TRUE}, each level of \code{by} will be
#' drawn in a separate window
#' @param lr_test logical or numeric; if \code{TRUE}, a log-rank test will be
#' performed and the results added to the top-right corner of the plot; if
#' numeric, the value is passed as \code{rho} controlling the type of test
#' performed; see \code{\link{survdiff}}
#' @param main title of plot(s)
#' @param ylab y-axis label
#' @param sub sub-title displayed in upper left corner; should be a character
#' vector with length equal to the number of panels (i.e., the number of
#' unique values of \code{by} or length one if \code{by} was not given)
#' @param strata_lab at-risk table strata labels; should be a character vector
#' with length equal to the number of strata; \code{TRUE} (or equivalently
#' missing) is the default, and \code{FALSE} trims the labels; see examples
#' @param fig_lab figure panel labels; should be a character vector with
#' length equal to the number of panels (i.e., the number of unique values of
#' \code{by} or length one if \code{by} was not given)
#' @param col.surv color for individual survival curves or for all curves in
#' a plot if \code{by} is given and \code{map.col = TRUE}; if \code{col.surv}
#' is a named vector which matches the at risk labels, then colors are mapped
#' to the corresponding strata; see \code{\link{kmplot}}
#' @param map.col logical; if \code{TRUE}, \code{col.surv} will be the color
#' of all curves in each plot (only used when \code{by} is non-missing)
#' @param legend logical, a vector of x/y coordinates, or a keyword (see
#' \code{\link{legend}}); if \code{TRUE}, the default position is
#' \code{"bottomleft"}
#' @param add logical; if \code{FALSE} (default), resets graphical parameters
#' to settings before \code{kmplot_by} was called; set to \code{TRUE} for
#' adding to existing plots
#' @param plot logical; if \code{FALSE}, no plot is created but a list with
#' \code{survfit}s is returned
#' @param args.survfit a \emph{named} list of optional arguments passed to
#' \code{\link{survfit.formula}}; relevant arguments include \code{type}
#' (default is \code{"kaplan-meier"}), \code{error} (\code{"greenwood"}),
#' \code{conf.int} (\code{0.95}), \code{"conf.type"} (\code{"log"}), and
#' \code{"se.fit"} (\code{TRUE})
#' @param stratify (dev) \code{\link[survival]{strata}} variables
#' @param panel.first,panel.last,... additional arguments passed to
#' \code{\link{kmplot}} or graphical parameters subsequently passed to
#' \code{\link{par}}
#'
#' @return
#' Invisibly returns a list of \code{\link{survfit}} object(s) used to generate
#' plot(s). If \code{by} was used, there will be a list element for each unique
#' value.
#'
#' @seealso
#' \code{\link{kmplot}}; \code{\link{survdiff}}; \code{\link{kmplot_data_}};
#' \code{\link{lr_text}}; \code{\link{lr_pval}}; \code{\link{points.kmplot}}
#'
#' @examples
#' library('survival')
#' kmplot_by(time = 'time', event = 'status', data = colon)
#' with(colon, kmplot_by('All', time = time, event = status))
#'
#'
#' ## create *_ind, *_time variables, see details
#' colon2 <- within(colon[duplicated(colon$id), ], {
#' pfs_time <- time
#' pfs_ind <- status
#' sex <- factor(sex, 0:1, c('Female','Male'))
#' })
#'
#' kmplot_by('rx', 'pfs', data = colon2, col.surv = 1:3,
#' strata_lab = FALSE, median = TRUE)
#' kmplot_by('rx', 'pfs', data = colon2, col.surv = 1:3,
#' strata_lab = FALSE, median = TRUE, args.survfit = list(conf.int = 0.90))
#'
#'
#' ## these are equivalent (with minor differences in aesthetics)
#' kmplot_by(time = 'pfs_time', event = 'pfs_ind', data = colon2)
#' kmplot_by('1', 'pfs', colon2)
#'
#'
#' ## return value is a list of survfit objects
#' l <- kmplot_by('sex', 'pfs', colon2, 'rx', plot = FALSE)
#' str(lapply(l, kmplot))
#' str(lapply(l, kmplot_by))
#'
#'
#' ## multiple variables can be combined
#' kmplot_by('rx + sex', 'pfs', colon2, strata_lab = FALSE, lty.surv = 1:6)
#'
#'
#' ## if "by" is given, default is to plot separately
#' kmplot_by('rx', 'pfs', colon2, by = 'sex', col.surv = 1:3,
#' strata_lab = c('Observation', 'Trt', 'Trt + 5-FU'))
#'
#' ## if "by" is given, use map.col to map colors to plots
#' kmplot_by( 'rx', 'pfs', colon2, by = 'rx', map.col = TRUE, single = FALSE)
#' kmplot_by('sex', 'pfs', colon2, by = 'rx', map.col = TRUE, single = FALSE)
#' kmplot_by('sex', 'pfs', colon2, by = 'age > 60', map.col = TRUE, single = FALSE)
#'
#' ## to ensure colors are mapped to the same strata across plots (eg, if
#' ## all sub plots do not have the same groups), use a _named_ vector
#' kmplot_by('rx', 'pfs', colon2, by = 'rx', xlim = c(0, 3000),
#' col.surv = c(Lev = 'brown', Obs = 'blue', 'Lev+5FU' = 'purple'))
#'
#'
#' ## if single = FALSE, uses n2mfrow function to set par('mfrow')
#' kmplot_by('rx', 'pfs', colon2, by = 'sex', col.surv = 1:3, single = FALSE,
#' strata_lab = c('Observe', 'Trt', 'Trt + 5-FU'), main = levels(colon2$sex))
#'
#'
#' ## if par('mfrow') is anything other than c(1,1), uses current setting
#' par(mfrow = c(2,2))
#' kmplot_by('rx', 'pfs', colon2, by = 'sex', col.surv = 1:3, single = FALSE,
#' strata_lab = c('Observation','Trt','Trt + 5-FU'), add = TRUE)
#' kmplot_by('1', 'pfs', colon2, by = 'sex', col.surv = 1:3, single = FALSE,
#' strata_lab = FALSE, fig = c('C', 'D'), lr_test = FALSE)
#'
#'
#' ## use add = TRUE to add to a figure region without using the by argument
#' par(mfrow = c(1, 2))
#' mar <- c(8, 6, 3, 2) ## to align axes
#' kmplot_by( 'rx', 'pfs', colon2, strata_lab = FALSE, add = TRUE, mar = mar)
#' kmplot_by('sex', 'pfs', colon2, strata_lab = FALSE, add = TRUE, mar = mar)
#'
#' @export
kmplot_by <- function(strata = '1', event = NULL, data = NULL, by = NULL,
time = NULL, single = TRUE, lr_test = TRUE, main = NULL,
ylab = NULL, sub = NULL, strata_lab = NULL, fig_lab = NULL,
col.surv = NULL, map.col = FALSE, legend = FALSE,
add = FALSE, plot = TRUE, args.survfit = list(),
stratify = NULL, panel.first = NULL, panel.last = NULL, ...) {
## defaults
type <- args.survfit$type %||% 'kaplan-meier'
error <- args.survfit$error %||% 'greenwood'
conf.int <- args.survfit$conf.int %||% 0.95
conf.type <- args.survfit$conf.type %||% 'log'
se.fit <- args.survfit$se.fit %||% TRUE
if (is.logical(lr_test)) {
rho <- 0
} else {
rho <- if (is.numeric(lr_test))
lr_test else 0
lr_test <- TRUE
}
if (length(event) > 1L) {
data <- data.frame(
strata, event, time, by = by %||% NA,
stringsAsFactors = FALSE
)
strata <- 'strata'
event <- 'event'
time <- 'time'
by <- if (is.null(by)) NULL else 'by'
}
if (inherits(strata, 'survfit')) {
## if survfit object is given, extract needed vars and run as usual
if (is.null(data <- strata$.data)) {
data <- deparse(strata$call$data)
data <- get(data, where(gsub('[$[].*', '', data)))
}
conf.int <- strata$conf.int %||% conf.int
conf.type <- strata$conf.type %||% conf.type
se.fit <- !is.null(strata$std.err)
form <- as.character(strata$call$formula)[-1L]
strata <- form[length(form)]
event <- gsub('(\\S+)\\)|.', '\\1', form[1L])
time <- gsub('\\((\\w+)|.', '\\1', form[1L])
}
form <- if (!is.null(time)) {
ev <- event
sprintf('Surv(%s, %s) ~ %s', time, event, strata)
} else {
ev <- paste0(event, '_ind')
sprintf('Surv(%s_time, %s_ind) ~ %s', event, event, strata)
}
form <- as.formula(form)
if (is.factor(data[, ev])) {
warning(sprintf('factor (%s) used for event - coercing to integer',
shQuote(event)), call. = FALSE)
data[, ev] <- as.integer(data[, ev])
}
op <- par(no.readonly = TRUE)
if (plot & (!add | !single))
on.exit(par(op))
if (add)
on.exit(NULL)
if (!is.null(by)) {
data[, by] <- if (by %in% names(data))
as.factor(data[, by])
else factor(with(data, eval(parse(text = by))))
if (single) {
add <- FALSE
par(mfrow = c(1L,1L))
} else {
add <- TRUE
if (identical(as.integer(par('mfrow')), c(1L, 1L)))
par(mfrow = n2mfrow(lunique(data[, by], na.rm = TRUE)))
}
sp <- split(data, droplevels(data[, by]))
## list names will be main title(s)
names(sp) <- rep_len(main %||% paste(by, names(sp), sep = '='), length(sp))
} else {
if (missing(add)) {
add <- FALSE
par(mfrow = c(1L, 1L))
}
map.col <- FALSE
sp <- list(data)
names(sp) <- main
}
## define these before passing to loop
mlabs <- is.null(strata_lab)
msub <- is.null(sub)
fig <- if (length(sp) > 1L & is.null(fig_lab))
LETTERS[seq_along(sp)] else if (is.null(fig_lab)) '' else fig_lab
fig <- rep_len(fig, length(sp))
ylab <- ylab %||% sprintf('%s probability', toupper(event))
## possible names for strata labels/title
dots <- lapply(dots(...), function(x)
tryCatch(eval(x), error = function(e) NULL))
strata_names <- dots$strata.lab %||% dots$strata.expr %||%
if (mlabs) NULL else strata_lab
if (is.logical(strata_names) |
!is.expression(strata_names) && anyNA(strata_names) |
is.null(strata_names))
strata_names <- NULL
if (!is.null(names(strata_names)))
names(sp) <- strata_names
if (!is.null(by) & is.null(names(col.surv))) {
sl <- survfit(
form, data, se.fit = se.fit, type = type, error = error,
conf.int = conf.int, conf.type = conf.type
)
col.surv <- setNames(
col.surv %||% seq_along(sl$n),
if (by == strata || is.null(strata_lab) || identical(strata_lab, FALSE))
NULL else strata_names %||% names(sl$strata)
)
if (length(sp) != length(col.surv))
col.surv <- if (map.col)
seq_along(sp) else rep(col.surv, length(sp))
}
sl <- lapply(seq_along(sp), function(x) {
tryCatch({
eval(substitute(
survfit(form, data = sp[[x]], type = type,
conf.type = conf.type, error = error,
conf.int = conf.int, se.fit = se.fit),
list(form = form))
)
}, error = function(e) {
if (grepl('first argument must be of mode character', e)) {
warning('Error in survfit:\n', e, '\n',
'Check that \'strata\' has at least one non-missing value',
'\n\n', 'Returning null model: ',
deparse(update(form, . ~ 1)))
eval(substitute(
survfit(form, data = sp[[x]], type = type,
conf.type = conf.type, error = error,
conf.int = conf.int, se.fit = se.fit),
list(form = update(form, . ~ 1))))
} else if (grepl('less than one element in integerOneIndex', e))
stop('Check that \'event\' has at least one non-missing value')
else stop(e)
})
})
l <- lapply(seq_along(sp), function(x) {
s <- s0 <- sl[[x]]
s$.data <- s0$.data <- sp[[x]]
if (strata == '1')
strata <- ''
if (!is.null(s$strata)) {
ns <- names(s$strata)
ns <- if (mlabs || isTRUE(strata_lab)) ns else
if (identical(strata_lab, FALSE)) {
svar <- colnames(model.frame(form, s$.data)[, -1L, drop = FALSE])
cl <- c(list(survival::strata),
lapply(svar, as.symbol),
shortlabel = TRUE)
mode(cl) <- 'call'
levels(eval(cl, model.frame(form, s$.data)))
} else if (!length(strata_lab) == length(ns)) {
warning('length(strata_lab) does not equal length(s$strata)',
call. = FALSE)
ns
} else if (length(strata_lab) == length(ns))
strata_lab else ns
names(s$strata) <- trimws(ns)
}
if (!plot)
return(s0)
km <- kmplot(
s, add = add, ...,
legend = legend, main = names(sp)[x], ylab = ylab,
lr_test = lr_test, stratify = stratify,
col.surv = if (map.col) unname(col.surv)[x] else col.surv,
panel.last = panel.last,
panel.first = {
panel.first
## sub label - top left margin (default: strata var)
mtxt <- if (!msub)
rep_len(sub, length(sp))[x] else strata
mtext(mtxt, side = 3L, line = 0.25, outer = FALSE,
at = 0, adj = 0, font = 3L)
## figure label - top left outer margin (eg, A, B, C)
mtext(
fig[x], side = 3L, line = 0.25, outer = FALSE,
at = 0 - par('usr')[2L] * 0.05,
font = par('font.main'), cex = par('cex.main')
)
}
)
structure(
s0,
call = attr(km, 'call'),
scall = attr(km, 'scall'),
ccall = attr(km, 'ccall')
)
})
names(l) <- names(sp) %||% strata
invisible(l)
}
#' Add ticks to \code{kmplot}
#'
#' Add tick marks to a \code{\link{kmplot}} representing a specific event,
#' \code{what}, occurring within a variable, \code{by_var}.
#'
#' @param object an object of class \code{\link{survfit}}
#' @param data the data set used to fit \code{s} which should also contain
#' \code{by_var} and optionally \code{time}, \code{event}, and \code{strata};
#' \code{kmplot_ticks} will attempt to select these variables based on the
#' call to \code{survfit}
#' @param by_var,what a variable, \code{by_var} in \code{data} for which
#' tick marks are to be placed at each occurrence of \code{what}
#' @param y the y-coordinate(s) for each point, recycled as needed
#' @param time,event,strata (optional) variables used to fit \code{s}
#' @param col a vector of colors (one for each strata level of \code{s}) for
#' tick marks; note these colors should match the curves of the survival plot
#' @param pch a vector of plotting characters to distinguish censoring and
#' events
#' @param ... additional arguments passed to \code{\link{points}}
#'
#' @seealso
#' \code{\link{kmplot}}; \code{\link{kmplot_by}}
#'
#' @examples
#' library('survival')
#' s <- survfit(Surv(futime, fustat) ~ rx, ovarian)
#'
#' kmplot(s, add = TRUE)
#' kmplot_ticks(s, by_var = 'resid.ds', what = 1)
#' kmplot_ticks(s, by_var = 'resid.ds', what = 2, pch = '*', y = 0.05)
#'
#' @export
kmplot_ticks <- function(object, data = eval(object$call$data), by_var, what,
y = 0, time, event, strata, col = NULL,
pch = NULL, ...) {
op <- par(no.readonly = TRUE)
on.exit(par(op))
## lifted from kmplot_by
if (inherits(object, 'survfit')) {
if (is.null(data <- object$.data)) {
data <- deparse(object$call$data)
data <- get(data, where(gsub('[$[].*', '', data)))
}
terms <- c(terms.inner(object$call$formula),
tail(as.character(object$call$formula), 1L))
if (missing(time))
time <- terms[1L]
if (missing(event))
event <- terms[2L]
if (missing(strata))
strata <- terms[3L]
}
nc <- length(unique(data[, strata]))
col <- if (missing(col))
seq.int(nc) else rep_len(col, nc)
data[, strata] <- as.factor(data[, strata])
data <- data[data[, by_var] %in% what, ]
if (!nrow(data)) {
par(op)
return(invisible(NULL))
}
col <- col[as.numeric(data[, strata])]
pch <- rep_len(pch %||% 3:4, 2L)
pch <- pch[(as.integer(data[, event]) == 1L) + 1L]
points(data[, time], rep_len(y, nrow(data)), col = col, pch = pch, ...)
invisible(data)
}
terms.inner <- function(x) {
# survival:::terms.inner
if (inherits(x, 'formula')) {
if (length(x) == 3L)
c(terms.inner(x[[2L]]), terms.inner(x[[3L]]))
else terms.inner(x[[2L]])
} else if (class(x) == 'call' &&
(x[[1L]] != as.name('$') &&
x[[1L]] != as.name('['))) {
if (x[[1L]] == '+' || x[[1]] == '*' || x[[1]] == '-') {
if (length(x) == 3L)
c(terms.inner(x[[2L]]), terms.inner(x[[3L]]))
else terms.inner(x[[2L]])
} else if (x[[1L]] == as.name('Surv'))
unlist(lapply(x[-1L], terms.inner))
else terms.inner(x[[2L]])
} else (deparse(x))
}
#' Compute local p-value from coxph
#'
#' Checks the null hypothesis: C * beta.hat = c, i.e., the local p-value of
#' one or more factors in a model; can also be used to test more comlex
#' hypotheses.
#'
#' @param object survival object of class \code{\link[survival]{coxph}}
#' @param pos vector of positions of \code{\link{coefficients}} of interest
#' from \code{summary(coxph)}; defaults to \code{seq_along(coef(s))}
#' @param C,d \code{C}, a q-by-p matrix, and \code{d}, a q-by-1 matrix, define
#' the null hypothesis being checked; default is a global test on the
#' variables in \code{pos}, i.e., \code{C} is the identity matrix, and
#' \code{d} is a vector of zeros
#' @param digits number of significant figures in output
#'
#' @references
#' \url{http://www.ddiez.com/teac/surv/}
#'
#' @examples
#' library('survival')
#' fit <- coxph(Surv(time, status) ~ sex + ph.ecog, data = cancer)
#'
#' ## compare to summary(fit)
#' local_coxph_test(fit)
#' local_coxph_test(fit, 2)
#'
#' @export
local_coxph_test <- function(object, pos, C = NULL, d = NULL, digits = 3) {
stopifnot(inherits(object, 'coxph'))
if (missing(pos))
pos <- seq_along(coef(object))
n <- length(pos)
if (!(n <- length(pos)) || !length(coef(object)))
stop('Model has no coefficients')
if (is.null(C)) {
C <- matrix(0, n, n)
diag(C) <- 1
} else if (dim(C)[1L] != n)
stop('C has improper dimensions\n')
if (is.null(d))
d <- matrix(0, n, 1L)
if (dim(d)[1L] != dim(C)[1L])
stop('C and d do not have appropriate dimensions')
I. <- object$var[pos, pos]
est <- matrix(as.vector(object$coeff[pos]), dim(C)[2L])
chisq <- t(C %*% est - d) %*% solve(t(C) %*% I. %*% C ) %*% (C %*% est - d)
chisq <- as.numeric(chisq)
df <- dim(C)[1L]
p.value <- signif(pchisq(chisq, df, lower.tail = FALSE), digits)
list(est = est, chisq = chisq, df = df, p.value = p.value)
}
#' Create counting process data
#'
#' Converts a data frame to counting process notation and allows for
#' time-dependent variables to be introduced.
#'
#' @param data data frame with survival time, survival status, and other
#' covariates
#' @param time.var \code{data} variable name representing survival time
#' @param status.var \code{data} variable name representing status
#' @param covars other covariates to retain
#'
#' @return
#' A data frame with events in counting process notation.
#'
#' @references
#' \url{http://www.ddiez.com/teac/surv/}
#'
#' @examples
#' library('survival')
#' cp <- surv_cp(aml, 'time', 'status')
#' coxph(Surv(start, stop, status) ~ x, data = cp)
#'
#' ## compare to
#' coxph(Surv(time, status) ~ x, data = aml)
#'
#' @export
surv_cp <- function(data, time.var, status.var,
covars = setdiff(names(data), c(time.var, status.var))) {
## sorted times, append to 0
t.sort <- c(0, sort(unique(data[[time.var]])))
## for each data point find times less than or equal to the obs time
t.list <- lapply(data[[time.var]], function(x) t.sort[t.sort <= x])
## create list of datasets with covariates and all relevant start/stop times
## remove one from end of x, stop by removing first of x
## include the status variable and covariates in the dataframe
f <- function(i) {
data.frame(
start = head(t.list[[i]], -1L),
stop = tail(t.list[[i]], -1L),
data[i, c(status.var, covars)],
row.names = NULL
)
}
data <- do.call('rbind', Map('f', seq_along(t.list)))
## create the correct status need last time for each
## subject with status=1 to to be status=1 but all others status=0
## lapply creates vectors 0,0,0,...,1 based on length of t.list
## subtract 2 since the lag takes one away, then need one for the 1 at end
## this is then multiplied by status to correct it
keep.status <- do.call('c', lapply(t.list, function(x)
c(rep(0L, length(x) - 2L), 1L)))
data[, status.var] <- data[, status.var] * keep.status
data
}
#' Summary of a survival curve
#'
#' Prints and returns a list containing the survival curve, confidence limits
#' for the curve, and other information which can be more useful than the
#' acutal return value of \code{survival:::print.summary.survfit}.
#'
#' @param object a \code{\link{survfit}} object
#' @param digits number of digits to use in printing numbers
#' @param locf logical; if \code{TRUE}, any \code{NA} probabilities will be
#' carried forward, e.g., if no events occurred during two consecutive time
#' intervals
#' @param ... additional arguments passed to
#' \code{\link[survival]{summary.survfit}}
#'
#' @return
#' A list with \code{survival:::print.summary.survfit} matrices for each
#' strata, i.e., the output printed.
#'
#' @seealso
#' \code{\link{survfit}}; \code{\link{print.summary.survfit}}
#'
#' @examples
#' library('survival')
#' fit1 <- survfit(coxph(Surv(time, status) ~ strata(I(age > 60)), cancer))
#' surv_summary(fit1, times = c(0, 100, 200))
#'
#' @export
surv_summary <- function(object, digits = 3L, locf = FALSE, ...) {
stopifnot(inherits(object, 'survfit'))
oo <- options(digits = digits)
on.exit(options(oo))
x <- summary(object, ...)
if (!is.null(cl <- x$call)) {
cat('Call: ')
dput(cl)
}
omit <- x$na.action
if (length(omit))
cat(naprint(omit), '\n')
if (x$type == 'right' || is.null(x$n.enter)) {
mat <- cbind(x$time, x$n.risk, x$n.event, x$surv)
cnames <- c('time', 'n.risk', 'n.event')
} else if (x$type == 'counting') {
mat <- cbind(x$time, x$n.risk, x$n.event, x$n.enter, x$n.censor, x$surv)
cnames <- c(time, 'n.risk', 'n.event', 'entered', 'censored')
}
ncurve <- if (is.matrix(x$surv))
ncol(x$surv) else 1L
if (ncurve == 1L) {
cnames <- c(cnames, 'survival')
if (!is.null(x$std.err)) {
if (is.null(x$lower)) {
mat <- cbind(mat, x$std.err)
cnames <- c(cnames, 'std.err')
} else {
mat <- cbind(mat, x$std.err, x$lower, x$upper)
cnames <- c(cnames, 'std.err',
paste0('lower ', x$conf.int * 100, '% CI'),
paste0('upper ', x$conf.int * 100, '% CI'))
}
}
} else cnames <- c(cnames, paste0('survival', seq.int(ncurve)))
if (!is.null(x$start.time)) {
mat.keep <- mat[, 1L] >= x$start.time
mat <- mat[mat.keep, , drop = FALSE]
if (is.null(dim(mat)))
stop('No information available using start.time = ', x$start.time)
}
if (!is.matrix(mat))
mat <- matrix(mat, nrow = 1L)
if (!is.null(mat)) {
dimnames(mat) <- list(NULL, cnames)
if (is.null(x$strata)) {
cat('\n')
res <- prmatrix(mat, rowlab = rep('', nrow(mat)))
invisible(if (locf) apply(res, 2L, locf) else res)
} else {
strata <- x$strata
if (!is.null(x$start.time))
strata <- strata[mat.keep]
invisible(setNames(lapply(levels(strata), function(i) {
who <- strata == i
cat('\n ', i, '\n')
res <- if (sum(who) == 1L)
prmatrix(mat[who, , drop = FALSE])
else prmatrix(mat[who, , drop = FALSE], rowlab = rep('', sum(who)))
if (locf)
apply(res, 2L, locf) else res
}), levels(strata)))
}
} else stop('There are no events to print. Use the option censored = TRUE ',
'with the summary function to see the censored observations.')
}
#' Summary table
#'
#' Prints a formatted summary table for \code{\link{survfit}} objects.
#'
#' @param object an object of class \code{\link[survival]{survfit}}
#' @param digits number of digits to use in printing numbers
#' @param times vector of times
#' @param ... additional arguments passed to \code{\link{surv_summary}} and
#' further to \code{\link[survival]{summary.survfit}}
#' @param maxtime logical; if \code{TRUE}, adds the maximum time for which an
#' even occurs; if \code{FALSE}, number of events may not sum to total
#' @param percent logical; if \code{TRUE}, percentages are shown instead of
#' probabilities
#' @param locf logical; if \code{TRUE}, any \code{NA} probabilities will be
#' carried forward, e.g., if no events occurred during two consecutive time
#' intervals
#' @param median,ci.median logical; if \code{TRUE}, the median (and confidence
#' interval) will be added as a separate column
#' @param ci.type for the median confidence interval, show either the
#' \code{object$conf.int} or the full range; see \code{\link{surv_extract}}
#' @param digits.median for the median, the number of digits past the decimal
#' point to keep
#' @param na.median a string to use in place of "NA" for the median text;
#' default is "NR" for "not reached"
#' @param fun survival estimate transformation, one of "S" or "F" for the usual
#' survival probabilities, \code{S}, or \code{1 - S}, respectively
#'
#' @return
#' A matrix (or list of matrices) with formatted summaries for each strata; see
#' \code{\link{summary.survfit}}
#'
#' @seealso
#' \code{\link{survfit}}; \code{\link{print.summary.survfit}}
#'
#' @examples
#' library('survival')
#' fit0 <- survfit(Surv(time, status == 2) ~ 1, data = cancer)
#' surv_table(fit0, times = 0:2 * 100)
#' surv_table(fit0, times = 0:2 * 100, median = TRUE)
#' surv_table(fit0, times = 0:2 * 100, median = TRUE, ci.type = 'range')
#'
#' ## also works for list of tables
#' fit1 <- survfit(Surv(time, status == 2) ~ sex, data = cancer)
#' surv_table(fit1, median = TRUE, digits.median = 2, ci.median = FALSE)
#' rawr::combine_table(surv_table(fit1, median = TRUE))
#'
#' @export
surv_table <- function(object, digits = ifelse(percent, 0L, 3L),
times = pretty(object$time), maxtime = FALSE,
percent = FALSE, locf = FALSE,
median = FALSE, ci.median = TRUE,
ci.type = c('conf.int', 'range'),
digits.median = 0L, na.median = 'NR',
fun = c('S', 'F'), ...) {
stopifnot(inherits(object, 'survfit'))
if (maxtime) {
idx <- object$n.event > 0
maxtime <- max(object$time[if (any(idx))
idx else seq_along(idx) == length(idx)])
times <- c(times[times <= maxtime], maxtime)
}
capture.output(
ss <- surv_summary(
object, digits = 7L, times = unique(times), locf = locf, ...
)
)
if (fun <- fun[1L] %in% c('F', 'event', 'events')) {
s_to_f <- function(x) {
nn <- colnames(x)
si <- grep('survival', nn)
ui <- grep('upper', nn)
li <- grep('lower', nn)
x[, c(si, li, ui)] <- 1 - x[, c(si, ui, li)]
x
}
ss <- if (islist(ss))
lapply(ss, s_to_f) else s_to_f(ss)
}
if (percent) {
ss <- if (islist(ss)) {
lapply(ss, function(x) {
idx <- grep('survival|lower|upper', colnames(x))
x[, idx] <- x[, idx] * 100
x
})
} else {
idx <- grep('survival|lower|upper', colnames(ss))
ss[, idx] <- ss[, idx] * 100
ss
}
}
f <- function(x, d = digits, vars = vars) {
ox <- x
vars <- colnames(x)
g <- function(wh, x = x, cn = vars) {
cn[grepl(wh, cn)]
}
tmpvar <- g('survival|std.err|lower|upper')
x[, tmpvar] <- roundr(x[, tmpvar], digits = d)
x[, 'n.risk'] <- format(ox[, 'n.risk'], big.mark = ',')
x[, 'n.event'] <- format(ox[, 'n.event'], big.mark = ',')
surv <- sprintf(
'%s (%s, %s)',
x[, g('survival')], x[, g('lower')], x[, g('upper')]
)
cn <- c('Time', 'No. at risk', 'No. event', 'Std.Error',
sprintf('%s (%s%% CI)', ifelse(fun, 'Cuminc', 'Surv'),
object$conf.int * 100))
x <- cbind(x[, c(setdiff(vars, tmpvar), 'std.err'), drop = FALSE], surv)
colnames(x) <- cn
x
}
g <- function(x, y) {
nr <- nrow(x)
cbind(x, Median = c(y, rep_len('', nr - 1L)))
}
res <- if (is.list(ss))
Map('f', ss) else f(ss)
if (median) {
md <- surv_median(
object, ci = ci.median, show_conf = TRUE, print = FALSE,
type = ci.type, digits = digits.median
)
md <- gsub('NA', na.median, md)
if (is.list(ss))
Map(g, res, md) else g(res, md)
} else res
}
#' Pairwise \code{survdiff} comparisons
#'
#' Evaluate pairwise group differences in survival curves with
#' \code{\link[survival]{survdiff}}.
#'
#' @param object an object of class \code{\link[survival]{survdiff}},
#' \code{\link[survival]{survfit}}, or \code{\link[survival]{coxph}}
#'
#' alternatively, a formula in which case \code{data} must be provided
#' @param data (optional) a data frame in which to interpret the variables
#' named in \code{object} if given as a formula
#' @param ... additional arguments passed to \code{\link{survdiff}} such as
#' \code{na.action} or \code{rho} to control the test
#' @param method p-value correction method (default is \code{'holm'}; see
#' \code{\link{p.adjust}}
#' @param digits integer indicating the number of decimal places to be used
#'
#' @return
#' A list with three elements:
#'
#' \item{\code{n}}{the number of subjects in each pair of groups}
#' \item{\code{chi.sq}}{the chi-square statistic for a test of equality
#' between pairs of groups}
#' \item{\code{p.value}}{significance for each test. The lower and upper
#' triangles of the matrix are uncorrected and adjusted, respectively, for
#' multiple comparisons using \code{method} (the default is the Holm
#' correction, see \code{\link{p.adjust}})}
#'
#' @seealso
#' \code{\link[rawr]{coxph_pairs}}
#'
#' \code{\link{survdiff}}; \code{\link{p.adjust}};
#' \code{\link[rms]{contrast}} from the \pkg{\link[rms]{rms}} package;
#' \code{\link{pairwise.table}}
#'
#' @examples
#' library('survival')
#' sdif <- survdiff(Surv(time, status) ~ sex, data = lung)
#' sfit <- survfit(Surv(time, status) ~ sex, data = lung)
#'
#' stopifnot(identical(survdiff_pairs(sdif), survdiff_pairs(sfit)))
#'
#'
#' ## numeric and integer variables will be treated as factor-like
#' sfit <- survfit(Surv(time, status) ~ extent, data = colon)
#' kmplot(sfit)
#' survdiff_pairs(sfit)
#'
#' ## compare
#' survdiff(Surv(time, status) ~ extent, data = colon[colon$extent %in% 1:2, ])
#' survdiff(Surv(time, status) ~ extent, data = colon[colon$extent %in% 2:3, ])
#' survdiff(Surv(time, status) ~ extent, data = colon[colon$extent %in% 3:4, ])
#' ## etc ...
#'
#'
#' ## for interactions, create a new variable with all levels
#' colon$int <- with(colon, interaction(sex, extent))
#' sfit <- survfit(Surv(time, status) ~ int, data = colon)
#' survdiff_pairs(sfit, rho = 1, method = 'BH')
#'
#' ## rawr >= 1.0.0 allows multiple variables
#' sfit <- survfit(Surv(time, status) ~ sex + extent, data = colon)
#' survdiff_pairs(sfit, rho = 1, method = 'BH')
#'
#'
#' ## also allows for a formula to be passed (data arg required)
#' survdiff_pairs(Surv(time, status) ~ sex + extent, data = colon)
#'
#'
#' ## strata will be ignored
#' sdif <- survdiff(Surv(time, status) ~ sex + strata(inst), data = lung)
#' survdiff_pairs(sdif)
#'
#' @export
survdiff_pairs <- function(object, data = NULL, ...,
method = p.adjust.methods,
digits = getOption('digits')) {
if (inherits(object, 'formula')) {
form <- object
object <- survfit(form, data)
object$call$formula <- form
}
stopifnot(inherits(object, c('survdiff', 'survfit', 'coxph')))
pwchisq <- function(i, j) {
data <- droplevels(data[data[, rhs] %in% c(unq[i], unq[j]), ])
survdiff(as.formula(object$call$formula), data = data, ...)$chisq
}
method <- match.arg(method)
if (is.null(data))
data <- eval(object$call$data, envir = parent.frame(1L))
# rhs <- tail(all.vars(object$call$formula), -2L) ## fails for strata and s ~ ...
rhs <- strsplit(as.character(object$call$formula)[3L], '\\s*\\+\\s*')[[1L]]
rhs <- trimws(grep('strata\\s*\\(', rhs, value = TRUE, invert = TRUE))
if (any(grepl('factor\\s*\\(', rhs))) {
rhs <- trimws(gsub('factor\\s*\\(([^)]+)\\)', '\\1', rhs))
# names(data) <- trimws(gsub('factor\\s*\\(([^)]+)\\)', '\\1', names(data)))
}
if (length(rhs) > 1L) {
data[, 'int__'] <- interaction(data[, rhs], drop = TRUE)
object <- update(object, . ~ int__, data = data)
rhs <- 'int__'
}
unq <- as.character(sort(unique(data[, rhs])))
nn <- outer(as.character(unq), as.character(unq), Vectorize(function(x, y)
nrow(data[data[, rhs] %in% c(x, y), ])))
nn[upper.tri(nn)] <- NA
chisq <- rbind(NA, cbind(pairwise.table(pwchisq, unq, 'none'), NA))
dimnames(chisq) <- dimnames(nn) <- list(unq, unq)
p.value <- apply(chisq, 1:2, function(x)
pchisq(x, 1L, lower.tail = FALSE))
tpv <- t(p.value)
tpv[lower.tri(p.value)] <-
p.adjust(p.value[lower.tri(p.value)], method = method)
p.value <- t(tpv)
res <- list(n = nn, chi.sq = chisq, p.value = p.value)
structure(
lapply(res, round, digits = digits),
class = 'survdiff_pairs'
)
}
#' Pairwise \code{coxph} comparisons
#'
#' Evaluate pairwise group differences in hazard ratios with
#' \code{\link[survival]{coxph}}. Note this function is intended to be used
#' for uni-variable models with a factor-like predictor.
#'
#' @param object an object of class \code{\link[survival]{survdiff}},
#' \code{\link[survival]{survfit}}, or \code{\link[survival]{coxph}}
#'
#' alternatively, a formula in which case \code{data} must be provided
#' @param data (optional) a data frame in which to interpret the variables
#' named in \code{object} if given as a formula
#' @param level the confidence level for intervals; see \code{\link{confint}}
#' @param ... additional arguments passed to \code{\link{survdiff}} such as
#' \code{na.action} or \code{rho} to control the test
#' @param relevel logical; if \code{TRUE}, the reference group is flipped
#' @param method p-value correction method (default is \code{'holm'}; see
#' \code{\link{p.adjust}}
#' @param digits integer indicating the number of decimal places to be used
#'
#' @return
#' A list with three elements:
#'
#' \item{\code{n}}{the number of subjects in each pair of groups}
#' \item{\code{n_model}}{the number of subjects in each pair of models, i.e.,
#' excluding subjects with missing data}
#' \item{\code{hr}}{the hazard ratio between pairs of groups}
#' \item{\code{lci}}{the lower \code{level} confidence intervals for each
#' hazard ratio}
#' \item{\code{uci}}{the upper \code{level} confidence intervals for each
#' hazard ratio}
#' \item{\code{p.value}}{Wald p-value for each hazard ratio. The lower and
#' upper triangles of the matrix are uncorrected and adjusted, respectively,
#' for multiple comparisons using \code{method} (the default is the Holm
#' correction, see \code{\link{p.adjust}})}
#'
#' @seealso
#' \code{\link[rawr]{survdiff_pairs}}
#'
#' @examples
#' library('survival')
#' sdif <- survdiff(Surv(time, status) ~ sex, data = lung)
#' sfit <- survfit(Surv(time, status) ~ sex, data = lung)
#' cxph <- coxph(Surv(time, status) ~ sex, data = lung)
#'
#' stopifnot(
#' identical(coxph_pairs(sdif), coxph_pairs(sfit)),
#' identical(coxph_pairs(sdif), coxph_pairs(cxph))
#' )
#'
#' ## numeric and integer variables will be treated as factor-like
#' sfit <- survfit(Surv(time, status) ~ extent, data = colon)
#' kmplot(sfit, add = TRUE)
#' coxph_pairs(sfit)$n
#'
#' leg <- function(ii, jj, ...) {
#' hr <- coxph_pairs(sfit, ...)
#' sapply(seq_along(ii), function(i) {
#' j <- ii[i]
#' i <- jj[i]
#' sprintf(
#' '%s vs %s: n=%s, HR: %.2f (%.2f - %.2f), %s',
#' i, j, hr$n[i, j], hr$hr[i, j], hr$lci[i, j], hr$uci[i, j],
#' pvalr(hr$p.value[i, j], show.p = TRUE)
#' )
#' })
#' }
#' legend(
#' 'bottomleft', leg(c('1', '1', '1'), c('2', '3', '4')),
#' lty = 1, col = 2:4, bty = 'n'
#' )
#'
#'
#' ## compare
#' f <- function(x) exp(coef(x))
#' f(coxph(Surv(time, status) ~ extent, data = colon[colon$extent %in% c(1, 2), ]))
#' f(coxph(Surv(time, status) ~ extent, data = colon[colon$extent %in% c(1, 3), ]))
#' f(coxph(Surv(time, status) ~ extent, data = colon[colon$extent %in% c(1, 4), ]))
#' ## etc ...
#'
#'
#' ## rawr >= 1.0.0 allows multiple variables
#' sfit <- survfit(Surv(time, status) ~ sex + extent, data = colon)
#' coxph_pairs(sfit, method = 'BH')
#'
#'
#' ## also allows for a formula to be passed (data arg required)
#' coxph_pairs(Surv(time, status) ~ sex + extent, data = colon)
#'
#'
#' ## strata will be ignored
#' sdif <- survdiff(Surv(time, status) ~ sex + strata(inst), data = lung)
#' coxph_pairs(sdif)
#'
#' @export
coxph_pairs <- function(object, data = NULL, level = 0.95, ..., relevel = FALSE,
method = p.adjust.methods, digits = getOption('digits')) {
if (inherits(object, 'formula')) {
form <- object
object <- survfit(form, data)
object$call$formula <- form
}
stopifnot(inherits(object, c('survdiff', 'survfit', 'coxph')))
method <- match.arg(method)
if (is.null(data))
data <- eval(object$call$data, envir = parent.frame(1L))
# rhs <- tail(all.vars(object$call$formula), -2L) ## fails for strata and s ~ ...
rhs <- strsplit(as.character(object$call$formula)[3L], '\\s*\\+\\s*')[[1L]]
rhs <- trimws(grep('strata\\s*\\(', rhs, value = TRUE, invert = TRUE))
if (any(grepl('factor\\s*\\(', rhs))) {
rhs <- trimws(gsub('factor\\s*\\(([^)]+)\\)', '\\1', rhs))
# names(data) <- trimws(gsub('factor\\s*\\(([^)]+)\\)', '\\1', names(data)))
}
if (length(rhs) > 1L) {
data[, 'int__'] <- interaction(data[, rhs], drop = TRUE)
object <- update(object, . ~ int__, data = data)
rhs <- 'int__'
}
unq <- as.character(sort(unique(data[, rhs])))
f <- function(i, j) {
dd <- droplevels(data[data[, rhs] %in% c(i, j), ])
cx <- coxph(as.formula(object$call$formula), data = dd, ...)
co <- coef(summary(cx))
ci <- exp(confint(cx, level = level))
if (relevel) {
co[2L] <- 1 / co[2L]
ci <- 1 / rev(ci)
}
data.frame(
x = i, y = j, n = cx$n, hr = co[2L], p.value = co[5L],
lci = ci[1L], uci = ci[2L], level = level
)
}
g <- function(i) {
res <- matrix(NA, length(unq), length(unq), TRUE, list(unq, unq))
res[lower.tri(res)] <- pw[, i]
res
}
pw <- combn(unq, 2L, simplify = FALSE, function(x) {
f(x[1L], x[2L])
})
pw <- do.call('rbind', pw)
p.value <- g(5)
tpv <- t(p.value)
tpv[lower.tri(p.value)] <-
p.adjust(p.value[lower.tri(p.value)], method = method)
p.value <- t(tpv)
nn <- outer(as.character(unq), as.character(unq), Vectorize(function(x, y)
nrow(data[data[, rhs] %in% c(x, y), ])))
nn[upper.tri(nn)] <- NA
dimnames(nn) <- list(unq, unq)
res <- list(
n = nn, n_model = g(3),
hr = g(4), lci = g(6), uci = g(7),
p.value = p.value
)
structure(
lapply(res, round, digits = digits),
class = 'coxph_pairs', level = level
)
}
#' Landmark
#'
#' Fit survival curves for landmark time points.
#'
#' @param object an object of class \code{\link[survival]{survfit}}
#' @param times a vector of landmark times
#' @param col color for \code{times} annotations; use \code{col = 0} to
#' suppress labels
#' @param plot,plot.main logicals; if \code{TRUE}, landmark and \code{s} are
#' plotted, respectively
#' @param lr_test logical or numeric; if \code{TRUE}, a log-rank test will be
#' performed and the results added to the top-right corner of the plot; if
#' numeric, the value is passed as \code{rho} controlling the type of test
#' performed; see \code{\link{survdiff}}
#' @param adjust_start logical; if \code{TRUE}, each landmark plot will begin
#' at the y-axis
#' @param single logical; if \code{TRUE}, plots drawn on a single frame
#' @param ... additional arguments passed to \code{\link{kmplot}}
#'
#' @return
#' A list with the following elements:
#'
#' \item{\code{$table}}{data frame with the sample size, chi-square
#' statistic, degrees of freedom, and p-value for the test (the type of test
#' can be controlled by using a numeric value for \code{lr_test}, passed as
#' \code{rho} to \code{\link[survival]{survdiff}})}
#' \item{\code{$survfit}}{a list with each \code{\link[survival]{survfit}}
#' object}
#'
#' @examples
#' library('survival')
#' s <- survfit(Surv(time, status) ~ rx, colon)
#' l <- landmark(s, times = c(500, 1000, 2000), single = TRUE)
#' l$table
#'
#' layout(matrix(c(1, 1, 2, 3), 2))
#' landmark(s, times = c(500, 2000), adjust_start = TRUE,
#' hr_text = TRUE, col.surv = 4:6)
#'
#' @export
landmark <- function(object, times = NULL, col = 2L, plot = TRUE, plot.main = plot,
lr_test = TRUE, adjust_start = FALSE, single = FALSE, ...) {
stopifnot(inherits(object, 'survfit'))
form <- object$call$formula
data <- eval(object$call$data)
tvar <- terms.inner(form)[1L]
op <- par(no.readonly = TRUE)
on.exit(par(op))
if (single)
par(mfrow = n2mfrow(length(times) + plot.main))
st <- data.frame(n = sum(object$n), lr_pval(object, TRUE), row.names = 'Total')
if (plot.main)
kmplot(
object, ..., add = TRUE, lr_test = lr_test,
panel.first = {
abline(v = times, col = col)
if (length(times))
mtext(seq_along(times), at = times, cex = par('cex.lab'), col = col)
}
)
sd <- NULL
if (length(times))
sd <- lapply(times, function(ii) {
tmp <- data[data[, tvar] > ii, ]
if (adjust_start)
tmp[, tvar] <- tmp[, tvar] - ii
si <- eval(substitute(survfit(formula, tmp),
list(formula = as.formula(form))))
si$.data <- tmp
at <- pretty(si$time)
if (plot)
kmplot(
si, ..., add = TRUE, lr_test = lr_test, xaxis.at = at,
xaxis.lab = ifelse(adjust_start, ii, 0) + at,
panel.first = {
abline(v = ii, col = if (adjust_start) 0L else col)
mtext(which(times %in% ii), at = par('usr')[1L], col = col,
cex = par('cex.main'), font = par('font.main'))
}
)
list(
table = data.frame(n = sum(si$n), lr_pval(si, TRUE), row.names = ii),
survfit = si
)
})
ss <- lapply(sd, '[[', 'survfit')
tt <- lapply(sd, '[[', 'table')
res <- list(
table = do.call('rbind', c(list(st), tt)),
survfit = c(list(Total = object), setNames(ss, sprintf('time=%s', times)))
)
invisible(res)
}
#' Extract \code{survfit} summaries
#'
#' @description
#' Convenience functions to print results from \code{summary(x)$table}
#' where \code{x} is a \code{\link[survival]{survfit}} object.
#'
#' \code{surv_median} and \code{surv_prob} extract median(s) and survival
#' estimate(s) with optional confidence intervals, respectively.
#' \code{surv_extract} is a generic extractor.
#'
#' @param object an object of class \code{\link[survival]{survfit}}
#' @param what the data to return, either the index, character string(s), or
#' \code{NULL} (returns the entire table)
#' @param ci logical; if \code{TRUE}, the confidence interval is printed
#' @param digits number of digits past the decimal point to keep
#' @param which optional integer or character vector to select or re-order
#' the output; \code{which = NULL} and returns results for all strata
#' @param print logical; if \code{TRUE}, output is prepared for in-line
#' printing
#' @param na a character string to replace \code{NA} when median
#' times have not yet been reached
#' @param type if \code{ci = TRUE}, the type of confidence interval to show:
#' \code{"conf.int"} for the \code{object$conf.int * 100}% confidence
#' interval (default) or \code{"range"} for the full range
#' @param times vector of times passed to \code{\link{surv_table}}
#' @param show_conf logical; if \code{TRUE}, includes the confidence level
#' @param percent logical; if \code{TRUE}, percentages are shown instead of
#' probabilities
#'
#' @examples
#' library('survival')
#' sfit1 <- survfit(Surv(time, status) ~ 1, lung)
#' sfit2 <- survfit(Surv(time, status) ~ sex, lung, conf.int = 0.9)
#'
#' surv_extract(sfit1, NULL)
#' surv_extract(sfit1, 2:3)
#' surv_extract(sfit2, 'median|CL')
#' surv_extract(sfit2, c('events', 'median'))
#'
#'
#' surv_median(sfit1)
#' surv_median(sfit2)
#' surv_median(sfit2, print = FALSE)
#'
#' surv_median(sfit1, ci = TRUE)
#' surv_median(sfit2, ci = TRUE)
#' surv_median(sfit2, ci = TRUE, print = FALSE)
#'
#' surv_median(sfit1, ci = TRUE, type = 'range')
#' surv_median(sfit2, ci = TRUE, type = 'range')
#' surv_median(sfit2, ci = TRUE, print = FALSE, type = 'range')
#'
#'
#' times <- 365.242 * c(0.5, 1, 2)
#' surv_prob(sfit1, times)
#' ## compare
#' summary(sfit1, times)
#'
#' surv_prob(sfit2, times, which = 'sex=2', digits = 3)
#' ## compare
#' summary(sfit2, times)
#'
#' surv_prob(sfit2, times = 365, ci = TRUE, which = NULL)
#'
#' @export
surv_extract <- function(object, what = 'median') {
stopifnot(inherits(object, 'survfit'))
oo <- options(survfit.rmean = 'individual')
on.exit(options(oo))
tbl <- summary(object)$table
one <- is.null(object$strata) | length(object$strata) == 1L
tbl <- if (one) {
c(tbl, time.min = min(object$time), time.max = max(object$time))
} else {
sp <- split(object$time, rep(seq_along(object$strata), object$strata))
cbind(tbl, time.min = sapply(sp, min), time.max = sapply(sp, max))
}
if (is.null(what))
return(tbl)
what <- if (is.character(what))
grep(paste0(what, collapse = '|'), names(tbl) %||% colnames(tbl))
else what
if (one)
tbl[what] else tbl[, what]
}
#' @rdname surv_extract
#' @export
surv_median <- function(object, ci = FALSE, digits = 0L, which = NULL,
print = TRUE, na = ifelse(ci, 'NR', 'not reached'),
show_conf = TRUE, type = c('conf.int', 'range')) {
stopifnot(inherits(object, 'survfit'))
nr <- function(x) {
gsub('NA', na, x)
}
type <- match.arg(type)
nst <- pmax(1L, length(object$strata))
which <- if (is.null(which))
seq.int(nst) else which
if (!ci) {
res <- surv_extract(object, 'median')[which]
return(
if (print)
nr(iprint(res, digits = digits)) else roundr(res, digits)
)
}
res <- surv_extract(
object,
switch(type, conf.int = 'median|.CL', range = 'median|time.m')
)
res <- roundr(res, digits)
f <- function(x) {
res <- if (show_conf)
sprintf('%s (%s%% CI: %s - %s)',
x[1L], object$conf.int * 100, x[2L], x[3L])
else sprintf('%s (%s - %s)', x[1L], x[2L], x[3L])
if (type == 'range')
gsub('[0-9.]+% CI', 'range', res) else res
}
res <- if (is.null(dim(res)))
f(res) else apply(res, 1L, f)
if (print)
nr(iprint(res)) else res
}
#' @rdname surv_extract
#' @export
surv_prob <- function(object, times = pretty(object$time), ci = FALSE,
digits = ifelse(percent, 0L, 2L), which = 1L,
print = TRUE, show_conf = TRUE, percent = FALSE) {
stopifnot(inherits(object, 'survfit'))
res <- surv_table(
object, digits, times, FALSE, percent = percent, extend = TRUE
)
if (!islist(res))
res <- list(res)
res <- lapply(res, function(y) {
y <- unname(y[, grep('Surv', colnames(y))])
y <- if (!ci)
gsub(' .*', '', y) else gsub(', ', ' - ', y)
if (show_conf)
y <- gsub('(?<=\\()', sprintf('%s%% CI: ', object$conf.int * 100),
y, perl = TRUE)
if (print)
iprint(y) else y
})
if (length(which) == 1L)
res[[which]]
else if (print & length(times) == 1L)
iprint(res) else res
}
#' Difference in two Kaplan-Meier estimates
#'
#' Draw a confidence band for comparing two survival curves. The shaded region
#' is centered at the midpoint of the two survival estimates, and the height
#' is half the width of the approximate \code{conf.int} pointwise confidence
#' region. Curves not overlapping the shaded area are approximately
#' significantly different at the \code{1 - conf.int} level.
#'
#' @param object an object of class \code{\link[survival]{survfit}}
#' @param col color used for the band
#' @param conf.int the level of the two-sided confidence interval
#'
#' @seealso
#' Adapted from \code{rms::survdiffplot}
#'
#' @examples
#' library('survival')
#' fit <- survfit(Surv(time, status) ~ sex, lung)
#'
#' ## survival:::plot.survfit
#' plot(fit, mark.time = TRUE, col = 1:2)
#' kmdiff(fit)
#'
#' ## rawr::kmplot (add = TRUE to add to plot before resetting par)
#' kmplot(fit, add = TRUE)
#' kmdiff(fit)
#' legend(
#' 'topright', fill = adjustcolor('grey', 0.5),
#' legend = sprintf('Difference\n1/2 %s CL', fit$conf.int)
#' )
#'
#' @export
kmdiff <- function(object, col = adjustcolor('grey', 0.5), conf.int = 0.95) {
stopifnot(inherits(object, 'survfit'))
strata <- names(object$strata)
if (length(strata) != 2L) {
warning('\'kmdiff\' requires exactly two levels in object$strata')
return(invisible(NULL))
}
f <- function(level, times, x) {
strata <- x$strata
ii <- strata == level
time <- x$time[ii]
jj <- match(times, time)
surv <- x$surv[ii]
nrisk <- x$n.risk[ii]
se <- x$std.err[ii]
list(time = times, surv = surv[jj], se = se[jj], nrisk = nrisk[jj])
}
dostep <- function(x, y) {
# survival:::plot.survfit
keep <- is.finite(x) & is.finite(y)
if (!any(keep))
return(invisible(NULL))
if (!all(keep)) {
x <- x[keep]
y <- y[keep]
}
n <- length(x)
if (n == 1L)
list(x = x, y = y)
else if (n == 2L)
list(x = x[c(1L, 2L, 2L)], y = y[c(1L, 1L, 2L)])
else {
temp <- rle(y)$lengths
drops <- 1 + cumsum(temp[-length(temp)])
if (n %in% drops) {
xrep <- c(x[1L], rep(x[drops], each = 2L))
yrep <- rep(y[c(1, drops)], c(rep(2L, length(drops)), 1L))
} else {
xrep <- c(x[1L], rep(x[drops], each = 2L), x[n])
yrep <- c(rep(y[c(1, drops)], each = 2L))
}
list(x = xrep, y = yrep)
}
}
st <- object$time
st <- c(st, seq(min(st), max(st), length.out = length(st) * 10))
st <- unique(sort(st))
ss <- summary(object, times = st)
a <- f(strata[1L], st, ss)
b <- f(strata[2L], st, ss)
surv <- (a$surv + b$surv) / 2
z <- qnorm((1 + conf.int) / 2)
se <- sqrt(a$se ^ 2 + b$se ^ 2)
lo <- surv - 0.5 * z * se
hi <- surv + 0.5 * z * se
ok <- !is.na(st + lo + hi) & st < max(st)
x <- dostep(c(st[ok], rev(st[ok])), c(lo[ok], rev(hi[ok])))
polygon(x, col = col, border = NA)
invisible(x)
}
#' Survival distributions
#'
#' Solve for the time or proportion event-free assuming exponential survival.
#'
#' @param p1,p2 proportion of patients who are event-free for the known and
#' unknown rates, respectively
#' @param time1,time2 times corresponding to p1 and p2, respectively; note
#' that exactly one of \code{p2} and \code{time2} should be null
#'
#' @examples
#' time <- seq(0, 5, 0.01)
#' cumhaz <- cumsum(rep(exp(0.2), length(time)) * 0.01)
#' surv <- exp(-cumhaz)
#'
#' set.seed(1)
#' n <- runif(100)
#' failtimes <- time[colSums(outer(surv, n, `>`))]
#'
#' library('survival')
#' sf <- survfit(Surv(failtimes) ~ 1)
#' op <- par(mfrow = c(1, 2))
#' plot(time, surv, type = 'l', xlab = 'Time', ylab = 'Survival')
#' plot(sf)
#' par(op)
#'
#' surv_extract(sf)
#'
#' ## given median survival of 0.58 time units, what proportions are
#' ## event-free at times
#' times <- c(0.4, 0.6, 0.8)
#' p <- surv_dist(0.5, 0.58, time2 = times)
#' round(p, 3)
#'
#' summary(sf, times = times)
#'
#' ## similarly, given median survival of 0.58 time units, when are the
#' ## proportions of 0.609, 0.478, and 0.374 event-free
#' surv_dist(0.5, 0.58, round(p, 3))
#'
#' @export
surv_dist <- function(p1, time1, p2 = NULL, time2 = NULL) {
# r1 <- log(1 / p1) / time1
# r2 <- log(1 / p2) / time2
# r1:r2
r1 <- log(1 / p1) / time1
if (is.null(time2))
log(1 / p2) / r1
else 1 / exp(r1 * time2)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.