#' PPC censoring
#'
#' @description Compare the empirical distribution of censored data `y` to the
#' distributions of simulated/replicated data `yrep` from the posterior
#' predictive distribution. See the **Plot Descriptions** section, below, for
#' details.
#'
#' Although some of the other \pkg{bayesplot} plots can be used with censored
#' data, `ppc_km_overlay()` is currently the only plotting function designed
#' *specifically* for censored data. We encourage you to suggest or contribute
#' additional plots at
#' [github.com/stan-dev/bayesplot](https://github.com/stan-dev/bayesplot).
#'
#' @name PPC-censoring
#' @family PPCs
#'
#' @template args-y-yrep
#' @param size,alpha Passed to the appropriate geom to control the appearance of
#' the `yrep` distributions.
#' @param ... Currently only used internally.
#'
#' @template return-ggplot
#'
#' @section Plot Descriptions:
#' \describe{
#' \item{`ppc_km_overlay()`}{
#' Empirical CCDF estimates of each dataset (row) in `yrep` are overlaid,
#' with the Kaplan-Meier estimate (Kaplan and Meier, 1958) for `y` itself on
#' top (and in a darker shade). This is a PPC suitable for right-censored
#' `y`. Note that the replicated data from `yrep` is assumed to be
#' uncensored.
#' }
#' \item{`ppc_km_overlay_grouped()`}{
#' The same as `ppc_km_overlay()`, but with separate facets by `group`.
#' }
#' }
#'
#' @templateVar bdaRef (Ch. 6)
#' @template reference-bda
#' @template reference-km
#'
#' @examples
#' color_scheme_set("brightblue")
#' y <- example_y_data()
#' # For illustrative purposes, (right-)censor values y > 110:
#' status_y <- as.numeric(y <= 110)
#' y <- pmin(y, 110)
#' # In reality, the replicated data (yrep) would be obtained from a
#' # model which takes the censoring of y properly into account. Here,
#' # for illustrative purposes, we simply use example_yrep_draws():
#' yrep <- example_yrep_draws()
#' dim(yrep)
#' \donttest{
#' ppc_km_overlay(y, yrep[1:25, ], status_y = status_y)
#' }
#' # With separate facets by group:
#' group <- example_group_data()
#' \donttest{
#' ppc_km_overlay_grouped(y, yrep[1:25, ], group = group, status_y = status_y)
#' }
NULL
#' @export
#' @rdname PPC-censoring
#' @param status_y The status indicator for the observations from `y`. This must
#' be a numeric vector of the same length as `y` with values in \{0, 1\} (0 =
#' right censored, 1 = event).
ppc_km_overlay <- function(
y,
yrep,
...,
status_y,
size = 0.25,
alpha = 0.7
) {
check_ignored_arguments(..., ok_args = "add_group")
add_group <- list(...)$add_group
suggested_package("survival")
suggested_package("ggfortify")
stopifnot(is.numeric(status_y))
stopifnot(all(status_y %in% c(0, 1)))
data <- ppc_data(y, yrep, group = status_y)
# Modify the status indicator:
# * For the observed data ("y"), convert the status indicator back to
# a numeric.
# * For the replicated data ("yrep"), set the status indicator
# to 1 ("event"). This way, the Kaplan-Meier estimator reduces
# to "1 - ECDF" with ECDF denoting the ordinary empirical cumulative
# distribution function.
data <- data %>%
dplyr::mutate(group = ifelse(.data$is_y,
as.numeric(as.character(.data$group)),
1))
sf_form <- survival::Surv(value, group) ~ rep_label
if (!is.null(add_group)) {
data <- dplyr::inner_join(data,
tibble::tibble(y_id = seq_along(y),
add_group = add_group),
by = "y_id")
sf_form <- update(sf_form, . ~ . + add_group)
}
sf <- survival::survfit(
sf_form,
data = data
)
names(sf$strata) <- sub("add_group=", "add_group:", names(sf$strata)) # Needed to split the strata names in ggfortify:::fortify.survfit() properly.
fsf <- fortify(sf)
if(any(grepl("add_group", levels(fsf$strata)))){
strata_split <- strsplit(as.character(fsf$strata), split = ", add_group:")
fsf$strata <- as.factor(sapply(strata_split, "[[", 1))
fsf$group <- as.factor(sapply(strata_split, "[[", 2))
}
fsf$is_y_color <- as.factor(sub("\\[rep\\] \\(.*$", "rep", sub("^italic\\(y\\)", "y", fsf$strata)))
fsf$is_y_size <- ifelse(fsf$is_y_color == "yrep", size, 1)
fsf$is_y_alpha <- ifelse(fsf$is_y_color == "yrep", alpha, 1)
# Ensure that the observed data gets plotted last by reordering the
# levels of the factor "strata"
fsf$strata <- factor(fsf$strata, levels = rev(levels(fsf$strata)))
ggplot(data = fsf,
mapping = aes(x = .data$time,
y = .data$surv,
color = .data$is_y_color,
group = .data$strata,
size = .data$is_y_size,
alpha = .data$is_y_alpha)) +
geom_step() +
hline_at(
0.5,
linewidth = 0.1,
linetype = 2,
color = get_color("dh")
) +
hline_at(
c(0, 1),
linewidth = 0.2,
linetype = 2,
color = get_color("dh")
) +
scale_size_identity() +
scale_alpha_identity() +
scale_color_ppc() +
scale_y_continuous(breaks = c(0, 0.5, 1)) +
xlab(y_label()) +
yaxis_title(FALSE) +
xaxis_title(FALSE) +
yaxis_ticks(FALSE) +
bayesplot_theme_get()
}
#' @export
#' @rdname PPC-censoring
#' @template args-group
ppc_km_overlay_grouped <- function(
y,
yrep,
group,
...,
status_y,
size = 0.25,
alpha = 0.7
) {
check_ignored_arguments(...)
p_overlay <- ppc_km_overlay(
y = y,
yrep = yrep,
add_group = group,
...,
status_y = status_y,
size = size,
alpha = alpha
)
p_overlay +
facet_wrap("group") +
force_axes_in_facets()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.