#' @title
#' Summarize survival-time data
#'
#' @description
#' An R flavored version of the Stata command `stsum`. Presents summary
#' statistics: time at risk; incidence rate; number of subjects; and the 25th,
#' 50th, and 75th percentiles of survival time.
#'
#' @param data A data frame
#' @param time For right censored data, this is the follow up time. For interval
#' data, the first argument is the starting time for the interval.
#' @param event The status indicator, normally 0=alive, 1=dead. Other choices
#' are TRUE/FALSE (TRUE = death) or 1/2 (2=death). For interval censored data,
#' the status indicator is 0=right censored, 1=event at time, 2=left censored,
#' 3=interval censored. For multiple enpoint data the event variable will be a
#' factor, whose first level is treated as censoring. Although unusual, the
#' event indicator can be omitted, in which case all subjects are assumed to
#' have an event.
#' @param type character string specifying the type of censoring. Possible
#' values are "right", "left", "counting", "interval", "interval2" or
#' "mstate".
#' @param group Variable name (unquoted) to stratify on
#'
#' @references
#' https://www.stata.com/manuals13/ststsum.pdf
#'
#' @importFrom dplyr bind_cols
#' @importFrom dplyr group_by
#' @importFrom dplyr mutate
#' @importFrom dplyr mutate_all
#' @importFrom dplyr n
#' @importFrom dplyr rename
#' @importFrom dplyr summarise
#' @importFrom purrr pluck
#' @importFrom rlang enquo
#' @importFrom rlang expr
#' @importFrom rlang get_expr
#' @importFrom rlang quo_name
#' @importFrom survival Surv
#' @importFrom survival survfit
#' @importFrom tibble as_tibble
#' @importFrom tibble enframe
#' @importFrom tibble tibble
#' @importFrom tidyr spread
#'
#' @return
#' A list
#'
#' @export
#'
#' @examples
#' #### Example 1 --------------------------------
#'
#' library(dplyr)
#' library(tibble)
#' page_2 <- tibble::tribble(
#' ~group, ~time, ~dead, ~st, ~d, ~t, ~t0,
#' 1, 143, 1, 1, 1, 143, 0,
#' 1, 164, 1, 1, 1, 164, 0,
#' 1, 188, 1, 1, 1, 188, 0,
#' 1, 188, 1, 1, 1, 188, 0,
#' 1, 190, 1, 1, 1, 190, 0,
#' 1, 192, 1, 1, 1, 192, 0,
#' 1, 206, 1, 1, 1, 206, 0,
#' 1, 209, 1, 1, 1, 209, 0,
#' 1, 213, 1, 1, 1, 213, 0,
#' 1, 216, 1, 1, 1, 216, 0,
#' 1, 220, 1, 1, 1, 220, 0,
#' 1, 227, 1, 1, 1, 227, 0,
#' 1, 230, 1, 1, 1, 230, 0,
#' 1, 234, 1, 1, 1, 234, 0,
#' 1, 246, 1, 1, 1, 246, 0,
#' 1, 265, 1, 1, 1, 265, 0,
#' 1, 304, 1, 1, 1, 304, 0,
#' 1, 216, 0, 1, 0, 216, 0,
#' 1, 244, 0, 1, 0, 244, 0,
#' 2, 142, 1, 1, 1, 142, 0,
#' 2, 156, 1, 1, 1, 156, 0,
#' 2, 163, 1, 1, 1, 163, 0,
#' 2, 198, 1, 1, 1, 198, 0,
#' 2, 205, 1, 1, 1, 205, 0,
#' 2, 232, 1, 1, 1, 232, 0,
#' 2, 232, 1, 1, 1, 232, 0,
#' 2, 233, 1, 1, 1, 233, 0,
#' 2, 233, 1, 1, 1, 233, 0,
#' 2, 233, 1, 1, 1, 233, 0,
#' 2, 233, 1, 1, 1, 233, 0,
#' 2, 239, 1, 1, 1, 239, 0,
#' 2, 240, 1, 1, 1, 240, 0,
#' 2, 261, 1, 1, 1, 261, 0,
#' 2, 280, 1, 1, 1, 280, 0,
#' 2, 280, 1, 1, 1, 280, 0,
#' 2, 296, 1, 1, 1, 296, 0,
#' 2, 296, 1, 1, 1, 296, 0,
#' 2, 323, 1, 1, 1, 323, 0,
#' 2, 204, 0, 1, 0, 204, 0,
#' 2, 344, 0, 1, 0, 344, 0
#' )
#'
#'
#' st_sum(data = page_2,
#' time = time,
#' event = dead,
#' group = group)
#'
#' #### Example 2 --------------------------------
#'
#' sample_df <- tibble::tribble(
#' ~exp, ~time, ~disease, ~st, ~d, ~t, ~t0,
#' 0, 1, 1, 1, 1, 1, 0,
#' 0, 1, 1, 1, 1, 1, 0,
#' 0, 1, 1, 1, 1, 1, 0,
#' 0, 1, 1, 1, 1, 1, 0,
#' 0, 1, 1, 1, 1, 1, 0,
#' 0, 1, 1, 1, 1, 1, 0,
#' 0, 1, 1, 1, 1, 1, 0,
#' 0, 1, 1, 1, 1, 1, 0,
#' 0, 2, 1, 1, 1, 2, 0,
#' 0, 2, 1, 1, 1, 2, 0,
#' 0, 2, 1, 1, 1, 2, 0,
#' 0, 2, 1, 1, 1, 2, 0,
#' 0, 2, 1, 1, 1, 2, 0,
#' 0, 2, 1, 1, 1, 2, 0,
#' 0, 2, 1, 1, 1, 2, 0,
#' 0, 2, 1, 1, 1, 2, 0,
#' 0, 2, 1, 1, 1, 2, 0,
#' 0, 2, 1, 1, 1, 2, 0,
#' 0, 2, 1, 1, 1, 2, 0,
#' 0, 3, 1, 1, 1, 3, 0,
#' 0, 3, 1, 1, 1, 3, 0,
#' 0, 4, 0, 1, 0, 4, 0,
#' 0, 5, 0, 1, 0, 5, 0,
#' 0, 5, 0, 1, 0, 5, 0,
#' 0, 5, 1, 1, 1, 5, 0,
#' 0, 6, 0, 1, 0, 6, 0,
#' 0, 7, 0, 1, 0, 7, 0,
#' 0, 7, 0, 1, 0, 7, 0,
#' 0, 8, 0, 1, 0, 8, 0,
#' 1, 1, 1, 1, 1, 1, 0,
#' 1, 1, 1, 1, 1, 1, 0,
#' 1, 1, 1, 1, 1, 1, 0,
#' 1, 1, 1, 1, 1, 1, 0,
#' 1, 1, 1, 1, 1, 1, 0,
#' 1, 1, 1, 1, 1, 1, 0,
#' 1, 1, 1, 1, 1, 1, 0,
#' 1, 2, 1, 1, 1, 2, 0,
#' 1, 2, 1, 1, 1, 2, 0,
#' 1, 2, 1, 1, 1, 2, 0,
#' 1, 2, 1, 1, 1, 2, 0,
#' 1, 2, 1, 1, 1, 2, 0,
#' 1, 2, 1, 1, 1, 2, 0,
#' 1, 2, 1, 1, 1, 2, 0,
#' 1, 2, 1, 1, 1, 2, 0,
#' 1, 3, 1, 1, 1, 3, 0,
#' 1, 3, 1, 1, 1, 3, 0,
#' 1, 4, 0, 1, 0, 4, 0,
#' 1, 4, 0, 1, 0, 4, 0,
#' 1, 4, 0, 1, 0, 4, 0,
#' 1, 4, 0, 1, 0, 4, 0,
#' 1, 4, 1, 1, 1, 4, 0,
#' 1, 5, 0, 1, 0, 5, 0,
#' 1, 5, 0, 1, 0, 5, 0,
#' 1, 6, 0, 1, 0, 6, 0,
#' 1, 6, 0, 1, 0, 6, 0,
#' 1, 10, 0, 1, 0, 10, 0,
#' 1, 12, 0, 1, 0, 12, 0
#' )
#'
#'
#' st_sum(data = sample_df,
#' time = time,
#' event = disease,
#' group = exp)
#'
#'
#' st_sum(data = sample_df,
#' time = time,
#' event = (disease == 1),
#' group = exp)
st_sum <- function(data,
time,
event,
type = "right",
group) {
time_enq <- rlang::enquo(time)
event_enq <- rlang::enquo(event)
group_enq <- rlang::enquo(group)
#### Summary for total data --------------------------------
# Just bind it instead of assigning objects
total <- dplyr::bind_cols(
# First column with label
tibble::tibble(`strata` = "All") %>%
mutate(strata = factor(strata)),
# Summary stats
data %>%
dplyr::summarise(
time_at_risk = sum(!! time_enq, na.rm = TRUE),
crude_incidence =
sum(!! event_enq, na.rm = TRUE) / time_at_risk,
no_of_subjects = dplyr::n())
,
# quantiles
data %>%
with(.,
survival::survfit(as.formula(
rlang::expr(survival::Surv(time = !! rlang::get_expr(time_enq),
event = !! rlang::get_expr(event_enq),
type = type)
~ 1)))) %>%
quantile(., probs = c(25, 50, 75) / 100) %>%
purrr::pluck(.x = .,
"quantile") %>%
# tibble::as_tibble() %>%
tibble::enframe(name = NULL) %>%
mutate(key = c("q_25", "q_50", "q_75")) %>%
tidyr::spread(., key = key, value = value) %>%
dplyr::mutate_all(.tbl = .,
.funs = list(~ as.integer(.)))
)
#### Summary by groups --------------------------------
# Just bind it instead of assigning objects
grouped <- dplyr::bind_cols(
# Summary stats
data %>%
dplyr::group_by(!! group_enq) %>%
dplyr::summarise(
time_at_risk = sum(!! time_enq, na.rm = TRUE),
crude_incidence =
sum(!! event_enq, na.rm = TRUE) / time_at_risk,
no_of_subjects = dplyr::n()) %>%
dplyr::rename(`strata` = !! rlang::quo_name(group_enq))
,
# quantiles
# quantiles
data %>%
with(.,
survival::survfit(as.formula(
rlang::expr(survival::Surv(time = !! rlang::get_expr(time_enq),
event = !! rlang::get_expr(event_enq),
type = type)
~ !! rlang::get_expr(group_enq))))) %>%
quantile(., probs = c(25, 50, 75) / 100) %>%
purrr::pluck(.x = .,
"quantile") %>%
tibble::as_tibble() %>%
dplyr::mutate_all(.tbl = .,
.funs = list(~ as.integer(.))) %>%
dplyr::rename(
# new = old
q_25 = 1,
q_50 = 2,
q_75 = 3)
)
list(total = tibble::as_tibble(total),
by_group = grouped)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.