#' @include utilities.R surv_group_by.R
NULL
#' Create Survival Curves
#'
#' @description Wrapper around \link[survival]{survfit}.
#'
#' Compared to \link[survival]{survfit}, it also supports:
#' - A list of data sets and/or a list of formulas,
#' - A grouped data sets as generated by the function \link{surv_group_by},
#' - A group.by option
#'
#' There are many cases where this function may be useful:
#'
#' - **Case 1: One formula, one data set.**
#' Example: You want to fit the survival curves of one biomarker/gene in a
#' given data set. Identical to \link[survival]{survfit}.
#'
#' - **Case 2: List of formulas, one data set.**
#' Example: You want to fit the survival curves of a list of biomarkers/genes
#' in the same data set.
#' Returns a named list of `survfit` objects in the same order as formulas.
#'
#' - **Case 3: One formula, list of data sets.**
#' Example: You want to fit survival curves of one biomarker/gene in multiple
#' cohort of patients (colon, lung, breast).
#' Returns a named list of `survfit` objects in the same order as the data sets.
#'
#' - **Case 4: List of formulas and List of data sets.**
#' Example: You want to fit survival curves of multiple biomarkers/genes in
#' multiple cohort of patients (colon, lung, breast).
#' Each formula will be applied to each of the data set in the data list.
#' Returns a named list of `survfit` objects.
#'
#' - **Case 5: One formula and grouped data sets by one or two variables.**
#' Example: One might like to plot the survival curves of patients treated by
#' drug A vs patients treated by drug B in a dataset grouped by TP53 and/or
#' RAS mutations. In this case use the argument \code{group.by}.
#' Returns a named list of `survfit` objects.
#'
#' - **Case 6: Apply each formula to the matching data set with the same index/position in the list.**
#' Example: formula 1 is applied to data 1, formula 2 is applied to data 2, etc.
#' In this case formula and data lists should have the same length and you should specify the argument `match.fd = TRUE` ( stands for match formula and data).
#' Returns a named list of `survfit` objects.
#'
#' @param formula survival formula. See \link[survival]{survfit.formula}.
#' Can be a list of formula. Named lists are recommended.
#' @param data a data frame in which to interpret the variables named in the formula.
#' Can be a list of data sets. Named lists are recommended.
#' Can be also a grouped dataset as generated by the function \link{surv_group_by}().
#' @param group.by a grouping variables to group the data set by.
#' A character vector containing the name of grouping variables. Should be of length <= 2.
#' @param match.fd logical value. Default is FALSE. Stands for "match formula and data".
#' Useful only when you have a list of formulas and a list of data sets, and
#' you want to apply each formula to the matching data set with the same index/position in the list.
#' For example formula1 is applied to data 1, formula2 is applied to data 2, and so on ....
#' In this case use match.fd = TRUE.
#' @param ... Other arguments passed to the \link[survival]{survfit.formula} function.
#'
#' @return
#' \itemize{
#' \item Returns an object of class survfit if one formula and one data set provided.
#' \item Returns a named list of survfit objects when input is a list of formulas and/or data sets.
#' The same holds true when grouped data sets are provided or when the argument \code{group.by} is specified.
#' \itemize{
#' \item If the names of formula and data lists are available,
#' the names of the resulting survfit objects list are obtained by collapsing the names of formula and data lists.
#' \item If the formula names are not available, the variables in the formulas are extracted and used to build the name of survfit object.
#' \item In the case of grouped data sets, the names of survfit object list are obtained by
#' collapsing the levels of grouping variables and the names of variables in the survival curve formulas.
#' }
#' }
#' @examples
#'
#' library(survival)
#' library(magrittr)
#'
#'# One formula, one data set ---------------
#'fit <- surv_fit(Surv(time, status) ~ sex,
#' data = colon)
#'surv_pvalue(fit)
#'
#'
#'# List of formulas, one data set ----------
#'
#'# Create a named list of formulas
#'formulas <- list(
#' sex = Surv(time, status) ~ sex,
#' rx = Surv(time, status) ~ rx
#')
#'
#'# Fit survival curves for each formula
#'fit <- surv_fit(formulas, data = colon)
#'surv_pvalue(fit)
#'
#'# One formula, list of data sets ----------
#'fit <- surv_fit(Surv(time, status) ~ sex,
#' data = list(colon, lung))
#'surv_pvalue(fit)
#'
#'
#'# List of formulas, list of data sets -----
#'
#'# Create two data sets
#'set.seed(123)
#'colon1 <- dplyr::sample_frac(colon, 1/2)
#'set.seed(1234)
#'colon2 <- dplyr::sample_frac(colon, 1/2)
#'
#'# Create a named list of formulas
#'formula.list <- list(
#' sex = Surv(time, status) ~ sex,
#' adhere = Surv(time, status) ~ adhere,
#' rx = Surv(time, status) ~ rx
#')
#'
#'# Fit survival curves
#'fit <- surv_fit(formula.list, data = list(colon1, colon2),
#' match.fd = FALSE)
#'surv_pvalue(fit)
#'
#'
#'# Grouped survfit -------------------------
#'fit <- surv_fit(Surv(time, status) ~ sex,
#' data = colon, group.by = "rx")
#'
#'# Alternatively, do this
#'fit <- colon %>%
#' surv_group_by("rx") %>%
#' surv_fit(Surv(time, status) ~ sex, data = .)
#'
#'surv_pvalue(fit)
#'
#' @seealso ggsurvplot(), surv_pvalue(), surv_median()
#' @rdname surv_fit
#' @export
surv_fit <- function(formula, data, group.by = NULL, match.fd = FALSE, ...){
if(inherits(data, c("surv_group_by", "list")) & !is.null(group.by))
stop("Dataset is grouped, but group.by is not NULL")
# List of formulas and List of data sets with match.fd = TRUE ----------------
# They should have the same length.
# Each formula is applied to each data set of the same index
if(.is_list(formula) & .is_list(data) & match.fd){
if(length(formula) != length(data))
stop("When formula and data are lists, ",
"they should have the same length")
res <- purrr::map2(formula, data, .survfit1, ...)
}
else if(.is_list (formula) & .is_list (data) & !match.fd){
.map_each <- function(formula, data){
purrr::map(data, .survfit2, formula, ...)
}
res <- purrr::map(formula, .map_each , data) %>%
dplyr::combine()
}
else if(.is_list (formula) & !.is_list (data)){
res <- purrr::map(formula, .survfit1, data, ...)
}
else if(!.is_list (formula) & .is_list (data)){
res <- map(data, .survfit2, formula, ...)
}
if(.is_list(data)){
DNAME <- names(data)
if(is.null(DNAME) & !.is_grouped_data(input_data)){
DNAME <- deparse(substitute(data)) %>%
as.character() %>% gsub("list\\(|\\)", "", .) %>%
strsplit(., ",\\s*", perl = TRUE) %>% unlist()
}
}
else DNAME <- deparse(substitute(data))
FNAME <- .get_formula_names(formula)
if(.is_list (formula) & .is_list (data) & !match.fd){
nf <- length(formula)
ndata <- length(data)
FNAME <- rep(FNAME, each = ndata )
DNAME <- rep(DNAME, nf)
}
if(.is_list(data) | .is_list(formula))
names(res) <- .collapse(DNAME, FNAME, sep = "::")
res
}
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Helper functions
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Returns a list of valid survfit options
#.........................................................................
.valid_survfit_options <- function(formula, data, ...){
.dots <- list(...)
survoptions <- list(formula = formula, data = data)
allowed.options <- c("weights", "subset", "na.action", "etype", "id",
"istate", "type", "error", "conf.type", "conf.lower",
"start.time", "conf.int", "se.fit", "influence")
for(argument in names(.dots)){
if(argument %in% allowed.options)
survoptions[[argument]] <- .dots[[argument]]
}
survoptions
}
# .survfit1(): formula is the first argument
#.........................................................................
.survfit1 <- function(formula, data, ...){
res <- do.call(survfit, .valid_survfit_options(formula, data, ...))
structure(res, class = "survfit")
}
# Formula is the second argument
#.........................................................................
.survfit2 <- function(data, formula, ...){
res <- do.call(survfit, .valid_survfit_options(formula, data, ...))
structure(res, class = "survfit")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.