rwToT2: Real World Time on Treatment R Package

Documented in get_restricted_mean

#' Calcuate Restricted Mean
#'
#' Function to calculate restricted mean (RMST) based on method of choice at time
#' \code{t}. RMST is defined as the mean rwToT of all subjects in the study
#' population followed up to t, and is simply the area under the rwToT KM curve
#' up to t. The choice of t will be based on level of data maturity. A small
#' risk set size could result in large variation in the estimation and difficulty
#' in interpretation. Pocock et al recommend, in the context of the graphical display
#' of the survival curve, ensuring that the risk set size is at least 10% to 20%
#' of the participants that have not yet experienced the event (10% is used in
#' this function). When sufficient risk size set is not available at t, cure
#' extrapolation based on parametric functions will be applied and the model with
#' the lowest AIC will be used. Please refer Protocol(VEAP 7760) for detailed
#' method description.
#'
#'
#' @param df_cohort A dataframe. Minimum required columns:
#' \itemize{
#' \item{`STATUS` : Censor status (0 = censored, 1 = event)}
#' \item{`DD` : Number of days between first and last dose}
#' \item{`START_DATE` : First dose date}
#' \item{`LAST_ACTIVITY_DATE` : Last Activity date}
#' }
#' @param t A numeric value: the time point of interest
#' @return Restricted mean at t months using best-fitting parametric function
#' @return KM Parametric curve data
#' @seealso \code{\link{get_analysis}}, \code{\link{get_treatment_rate}}
#' @examples
#' # Compute restricted mean for AML data
#' get_restricted_mean(rwToT_aml, 50)
#' @importFrom magrittr %>%
#' @importFrom dplyr filter arrange desc mutate select
#' @importFrom survival survfit Surv
#' @importFrom flexsurv flexsurvreg rmst_exp rmst_weibull rmst_lnorm rmst_llogis rmst_gompertz
#' @importFrom stats median time setNames
#' @importFrom purrr map map_dbl
#'
#'
#' @export
get_restricted_mean <- function(df_cohort, t) {

  #creates survival time object and adds to dataframe---------------------------
  df_cohort <- df_cohort %>%
    mutate(SurvObj = Surv(DD, STATUS))

  #survival fit object----------------------------------------------------------
  fit_km <- survfit(SurvObj ~ 1, data = df_cohort, conf.type = "log-log")

  #KM curve data from fit_km----------------------------------------------------
  output_km_curve <- data.frame(cbind(
    time = fit_km$time,
    nRisk = fit_km$n.risk,
    nEvent = fit_km$n.event,
    nCensor = fit_km$n.censor,
    pSurv = fit_km$surv,
    upper = fit_km$upper,
    lower = fit_km$lower
  ))


  n_risk_original <- output_km_curve[1, "nRisk"] * 0.1
  tmp_km_curve <- output_km_curve %>%
    filter(time <= t) %>%
    arrange(desc(time))
  n_risk_t <- tmp_km_curve[1, "nRisk"]
  
  if (fit_km$n < 10){
    warning(paste0("Sample size (n = ", fit_km$n, ") is too small!"))
    return(list(
      "mean" = NA,
      "parametric_km" = NULL
    ))}

  #perform pocock method--------------------------------------------------------
  #if pocock method not suitable, move on to parametric method------------------
  else if (n_risk_t >= n_risk_original) {

    #restricted mean survival time----------------------------------------------
    rmean <- fit_km %>%
      survival:::survmean(rmean = t)

    rmean_ci <- paste(round(rmean$matrix["*rmean"], 1), " (",
      round((rmean$matrix["*rmean"] - 1.96 * rmean$matrix["*se(rmean)"]), 1), " - ",
      round((rmean$matrix["*rmean"] + 1.96 * rmean$matrix["*se(rmean)"]), 1), ")",
      sep = ""
    )

    #output rmean from Pocock method--------------------------------------------
    return(list(
      "mean" = rmean_ci,
      "parametric_km" = NULL
    ))
  }
  else {
    #for parametric method find the best performing model-----------------------
    model <- best_model(df_cohort, t)

    #output KM curve------------------------------------------------------------
    #t is Time or vector of times to include in the curve-----------------------
    #for our output we have fixed t, up to 48 months----------------------------
    output_extra_km_curve <- summary(model$output_model, t = 0:48)[[1]]

    names(model$best_model_rmst_mean) <- c("mean", "model")
    output <- paste(model$best_model_rmst_mean$mean, " [",
      model$best_model_rmst_mean$model, "]",
      sep = ""
    )
    return(list(
      "mean" = output,
      "parametric_km" = output_extra_km_curve
    ))
  }
}