R/calc_hr_pi.R

Defines functions calc_hr_pi

Documented in calc_hr_pi

#' Generate hazard ratio with prediction intervals from parametric bootstrap simulation
#'
#' @export
#' @param sim A `survparamsim` class object generated by [surv_param_sim()] function.
#' @param trt A string to specify which column define treatment status to calculate HR.
#' @param group Optional string(s) to specify grouping variable(s).
#' You will have faceted histograms for these variables in [plot_hr_pi()] function.
#' @param pi.range Prediction interval for simulated HR.
#' @param calc.obs A logical to specify whether to calculate HR for the observed data.
#' Need be set as FALSE if survival information in the `newdata` is dummy.
#' @param trt.assign Specify which of the categories of `trt` need to be considered as control group.
#' See details below if you have more than two categories.
#'
#' @details
#' If your `trt` has more than two categories/levels and want to specify which one to use as a
#' reference group, you can convert the column into a factor in the `newdata` input for
#' [surv_param_sim()]. The first level will be used as a reference group.
#'
calc_hr_pi <- function(sim, trt, group = NULL, pi.range = 0.95,
                       calc.obs = TRUE, trt.assign = c("default", "reverse")){

  # Replace nest with packageVersion("tidyr") >= '1.0.0' for a speed issue
  # and different behavior when no grouping is supplied
  # See https://github.com/tidyverse/tidyr/issues/751
  nest2 <- ifelse(utils::packageVersion("tidyr") == '1.0.0', tidyr::nest_legacy, tidyr::nest)
  unnest2 <- ifelse(utils::packageVersion("tidyr") == '1.0.0', tidyr::unnest_legacy, tidyr::unnest)

  trt.assign <- match.arg(trt.assign)

  newdata.nona.obs <- sim$newdata.nona.obs
  newdata.nona.sim <- sim$newdata.nona.sim

  if(missing(trt)) stop("`trt` needs to be specified")
  if(length(trt) > 1) stop("`trt` can only take one string")

  group.syms <- rlang::syms(group)
  trt.sym    <- rlang::sym(trt)

  # Check trt values
  n.distinct.trt <- check_trt(newdata.nona.obs, trt.sym, group.syms)

  # Convert trt to factor
  newdata.nona.obs <-
    newdata.nona.obs %>%
    dplyr::mutate(!!trt.sym := factor(!!trt.sym))
  newdata.nona.sim <-
    newdata.nona.sim %>%
    dplyr::mutate(!!trt.sym := factor(!!trt.sym))

  # Reverse control vs trt
  if(trt.assign == "reverse"){
    newdata.nona.obs <-
      newdata.nona.obs %>%
      dplyr::mutate(!!trt.sym := forcats::fct_rev(!!trt.sym))
    newdata.nona.sim <-
      newdata.nona.sim %>%
      dplyr::mutate(!!trt.sym := forcats::fct_rev(!!trt.sym))
  }

  trt.levels <- dplyr::pull(newdata.nona.obs, !!trt.sym) %>% levels()


  if(methods::is(sim, "survparamsim_pre_resampled")){
    if(sim$newdata.orig.missing & calc.obs) {
      warning("Original observed data not provided in `surv_param_sim_pre_resampled()` and HR will not be estimated for the observed data. Speficy `calc.obs = FALSE` to suppress this warning.")
      calc.obs = FALSE
    }
  }

  # Calc HR for observed data
  if(calc.obs){
    obs.grouped <-
      newdata.nona.obs %>%
      dplyr::group_by(!!!group.syms)

    if(length(dplyr::group_vars(obs.grouped)) == 0 &
       utils::packageVersion("tidyr") >= '1.0.0') {
      obs.nested <-
        obs.grouped %>%
        nest2(data = dplyr::everything())
    } else {
      obs.nested <- nest2(obs.grouped)
    }

    ## Define function to calc HR
    calc_hr_each_obs <- function(x){
      formula <-
        paste(attributes(formula(sim$survreg))$variables,"~",trt)[2] %>%
        stats::as.formula()

      cfit <- survival::coxph(formula, data=x)
      p.value.logrank <- broom::glance(cfit)$p.value.log
      cfit %>%
        broom::tidy(exponentiate = TRUE) %>%
        dplyr::mutate(!!trt.sym := factor(substr(term, nchar(trt)+1, nchar(term)),
                                          levels = trt.levels)) %>%
        dplyr::select(!!trt.sym, HR = estimate, p.value.coef.wald = p.value) %>%
        dplyr::mutate(p.value.logrank = p.value.logrank)
    }
    safe_calc_hr_each_obs <- purrr::safely(calc_hr_each_obs, otherwise = NA)

    ## Calc HR
    obs.hr <-
      obs.nested %>%
      dplyr::mutate(coxfit = purrr::map(data, safe_calc_hr_each_obs),
                    HR = purrr::map(coxfit, ~.$result),
                    description = "obs") %>%
      unnest2(HR) %>%
      dplyr::select(-data, -coxfit) %>%
      dplyr::ungroup()

    # Reverse back the factor
    if(trt.assign == "reverse"){
      obs.hr <-
        obs.hr %>%
        dplyr::mutate(!!trt.sym := forcats::fct_rev(!!trt.sym))
    }

    if(dplyr::filter(obs.hr, is.na(HR)) %>% nrow() > 0) {
      warning("HR was not calculable in at least one subgroup for the observed data, likely due to small number of subjects.")
    }
  } else {
    obs.hr <- NULL
  }


  # Calculate HR simulated data

  ## First nest data - cox fit will done for each nested data
  newdata.trt.group <-
    newdata.nona.sim %>%
    dplyr::select(subj.sim, !!trt.sym, !!!group.syms)

  sim.nested <-
    sim$sim %>%
    dplyr::left_join(newdata.trt.group, by = "subj.sim") %>%
    dplyr::group_by(rep, !!!group.syms) %>%
    nest2()


  ## Define function to calc HR
  calc_hr_each_sim <- function(x){
    formula <-
      paste("Surv(time, event) ~",trt) %>%
      stats::as.formula()

    cfit <- survival::coxph(formula, data=x)
    p.value.logrank <- broom::glance(cfit)$p.value.log
    cfit %>%
      broom::tidy(exponentiate = TRUE) %>%
      dplyr::mutate(!!trt.sym := factor(substr(term, nchar(trt)+1, nchar(term)),
                                        levels = trt.levels))%>%
      dplyr::select(!!trt.sym, HR = estimate, p.value.coef.wald = p.value) %>%
      dplyr::mutate(p.value.logrank = p.value.logrank)
  }
  safe_calc_hr_each_sim <- purrr::safely(calc_hr_each_sim, otherwise = NA)

  ## Calc HR
  sim.hr <-
    sim.nested %>%
    dplyr::mutate(coxfit = purrr::map(data, safe_calc_hr_each_sim),
                  HR = purrr::map(coxfit, ~.$result),
                  description = "sim") %>%
    unnest2(HR) %>%
    dplyr::select(-data, -coxfit) %>%
    dplyr::ungroup()

  # Reverse back the factor
  if(trt.assign == "reverse"){
    sim.hr <-
      sim.hr %>%
      dplyr::mutate(!!trt.sym := forcats::fct_rev(!!trt.sym))
  }

  if(dplyr::filter(sim.hr, is.na(HR)) %>% nrow() > 0) {
    warning("HR was not calculable in at least one subgroup for the simulated data, likely due to small number of subjects and this might results in biased estimates for prediction intervals.")
  }

  ## Calc quantiles
  quantiles <-
    tibble::tibble(description = c("pi_low", "pi_med", "pi_high"),
                   quantile = c(0.5 - pi.range/2, 0.5, 0.5 + pi.range/2))

  sim.hr.pi <-
    sim.hr %>%
    dplyr::group_by(!!!group.syms, !!trt.sym) %>%
    dplyr::summarize(pi_low = as.numeric(stats::quantile(HR, probs = 0.5 - pi.range/2, na.rm = TRUE)),
                     pi_med = as.numeric(stats::quantile(HR, probs = 0.5, na.rm = TRUE)),
                     pi_high= as.numeric(stats::quantile(HR, probs = 0.5 + pi.range/2, na.rm = TRUE))) %>%
    dplyr::ungroup() %>%
    tidyr::gather(description, HR, pi_low:pi_high) %>%
    dplyr::left_join(quantiles, by = "description")

  if(calc.obs){
    hr.pi.quantile <-
      dplyr::select(obs.hr, -p.value.coef.wald, -p.value.logrank) %>%
      dplyr::bind_rows(sim.hr.pi, .) %>%
      dplyr::arrange(!!!group.syms, !!trt.sym)

  } else {
    hr.pi.quantile <-
      sim.hr.pi %>%
      dplyr::arrange(!!!group.syms, !!trt.sym)
  }

  # Output
  out <- list()

  out$calc.obs <- calc.obs
  out$pi.range   <- pi.range

  out$group.syms <- group.syms
  out$trt.sym    <- trt.sym

  out$obs.hr <- obs.hr
  out$sim.hr <- sim.hr
  out$hr.pi.quantile <- hr.pi.quantile

  out$trt.levels <- trt.levels

  structure(out, class = c("survparamsim.hrpi"))
}


#' Plot simulated HR histogram(s) overlayed with prediction intervals
#'
#' @export
#' @param hr.pi a return object from \code{\link{calc_hr_pi}} function.
#' @param show.obs A logical specifying whether to show observed HR on the plot.
#'   This will have no effect if `calc.obs` was set to `FALSE` in \code{\link{calc_hr_pi}}.
#'
plot_hr_pi <- function(hr.pi, show.obs = TRUE){

  obs.hr <- hr.pi$obs.hr
  sim.hr <- hr.pi$sim.hr
  hr.pi.quantile  <- hr.pi$hr.pi.quantile
  trt.levels <- hr.pi$trt.levels

  group.syms <- hr.pi$group.syms
  trt.sym    <- hr.pi$trt.sym

  g <-
    ggplot2::ggplot(sim.hr, ggplot2::aes(HR)) +
    ggplot2::geom_histogram(bins = 30, alpha = 0.5, color = "black") +
    ggplot2::geom_vline(data = dplyr::filter(hr.pi.quantile, description %in% c("pi_low", "pi_high")),
               ggplot2::aes(xintercept = HR),
               lty="dashed")


  ## Observed
  if(hr.pi$calc.obs & show.obs) {
    g <-
      g +
      ggplot2::geom_vline(data = dplyr::filter(hr.pi.quantile, description == "obs"),
                          ggplot2::aes(xintercept = HR),
                          color = "red", lwd = 1)

  }

  # Facet fig based on group
  if(length(trt.levels) > 2) {
    g <- g + ggplot2::facet_grid(ggplot2::vars(!!trt.sym),
                                 ggplot2::vars(!!!group.syms),
                                 labeller = ggplot2::label_both)
  } else {
    if(length(group.syms) == 1 || length(group.syms) >= 3 ) {
      g <- g + ggplot2::facet_wrap(ggplot2::vars(!!!group.syms),
                                   labeller = ggplot2::label_both)
    } else if (length(group.syms) == 2) {
      g <- g + ggplot2::facet_grid(ggplot2::vars(!!group.syms[[1]]),
                                   ggplot2::vars(!!group.syms[[2]]),
                                   labeller = ggplot2::label_both)
    }
  }

  return(g)

}


check_trt <- function(newdata.nona.obs, trt.sym, group.syms){

  # Check trt values
  trt.vec <-
    newdata.nona.obs %>%
    dplyr::select(!!trt.sym) %>%
    .[[1]]

  n.distinct.trt <- length(unique(trt.vec))

  if(sum(is.na(trt.vec)) > 0) stop("`trt` cannot has NA values")
  if(is.factor(trt.vec) & nlevels(trt.vec) != n.distinct.trt) warning("`trt` variable is factor and has unused levels, which is automatically dropped`")
  if(is.ordered(trt.vec)) stop("`trt` cannot be an ordered factor, please use a regular factor instead")

  return(n.distinct.trt)

}




#' @rdname survparamsim-methods
#' @export
print.survparamsim.hrpi <- function(x, ...){
  trt <- as.character(x$trt.sym)
  group <- as.character(x$group.syms)

  cat("---- Simulated and observed (if calculated) hazard ratio ----\n")
  cat("* Use `extract_hr_pi()` to extract prediction intervals and observed HR\n")
  cat("* Use `extract_hr()` to extract individual simulated HRs\n")
  cat("* Use `plot_hr_pi()` to draw histogram of predicted HR\n\n")
  cat("* Settings:\n")
  cat("    trt: ", trt, "\n", sep="")
  cat("         ('", paste(x$trt.levels[-1], collapse = "', '"), "' as test trt, '", x$trt.levels[[1]], "' as control)\n", sep="")
  cat("    group:", ifelse(is.null(group), "(NULL)", group), "\n", sep=" ")
  cat("    pi.range:", x$pi.range, "\n", sep=" ")
  cat("    calc.obs:", x$calc.obs, "\n", sep=" ")


}


#' @rdname survparamsim-methods
#' @export
summary.survparamsim.hrpi <- function(object, ...) {

  return(extract_hr_pi(object))
}

Try the survParamSim package in your browser

Any scripts or data that you put into this service are public.

survParamSim documentation built on June 3, 2022, 9:06 a.m.