Nothing
#' Fit Cox Proportional Hazard Ratio model
#'
#' @description
#' Fit Cox proportional hazards model on an time-to-event endpoint.
#'
#' Refer to \href{https://zhangh12.github.io/TrialSimulator/articles/wrappers.html}{this vignette}
#' for more information and examples.
#'
#' @param formula An object of class \code{formula} that can be used with
#' \code{survival::coxph}. The data frame \code{data} must consist a column
#' \code{arm} and a column of the endpoint specified in \code{formula}.
#' Covariates can be adjusted. Interactions between \code{arm} and covariates are
#' allowed in \code{formula}, but \code{arm} must has a term of main effect,
#' and only estimate of that main effect is tested.
#' @param placebo Character. String indicating the placebo in \code{data$arm}.
#' @param data Data frame. Usually it is a data snapshot locked at a milestone.
#' @param alternative a character string specifying the alternative hypothesis,
#' must be one of \code{"greater"} or \code{"less"}, i.e., one-sided test is
#' enforced. No default value.
#' \code{"greater"} means superiority of treatment over placebo is established
#' by an hazard ratio greater than 1.
#' @param scale character. The type of estimate in the output. Must be one
#' of \code{"log hazard ratio"} or \code{"hazard ratio"}. No default value.
#' @param tidy logical. \code{FALSE} if more information are returned.
#' Default: \code{TRUE}.
#' @param ... (optional) subset conditions compatible with \code{dplyr::filter}.
#' \code{coxph} will be fitted on this subset only. This argument can be useful
#' to create a subset of data for analysis when a trial consists of more
#' than two arms. By default, it is not specified,
#' all data will be used to fit the model. More than one condition can be
#' specified in \code{...}, e.g.,
#' \code{fitCoxph(formula, 'pbo', data, 'less', 'log hazard ratio', arm \%in\% c('pbo', 'low dose'), x > 0.5)},
#' which is equivalent to:
#' \code{fitCoxph(formula, 'pbo', data, 'less', 'log hazard ratio', arm \%in\% c('pbo', 'low dose') & x > 0.5)}.
#' Note that if more than one treatment arm are present in the data after
#' applying filter in \code{...}, models are fitted and tested for placebo verse
#' each of the treatment arms.
#'
#' @returns a data frame with three columns:
#' \describe{
#' \item{\code{arm}}{name of the treatment arm. }
#' \item{\code{placebo}}{name of the placebo arm. }
#' \item{\code{estimate}}{estimate of main effect of arm, depending on \code{scale}. }
#' \item{\code{p}}{one-sided p-value for log hazard ratio (treated vs placebo). }
#' \item{\code{info}}{the number of events of the endpoint in the subset. }
#' \item{\code{z}}{the z statistics of log hazard ratios. }
#' }
#'
#' @export
#'
fitCoxph <- function(formula, placebo, data, alternative, scale, ..., tidy = TRUE) {
if(!inherits(formula, 'formula')){
stop('formula must be a formula object with "arm" indicating the column arm in data. ')
}
if(!is.character(placebo) || length(placebo) != 1){
stop("placebo must be a single character string")
}
if(!is.data.frame(data)){
stop("data must be a data frame")
}
alternative <- match.arg(alternative, choices = c('greater', 'less'))
valid_scales <- c('log hazard ratio', 'hazard ratio')
if(!is.character(scale) || length(scale) != 1 || !(scale %in% valid_scales)){
stop('scale must be one of ', paste0(valid_scales, collapse = ', '))
}
vars_in_formula <- all.vars(formula)
missing_vars <- setdiff(vars_in_formula, names(data))
if(length(missing_vars) > 0){
stop('The following variable(s) used in formula are missing from data: \n',
paste0(missing_vars, collapse = ', '))
}
if(!'arm' %in% vars_in_formula){
stop('formula must include main effect term for arm. ')
}
# Prepare the data based on condition ...
filtered_data <- if(...length() == 0){
data
}else{
tryCatch({
data %>% dplyr::filter(...)
},
error = function(e){
stop('Error in filtering data for coxph test. ',
'Please check condition in ..., ',
'which should be compatible with dplyr::filter. ')
})
}
# Check if any data remains after filtering
if (nrow(filtered_data) == 0) {
stop("No data remaining after applying subset condition. ")
}
treatment_arms <- setdiff(unique(filtered_data$arm), placebo) %>% sort()
ret <- NULL
for(trt_arm in treatment_arms){
sub_data <- filtered_data %>% dplyr::filter(.data$arm %in% c(placebo, trt_arm))
# Ensure arm is a factor with placebo and treatment
sub_data$arm <- factor(sub_data$arm, levels = c(placebo, trt_arm))
# Fit the Cox model
fit <- tryCatch({
coxph(formula, data = sub_data, model = TRUE)
}, error = function(e){
stop('coxph model fitting failed: ', e$message)
})
sfit <- summary(fit)
## identify coefficient of treatment arm (main effect)
coef_row <- grep(paste0('^arm', trt_arm, '$'), rownames(sfit$coef))
if(length(coef_row) != 1){
stop('fixCoxph: unable to identify arm main effect coefficient for arm <', trt_arm,
'>. Please use <arm> in formula explicitly. ')
}
estimate <- sfit$coef[coef_row, 'coef']
se <- sfit$coef[coef_row, 'se(coef)']
z <- estimate / se
p <- ifelse(alternative == 'greater', 1 - pnorm(z), pnorm(z))
model_data <- model.frame(fit)
surv_obj <- model.response(model_data)
info <- sum(surv_obj[, 'status'] == 1)
res <- data.frame(arm = trt_arm, placebo = placebo,
estimate = ifelse(scale == 'log hazard ratio',
estimate, exp(estimate)),
p = p, info = info, z = z
)
## return only if tidy = FALSE
if(!tidy){
stopifnot(length(surv_obj) == nrow(model_data))
res$info_pbo <- sum(surv_obj[, 'status'] == 1 & model_data$arm %in% placebo)
res$info_trt <- sum(surv_obj[, 'status'] == 1 & model_data$arm %in% trt_arm)
stopifnot(res$info_pbo + res$info_trt == res$info)
res$n_pbo <- sum(model_data$arm %in% placebo)
res$n_trt <- sum(model_data$arm %in% trt_arm)
stopifnot(res$n_trt + res$n_pbo == nrow(model_data))
}
ret <- rbind(ret, res)
}
rownames(ret) <- NULL
class(ret) <- c('fit_coxph', class(ret))
ret
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.