#' Create patient-level and group-level summary statistics.
#'
#'
#' Data format should be in 'long' format, where each PRO-CTCAE item is a
#' variable/column.
#'
#' @param dsn A data.frame object with PRO-CTCAE data.
#' @param id_var A character string. Name of ID variable differentiating each
#' PRO-CTCAE survey/participant entered as a quoted string.
#' @param cycle_var A character string. Name of variable differentiating one
#' longitudinal/repeated. PRO-CTCAE survey from another, within an individual
#' ID.
#' @param baseline_val A number indicating the expected baseline cycle/time
#' point.
#' @param summary_measure A character string. Type of summary statistic to be
#' used. Please consult current literature for appropriate interpretations of
#' the summary measure selected and suitable analysis procedures for comparing
#' groups. Options include: \code{"max"} = Use subjects'
#' maximum score. \code{"max_post_bl"} = Use subjects' maximum score
#' post-baseline visit. \code{"bl_adjusted"} = Use subjects' baseline adjusted
#' score over the study period. The baseline adjusted score is derived by the
#' following: If the maximum score post-baseline is more severe than the
#' baseline score, then the use maximum score post-baseline is used as the
#' adjusted score. Otherwise, if the maximum score post-baseline is the same
#' or less serve than the baseline score, then zero (0) is used as the
#' adjusted score. \code{"toxicity_idex"} = Construct patient-level toxicity
#' index. \code{"AUC_worsening"} = Calculate group-level AUC describing
#' @param baseline_val A number indicating the expected baseline cycle/time
#' point.
#' @param arm_var A character string. Name of arm variable differentiating
#' treatment arms or other grouping factor. Required for group-level
#' summary measures.
#' @return A data.frame with only the id and PRO-CTCAE variables being summarized.
#' Each subject will now only have 1 observation (PRO-CTCAE variables are now the summary measure value).
#' @importFrom magrittr %>%
#' @examples
#' toxSummary(dsn=ProAE::tox_acute,
#' id_var="id",
#' cycle_var="Cycle",
#' baseline_val=1,
#' summary_measure = "max")
#' @export
toxSummary <- function(dsn,
id_var,
cycle_var,
summary_measure,
baseline_val=NA,
arm_var=NA){
# ----------------------------------------------------------------
# --- Checks 1/2
# ----------------------------------------------------------------
## -- Assign binding for objects
baseline = NULL
## -- Required parameters
if(exists("dsn")){
if(!is.data.frame(dsn)){
stop("param dsn must be provided as a data.frame object")
}
} else {stop("param dsn not provided")}
if(exists("id_var")){
if(!is.character(id_var)){
stop("param id_var must be provided as a character string")
} else if (!(id_var %in% colnames(dsn))){
stop(paste0("param id_var (", id_var, ") not found as a variable in dsn (", deparse(substitute(dsn)), ")"))
}
} else {stop("param id_var not provided")}
if(exists("cycle_var")){
if(!is.character(cycle_var)){
stop("param cycle_var must be provided as a character string")
} else if (!(cycle_var %in% colnames(dsn))){
stop(paste0("param cycle_var (", cycle_var, ") not found as a variable in dsn (", deparse(substitute(dsn)), ")"))
}
} else {stop("param cycle_var not provided")}
if(!(summary_measure %in% c("max", "max_post_bl", "bl_adjusted", "toxicity_index", "AUC_worsening", "AUC_improvement"))){
stop("param summary_measure must be one of the fallowing; 'max', 'max_post_bl', 'bl_adjusted', 'AUC_worsening', 'AUC_improvement'")
}
## -- Checks for summary measures requiring baseline data information.
if(summary_measure %in% c("max_post_bl", "bl_adjusted", "AUC_worsening", "AUC_improvement")){
if(is.na(baseline_val)){
stop("param baseline_val must be provided for this measure")
} else{
if(!(is.numeric(baseline_val) | is.integer(baseline_val) | length(baseline_val)==1)){
stop("param baseline_val must be provided for this measure as a single number, of class numeric or integer")
}
if(min(dsn[, cycle_var]) != baseline_val){
stop(paste0("The value of the param baseline_val (", baseline_val, ") is not the smallest ", cycle_var, "."))
}
}
}
if(summary_measure %in% c("AUC_worsening", "AUC_improvement")){
if(is.na(arm_var) | !(arm_var %in% colnames(dsn))){
stop(paste0("A valid arm or grouping variable within dsn must be provided for the summary measure selected"))
}
}
# ----------------------------------------------------------------
# --- Get existing PRO-CTCAE variables in dsn
# ----------------------------------------------------------------
## -- Individual items
dsn_items0 = toupper(names(dsn)[toupper(names(dsn)) %in% proctcae_vars$name])
dsn_items = dsn_items0[! dsn_items0 %in% as.character(proctcae_vars$name[proctcae_vars$fmt %in% c("yn_2_fmt", "yn_3_fmt", "yn_4_fmt")])]
## -- Composites
proctcae_vars_comp0 = proctcae_vars[,-1] %>% dplyr::mutate_if(is.factor, as.character)
proctcae_vars_comp0 = proctcae_vars_comp0[!proctcae_vars_comp0$name %in% as.character(proctcae_vars_comp0$name[proctcae_vars_comp0$fmt %in% c("yn_2_fmt",
"yn_3_fmt",
"yn_4_fmt")]),]
proctcae_vars_comp = c()
proctcae_vars_comp$name = paste0(substr(proctcae_vars_comp0$name, 1, nchar(proctcae_vars_comp0$name)-5), "_COMP")
proctcae_vars_comp$short_label = sub(proctcae_vars_comp0$short_label, pattern = " [[:alpha:]]*$", replacement = "")
proctcae_vars_comp = unique(data.frame(proctcae_vars_comp, stringsAsFactors=FALSE))
dsn_comps = toupper(names(dsn)[toupper(names(dsn)) %in% proctcae_vars_comp$name])
dsn_items = c(dsn_items, dsn_comps)
# ----------------------------------------------------------------
# --- Checks 2/2
# ----------------------------------------------------------------
## -- Confirm there are available PRO-CTCAE variables within dsn with expected naming convention
if(identical(dsn_items, character(0))){
stop(paste0("No PRO-CTCAE variables found within dsn (",
deparse(substitute(dsn)),
") meeting the expected naming convention to summarize"))
}
## -- Check for any duplicate individuals within cycles
if(any(duplicated(dsn[,c(id_var, cycle_var)]))){
stop(paste0("Duplicate observations were found within id_var and cycle_var combinations (", id_var, " and ", cycle_var, ")"))
}
## -- Check to make sure scores are elements of {0, 1, 2, 3, 4}
if(sum(!unlist(dsn[, dsn_items], use.names = FALSE)%in%c(0, 1, 2, 3, 4, NA))>0){
stop("PRO-CTCAE item scores must be an integer between 0 and 4.")
}
# ----------------------------------------------------------------
# --- Individual-level summary measures
# ----------------------------------------------------------------
if(summary_measure %in% c("max", "max_post_bl", "bl_adjusted", "toxicity_index")){
## -----------------------------
## -- Max
## -- This is the largest score across all timepoints for any observed data
## -----------------------------
if(summary_measure == "max"){
dsn_summary <- dsn %>%
dplyr::group_by(get(id_var)) %>%
dplyr::summarise(dplyr::across(dplyr::all_of(dsn_items), ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE))), .groups = "drop")
}
## -----------------------------
## -- Max post-baseline
## -- This is the largest score across all timepoints after the baseline. Must have data after baseline to
## -- calculate this summary measure, otherwise return NA
## -----------------------------
if(summary_measure == "max_post_bl") {
dsn_summary <- dsn %>%
dplyr::mutate(baseline = dplyr::if_else(get(cycle_var) == baseline_val, TRUE, FALSE)) %>%
dplyr::group_by(get(id_var)) %>%
dplyr::filter(!baseline) %>%
dplyr::summarise(dplyr::across(dplyr::all_of(dsn_items), ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)), .groups = "drop"))
}
## -----------------------------
## -- Baseline adjusted max
## -- This is the largest score across all timepoints after the baseline, including only scores that were
## -- worse than the patient's baseline score. Must have data at baseline and after baseline to
## -- calculate this summary measure, otherwise return NA
## -- Function to calculate baseline adjustment summary statistic.
## -- scores := a numeric vector for each cycle for a given id
## -- baseline := a boolean vector identifying which score is the baseline score
## -----------------------------
bl_adjusted <- function(scores, baseline) {
if(length(scores) == 1 | sum(baseline) == 0){
return(NA)
} else {
base_val = scores[which(baseline)]
max_post_bl_val = ifelse(all(is.na(scores[which(!baseline)])), NA, max(scores[which(!baseline)], na.rm = TRUE))
ifelse(base_val >= max_post_bl_val, 0, max_post_bl_val)
}
}
if(summary_measure == "bl_adjusted"){
dsn_summary = dsn %>%
dplyr::mutate(baseline = (get(cycle_var) == baseline_val)) %>%
dplyr::group_by(get(id_var)) %>%
dplyr::summarise(dplyr::across(dplyr::all_of(dsn_items), ~bl_adjusted(., baseline)), .groups = "drop")
}
## -----------------------------
## -- Toxicity index
## -----------------------------
toxicity_index = function(x){
# -- Remove NA obs and sort descending
x_tmp = sort(x[!is.na(x)], decreasing = TRUE)
if(length(x_tmp)==1){
# only one grade
ti = x_tmp[1]
} else if(sum(x_tmp[-1])==0){
# only one none-zero grade
ti = x_tmp[1]
} else {
# compute
ti = x_tmp[1]
for(i in 1:(length(x_tmp)-1)){
ti = ti + (x_tmp[i+1] / prod((1+x_tmp[1:i])))
# -- prevent default rounding for large decimal portions
# if ti decimal portion approaches 1 break with ti s.e.t. [integer].9999
if(ti-x_tmp[1] >= 0.9999){
ti = x_tmp[1] + 0.9999
break
}
}
}
return(ti)
}
if(summary_measure == "toxicity_index"){
dsn_summary = dsn %>%
dplyr::group_by(get(id_var)) %>%
dplyr::summarise(dplyr::across(dplyr::all_of(dsn_items), ~toxicity_index(.)), .groups = "drop")
}
## -----------------------------
## -----------------------------
## -- update id_var name
colnames(dsn_summary)[1] = id_var
## -- add back in any previous id's that were filtered out
dsn_summary_complete = dsn %>%
dplyr::select(dplyr::all_of(id_var)) %>%
dplyr::distinct() %>%
dplyr::left_join(dsn_summary, by = dplyr::all_of(id_var))
}
## ----------------------------------------------------------------
## --- Group-level summary measures
## ----------------------------------------------------------------
if(summary_measure %in% c("AUC_worsening", "AUC_improvement")){
## -----------------------------
## -- AUC: AE burden worsening/improvement from baseline
## -----------------------------
# -- Function for line-line intersection
intersects_fun = function(blmean, x, y){
intersects = c()
for(i in 1:(length(y)-1)){
y_ith_pair = y[i:(i+1)]
x_ith_pair = x[i:(i+1)]
# -- Get intersection if baseline value falls between the two adjacent points
if(sum(blmean <= y_ith_pair)==1){
# x-y swap here to accommodate vertical line intersection
int_fun = stats::approxfun(y=x_ith_pair, x=y_ith_pair)
int = int_fun(v = blmean)
if(!is.na(int)){
intersects = c(intersects, int)
}
}
}
x2_0 = c(x, intersects)
y2 = c(y,rep(rep(blmean, length(intersects))))
# Reorder by time with additional time at intersections
x2 = x2_0[order(x2_0)]
y2 = y2[order(x2_0)]
return(list(x2,y2))
}
dsn_slice = dsn[,c(id_var, cycle_var, arm_var, dsn_items)]
# -- Group-level AE means data
group_auc = stats::aggregate(data = dsn_slice[,c(arm_var, cycle_var, dsn_items)],
stats::formula(paste0(". ~", arm_var,"+", cycle_var)),
mean)
# -- Create data frame for AUC to be attached
group_out1 = data.frame(arm = unique(group_auc[,arm_var]))
names(group_out1) = arm_var
group_out2 = as.data.frame(matrix(as.numeric(),
nrow = nrow(group_out1),
ncol = length(dsn_items)))
names(group_out2) = dsn_items
auc_summary = cbind(group_out1, group_out2)
for (item in dsn_items){
for(i in unique(group_auc[,arm_var])){
bl_val = group_auc[group_auc[,arm_var]==i & !is.na(group_auc[,item]) & group_auc[,cycle_var]==baseline_val,item]
ints = intersects_fun(bl_val,
x = unique(stats::na.omit(group_auc[group_auc[,arm_var]==i & !is.na(group_auc[,item]),cycle_var])),
y = group_auc[group_auc[,arm_var]==i & !is.na(group_auc[,item]),item])
t_i = ints[[1]]
y_i = ints[[2]] - bl_val
if(summary_measure == "AUC_worsening"){
y_i[y_i < 0] = 0
}
if(summary_measure == "AUC_improvement"){
y_i[y_i > 0] = 0
}
auc_i = sum( (t_i[2:length(t_i)] - t_i[2:length(t_i)-1]) * (y_i[2:length(t_i)] + y_i[2:length(t_i)-1])/2 )
auc_summary[auc_summary[,arm_var]==i, item] = auc_i
}
}
## -----------------------------
## -----------------------------
dsn_summary_complete = auc_summary
}
## -----------------------------
## -----------------------------
return(dsn_summary_complete)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.