knitr::opts_chunk$set(echo = TRUE, cache = FALSE, message = FALSE)
suppressPackageStartupMessages(library(tidyverse)) suppressPackageStartupMessages(library(knitr)) suppressPackageStartupMessages(library(furrr)) suppressPackageStartupMessages(library(future)) suppressPackageStartupMessages(library(simaerep)) plan(multisession, workers = 6)
devtools::install_github("Gilead-BioStats/gsm@v1.9.2", ref = "main")
The {gsm} R package provides a standardized Risk Based Quality Monitoring (RBQM) framework for clinical trials that pairs a flexible data pipeline with robust reports. It also uses Funnel Plots to flag outliers which provide broader tolerance limits for sites with low exposure and narrower limits for sites with higher exposure. This method is different to the event rate based limits we have used in previous heuristics to measure {simaerep} performance. Funnel plots are discussed in greater detail by Zink et al. 2018
One of the draw backs of using funnel plots for flagging is that they assume that the AE rate remains constant over the course of the study.
We have prepared a snapshot of the AE reporting configuration of our current portfolio. For each study we have also measured a visit-specific AE rate which allows us to generate a synthetic portfolio with flexible AE rates across a study.
df_config <- readr::read_csv("ae_conf_20240220.csv") df_ae_rates <- readr::read_csv("ae_rates_20240220.csv") df_config %>% head(25) %>% knitr::kable() df_ae_rates %>% head(25) %>% knitr::kable()
We generate two synthetic portfolios with no AE under-reporting sites. One portfolio with a fixed AE rate for all visits and another one with a flexible visit-specific AE rate.
df_portf_fix <- sim_test_data_portfolio(df_config, parallel = TRUE, progress = TRUE) df_portf_fix %>% head(25) %>% knitr::kable() df_portf_flex <- sim_test_data_portfolio(df_config, df_ae_rates = df_ae_rates, parallel = TRUE, progress = TRUE)
Next we confirm the different AE rates in our two synthetic portfolios.
df_rate_fix <- df_portf_fix %>% mutate(ae_rate = coalesce(n_ae - lag(n_ae), n_ae), .by = c("study_id", "patnum")) %>% summarise(ae_rate = mean(ae_rate), .by = c("study_id", "visit")) %>% mutate(rate = "fix") df_rate_flex <- df_portf_flex %>% mutate(ae_rate = coalesce(n_ae - lag(n_ae), n_ae), .by = c("study_id", "patnum")) %>% summarise(ae_rate = mean(ae_rate), .by = c("study_id", "visit")) %>% mutate(rate = "flex")
bind_rows(df_rate_flex, df_rate_fix) %>% ggplot(aes(visit, ae_rate)) + geom_line(aes(group = study_id), alpha = 0.2) + geom_smooth() + facet_wrap(~ rate) + labs(title = "Average AE rates per Study")
bind_rows(df_rate_flex, df_rate_fix) %>% filter(dense_rank(study_id) <= 16) %>% ggplot(aes(visit, ae_rate)) + geom_line(aes(group = rate, color = rate)) + facet_wrap(~ study_id, scales = "free") + labs(title = "Average AE rates for Selected Studies")
We can confirm that the AE rates in the "flexible" portfolio are not constant. Moreover we see that the AE rate profile is very unique for each study.
Here we demonstrate how to use the {gsm} package on our simulated portfolios so that we can get a good visualization of the funnel plot.
get_SUBJ <- function(df_portf) { df_portf %>% select(study_id, siteid = site_number, subjid = patnum, timeonstudy = visit) %>% summarise(timeonstudy = max(timeonstudy), .by = c(study_id, siteid, subjid)) %>% group_by(study_id) %>% nest() } get_AE <- function(df_portf) { df_portf %>% select(study_id, subjid = patnum, n_ae) %>% summarise(n_ae = max(n_ae), .by = c(study_id, subjid)) %>% filter(n_ae > 0) %>% mutate(n_ae = map(n_ae, ~ tibble(n = seq(1, .)), .progress = TRUE)) %>% unnest(n_ae) %>% select(- n) %>% group_by(study_id) %>% nest() } dfSUBJ_fix <- get_SUBJ(df_portf_fix) dfAE_fix <- get_AE(df_portf_fix)
dfInput <- gsm::AE_Map_Raw(list(dfSUBJ = dfSUBJ_fix$data[[1]], dfAE = dfAE_fix$data[[1]])) dfInput dfTransformed <- gsm::Transform_Rate( dfInput, strNumeratorCol = "Count", strDenominatorCol = "Exposure" ) dfTransformed dfAnalyzed <- gsm::Analyze_NormalApprox(dfTransformed) dfAnalyzed dfFlagged <- gsm::Flag_NormalApprox(dfAnalyzed, vThreshold = c(-3, -2, 2, 3)) dfFlagged dfSummary <- gsm::Summarize(dfFlagged) dfSummary dfBounds <- gsm::Analyze_NormalApprox_PredictBounds(dfTransformed, vThreshold = c(-3, -2, 2, 3)) dfBounds chart <- gsm::Visualize_Scatter(dfFlagged, dfBounds) chart
We write out own funnel function as adapted from {gsm}
funnel_ur <- function(df, site, ur_rate) { df %>% filter(visit == max(visit), .by = patnum) %>% summarise( Metric = sum(.data$n_ae) / sum(.data$visit), n_ae = sum(n_ae), visit = sum(visit), .by = "site_number" ) %>% mutate( n_ae = ifelse(site_number == site, n_ae * (1 - ur_rate), n_ae), Metric = n_ae / visit ) %>% mutate( vMu = sum(.data$n_ae) / sum(.data$visit), z_0 = ifelse(.data$vMu == 0, 0, (.data$Metric - .data$vMu) / sqrt(.data$vMu / .data$visit) ), phi = mean(.data$z_0^2), z_i = ifelse(.data$vMu == 0 | .data$phi == 0, 0, (.data$Metric - .data$vMu) / sqrt(.data$phi * .data$vMu / .data$visit) ) ) %>% filter(site_number == site) %>% pull(z_i) } sim_ur_funnel <- function(df) { df %>% group_by(study_id) %>% nest() %>% ungroup() %>% mutate( sites = map(data, ~ distinct(., site_number)) ) %>% unnest(sites) %>% mutate(ur = list(tibble(ur_rate = c(0, 0.1, 0.25, 0.5, 0.75, 1)))) %>% unnest(ur) %>% mutate( score = pmap_dbl(list(data, site_number, ur_rate), funnel_ur, .progress = TRUE) ) } df_sim_ur_funnel_flex <- sim_ur_funnel(df_portf_flex)
df_sim_ur_funnel_fix <- sim_ur_funnel(df_portf_fix)
We simulate under-reporting for both portfolios using {simaerep} using sim_ur_scenarios()
.
df_sim_simaerep_fix <- sim_ur_scenarios( df_portf_fix, extra_ur_sites = 0, ur_rate = c(0, 0.1, 0.25, 0.5, 0.75, 1), parallel = TRUE, poisson = TRUE, prob_lower = TRUE, progress = TRUE )
df_sim_simaerep_flex <- sim_ur_scenarios( df_portf_flex, extra_ur_sites = 0, ur_rate = c(0, 0.1, 0.25, 0.5, 0.75, 1), parallel = TRUE, poisson = TRUE, prob_lower = TRUE, progress = TRUE )
As the funnel plot score does not use multiplicity correction, we also compare the funnel plot score against the {simaerep} score w/o multiplicity correction.
df_sim_simaerep_fix$ae_rate <- "AE rate: fix" df_sim_simaerep_flex$ae_rate <- "AE rate: flexible" df_sim_ur_funnel_fix$ae_rate <- "AE rate: fix" df_sim_ur_funnel_flex$ae_rate <- "AE rate: flexible" df_sim_fun_thresh2 <- bind_rows(df_sim_ur_funnel_fix, df_sim_ur_funnel_flex) %>% mutate( type = "funnel", ) %>% select(type, ae_rate, study_id, site_number, ur_rate, score) df_sim_simaerep_threshp95 <- bind_rows(df_sim_simaerep_fix, df_sim_simaerep_flex) %>% mutate( type = "{simaerep}" ) %>% select(type, ae_rate, study_id, site_number, ur_rate, score = prob_low_prob_ur) df_sim_simaerep_threshp95_no_mult <- bind_rows(df_sim_simaerep_fix, df_sim_simaerep_flex) %>% mutate( type = "{simaerep} no mult" ) %>% mutate( score = 1 - prob_low ) %>% select(type, ae_rate, study_id, site_number, ur_rate, score) df_eval <- bind_rows( df_sim_simaerep_threshp95, df_sim_simaerep_threshp95_no_mult, df_sim_fun_thresh2 )
We continue by comparing the ROC-AUC.
get_roc <- function(df_ur, df_nr) { df <- bind_rows(df_ur, df_nr) pROC::roc(df, response = "is_ur", predictor = "score", quiet = TRUE) } # use 0 scenario to mix with ur scenario and calculate auc from scores df_nr <- df_eval %>% filter(ur_rate == 0) %>% mutate(is_ur = "no") %>% select(- ur_rate) %>% group_by(type, study_id, ae_rate) %>% nest() %>% ungroup() %>% rename(data_nr = data) df_ur <- df_eval %>% filter(ur_rate > 0) %>% mutate(is_ur = "yes") %>% group_by(type, study_id, ur_rate, ae_rate) %>% nest() %>% ungroup() %>% rename(data_ur = data) df_auc <- df_ur %>% left_join( df_nr, by = c("type", "study_id", "ae_rate") ) %>% mutate( roc = map2(data_ur, data_nr, get_roc, .progress = TRUE), auc = map_dbl(roc, pROC::auc, .progress = TRUE) ) %>% select(type, study_id, ur_rate, ae_rate, roc, auc)
df_auc %>% summarise( sd_auc = sd(.data$auc), auc = mean(.data$auc), .by = c(type, ur_rate, ae_rate) ) %>% knitr::kable(digit = 3)
df_auc %>% ggplot(aes(type, auc)) + geom_boxplot(aes(fill = type)) + facet_grid(ae_rate ~ ur_rate) + scale_fill_brewer(palette = "Dark2") + theme(axis.text.x = element_blank())
We set the thresholds for funnel and simaerep w/o multiplicity correction so that we get the same fpr as for simaerep with multiplicity correction and the established threshold of 0.95
thresh_default <- 0.95 target_fpr <- df_eval %>% filter(ur_rate == 0, type == "{simaerep}") %>% summarise(fpr = sum(score >= thresh_default) / n()) %>% pull(fpr) thresh_no_mult <- df_eval %>% filter(ur_rate == 0, type == "{simaerep} no mult") %>% pull(score) %>% quantile(1 - target_fpr) thresh_funnel <- df_eval %>% filter(ur_rate == 0, type == "funnel") %>% pull(score) %>% quantile(target_fpr) target_fpr thresh_no_mult thresh_funnel
get_prop_test_ci95 <- function(..., ix) { stopifnot(ix %in% c(1, 2)) tryCatch( prop.test(...)$conf.int[ix], error = function(cnd) c(NA, NA)[ix] ) } aggr_results <- function(df_eval) { df_perf <- df_eval %>% mutate( is_ur = case_when( type == "{simaerep}" ~ score >= thresh_default, type == "{simaerep} no mult" ~ score >= thresh_no_mult, type == "funnel" ~ score <= thresh_funnel ) ) %>% summarise( n = n(), .by = c(type, ae_rate, ur_rate, is_ur) ) %>% pivot_wider( names_from = is_ur, values_from = n, names_prefix = "is_ur_", values_fill = 0 ) %>% mutate( n_sites = is_ur_TRUE + is_ur_FALSE, ratio = is_ur_TRUE / n_sites, ratio_type = ifelse(ur_rate == 0, "fpr", "tpr"), ci95_low = map2_dbl(is_ur_TRUE, n_sites, ~ get_prop_test_ci95(.x, .y, ix = 1)), ci95_high = map2_dbl(is_ur_TRUE, n_sites, ~ get_prop_test_ci95(.x, .y, ix = 2)) ) } df_perf <- aggr_results(df_eval)
df_perf %>% knitr::kable(digits = 4)
plot_perf <- function(df_perf) { df_perf %>% mutate(ur_rate = paste0("under-reporting rate: ", ur_rate, " - ", ratio_type), ur_rate = ifelse(str_detect(ur_rate, "fpr"), "fpr", ur_rate), ae_rate = forcats::fct_rev(factor(ae_rate))) %>% group_by(ur_rate) %>% ggplot(aes(type, ratio)) + geom_errorbar(aes(ymin = ci95_low, ymax = ci95_high, color = type, linetype = ae_rate), linewidth = 1) + facet_wrap(~ ur_rate, ncol = 1) + coord_flip() + theme(legend.position = "bottom") + labs( x = "", y = "CI95 Performance Ratio", title = "{simaerep} vs Funnel-Plot Performance" ) + scale_color_brewer(palette = "Dark2") } plot_perf(df_perf)
plan(sequential)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.