In openpharma/simaerep: Find Clinical Trial Sites Under-Reporting Adverse Events

knitr::opts_chunk$set(echo = TRUE, cache = FALSE, message = FALSE)

Load

suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(knitr))
suppressPackageStartupMessages(library(furrr))
suppressPackageStartupMessages(library(future))
suppressPackageStartupMessages(library(simaerep))

plan(multisession, workers = 6)

Install {gsm}

devtools::install_github("Gilead-BioStats/gsm@v1.9.2", ref = "main")

Introduction

The {gsm} R package provides a standardized Risk Based Quality Monitoring (RBQM) framework for clinical trials that pairs a flexible data pipeline with robust reports. It also uses Funnel Plots to flag outliers which provide broader tolerance limits for sites with low exposure and narrower limits for sites with higher exposure. This method is different to the event rate based limits we have used in previous heuristics to measure {simaerep} performance. Funnel plots are discussed in greater detail by Zink et al. 2018

One of the draw backs of using funnel plots for flagging is that they assume that the AE rate remains constant over the course of the study.

Prepare Data

Load Portfolio Configurations

We have prepared a snapshot of the AE reporting configuration of our current portfolio. For each study we have also measured a visit-specific AE rate which allows us to generate a synthetic portfolio with flexible AE rates across a study.

df_config <- readr::read_csv("ae_conf_20240220.csv")
df_ae_rates <- readr::read_csv("ae_rates_20240220.csv")

df_config %>%
  head(25) %>%
  knitr::kable()

df_ae_rates %>%
  head(25) %>%
  knitr::kable()

Simulate Portfolio

We generate two synthetic portfolios with no AE under-reporting sites. One portfolio with a fixed AE rate for all visits and another one with a flexible visit-specific AE rate.

df_portf_fix <- sim_test_data_portfolio(df_config, parallel = TRUE, progress = TRUE)

df_portf_fix %>%
  head(25) %>%
  knitr::kable()


df_portf_flex <- sim_test_data_portfolio(df_config, df_ae_rates = df_ae_rates, parallel = TRUE, progress = TRUE)

Compare AE rates

Next we confirm the different AE rates in our two synthetic portfolios.

df_rate_fix <- df_portf_fix %>%
  mutate(ae_rate = coalesce(n_ae - lag(n_ae), n_ae), .by = c("study_id", "patnum")) %>%
  summarise(ae_rate = mean(ae_rate), .by = c("study_id", "visit")) %>%
  mutate(rate = "fix")


df_rate_flex <- df_portf_flex %>%
  mutate(ae_rate = coalesce(n_ae - lag(n_ae), n_ae), .by = c("study_id", "patnum")) %>%
  summarise(ae_rate = mean(ae_rate), .by = c("study_id", "visit")) %>%
  mutate(rate = "flex")

bind_rows(df_rate_flex, df_rate_fix) %>%
  ggplot(aes(visit, ae_rate)) +
    geom_line(aes(group = study_id), alpha = 0.2) +
    geom_smooth() +
    facet_wrap(~ rate) +
    labs(title = "Average AE rates per Study")

bind_rows(df_rate_flex, df_rate_fix) %>%
  filter(dense_rank(study_id) <= 16) %>%
  ggplot(aes(visit, ae_rate)) +
    geom_line(aes(group = rate, color = rate)) +
    facet_wrap(~ study_id, scales = "free") +
    labs(title = "Average AE rates for Selected Studies")

We can confirm that the AE rates in the "flexible" portfolio are not constant. Moreover we see that the AE rate profile is very unique for each study.

Funnel Plots {gsm}

Example

Here we demonstrate how to use the {gsm} package on our simulated portfolios so that we can get a good visualization of the funnel plot.

get_SUBJ <- function(df_portf) {
  df_portf %>%
    select(study_id, siteid = site_number, subjid = patnum, timeonstudy = visit) %>%
    summarise(timeonstudy = max(timeonstudy), .by = c(study_id, siteid, subjid)) %>%
    group_by(study_id) %>%
    nest()
}


get_AE <- function(df_portf) {
  df_portf %>%
    select(study_id, subjid = patnum, n_ae) %>%
    summarise(n_ae = max(n_ae), .by = c(study_id, subjid)) %>%
    filter(n_ae > 0) %>%
    mutate(n_ae = map(n_ae, ~ tibble(n = seq(1, .)), .progress = TRUE)) %>%
    unnest(n_ae) %>%
    select(- n) %>%
    group_by(study_id) %>%
    nest()
}

dfSUBJ_fix <- get_SUBJ(df_portf_fix)
dfAE_fix <- get_AE(df_portf_fix)

dfInput <- gsm::AE_Map_Raw(list(dfSUBJ = dfSUBJ_fix$data[[1]], dfAE = dfAE_fix$data[[1]]))
dfInput

dfTransformed <- gsm::Transform_Rate(
  dfInput,
  strNumeratorCol = "Count",
  strDenominatorCol = "Exposure"
  )
dfTransformed

dfAnalyzed <- gsm::Analyze_NormalApprox(dfTransformed)
dfAnalyzed

dfFlagged <- gsm::Flag_NormalApprox(dfAnalyzed, vThreshold = c(-3, -2, 2, 3))
dfFlagged

dfSummary <- gsm::Summarize(dfFlagged)
dfSummary

dfBounds <- gsm::Analyze_NormalApprox_PredictBounds(dfTransformed, vThreshold = c(-3, -2, 2, 3))
dfBounds

chart <- gsm::Visualize_Scatter(dfFlagged, dfBounds)
chart

Simulate UR

UR Funnel

We write out own funnel function as adapted from {gsm}

funnel_ur <- function(df, site, ur_rate) {
  df %>%
    filter(visit == max(visit), .by = patnum) %>%
    summarise(
      Metric = sum(.data$n_ae) / sum(.data$visit),
      n_ae = sum(n_ae),
      visit = sum(visit),
      .by = "site_number"
    ) %>%
    mutate(
      n_ae = ifelse(site_number == site, n_ae * (1 - ur_rate), n_ae),
      Metric = n_ae / visit
    ) %>%
    mutate(
          vMu = sum(.data$n_ae) / sum(.data$visit),
          z_0 = ifelse(.data$vMu == 0,
            0,
            (.data$Metric - .data$vMu) /
              sqrt(.data$vMu / .data$visit)
          ),
          phi = mean(.data$z_0^2),
          z_i = ifelse(.data$vMu == 0 | .data$phi == 0,
            0,
            (.data$Metric - .data$vMu) /
              sqrt(.data$phi * .data$vMu / .data$visit)
          )
    ) %>%
    filter(site_number == site) %>%
    pull(z_i)
}

sim_ur_funnel <- function(df) {
  df %>%
    group_by(study_id) %>%
    nest() %>%
    ungroup() %>%
    mutate(
      sites = map(data, ~ distinct(., site_number))
    ) %>%
    unnest(sites) %>%
    mutate(ur = list(tibble(ur_rate = c(0, 0.1, 0.25, 0.5, 0.75, 1)))) %>%
    unnest(ur) %>%
    mutate(
      score = pmap_dbl(list(data, site_number, ur_rate), funnel_ur, .progress = TRUE)
    )
}

df_sim_ur_funnel_flex <- sim_ur_funnel(df_portf_flex)

df_sim_ur_funnel_fix <- sim_ur_funnel(df_portf_fix)

UR {simaerep}

We simulate under-reporting for both portfolios using {simaerep} using sim_ur_scenarios().

df_sim_simaerep_fix <- sim_ur_scenarios(
   df_portf_fix,
   extra_ur_sites = 0,
   ur_rate = c(0, 0.1, 0.25, 0.5, 0.75, 1),
   parallel = TRUE,
   poisson = TRUE,
   prob_lower = TRUE,
   progress = TRUE
)

df_sim_simaerep_flex <- sim_ur_scenarios(
   df_portf_flex,
   extra_ur_sites = 0,
   ur_rate = c(0, 0.1, 0.25, 0.5, 0.75, 1),
   parallel = TRUE,
   poisson = TRUE,
   prob_lower = TRUE,
   progress = TRUE
)

Evaluate

Combine Results

As the funnel plot score does not use multiplicity correction, we also compare the funnel plot score against the {simaerep} score w/o multiplicity correction.

df_sim_simaerep_fix$ae_rate <- "AE rate: fix"
df_sim_simaerep_flex$ae_rate <- "AE rate: flexible"
df_sim_ur_funnel_fix$ae_rate <- "AE rate: fix"
df_sim_ur_funnel_flex$ae_rate <- "AE rate: flexible"



df_sim_fun_thresh2 <- bind_rows(df_sim_ur_funnel_fix, df_sim_ur_funnel_flex) %>%
  mutate(
    type = "funnel",
  ) %>%
  select(type, ae_rate, study_id, site_number, ur_rate, score)


df_sim_simaerep_threshp95 <-  bind_rows(df_sim_simaerep_fix, df_sim_simaerep_flex) %>%
  mutate(
    type = "{simaerep}"
  ) %>%
  select(type, ae_rate, study_id, site_number, ur_rate, score = prob_low_prob_ur)

df_sim_simaerep_threshp95_no_mult <-  bind_rows(df_sim_simaerep_fix, df_sim_simaerep_flex) %>%
  mutate(
    type = "{simaerep} no mult"
  ) %>%
  mutate(
    score = 1 - prob_low
  ) %>%
  select(type, ae_rate, study_id, site_number, ur_rate, score)


df_eval <- bind_rows(
  df_sim_simaerep_threshp95,
  df_sim_simaerep_threshp95_no_mult,
  df_sim_fun_thresh2
)

AUC

We continue by comparing the ROC-AUC.

get_roc <- function(df_ur, df_nr) {
  df <- bind_rows(df_ur, df_nr) 
  pROC::roc(df, response = "is_ur", predictor = "score", quiet = TRUE)
}

# use 0 scenario to mix with ur scenario and calculate auc from scores
df_nr <- df_eval %>%
  filter(ur_rate == 0) %>%
  mutate(is_ur = "no") %>%
  select(- ur_rate) %>%
  group_by(type, study_id, ae_rate) %>%
  nest() %>%
  ungroup() %>%
  rename(data_nr = data)


df_ur <- df_eval %>%
  filter(ur_rate > 0) %>%
  mutate(is_ur = "yes") %>%
  group_by(type, study_id, ur_rate, ae_rate) %>%
  nest() %>%
  ungroup() %>%
  rename(data_ur = data)


df_auc <- df_ur %>%
  left_join(
    df_nr,
    by = c("type", "study_id", "ae_rate")
  ) %>%
  mutate(
    roc = map2(data_ur, data_nr, get_roc, .progress = TRUE),
    auc = map_dbl(roc, pROC::auc, .progress = TRUE)
  ) %>%
  select(type, study_id, ur_rate, ae_rate, roc, auc)

Table

df_auc %>%
  summarise(
    sd_auc = sd(.data$auc),
    auc = mean(.data$auc),
    .by = c(type, ur_rate, ae_rate)
  ) %>%
  knitr::kable(digit = 3)

Plot

df_auc %>%
  ggplot(aes(type, auc)) +
    geom_boxplot(aes(fill = type)) +
    facet_grid(ae_rate ~ ur_rate)  +
    scale_fill_brewer(palette = "Dark2") +
    theme(axis.text.x = element_blank())

Metrics

Thresholds

We set the thresholds for funnel and simaerep w/o multiplicity correction so that we get the same fpr as for simaerep with multiplicity correction and the established threshold of 0.95

thresh_default <- 0.95

target_fpr <- df_eval %>%
  filter(ur_rate == 0, type == "{simaerep}") %>%
  summarise(fpr = sum(score >= thresh_default) / n()) %>%
  pull(fpr)

thresh_no_mult <-  df_eval %>%
  filter(ur_rate == 0, type == "{simaerep} no mult") %>%
  pull(score) %>%
  quantile(1 - target_fpr)

thresh_funnel <- df_eval %>%
  filter(ur_rate == 0, type == "funnel") %>%
  pull(score) %>%
  quantile(target_fpr)

target_fpr

thresh_no_mult

thresh_funnel

Aggregate

get_prop_test_ci95 <- function(..., ix) {

  stopifnot(ix %in% c(1, 2))

  tryCatch(
    prop.test(...)$conf.int[ix],
    error = function(cnd) c(NA, NA)[ix]
  )
}

aggr_results <- function(df_eval) {

  df_perf <- df_eval %>%
    mutate(
      is_ur = case_when(
        type == "{simaerep}" ~ score >= thresh_default,
        type == "{simaerep} no mult" ~ score >= thresh_no_mult,
        type == "funnel" ~ score <= thresh_funnel
      )
    ) %>%
    summarise(
      n = n(),
      .by = c(type, ae_rate, ur_rate, is_ur)
    ) %>%
    pivot_wider(
      names_from = is_ur,
      values_from = n,
      names_prefix = "is_ur_",
      values_fill = 0
    ) %>%
    mutate(
      n_sites = is_ur_TRUE + is_ur_FALSE,
      ratio = is_ur_TRUE / n_sites,
      ratio_type = ifelse(ur_rate == 0, "fpr", "tpr"),
      ci95_low = map2_dbl(is_ur_TRUE, n_sites, ~ get_prop_test_ci95(.x, .y, ix = 1)),
      ci95_high = map2_dbl(is_ur_TRUE, n_sites, ~ get_prop_test_ci95(.x, .y, ix = 2))
    )
  }

df_perf <- aggr_results(df_eval)

Table

df_perf %>%
  knitr::kable(digits = 4)

Plot

plot_perf <- function(df_perf) {

  df_perf %>%
    mutate(ur_rate = paste0("under-reporting rate: ",  ur_rate, " - ", ratio_type),
           ur_rate = ifelse(str_detect(ur_rate, "fpr"), "fpr", ur_rate),
           ae_rate = forcats::fct_rev(factor(ae_rate))) %>%
    group_by(ur_rate) %>%
    ggplot(aes(type, ratio)) +
      geom_errorbar(aes(ymin = ci95_low, ymax = ci95_high, color = type, linetype = ae_rate), linewidth = 1) +
      facet_wrap(~ ur_rate, ncol = 1) +
      coord_flip() +
      theme(legend.position = "bottom") +
      labs(
        x = "",
        y = "CI95 Performance Ratio", 
        title = "{simaerep} vs Funnel-Plot Performance"
      ) +
      scale_color_brewer(palette = "Dark2")
}

plot_perf(df_perf)

Summary

Funnel plot expects constant event rates over time. Performance decrease when event rates are flexible.
{simaerep} performance is unaffected by flexible event rates.
{simaerep} detection rates with comparable false positive rates are greater than funnel plot detection rates when event rates are flexible.

plan(sequential)

openpharma/simaerep documentation built on July 16, 2025, 1:48 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

openpharma/simaerep
Find Clinical Trial Sites Under-Reporting Adverse Events

In openpharma/simaerep: Find Clinical Trial Sites Under-Reporting Adverse Events

Load

Install {gsm}

Introduction

Prepare Data

Load Portfolio Configurations

Simulate Portfolio

Compare AE rates

Funnel Plots {gsm}

Example

Simulate UR

UR Funnel

UR {simaerep}

Evaluate

Combine Results

AUC

Table

Plot

Metrics

Thresholds

Aggregate

Table

Plot

Summary

R Package Documentation

Browse R Packages

We want your feedback!

openpharma/simaerep Find Clinical Trial Sites Under-Reporting Adverse Events

In openpharma/simaerep: Find Clinical Trial Sites Under-Reporting Adverse Events

Load

Install {gsm}

Introduction

Prepare Data

Load Portfolio Configurations

Simulate Portfolio

Compare AE rates

Funnel Plots {gsm}

Example

Simulate UR

UR Funnel

UR {simaerep}

Evaluate

Combine Results

AUC

Table

Plot

Metrics

Thresholds

Aggregate

Table

Plot

Summary

R Package Documentation

Browse R Packages

We want your feedback!

openpharma/simaerep
Find Clinical Trial Sites Under-Reporting Adverse Events