knitr::opts_chunk$set(echo = TRUE, cache = FALSE, message = FALSE)
suppressPackageStartupMessages(library(dplyr)) suppressPackageStartupMessages(library(tibble)) suppressPackageStartupMessages(library(tidyr)) suppressPackageStartupMessages(library(simaerep)) suppressPackageStartupMessages(library(purrr)) suppressPackageStartupMessages(library(stringr))
suppressPackageStartupMessages(library(furrr)) suppressPackageStartupMessages(library(future)) plan(multisession, workers = 6)
df_config <- readr::read_csv("ae_conf_20240220.csv") df_ae_rates <- readr::read_csv("ae_rates_20240220.csv") df_portf_flex <- sim_test_data_portfolio(df_config, df_ae_rates = df_ae_rates, parallel = TRUE, progress = TRUE) df_portf_flex %>% readr::write_csv("portf.csv")
df_portf_flex <- readr::read_csv("portf.csv")
Here we reanalyze performance of {simaerep} integrating the lessons learned from previous versions. We will apply the following.
sim_ur()
funnel <- function(df) { df %>% filter(visit == max(visit), .by = patnum) %>% summarise( Metric = sum(.data$n_ae) / sum(.data$visit), n_ae = sum(n_ae), visit = sum(visit), .by = "site_number" ) %>% mutate( vMu = sum(.data$n_ae) / sum(.data$visit), z_0 = ifelse(.data$vMu == 0, 0, (.data$Metric - .data$vMu) / sqrt(.data$vMu / .data$visit) ), phi = mean(.data$z_0^2), z_i = ifelse(.data$vMu == 0 | .data$phi == 0, 0, (.data$Metric - .data$vMu) / sqrt(.data$phi * .data$vMu / .data$visit) ) ) } box <- function(df) { df <- df %>% filter(visit == max(visit), .by = patnum) %>% summarise( event_per_visit = sum(.data$n_ae) / sum(.data$visit), .by = "site_number" ) bx <- boxplot.stats(df$event_per_visit) df <- df %>% mutate( box_out = event_per_visit < bx$stats[1] ) } perf <- function(df_visit, study_id, site_number, ur_rate) { df_vs_study <- df_visit %>% sim_ur(study_id, site_number, ur_rate) df_visit_med75 <- df_vs_study %>% simaerep(under_only = TRUE, progress = FALSE, check = FALSE) %>% .$df_eval %>% filter(.data$site_number == .env$site_number) df_visit_med75_over <- df_vs_study %>% simaerep(under_only = FALSE, progress = FALSE, check = FALSE) %>% .$df_eval %>% filter(.data$site_number == .env$site_number) df_inframe <- df_vs_study %>% simaerep(inframe = TRUE, under_only = TRUE, check = FALSE, visit_med75 = FALSE) %>% .$df_eval %>% filter(.data$site_number == .env$site_number) df_inframe_visit_med75 <- df_vs_study %>% simaerep(inframe = TRUE, under_only = TRUE, check = FALSE, visit_med75 = TRUE) %>% .$df_eval %>% filter(.data$site_number == .env$site_number) funnel_zi <- funnel(df_vs_study) %>% filter(.data$site_number == .env$site_number) %>% pull(z_i) box_out <- box(df_vs_study) %>% filter(.data$site_number == .env$site_number) %>% pull(box_out) tibble( score_visit_med75 = df_visit_med75$prob_low_prob_ur, score_visit_med75_no_mult = 1 - df_visit_med75$prob_low, score_visit_med75_over = df_visit_med75_over$prob_low_prob_ur, score_visit_med75_over_no_mult = 1 - df_visit_med75_over$prob_low, score_inframe = df_inframe$prob_low_prob_ur, score_inframe_no_mult = 1 - df_inframe$prob_low, score_inframe_visit_med75 = df_inframe_visit_med75$prob_low_prob_ur, score_inframe_visit_med75_no_mult = 1 - df_inframe_visit_med75$prob_low, score_funnel_zi = funnel_zi, score_box_out = as.integer(box_out), stat_visit_med75_visit_med75 = df_visit_med75$visit_med75, stat_visit_med75_n_pat_with_med75 = df_visit_med75$n_pat_with_med75, stat_visit_med75_mean_ae_site_med75 = df_visit_med75$mean_ae_site_med75, stat_visit_med75_mean_ae_study_med75 = df_visit_med75$mean_ae_study_med75, stat_inframe_visit_med75 = df_inframe_visit_med75$visit_med75, stat_inframe_visit_med75_n_pat_with_med75 = df_inframe_visit_med75$n_pat_with_med75, stat_inframe_visit_med75_events_per_visit_site = df_inframe_visit_med75$events_per_visit_site, stat_inframe_visit_med75_events_per_visit_study = df_inframe_visit_med75$events_per_visit_study, stat_inframe_n_pat = df_inframe$n_pat, stat_inframe_events_per_visit_site = df_inframe$events_per_visit_site, stat_inframe_events_per_visit_study = df_inframe$events_per_visit_study, ) } # perf(df_portf_flex, study_id = "0010", site_number = "15153", ur_rate = 0) %>% unlist() # perf(df_portf_flex, study_id = "0010", site_number = "15153", ur_rate = 0.5) %>% unlist() # perf(df_portf_flex, study_id = "0010", site_number = "15153", ur_rate = 0.75) %>% unlist() # perf(df_portf_flex, study_id = "0010", site_number = "15153", ur_rate = 1) %>% unlist()
df_grid <- df_portf_flex %>% distinct(study_id, site_number) %>% # to reduce calculation time we only take every xth study filter(dense_rank(study_id)%%5 == 0) %>% mutate(ur_rate = list(c(0, 0.1, 0.25, 0.5, 0.75, 1))) %>% unnest(ur_rate) df_grid
with_progress_cnd( df_perf <- df_grid %>% mutate( perf = purrr_bar( list(study_id, site_number, ur_rate), .purrr = furrr::future_pmap, .f = function(x, y, z) perf(df_portf_flex, x, y, z), .purrr_args = list(.options = furrr_options(seed = TRUE)), .steps = nrow(.) ) ) ) df_perf %>% unnest(perf) %>% readr::write_csv("perf.csv")
df_perf <- readr::read_csv("perf.csv", show_col_types = FALSE)
df_perf_long <- df_perf %>% pivot_longer(cols = - c(study_id, site_number, ur_rate), names_to = "type", values_to = "score") %>% filter(startsWith(type, "score_")) %>% mutate(type = stringr::str_replace(type, "score_", ""))
We set the thresholds so that we get a fpr of 0.01.
Note that this results in probability thresholds ~ 0.99 for scores w/o multiplicity correction and in the recommended funnel plot score threshold of -2.
target_fpr <- 0.01 df_thresh <- df_perf_long %>% group_by(type) %>% nest() %>% ungroup() %>% mutate( data = map(data, ~ filter(., ur_rate == 0)), thresh1 = map_dbl(data, ~ quantile(pull(., score), 1 - target_fpr)), thresh2 = map_dbl(data, ~ quantile(pull(., score), target_fpr)), thresh = ifelse(type == "funnel_zi", thresh2, thresh1) ) %>% select(type, thresh) df_thresh
get_prop_test_ci95 <- function(..., ix) { stopifnot(ix %in% c(1, 2)) tryCatch( prop.test(...)$conf.int[ix], error = function(cnd) c(NA, NA)[ix] ) } df_aggr <- df_perf_long %>% left_join(df_thresh, by = "type") %>% mutate( is_ur = ifelse(type == "funnel_zi", score <= thresh, score >= thresh), is_ur = ifelse(type == "box_out", score == 1, is_ur) ) %>% summarise( n = n(), .by = c(type, ur_rate, is_ur) ) %>% pivot_wider( names_from = is_ur, values_from = n, names_prefix = "is_ur_", values_fill = 0 ) %>% mutate( n_sites = is_ur_TRUE + is_ur_FALSE, ratio = is_ur_TRUE / n_sites, ratio_type = ifelse(ur_rate == 0, "fpr", "tpr"), ci95_low = map2_dbl(is_ur_TRUE, n_sites, ~ get_prop_test_ci95(.x, .y, ix = 1)), ci95_high = map2_dbl(is_ur_TRUE, n_sites, ~ get_prop_test_ci95(.x, .y, ix = 2)), type_strip = str_replace(type, "_no_mult", ""), has_mult = ! str_detect(type, "no_mult") & ! type %in% c("funnel_zi", "box_out") )
Methods:
FN: false negatives TP: true positives
df_aggr %>% select(method = type_strip, has_mult, ur_rate, FN = is_ur_FALSE, TP = is_ur_TRUE, n_sites, ratio_type, ratio, ci95_low, ci95_high) %>% knitr::kable(digits = 4)
df_aggr %>% mutate(ur_rate = paste0("under-reporting rate: ", ur_rate, " - ", ratio_type) ) %>% group_by(ur_rate) %>% ggplot(aes(type, ratio)) + geom_errorbar(aes(ymin = ci95_low, ymax = ci95_high, color = type_strip, alpha = has_mult), linewidth = 1) + facet_wrap(~ ur_rate, ncol = 1) + coord_flip() + theme( legend.position = "right", axis.text.y = element_blank(), axis.ticks.y = element_blank() ) + labs( x = "", y = "Ratio (CI95)", title = "{simaerep} Performance", color = "Method", alpha = "Multiplicity Correction" ) + scale_color_manual(values = rev(RColorBrewer::brewer.pal(n = 6, name = "Dark2"))) + scale_alpha_manual(values = c(1, 0.5))
The new inframe methods compares event per visit rates without dropping any patients or AEs from the analysis. This is likely to explain the performance increase. We observed a similar increase when we optimized visit_med75 in past experiments.
This observation was already made by the Boeringer Ingelheim Team during the evaluation of {simaerep}. We can now reproducibly confirm this. The unaltered probability score as returned by the bootstrap algorithm already provides very realistic under-reporting probabilities.
These controls confirm previous observations that were made during the {simaerep} validation.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.