knitr::opts_chunk$set(echo = TRUE, cache = FALSE, fig.width = 10)
suppressPackageStartupMessages(library(tidyverse)) suppressPackageStartupMessages(library(knitr)) suppressPackageStartupMessages(library(simaerep))
We generally aggregate AEs by visit. Each patient follows the same visit schedule and a specified number of days passes between each consecutive visit. As visits are a scheduled contact point between the patient and physicians the day of the visit usually also the day when most AEs get reported. Alternatively we can also choose to use simaerep
to a
We load some a public clinical trial data set which only contains data of the control arm see SAS files as a Data Source Article
df_ae <- haven::read_sas('adae.sas7bdat') %>% select(STUDYID, SUBJID, SITEID, AESTDY) df_vs <- haven::read_sas('advs.sas7bdat') %>% select(STUDYID, SUBJID, SITEID, ADY) df_ae <- df_ae %>% rename(DY = AESTDY) %>% mutate(EVENT = "AE") df_vs <- df_vs %>% rename(DY = ADY) %>% mutate(EVENT = "VS") %>% # we ignore visits that have no date filter(! is.na(DY)) %>% # we are not interested in same day visits distinct() df_aevs <- bind_rows(df_ae, df_vs) %>% # NA's get sorted towards the end thus AEs with no date get sorted towards last visit arrange(STUDYID, SITEID, SUBJID, DY) %>% group_by(STUDYID, SITEID, SUBJID) %>% mutate(AE_NO = cumsum(ifelse(EVENT == "AE", 1, 0)), VS_NO = cumsum(ifelse(EVENT == "VS", 1, 0))) %>% # we remove patients with 0 visits filter(max(VS_NO) > 0) %>% # AE's before fist visit should register to visit 1 not zero mutate(VS_NO = ifelse(VS_NO == 0, 1, VS_NO)) df_aevs_aggr <- df_aevs %>% group_by(STUDYID, SITEID, SUBJID, VS_NO) %>% summarise(MIN_AE_NO = min(AE_NO), MAX_AE_NO = max(AE_NO), .groups = "drop") %>% group_by(STUDYID, SITEID, SUBJID) %>% mutate(MAX_VS_PAT = max(VS_NO)) %>% ungroup() %>% # assign AEs that occur after last visit to last AE mutate( CUM_AE = ifelse( VS_NO == MAX_VS_PAT, MAX_AE_NO, MIN_AE_NO) ) df_visit <- df_aevs_aggr %>% rename( study_id = "STUDYID", site_number = "SITEID", patnum = "SUBJID", n_ae = "CUM_AE", visit = "VS_NO" ) %>% select(study_id, site_number, patnum, n_ae, visit)
For aggregating on days we need to align the reference timelines of the single patients.
df_vs_min_max <- df_vs %>% group_by(STUDYID, SUBJID, SITEID) %>% summarise(min_DY = min(DY, na.rm = TRUE), max_DY = max(DY, na.rm = TRUE), .groups = "drop") df_vs_min_max$min_DY[1:25] df_vs_min_max$max_DY[1:25]
The day of the first visit is different for each patient and they start at negative values. First we correct all values to be positive and then normalize the AE date values to the date value of the first visit of each patient
corr_factor <- abs(min(df_vs_min_max$min_DY)) df_days <- df_ae %>% # include patients with vs but no AE right_join(df_vs_min_max, by = c("STUDYID", "SUBJID", "SITEID")) %>% # replace DY NULL with max patient DY group_by(STUDYID, SUBJID, SITEID) %>% mutate(DY = ifelse(is.na(DY) & ! is.na(EVENT), max(DY, na.rm = TRUE), DY)) %>% # replace DY for patients with 0 AE with day of maximum visit mutate(DY = ifelse(is.na(DY) & is.na(EVENT), max_DY, DY)) %>% # correct timelines mutate(DY = DY + corr_factor, min_DY = min_DY + corr_factor, DY_corr = DY + min_DY) %>% group_by(STUDYID, SITEID, SUBJID) %>% arrange(STUDYID, SITEID, SUBJID, DY_corr) %>% mutate(n_ae = row_number()) %>% ungroup() %>% # set AE count to 0 for patients with no AEs mutate(n_ae = ifelse(is.na(EVENT), 0 , n_ae)) %>% rename( study_id = STUDYID, site_number = SITEID, patnum = SUBJID, visit = DY_corr ) %>% group_by(study_id, site_number, patnum, visit) %>% summarise(n_ae = max(n_ae), .groups = "drop")
check if we get the same transformation as for the visit aggregations
stopifnot(n_distinct(df_days$site_number) == n_distinct(df_visit$site_number)) stopifnot(n_distinct(df_days$patnum) == n_distinct(df_visit$patnum)) pat0_days <- df_days %>% group_by(study_id, site_number, patnum) %>% filter(max(n_ae) == 0) %>% pull(patnum) %>% unique() %>% sort() pat0_vs <- df_visit %>% group_by(study_id, site_number, patnum) %>% filter(max(n_ae) == 0) %>% pull(patnum) %>% unique() %>% sort() stopifnot(all(pat0_days == pat0_vs))
df_days
We do have gaps in between the days leading to implicitly missing values. simaerep
will correct this automatically and throw a warning.
df_site <- site_aggr(df_visit = df_days)
to silence the warning we can use check df_visit()
which is also called internally by all other functions accepting df_visit
as an argument.
df_days <- simaerep:::check_df_visit(df_days) df_days
Then we proceed as usual.
df_sim_sites <- sim_sites(df_site, df_visit = df_days) df_eval_days <- eval_sites(df_sim_sites) simaerep::plot_study(df_visit = df_days, df_site = df_site, df_eval = df_eval_days, study = unique(df_days$study_id))
How do the results compare to aggregating on visits?
df_site <- site_aggr(df_visit) df_sim_sites <- sim_sites(df_site, df_visit) df_eval_vs <- eval_sites(df_sim_sites) simaerep::plot_study(df_visit, df_site, df_eval_vs, study = unique(df_visit$study_id))
We observe a difference in the results. Which is largely attributable in the difference in cut-off visit_med75 points that influences the set of patients included. In any case we observe a high rank correlation with a low p-value of all results greater 0.
As the inclusion/exclusion of patients in the analysis of a site in an ongoing trial can shift results, we recommend to aggregate on actually occurred visits because then all included patients had an equal amount of opportunities to report AEs.
df_comp <- df_eval_days %>% select( site_number, prob_low_prob_ur_days = prob_low_prob_ur, n_pat_with_med75_days = n_pat_with_med75 ) %>% left_join( select( df_eval_vs, site_number, prob_low_prob_ur_vs = prob_low_prob_ur, n_pat_with_med75_vs = n_pat_with_med75 ), by = "site_number" ) %>% filter(prob_low_prob_ur_days > 0 | prob_low_prob_ur_vs > 0) %>% select(site_number, starts_with("prob"), starts_with("n_pat")) %>% arrange(desc(prob_low_prob_ur_vs)) df_comp %>% knitr::kable() cor.test( df_comp$prob_low_prob_ur_vs, df_comp$prob_low_prob_ur_days, method = "spearman" )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.