tests/testthat/test_data.R

# test data is automatically loaded, check ./data-raw/generate_test_data.R

test_that("check S3 test data reproducibility", {
  aerep_check <- simaerep(
    r = 100,
    df_visit_test,
    param_sim_sites = list(
      r = 100
    )
  )

  aerep_test$df_eval <- arrange(aerep_test$df_eval, study_id, site_number)

  expect_equal(aerep_check$df_sim_sites, aerep_test$df_sim_sites)
  expect_equal(aerep_check$df_eval, aerep_test$df_eval)

  visit_check <- orivisit(df_visit_test)

  expect_equal(visit_check, visit_test)

})

test_that("check standard test data reproducibility", {

  set.seed(1)

  df_visit1 <- sim_test_data_study(
    n_pat = 100,
    n_sites = 5,
    frac_site_with_ur = 0.4,
    ur_rate = 0.6
  )

  df_visit1$study_id <- "A"

  set.seed(2)
  df_visit2 <- sim_test_data_study(n_pat = 100, n_sites = 5,
                                   frac_site_with_ur = 0.2, ur_rate = 0.1)

  df_visit2$study_id <- "B"

  df_visit_check <- dplyr::bind_rows(df_visit1, df_visit2)

  aerep <- simaerep(
    r = 100,
    df_visit_check,
    param_sim_sites = list(
      poisson_test = TRUE,
      r = 100
      )
  )

  expect_equal(df_visit_test, df_visit_check)
  expect_equal(df_site_test, aerep$df_site)
  expect_equal(df_sim_sites_test, aerep$df_sim_sites)
  expect_equal(arrange(df_eval_test, study_id, site_number), aerep$df_eval)

})


test_that("check portfolio performance test data reproducibility", {

  df_site_max_check <- df_visit_test %>%
    group_by(study_id, site_number, patnum) %>%
    summarise(max_visit = max(visit),
              max_ae = max(n_ae),
              .groups = "drop")

  df_config_check <- simaerep::get_config(
    df_site_max_check,
    anonymize = TRUE,
    min_pat_per_study = 100,
    min_sites_per_study = 5
  )

  set.seed(1)
  df_portf_check <- sim_test_data_portfolio(df_config_check, progress = FALSE)

  set.seed(1)
  df_scen_check <- sim_ur_scenarios(
    df_portf_check,
    extra_ur_sites = 2,
    ur_rate = c(0.5, 1),
    parallel = FALSE,
    poisson = FALSE,
    prob_lower = TRUE,
    progress = FALSE,
    site_aggr_args = list(method = "med75_adj")
  )

  df_perf_check <- get_portf_perf(df_scen_check)

  expect_equal(df_site_max_test, df_site_max_check)
  expect_equal(df_config_test, df_config_check)
  expect_equal(df_portf_test, df_portf_check)

  # we have fixed a mistake in sim_ur_scenarios therefore
  # the results have diverged and will not match anyomore
  # at this point we do not want to update the test data
  # to demonstrate consistency over major releases

  # expect_equal(df_scen_test, df_scen_check) #nolint
  # expect_equal(df_perf_test, df_perf_check) #nolint
})


test_that("sim_ur() ae_count as expected for max patient visits", {

  n_ae_test <- df_visit_test %>%
    filter(visit == max(visit), .by = c("study_id", "site_number", "patnum")) %>%
    filter(site_number == "S0001", study_id == "A") %>%
    pull(n_ae) %>%
    sum()

  n_ae_0p5 <- df_visit_test %>%
    sim_ur(study_id = "A", site_number = "S0001", ur_rate = 0.5) %>%
    filter(visit == max(visit), .by = c("study_id", "site_number", "patnum")) %>%
    filter(site_number == "S0001", study_id == "A") %>%
    pull(n_ae) %>%
    sum()

  expect_true(n_ae_test == n_ae_0p5 * 2)

})


test_that("sim_ur() and sim_ur_scenario() must give similar results", {

  df_sim_ur_scenarios <- sim_ur_scenarios(
      df_visit_test,
      extra_ur_sites = 0,
      ur_rate = c(0.5, 1),
      parallel = FALSE,
      poisson = FALSE,
      prob_lower = TRUE,
      progress = FALSE,
      check = FALSE,
      site_aggr_args = list(method = "med75_adj")
    ) %>%
    arrange(study_id, site_number, ur_rate)

  # sim_ur -------------------------------------

  perf <- function(df_visit, study_id, site_number, ur_rate) {
    df_vs_study <- df_visit %>%
      sim_ur(study_id, site_number, ur_rate)

    df_vs_study %>%
      simaerep(under_only = TRUE, progress = FALSE, check = FALSE) %>%
      .$df_eval %>%
      filter(.data$site_number == .env$site_number)
  }

  df_grid <- df_visit_test %>%
    distinct(study_id, site_number) %>%
    mutate(ur_rate = list(c(0, 0.5, 1))) %>%
    unnest(ur_rate)

  df_sim_ur <- df_grid %>%
    mutate(
      perf = purrr::pmap(
        list(study_id, site_number, ur_rate),
        function(x, y, z) perf(df_visit_test, x, y, z)
      )
    ) %>%
    select(- study_id, - site_number) %>%
    unnest(perf) %>%
    arrange(study_id, site_number, ur_rate)

  # compare ------------------------------------

  cols_equal <- c(
    "study_id",
    "site_number",
    "ur_rate",
    "visit_med75",
    "n_pat_with_med75",
    "mean_ae_study_med75"
  )

  expect_equal(
    df_sim_ur_scenarios[, cols_equal],
    df_sim_ur[, cols_equal]
  )

  # sim_ur calculates the number of AEs to be removed based
  # on tha latest visit of each patient and removes them from
  # the beginning. This can give smaller AE counts than removing
  # a fraction of AEs from visit_med75.

  expect_true(all(
    round(df_sim_ur$mean_ae_site_med75, 5) <=
    round(df_sim_ur_scenarios$mean_ae_site_med75, 5)
  ))

})
openpharma/simaerep documentation built on Feb. 2, 2025, 12:04 a.m.