tests/testthat/test-te_datastore_csv.R

test_that("te_datastore_csv validator works", {
  temp_dir <- tempfile("csv_dir_")
  dir.create(temp_dir)
  datastore <- save_to_csv(temp_dir)
  expanded_csv_data <- save_expanded_data(datastore, subset(vignette_switch_data, trial_period %in% 1:12))

  # validObject returns true
  expect_true(validObject(expanded_csv_data))

  # validObject returns error
  expanded_csv_data@files$period <- 12:1
  expect_error(validObject(expanded_csv_data))

  unlink(temp_dir, recursive = TRUE)
})


test_that("read_expanded_data can read te_datastore_csv data", {
  temp_dir <- tempfile("csv_dir_")
  dir.create(temp_dir)
  datastore <- save_to_csv(temp_dir)
  expanded_csv_data <- save_expanded_data(datastore, subset(vignette_switch_data, trial_period %in% 1:12))

  # check if no columns get added or removed by read_expanded_data
  expect_equal(ncol(read_expanded_data(expanded_csv_data)), length(expanded_csv_data@template))

  # check if omitting period reads in all data
  expect_equal(nrow(read_expanded_data(expanded_csv_data)), 25948)

  # check if period argument subsets data correctly
  expect_equal(nrow(read_expanded_data(expanded_csv_data, 1)), 1979)
  expect_equal(nrow(read_expanded_data(expanded_csv_data, c(5, 8, 12))), 6493)

  # check if method throws an error when using a character as period
  expect_error(read_expanded_data(expanded_csv_data, "1"))

  # check if no new NAs are introduced
  expect_equal(
    sum(is.na.data.frame(read_expanded_data(expanded_csv_data))),
    sum(is.na.data.frame(subset(vignette_switch_data, trial_period %in% 1:12)))
  )

  # check if subset_condition subsets data correctly
  expect_equal(
    nrow(read_expanded_data(expanded_csv_data, subset_condition = "followup_time <= 30 & nvarC == 40")), 371
  )

  unlink(temp_dir, recursive = TRUE)
})


test_that("sample_controls works with trial_sequence objects containing te_datastore_csv objects", {
  trial_itt_dir <- file.path(tempdir(), "trial_itt")
  dir.create(trial_itt_dir)

  trial_itt <- trial_sequence(estimand = "ITT") |>
    set_data(
      data = data_censored,
      id = "id",
      period = "period",
      treatment = "treatment",
      outcome = "outcome",
      eligible = "eligible"
    ) |>
    set_censor_weight_model(
      censor_event = "censored",
      numerator = ~ x1 + x2 + x3,
      denominator = ~x2,
      pool_models = "numerator",
      model_fitter = stats_glm_logit(save_path = file.path(trial_itt_dir, "switch_models"))
    ) |>
    calculate_weights() |>
    set_outcome_model(adjustment_terms = ~ x1 + x2)

  trial_itt_csv <- set_expansion_options(
    trial_itt,
    output = save_to_csv(file.path(trial_itt_dir, "trial_csvs")),
    chunk_size = 500
  ) |>
    expand_trials()

  # sample_controls works without additional arguments
  sc_01 <- sample_controls(trial_itt_csv, p_control = 0.01, seed = 1221)
  expect_equal(
    sort(sc_01@outcome_data@data$id),
    c(
      15, 16, 20, 20, 21, 29, 32, 38, 38, 40, 44, 44, 44, 49, 49, 49,
      50, 58, 61, 65, 68, 71, 71, 74, 74, 74, 74, 89, 95, 96, 98, 99
    )
  )

  random_01 <- sample_controls(trial_itt_csv, p_control = 0.01)

  # seed gets reset
  sc_01_1 <- sample_controls(trial_itt_csv, p_control = 0.01, seed = 1221)
  random_02 <- sample_controls(trial_itt_csv, p_control = 0.01)
  expect_false(identical(sort(random_01@outcome_data@data$id), sort(random_02@outcome_data@data$id)))

  # sample_controls works with p_control
  sc_02 <- sample_controls(trial_itt_csv, p_control = 0.5, seed = 5678)
  expect_equal(sc_02@outcome_data@n_rows, 780)

  # sample_controls works with p_control = 0
  sc_03 <- sample_controls(trial_itt_csv, p_control = 0)
  expect_equal(sc_03@outcome_data@n_rows, 14)

  # cases are kept
  expect_equal(sum(sc_01@outcome_data@data$outcome), 14)
  expect_equal(sum(sc_02@outcome_data@data$outcome), 14)
  expect_equal(sum(sc_03@outcome_data@data$outcome), 14)

  # all columns are kept and sample_weight column is added
  expect_equal(
    colnames(sc_01@outcome_data@data), c(colnames(trial_itt_csv@expansion@datastore@template), "sample_weight")
  )

  # sample_controls subsets data correctly
  sc_04 <- sample_controls(
    trial_itt_csv,
    period = 1:10,
    subset_condition = "followup_time <= 20 & treatment == 1",
    p_control = 0.2,
    seed = 2332
  )
  expect_equal(
    sort(sc_04@outcome_data@data$id),
    c(
      21, 21, 21, 27, 33, 33, 33, 34, 34, 44, 44, 44, 44, 44, 44, 44, 44, 44, 47, 47, 49, 50, 50, 54, 54, 59,
      59, 59, 59, 59, 59, 59, 60, 65, 65, 65, 71, 73, 74, 74, 74, 74, 74, 74, 74, 83, 95, 95, 95, 95, 95, 95
    )
  )
  expect_true(all(sc_04@outcome_data@data$followup_time <= 20))
  expect_true(all(sc_04@outcome_data@data$treatment == 1))
  expect_true(all(sc_04@outcome_data@data$period %in% 1:10))

  # if non-existing periods are entered sample_controls omits the missing periods, runs the code and returns a warning
  expect_warning(
    sc_05 <- sample_controls(
      trial_itt_csv,
      period = c(1:10, 20),
      subset_condition = "followup_time <= 20 & treatment == 1",
      p_control = 0.2,
      seed = 2332
    ),
    "The following periods don't exist in the data and were omitted: 20"
  )
  expect_equal(sort(sc_04@outcome_data@data$id), sort(sc_05@outcome_data@data$id))

  # sample_controls returns the correct classes
  expect_class(sc_04, "trial_sequence_ITT")
  expect_class(sc_04@outcome_data, "te_outcome_data")
  expect_class(sc_04@outcome_data@data, "data.table")

  unlink(trial_itt_dir, recursive = TRUE)
})


test_that("load_expanded_data works with trial_sequence objects containing te_datastore_csv objects", {
  trial_itt_dir <- file.path(tempdir(), "trial_itt")
  dir.create(trial_itt_dir)

  trial_itt <- trial_sequence(estimand = "ITT") |>
    set_data(
      data = data_censored,
      id = "id",
      period = "period",
      treatment = "treatment",
      outcome = "outcome",
      eligible = "eligible"
    ) |>
    set_censor_weight_model(
      censor_event = "censored",
      numerator = ~ x1 + x2 + x3,
      denominator = ~x2,
      pool_models = "numerator",
      model_fitter = stats_glm_logit(save_path = file.path(trial_itt_dir, "switch_models"))
    ) |>
    calculate_weights() |>
    set_outcome_model(adjustment_terms = ~ x1 + x2)

  trial_itt_csv <- set_expansion_options(
    trial_itt,
    output = save_to_csv(file.path(trial_itt_dir, "trial_csvs")),
    chunk_size = 500
  ) |>
    expand_trials()

  # load_expanded_data works without additional arguments
  sc_00 <- load_expanded_data(trial_itt_csv)
  expect_equal(sc_00@outcome_data@n_rows, 1558)

  # load_expanded_data works without additional arguments
  sc_01 <- load_expanded_data(trial_itt_csv, p_control = 0.01, seed = 1221)
  expect_equal(
    sort(sc_01@outcome_data@data$id),
    c(
      15, 16, 20, 20, 21, 29, 32, 38, 38, 40, 44, 44, 44, 49, 49, 49,
      50, 58, 61, 65, 68, 71, 71, 74, 74, 74, 74, 89, 95, 96, 98, 99
    )
  )

  random_01 <- load_expanded_data(trial_itt_csv, p_control = 0.01)

  # seed gets reset
  sc_01_1 <- load_expanded_data(trial_itt_csv, p_control = 0.01, seed = 1221)
  random_02 <- load_expanded_data(trial_itt_csv, p_control = 0.01)
  expect_false(identical(sort(random_01@outcome_data@data$id), sort(random_02@outcome_data@data$id)))

  # load_expanded_data works with p_control
  sc_02 <- load_expanded_data(trial_itt_csv, p_control = 0.5, seed = 5678)
  expect_equal(sc_02@outcome_data@n_rows, 780)

  # load_expanded_data works with p_control = 0
  sc_03 <- load_expanded_data(trial_itt_csv, p_control = 0)
  expect_equal(sc_03@outcome_data@n_rows, 14)

  # cases are kept
  expect_equal(sum(sc_00@outcome_data@data$outcome), 14)
  expect_equal(sum(sc_01@outcome_data@data$outcome), 14)
  expect_equal(sum(sc_02@outcome_data@data$outcome), 14)
  expect_equal(sum(sc_03@outcome_data@data$outcome), 14)

  # all columns are kept and sample_weight column is added
  expect_equal(
    colnames(sc_00@outcome_data@data), c(colnames(trial_itt_csv@expansion@datastore@template), "sample_weight")
  )
  expect_equal(sort(sc_00@outcome_data@data$sample_weight), rep(1, 1558))

  expect_equal(
    colnames(sc_01@outcome_data@data), c(colnames(trial_itt_csv@expansion@datastore@template), "sample_weight")
  )

  # load_expanded_data subsets data correctly
  sc_04 <- load_expanded_data(
    trial_itt_csv,
    period = 1:10,
    subset_condition = "followup_time <= 20 & treatment == 1",
    p_control = 0.2,
    seed = 2332
  )
  expect_equal(
    sort(sc_04@outcome_data@data$id),
    c(
      21, 21, 21, 27, 33, 33, 33, 34, 34, 44, 44, 44, 44, 44, 44, 44, 44, 44, 47, 47, 49, 50, 50, 54, 54, 59,
      59, 59, 59, 59, 59, 59, 60, 65, 65, 65, 71, 73, 74, 74, 74, 74, 74, 74, 74, 83, 95, 95, 95, 95, 95, 95
    )
  )
  expect_true(all(sc_04@outcome_data@data$followup_time <= 20))
  expect_true(all(sc_04@outcome_data@data$treatment == 1))
  expect_true(all(sc_04@outcome_data@data$period %in% 1:10))

  # load_expanded_data subsets data correctly without p_control
  sc_05 <- load_expanded_data(
    trial_itt_csv,
    period = 1:10,
    subset_condition = "followup_time <= 20 & treatment == 1",
  )
  expect_equal(
    sort(sc_05@outcome_data@data$id),
    c(
      2, 2, 2, 14, 14, 14, 14, 14, 14, 16, 16, 16, 16, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 27, 27, 27,
      27, 27, 27, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, 38, 38,
      44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
      44, 44, 44, 44, 44, 44, 44, 44, 47, 47, 47, 49, 49, 49, 50, 50, 50, 50, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54,
      54, 54, 54, 54, 54, 54, 54, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
      59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 60, 65, 65, 65, 65, 70, 70, 70, 71, 71, 71, 71, 71, 71,
      71, 71, 73, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 83, 83, 83, 95, 95, 95, 95, 95, 95,
      95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,
      95, 95, 96, 96, 96, 96, 96, 96
    )
  )

  # if non-existing periods are entered load_expanded_data omits the missing periods, runs the code & returns a warning
  expect_warning(
    sc_06 <- load_expanded_data(
      trial_itt_csv,
      period = c(1:10, 20),
      subset_condition = "followup_time <= 20 & treatment == 1",
      p_control = 0.2,
      seed = 2332
    ),
    "The following periods don't exist in the data and were omitted: 20"
  )
  expect_equal(sort(sc_04@outcome_data@data$id), sort(sc_06@outcome_data@data$id))

  # load_expanded_data returns the correct classes
  expect_class(sc_04, "trial_sequence_ITT")
  expect_class(sc_04@outcome_data, "te_outcome_data")
  expect_class(sc_04@outcome_data@data, "data.table")

  unlink(trial_itt_dir, recursive = TRUE)
})
CAM-Roche/RandomisedTrialsEmulation documentation built on April 14, 2025, 7:44 a.m.