tests/testthat/test-db-cdmSubset.R

test_cdmSubset <- function(con, cdmSchema, writeSchema) {

  # test cdmSubset, cdmSubsetCohort, cdmSample, cdmFlatten

  cdm <- cdmFromCon(
    con = con, cdmName = "test", cdmSchema = cdmSchema,
    writeSchema = writeSchema
  )

  cohortSet <- readCohortSet(system.file("cohorts3", package = "CDMConnector", mustWork = TRUE)) %>%
    dplyr::filter(cohort_name == "gibleed_all")

  cdm <- generateCohortSet(cdm, cohortSet = cohortSet, name = "cohort")

  cdmSampled1 <- cdmSample(cdm = cdm, n = 100, seed = 123)
  expect_true(cdmSampled1$person |> dplyr::tally() |> dplyr::pull() == 100)
  expect_true("person_sample" %in% names(cdmSampled1))
  expect_false("person_sample" %in% names(cdm))

  cdmSampled2 <- cdmSample(cdm = cdm, n = 200, name = "sample_person") # test name
  expect_true(cdmSampled2$person |> dplyr::tally() |> dplyr::pull() == 200)
  expect_true("sample_person" %in% names(cdmSampled2))
  expect_false("person_sample" %in% names(cdmSampled2))

  cdmSampled3 <- cdmSample(cdm = cdm, n = 100, seed = 123, name = "sample3") # test seed
  expect_true(cdmSampled3$person |> dplyr::tally() |> dplyr::pull() == 100)
  expect_true("sample3" %in% names(cdmSampled3))

  cdmSampled4 <- cdmSample(cdm = cdm, n = 100, seed = 1234, name = "sample4")
  expect_true(cdmSampled4$person |> dplyr::tally() |> dplyr::pull() == 100)
  expect_true("sample4" %in% names(cdmSampled4))

  expect_true(identical(
    cdmSampled1$person |> dplyr::collect() |> dplyr::arrange(.data$person_id),
    cdmSampled3$person |> dplyr::collect() |> dplyr::arrange(.data$person_id)
  ))

  expect_false(identical(
    cdmSampled1$person |> dplyr::collect() |> dplyr::arrange(.data$person_id),
    cdmSampled4$person |> dplyr::collect() |> dplyr::arrange(.data$person_id)
  ))

  idsToSubsetTo <- cdm$person %>%
    dplyr::select(person_id) %>%
    head(10) %>%
    dplyr::pull() %>%
    sort() %>%
    as.integer()

  cdm10 <- cdmSubset(cdm, idsToSubsetTo)

  expect_equal(
    dplyr::tally(cdm10$person) %>%
    dplyr::pull(.data$n) %>%
    as.integer(),
    10L)

  expect_equal(
    cdm10$person %>% dplyr::select(person_id) %>% dplyr::arrange(person_id) %>% dplyr::pull() %>% sort,
    idsToSubsetTo
  )

  # cdmSubsetCohort ----
  cdmGiBleed <- cdmSubsetCohort(cdm, cohortTable = "cohort")

  idsInConditionTable <- cdmGiBleed$condition_occurrence %>%
    dplyr::distinct(.data$person_id) %>%
    dplyr::pull()

  idsInCohort <- cdmGiBleed$cohort %>%
    dplyr::distinct(.data$subject_id) %>%
    dplyr::pull() %>%
    as.integer() # could be int64 type

  expect_true(all(idsInConditionTable %in% idsInCohort))

  # cdmFlatten ----
  flatCdm <- cdmFlatten(cdm10) %>%
    dplyr::collect()

  expect_s3_class(flatCdm, "data.frame")

  if (dbms(con) == "duckdb") {
    expect_true(nrow(flatCdm) > 0)
    expect_true(all(c("drug_exposure", "condition_occurrence") %in% flatCdm$domain))
  }

  flatCdm2 <- cdmFlatten(cdm10,
                         c("condition_occurrence",
                           "drug_exposure",
                           "procedure_occurrence",
                           "measurement",
                           "visit_occurrence",
                           "death",
                           "observation"),
                           includeConceptName = FALSE) %>%
    dplyr::collect()

  expect_s3_class(flatCdm2, "data.frame")

}

dbtype = "postgres"
for (dbtype in dbToTest) {
  test_that(glue::glue("{dbtype} - sample database"), {
    if (!(dbtype %in% ciTestDbs)) skip_on_ci()
    if (dbtype != "duckdb") skip_on_cran() else skip_if_not_installed("duckdb")
    con <- get_connection(dbtype)
    cdmSchema <- get_cdm_schema(dbtype)
    writeSchema <- get_write_schema(dbtype)
    skip_if(any(writeSchema == "") || any(cdmSchema == "") || is.null(con))
    test_cdmSubset(con, cdmSchema, writeSchema)
    disconnect(con)
  })
}

Try the CDMConnector package in your browser

Any scripts or data that you put into this service are public.

CDMConnector documentation built on April 4, 2025, 4:42 a.m.