tests/testthat/test-cdmTrimVocabulary.R

test_that("cdmTrimVocabulary trims vocabulary tables", {
  skip_if_not_installed("duckdb")

  con <- local_eunomia_con()
  cdm <- cdmFromCon(con, "main", "main", cdmName = "test")

  # count rows before trimming
  concept_before <- cdm$concept |> dplyr::tally() |> dplyr::pull("n")
  ancestor_before <- cdm$concept_ancestor |> dplyr::tally() |> dplyr::pull("n")

  cdm_trimmed <- cdmTrimVocabulary(cdm)

  # concept table should be smaller

  concept_after <- cdm_trimmed$concept |> dplyr::collect() |> nrow()
  expect_true(concept_after < concept_before)
  expect_true(concept_after > 0)

  # concept_ancestor should be smaller
  ancestor_after <- cdm_trimmed$concept_ancestor |> dplyr::collect() |> nrow()
  expect_true(ancestor_after <= ancestor_before)

  # concept_synonym should be empty
  synonym_count <- cdm_trimmed$concept_synonym |> dplyr::collect() |> nrow()
  expect_equal(synonym_count, 0L)

  # graph completeness: all concept_ids in concept_relationship exist in concept
  if ("concept_relationship" %in% names(cdm_trimmed)) {
    concept_ids <- cdm_trimmed$concept |>
      dplyr::select("concept_id") |>
      dplyr::collect() |>
      dplyr::pull("concept_id")

    cr <- cdm_trimmed$concept_relationship |> dplyr::collect()
    if (nrow(cr) > 0) {
      expect_true(all(cr$concept_id_1 %in% concept_ids))
      expect_true(all(cr$concept_id_2 %in% concept_ids))
    }
  }

  # all concept_ids in concept_ancestor exist in concept
  ca <- cdm_trimmed$concept_ancestor |> dplyr::collect()
  if (nrow(ca) > 0) {
    expect_true(all(ca$ancestor_concept_id %in% concept_ids))
    expect_true(all(ca$descendant_concept_id %in% concept_ids))
  }
})

Try the CDMConnector package in your browser

Any scripts or data that you put into this service are public.

CDMConnector documentation built on April 3, 2026, 9:09 a.m.