tests/testthat/test-summariseCodeUse.R

test_that("summarise code use - eunomia", {
  skip_on_cran()

  con <- DBI::dbConnect(duckdb::duckdb(), dbdir = CDMConnector::eunomiaDir())
  cdm <- CDMConnector::cdmFromCon(con, cdmSchema = "main", writeSchema = "main")

  acetiminophen <- c(1125315,  1127433, 40229134,
                     40231925, 40162522, 19133768,  1127078)
  poliovirus_vaccine <- c(40213160)
  cs <- list(acetiminophen = acetiminophen,
             poliovirus_vaccine = poliovirus_vaccine)
  startNames <- CDMConnector::listSourceTables(cdm)

  results <- summariseCodeUse(cs,
                              cdm = cdm,
                              byYear = TRUE,
                              bySex = TRUE,
                              ageGroup = list(c(0,17),
                                              c(18,65),
                                              c(66, 100)))
  endNames <- CDMConnector::listSourceTables(cdm)
  expect_true(length(setdiff(endNames, startNames)) == 0)

  expect_no_error(results_no_by_concept <- summariseCodeUse(cs,
                                                            cdm = cdm,
                                                            byYear = TRUE,
                                                            bySex = TRUE,
                                                            byConcept = FALSE))
  expect_true(all(results_no_by_concept |> dplyr::pull("group_level") |> unique() == c("acetiminophen","poliovirus_vaccine")))
  expect_true(all(results_no_by_concept |>
                    dplyr::filter(group_level == "acetiminophen", strata_level == "overall") |>
                    dplyr::pull("estimate_value") == c("14205", "2679")))

  # min cell counts:
  expect_true(
    all(
      omopgenerics::suppress(results) |>
        dplyr::filter(
          variable_name == "overall",
          strata_level == "1909",
          group_level == "acetiminophen"
        ) |>
        dplyr::pull("estimate_value") == "-"
    ))

  # check is a summarised result
  expect_true("summarised_result" %in%  class(results))
  expect_equal(omopgenerics::resultColumns(),
               colnames(results))

  # overall record count
  expect_true(results |>
                dplyr::filter(group_name == "codelist_name" &
                                strata_name == "overall" &
                                strata_level == "overall" &
                                group_level == "acetiminophen" &
                                estimate_name == "record_count",
                              variable_name == "overall") |>
                dplyr::pull("estimate_value") |>
                as.numeric() ==
                cdm$drug_exposure |>
                dplyr::filter(drug_concept_id %in%  acetiminophen) |>
                dplyr::tally() |>
                dplyr::pull("n"))

  # overall person count
  expect_true(results |>
                dplyr::filter(group_name == "codelist_name" &
                                strata_name == "overall" &
                                strata_level == "overall" &
                                group_level == "acetiminophen" &
                                estimate_name == "person_count",
                              variable_name == "overall") |>
                dplyr::pull("estimate_value") |>
                as.numeric()  ==
                cdm$drug_exposure |>
                dplyr::filter(drug_concept_id %in% acetiminophen) |>
                dplyr::select("person_id") |>
                dplyr::distinct() |>
                dplyr::tally() |>
                dplyr::pull("n"))

  # by year
  # overall record count
  expect_true(results |>
                dplyr::filter(group_name == "codelist_name" &
                                strata_name == "year" &
                                strata_level == "2008" &
                                group_level == "acetiminophen" &
                                estimate_name == "record_count",
                              variable_name == "overall") |>
                dplyr::pull("estimate_value") |>
                as.numeric()  ==
                cdm$drug_exposure |>
                dplyr::filter(drug_concept_id %in% acetiminophen) |>
                dplyr::filter(year(drug_exposure_start_date) == 2008) |>
                dplyr::tally() |>
                dplyr::pull("n"))

  # overall person count
  expect_true(results |>
                dplyr::filter(group_name == "codelist_name" &
                                strata_name == "year" &
                                strata_level == "2008" &
                                group_level == "acetiminophen" &
                                estimate_name == "person_count",
                              variable_name == "overall") |>
                dplyr::pull("estimate_value") |>
                as.numeric() ==
                cdm$drug_exposure |>
                dplyr::filter(drug_concept_id %in% acetiminophen) |>
                dplyr::filter(year(drug_exposure_start_date) == 2008) |>
                dplyr::select("person_id") |>
                dplyr::distinct() |>
                dplyr::tally() |>
                dplyr::pull("n"))

  # by age group and sex
  # overall record count
  expect_true(results |>
                dplyr::filter(group_name == "codelist_name" &
                                strata_name == "sex" &
                                strata_level == "Male" &
                                group_level == "acetiminophen" &
                                estimate_name == "record_count",
                              variable_name == "overall") |>
                dplyr::pull("estimate_value") |>
                as.numeric() ==
                cdm$drug_exposure |>
                dplyr::filter(drug_concept_id %in% acetiminophen) |>
                PatientProfiles::addSex() |>
                dplyr::filter(sex == "Male") |>
                dplyr::tally() |>
                dplyr::pull("n"))

  expect_true(results |>
                dplyr::filter(group_name == "codelist_name" &
                                strata_name == "age_group &&& sex" &
                                strata_level == "18 to 65 &&& Male" &
                                group_level == "acetiminophen" &
                                estimate_name == "record_count",
                              variable_name == "overall") |>
                dplyr::pull("estimate_value") |>
                as.numeric() ==
                cdm$drug_exposure |>
                dplyr::filter(drug_concept_id %in% acetiminophen) |>
                PatientProfiles::addAge(indexDate = "drug_exposure_start_date") |>
                PatientProfiles::addSex() |>
                dplyr::filter(sex == "Male" &
                                age >= "18" &
                                age <= "65") |>
                dplyr::tally() |>
                dplyr::pull("n"))

  # overall person count
  expect_true(results |>
                dplyr::filter(group_name == "codelist_name" &
                                strata_name == "age_group &&& sex" &
                                strata_level == "18 to 65 &&& Male" &
                                group_level == "acetiminophen" &
                                estimate_name == "person_count",
                              variable_name == "overall") |>
                dplyr::pull("estimate_value") |>
                as.numeric() ==
                cdm$drug_exposure |>
                dplyr::filter(drug_concept_id %in% acetiminophen) |>
                PatientProfiles::addAge(indexDate = "drug_exposure_start_date") |>
                PatientProfiles::addSex() |>
                dplyr::filter(sex == "Male" &
                                age >= "18" &
                                age <= "65") |>
                dplyr::select("person_id") |>
                dplyr::distinct() |>
                dplyr::tally() |>
                dplyr::pull("n"))

  # Check date range
  results <- summariseCodeUse(cs,
                              cdm = cdm,
                              byYear = TRUE,
                              bySex = TRUE,
                              ageGroup = list(c(0,17),
                                              c(18,65),
                                              c(66, 100)),
                              dateRange = as.Date(c("2010-01-01","2015-01-01")))
  expect_equal(results |>
                 omopgenerics::settings() |>
                 dplyr::select("date_range_start", "date_range_end") |>
                 as.character(),
               c("2010-01-01","2015-01-01"))

  # overall record count
  expect_true(results |>
                dplyr::filter(group_name == "codelist_name" &
                                strata_name == "overall" &
                                strata_level == "overall" &
                                group_level == "acetiminophen" &
                                estimate_name == "record_count",
                              variable_name == "overall") |>
                dplyr::pull("estimate_value") |>
                as.numeric() ==
                cdm$drug_exposure |>
                dplyr::filter(drug_exposure_start_date >= as.Date("2010-01-01"),
                              drug_exposure_start_date <= as.Date("2015-01-01")) |>
                dplyr::filter(drug_concept_id %in%  acetiminophen) |>
                dplyr::tally() |>
                dplyr::pull("n"))

  # overall person count
  expect_true(results |>
                dplyr::filter(group_name == "codelist_name" &
                                strata_name == "overall" &
                                strata_level == "overall" &
                                group_level == "acetiminophen" &
                                estimate_name == "person_count",
                              variable_name == "overall") |>
                dplyr::pull("estimate_value") |>
                as.numeric()  ==
                cdm$drug_exposure |>
                dplyr::filter(drug_concept_id %in% acetiminophen) |>
                dplyr::filter(drug_exposure_start_date >= as.Date("2010-01-01"),
                              drug_exposure_start_date <= as.Date("2014-12-31")) |>
                dplyr::select("person_id") |>
                dplyr::distinct() |>
                dplyr::tally() |>
                dplyr::pull("n"))

  # by year
  expect_true(
    results |>
      dplyr::filter(strata_name == "year") |>
      dplyr::pull("strata_level") |>
      unique() |>
      as.numeric() |> max() == 2014)
  expect_true(
    results |>
      dplyr::filter(strata_name == "year") |>
      dplyr::pull("strata_level") |>
      unique() |>
      as.numeric() |> min() == 2010)



  results <- summariseCodeUse(list("acetiminophen" = acetiminophen),
                              cdm = cdm, countBy = "person",
                              byYear = FALSE,
                              bySex = FALSE,
                              ageGroup = NULL)
  expect_true(nrow(results |>
                     dplyr::filter(estimate_name == "person_count")) > 0)
  expect_true(nrow(results |>
                     dplyr::filter(estimate_name == "record_count")) == 0)

  results <- summariseCodeUse(list("acetiminophen" = acetiminophen),
                              cdm = cdm, countBy = "record",
                              byYear = FALSE,
                              bySex = FALSE,
                              ageGroup = NULL)
  expect_true(nrow(results |>
                     dplyr::filter(estimate_name == "person_count")) == 0)
  expect_true(nrow(results |>
                     dplyr::filter(estimate_name == "record_count")) > 0)

  # domains covered
  # condition
  expect_true(nrow(summariseCodeUse(list(cs= c(4112343)),
                                    cdm = cdm,
                                    byYear = FALSE,
                                    bySex = FALSE,
                                    ageGroup = NULL))>1)

  # visit
  expect_true(nrow(summariseCodeUse(list(cs= c(9201)),
                                    cdm = cdm,
                                    byYear = FALSE,
                                    bySex = FALSE,
                                    ageGroup = NULL))>1)

  # drug
  expect_true(nrow(summariseCodeUse(list(cs= c(40213160)),
                                    cdm = cdm,
                                    byYear = FALSE,
                                    bySex = FALSE,
                                    ageGroup = NULL))>1)

  # measurement
  expect_true(nrow(summariseCodeUse(list(cs= c(3006322)),
                                    cdm = cdm,
                                    byYear = FALSE,
                                    bySex = FALSE,
                                    ageGroup = NULL))>1)

  # procedure and condition
  expect_true(nrow(summariseCodeUse(list(cs= c(4107731,4112343)),
                                    cdm = cdm,
                                    byYear = FALSE,
                                    bySex = FALSE,
                                    ageGroup = NULL))>1)

  # no records
  expect_message(results <- summariseCodeUse(list(cs= c(999999)),
                                             cdm = cdm,
                                             byYear = FALSE,
                                             bySex = FALSE,
                                             ageGroup = NULL))
  expect_true(nrow(results) == 0)

  # expected errors
  expect_error(summariseCodeUse("not a concept",
                                cdm = cdm,
                                byYear = FALSE,
                                bySex = FALSE,
                                ageGroup = NULL))
  expect_error(summariseCodeUse("123",
                                cdm = cdm,
                                byYear = FALSE,
                                bySex = FALSE,
                                ageGroup = NULL))
  expect_error(summariseCodeUse(list("123"), # not named
                                cdm = cdm,
                                byYear = FALSE,
                                bySex = FALSE,
                                ageGroup = NULL))
  expect_error(summariseCodeUse(list(a = 123),
                                cdm = "not a cdm",
                                byYear = FALSE,
                                bySex = FALSE,
                                ageGroup = NULL))
  expect_error(summariseCodeUse(list(a = 123),
                                cdm = cdm,
                                byYear = "Maybe",
                                bySex = FALSE,
                                ageGroup = NULL))
  expect_error(summariseCodeUse(list(a = 123),
                                cdm = cdm,
                                byYear = FALSE,
                                bySex = "Maybe",
                                ageGroup = NULL))
  expect_error(summariseCodeUse(list(a = 123),
                                cdm = cdm,
                                byYear = FALSE,
                                bySex = FALSE,
                                ageGroup = 25))
  expect_error(summariseCodeUse(list(a = 123),
                                cdm = cdm,
                                byYear = FALSE,
                                bySex = FALSE,
                                ageGroup = list(c(18,17))))
  expect_error(summariseCodeUse(list(a = 123),
                                cdm = cdm,
                                byYear = FALSE,
                                bySex = FALSE,
                                ageGroup = list(c(0,17),
                                                c(15,20))))
  expect_error(summariseCodeUse(list(a = 123),
                                cdm = cdm,
                                dateRange = c("a","b")))
  CDMConnector::cdmDisconnect(cdm)
})

test_that("summarise cohort code use - eunomia", {
  skip_on_cran()

  con <- DBI::dbConnect(duckdb::duckdb(), dbdir = CDMConnector::eunomiaDir())
  cdm <- CDMConnector::cdmFromCon(cdmName = "cdm", con, cdmSchema = "main", writeSchema = "main")

  pharyngitis <- c(4112343)

  cdm <- CDMConnector::generateConceptCohortSet(cdm = cdm,
                                                conceptSet = list(pharyngitis = pharyngitis),
                                                name = "pharyngitis",
                                                end = "observation_period_end_date",
                                                overwrite = TRUE)

  # any
  results_all <- summariseCodeUse(list(cs = 4134304),
                                  cdm = cdm)
  results_cohort <- summariseCohortCodeUse(list(cs = 4134304),
                                           cdm = cdm,
                                           cohortTable = "pharyngitis",
                                           timing = "any")
  expect_no_error(summariseCohortCodeUse(list(cs = 4134304),
                                         cdm = cdm,
                                         cohortTable = "pharyngitis",
                                         timing = "any",
                                         byConcept = FALSE))
  expect_true(inherits(results_cohort, "summarised_result"))
  expect_true(all(c("result_id", "result_type", "package_name", "package_version", "timing") %in%
                    colnames(omopgenerics::settings(results_cohort))))

  expect_true(results_cohort |>
                dplyr::filter(variable_name == "overall" &
                                strata_name == "overall" &
                                strata_level == "overall" &
                                estimate_name == "person_count") |>
                dplyr::pull("estimate_value") |>
                as.numeric() <
                results_all |>
                dplyr::filter(variable_name == "overall" &
                                strata_name == "overall" &
                                strata_level == "overall" &
                                estimate_name == "person_count") |>
                dplyr::pull("estimate_value") |>
                as.numeric())




  # at entry - everyone in the cohort should have the code
  results_cohort <- summariseCohortCodeUse(list(pharyngitis = pharyngitis),
                                           cdm = cdm,
                                           cohortTable = "pharyngitis",
                                           timing = "entry")
  results_cohort |>
    dplyr::filter(variable_name == "overall" &
                    strata_name == "overall" &
                    strata_level == "overall" &
                    estimate_name == "person_count") |>
    dplyr::pull("estimate_value") |>
    as.numeric() ==
    CDMConnector::cohortCount(cdm$pharyngitis) |>
    dplyr::pull("number_subjects")



  # 260139
  # on index
  index_260139 <- cdm$pharyngitis |>
    dplyr::left_join(cdm$condition_occurrence,
                     by=c("subject_id"="person_id")) |>
    dplyr::filter(condition_start_date == cohort_start_date) |>
    dplyr::filter(condition_concept_id == 260139) |>
    dplyr::select("subject_id") |>
    dplyr::distinct() |>
    dplyr::count() |>
    dplyr::pull()

  results_cohort_260139 <- summariseCohortCodeUse(list(cs = 260139),
                                                  cdm = cdm,
                                                  cohortTable = "pharyngitis",
                                                  timing = "entry")
  expect_equal(results_cohort_260139 |>
                 dplyr::filter(variable_name == "overall" &
                                 strata_name == "overall" &
                                 strata_level == "overall" &
                                 estimate_name == "person_count") |>
                 dplyr::pull("estimate_value") |>
                 as.numeric(), index_260139)


  # 260139 or 19133873 or 1127433
  # on index
  index_260139_19133873_1127433 <- dplyr::union_all(
    cdm$pharyngitis |>
      dplyr::left_join(cdm$condition_occurrence,
                       by=c("subject_id"="person_id")) |>
      dplyr::filter(condition_start_date == cohort_start_date) |>
      dplyr::filter(condition_concept_id == 260139) |>
      dplyr::select("subject_id"),
    cdm$pharyngitis |>
      dplyr::left_join(cdm$drug_exposure,
                       by=c("subject_id"="person_id")) |>
      dplyr::filter(drug_exposure_start_date == cohort_start_date) |>
      dplyr::filter(drug_concept_id %in% c(19133873,1127433)) |>
      dplyr::select("subject_id")) |>
    dplyr::count() |>
    dplyr::pull()

  results_cohort_260139_19133873_1127433<- summariseCohortCodeUse(list(cs = c(260139,19133873,1127433)),
                                                                  cdm = cdm,
                                                                  cohortTable = "pharyngitis",
                                                                  timing = "entry")
  expect_equal(results_cohort_260139_19133873_1127433 |>
                 dplyr::filter(variable_name == "overall" &
                                 strata_name == "overall" &
                                 strata_level == "overall" &
                                 estimate_name == "record_count") |>
                 dplyr::pull("estimate_value") |>
                 as.numeric(),
               index_260139_19133873_1127433)

  expect_equal(results_cohort_260139_19133873_1127433 |>
                 dplyr::filter(stringr::str_detect(variable_name, "Acute bronchitis")) |>
                 dplyr::filter(strata_name == "overall" &
                                 strata_level == "overall" &
                                 estimate_name == "person_count") |>
                 dplyr::pull("estimate_value") |>
                 as.numeric(),
               index_260139)


  # multiple cohorts
  cdm <- CDMConnector::generateConceptCohortSet(cdm = cdm,
                                                conceptSet = list(a = 260139,
                                                                  b = 1127433 ),
                                                name = "cohorts",
                                                end = "observation_period_end_date",
                                                overwrite = TRUE)

  results_cohort_mult <- summariseCohortCodeUse(list(cs = c(260139,19133873,1127433)),
                                                cdm = cdm,
                                                cohortTable = "cohorts",
                                                timing = "entry")
  expect_true(nrow(results_cohort_mult |>
                     dplyr::filter(stringr::str_detect(variable_name, "Acute bronchitis")) |>
                     dplyr::filter(strata_name == "overall" &
                                     strata_level == "overall" &
                                     estimate_name == "person_count")) == 2)

  expect_equal(c("a", "b"),  results_cohort_mult |>
                 dplyr::filter(stringr::str_detect(variable_name, "Acute bronchitis")) |>
                 dplyr::filter(strata_name == "overall" &
                                 strata_level == "overall" &
                                 estimate_name == "person_count") |>
                 visOmopResults::splitGroup() |>
                 dplyr::pull("cohort_name"))


  # empty cohort - no results
  cdm$pharyngitis <-  cdm$pharyngitis |>
    dplyr::filter(cohort_definition_id == 99)
  expect_true(nrow(summariseCohortCodeUse(list(cs = 4134304),
                                          cdm = cdm,
                                          cohortTable = "pharyngitis",
                                          timing = "any")) == 0)

  # expected errors
  expect_error(summariseCohortCodeUse(4134304,
                                      cdm = cdm,
                                      cohortTable = "not_a_cohort",
                                      timing = "any"))
  expect_error(summariseCohortCodeUse(list(4134304),
                                      cdm = cdm,
                                      cohortTable = "not_a_cohort",
                                      timing = "any"))
  expect_error(summariseCohortCodeUse(list(cs = 4134304),
                                      cdm = cdm,
                                      cohortTable = "not_a_cohort",
                                      timing = "any"))
  expect_error(summariseCohortCodeUse(list(cs = 4134304),
                                      cdm = cdm,
                                      cohortTable = "pharyngitis",
                                      timing = "not_a_option"))
  expect_error(summariseCohortCodeUse(list(cs = 4134304),
                                      cdm = cdm,
                                      cohortTable = "pharyngitis",
                                      timing = c("any", "entry")))

  CDMConnector::cdmDisconnect(cdm)

})

test_that("summarise code use - redshift", {

  testthat::skip_if(Sys.getenv("CDM5_REDSHIFT_DBNAME") == "")

  db <-  DBI::dbConnect(RPostgres::Redshift(),
                        dbname   = Sys.getenv("CDM5_REDSHIFT_DBNAME"),
                        host     = Sys.getenv("CDM5_REDSHIFT_HOST"),
                        port     = Sys.getenv("CDM5_REDSHIFT_PORT"),
                        user     = Sys.getenv("CDM5_REDSHIFT_USER"),
                        password = Sys.getenv("CDM5_REDSHIFT_PASSWORD"))

  cdm <- CDMConnector::cdmFromCon(cdmName = "cdm",
                                  con = db,
                                  cdmSchema = Sys.getenv("CDM5_REDSHIFT_CDM_SCHEMA"),
                                  writeSchema = Sys.getenv("CDM5_REDSHIFT_SCRATCH_SCHEMA"),
                                  cdmVersion = "5.3")

  asthma <- list(asthma = c(317009, 257581))

  results <- summariseCodeUse(asthma,
                              cdm = cdm,
                              byYear = TRUE,
                              bySex = TRUE,
                              ageGroup = list(c(0,17),
                                              c(18,65),
                                              c(66, 100)))
  # column names
  expect_true(inherits(results, "summarised_result"))

  # overall record count
  expect_true(results |>
                dplyr::filter(variable_name == "overall" &
                                strata_name == "overall" &
                                strata_level == "overall",
                              estimate_name == "record_count") |>
                dplyr::pull("estimate_value") |>
                as.numeric() ==
                cdm$condition_occurrence |>
                dplyr::filter(condition_concept_id %in%  !!asthma[[1]]) |>
                dplyr::tally() |>
                dplyr::pull("n"))

  # overall person count
  expect_true(results |>
                dplyr::filter(variable_name == "overall" &
                                strata_name == "overall" &
                                strata_level == "overall" &
                                estimate_name == "person_count") |>
                dplyr::pull("estimate_value") |>
                as.numeric() ==
                cdm$condition_occurrence |>
                dplyr::filter(condition_concept_id %in% !!asthma[[1]]) |>
                dplyr::select("person_id") |>
                dplyr::distinct() |>
                dplyr::tally() |>
                dplyr::pull("n"))

  # by year
  # overall record count
  expect_true(results |>
                dplyr::filter(variable_name == "overall" &
                                strata_name == "year" &
                                strata_level == "2008",
                              estimate_name == "record_count") |>
                dplyr::pull("estimate_value") |>
                as.numeric() ==
                cdm$condition_occurrence |>
                dplyr::filter(condition_concept_id %in% !!asthma[[1]]) |>
                dplyr::filter(year(condition_start_date) == 2008) |>
                dplyr::tally() |>
                dplyr::pull("n"))

  # overall person count
  expect_true(results |>
                dplyr::filter(variable_name == "overall" &
                                strata_name == "year" &
                                strata_level == "2008",
                              estimate_name == "person_count") |>
                dplyr::pull("estimate_value") |>
                as.numeric() ==
                cdm$condition_occurrence |>
                dplyr::filter(condition_concept_id %in% !!asthma[[1]]) |>
                dplyr::filter(year(condition_start_date) == 2008) |>
                dplyr::select("person_id") |>
                dplyr::distinct() |>
                dplyr::tally() |>
                dplyr::pull("n"))

  # by age group and sex
  # overall record count
  expect_true(results |>
                dplyr::filter(variable_name == "overall" &
                                strata_name == "sex" &
                                strata_level == "Male",
                              estimate_name == "record_count") |>
                dplyr::pull("estimate_value") |>
                as.numeric() ==
                cdm$condition_occurrence |>
                dplyr::filter(condition_concept_id %in% !!asthma[[1]]) |>
                PatientProfiles::addSex() |>
                dplyr::filter(sex == "Male") |>
                dplyr::tally() |>
                dplyr::pull("n"))

  expect_true(results |>
                dplyr::filter(variable_name == "overall" &
                                strata_name == "age_group &&& sex" &
                                strata_level == "18 to 65 &&& Male",
                              estimate_name == "record_count") |>
                dplyr::pull("estimate_value") |>
                as.numeric() ==
                cdm$condition_occurrence |>
                dplyr::filter(condition_concept_id %in% !!asthma[[1]]) |>
                PatientProfiles::addAge(indexDate = "condition_start_date") |>
                PatientProfiles::addSex() |>
                dplyr::filter(sex == "Male" &
                                age >= "18" &
                                age <= "65") |>
                dplyr::tally() |>
                dplyr::pull("n"))

  # overall person count
  expect_true(results |>
                dplyr::filter(variable_name == "overall" &
                                strata_name == "age_group &&& sex" &
                                strata_level == "18 to 65 &&& Male",
                              estimate_name == "person_count") |>
                dplyr::pull("estimate_value") |>
                as.numeric() ==
                cdm$condition_occurrence |>
                dplyr::filter(condition_concept_id %in% !!asthma[[1]]) |>
                PatientProfiles::addAge(indexDate = "condition_start_date") |>
                PatientProfiles::addSex() |>
                dplyr::filter(sex == "Male" &
                                age >= "18" &
                                age <= "65") |>
                dplyr::select("person_id") |>
                dplyr::distinct() |>
                dplyr::tally() |>
                dplyr::pull("n"))




  results <- summariseCodeUse(asthma,
                              cdm = cdm, countBy = "person",
                              byYear = FALSE,
                              bySex = FALSE,
                              ageGroup = NULL)
  expect_true(nrow(results |>
                     dplyr::filter(estimate_name == "person_count")) > 0)
  expect_true(nrow(results |>
                     dplyr::filter(estimate_name == "record_count")) == 0)

  results <- summariseCodeUse(asthma,
                              cdm = cdm, countBy = "record",
                              byYear = FALSE,
                              bySex = FALSE,
                              ageGroup = NULL)
  expect_true(nrow(results |>
                     dplyr::filter(estimate_name == "person_count")) == 0)
  expect_true(nrow(results |>
                     dplyr::filter(estimate_name == "record_count")) > 0)


  # domains covered

  # condition
  expect_true(nrow(summariseCodeUse(list(cs = c(317009)),
                                    cdm = cdm,
                                    byYear = FALSE,
                                    bySex = FALSE,
                                    ageGroup = NULL))>1)

  # visit
  expect_true(nrow(summariseCodeUse(list(cs = 9201),
                                    cdm = cdm,
                                    byYear = FALSE,
                                    bySex = FALSE,
                                    ageGroup = NULL))>1)

  # drug
  expect_true(nrow(summariseCodeUse(list(cs = 19071493),
                                    cdm = cdm,
                                    byYear = FALSE,
                                    bySex = FALSE,
                                    ageGroup = NULL))>1)

  # measurement
  expect_true(nrow(summariseCodeUse(list(cs = 2212542),
                                    cdm = cdm,
                                    byYear = FALSE,
                                    bySex = FALSE,
                                    ageGroup = NULL))>1)

  # procedure and condition
  expect_true(nrow(summariseCodeUse(list(cs = c(4261206,317009)),
                                    cdm = cdm,
                                    byYear = FALSE,
                                    bySex = FALSE,
                                    ageGroup = NULL))>1)

  # no records
  expect_message(results <- summariseCodeUse(list(cs = c(999999)),
                                             cdm = cdm,
                                             byYear = FALSE,
                                             bySex = FALSE,
                                             ageGroup = NULL))
  expect_true(nrow(results) == 0)



  # expected errors
  expect_error(summariseCodeUse(list(cs = "not a concept"),
                                cdm = cdm,
                                byYear = FALSE,
                                bySex = FALSE,
                                ageGroup = NULL))
  expect_error(summariseCodeUse(list(cs = 123),
                                cdm = "not a cdm",
                                byYear = FALSE,
                                bySex = FALSE,
                                ageGroup = NULL))
  expect_error(summariseCodeUse(list(cs = 123),
                                cdm = cdm,
                                byYear = "Maybe",
                                bySex = FALSE,
                                ageGroup = NULL))
  expect_error(summariseCodeUse(list(cs = 123),
                                cdm = cdm,
                                byYear = FALSE,
                                bySex = "Maybe",
                                ageGroup = NULL))
  expect_error(summariseCodeUse(list(cs = 123),
                                cdm = cdm,
                                byYear = FALSE,
                                bySex = FALSE,
                                ageGroup = 25))
  expect_error(summariseCodeUse(list(cs = 123),
                                cdm = cdm,
                                byYear = FALSE,
                                bySex = FALSE,
                                ageGroup = list(c(18,17))))
  expect_error(summariseCodeUse(list(cs = 123),
                                cdm = cdm,
                                byYear = FALSE,
                                bySex = FALSE,
                                ageGroup = list(c(0,17),
                                                c(15,20))))


  CDMConnector::cdmDisconnect(cdm)


})

test_that("summarise code use - eunomia source concept id NA", {
  skip_on_cran()

  con <- DBI::dbConnect(duckdb::duckdb(), dbdir = CDMConnector::eunomiaDir())
  cdm <- CDMConnector::cdmFromCon(con, cdmSchema = "main", writeSchema = "main")

  acetiminophen <- c(1125315,  1127433, 40229134,
                     40231925, 40162522, 19133768,  1127078)

  cdm$drug_exposure <- cdm$drug_exposure |>
    dplyr::mutate(drug_source_concept_id = NA_character_)

  cs <- list(acetiminophen = acetiminophen)
  results <- summariseCodeUse(cs,
                              cdm = cdm)

  expect_true(all(omopgenerics::splitAdditional(results) |>
                    dplyr::filter(variable_name != "overall") |>
                    dplyr::pull("source_concept_name") == "NA"))
  expect_true(all(omopgenerics::splitAdditional(results) |>
                    dplyr::filter(variable_name != "overall") |>
                    dplyr::pull("source_concept_id") == "NA"))

  CDMConnector::cdmDisconnect(cdm)
})

test_that("summarise cohort code use - eunomia source concept id NA", {
  skip_on_cran()

  con <- DBI::dbConnect(duckdb::duckdb(), dbdir = CDMConnector::eunomiaDir())
  cdm <- CDMConnector::cdmFromCon(con, cdmSchema = "main", writeSchema = "main")

  pharyngitis <- c(4112343)

  cdm$condition_occurrence <- cdm$condition_occurrence |>
    dplyr::mutate(condition_source_concept_id = NA_character_)

  cdm <- CDMConnector::generateConceptCohortSet(cdm = cdm,
                                                conceptSet = list(pharyngitis = pharyngitis),
                                                name = "pharyngitis",
                                                end = "observation_period_end_date",
                                                overwrite = TRUE)

  results_cohort <- summariseCohortCodeUse(list(cs = 4134304),
                                           cdm = cdm,
                                           cohortTable = "pharyngitis",
                                           timing = "any")

  expect_true(all(omopgenerics::splitAdditional(results_cohort) |>
                    dplyr::filter(variable_name != "overall") |>
                    dplyr::pull("source_concept_name") == "NA"))
  expect_true(all(omopgenerics::splitAdditional(results_cohort) |>
                    dplyr::filter(variable_name != "overall") |>
                    dplyr::pull("source_concept_id") == "NA"))

  CDMConnector::cdmDisconnect(cdm)

})

test_that("empty cohort", {
  skip_on_cran()

  con <- DBI::dbConnect(duckdb::duckdb(),
                        dbdir = CDMConnector::eunomiaDir())
  cdm <- CDMConnector::cdmFromCon(con,
                                  cdmSchema = "main",
                                  writeSchema = "main",
                                  cdmName = "test")
  cdm <- CDMConnector::generateConceptCohortSet(cdm = cdm,
                                                conceptSet = list(a = 260139,
                                                                  b = 1127433),
                                                name = "cohorts",
                                                end = "observation_period_end_date",
                                                overwrite = TRUE)
  results_cohort_mult <- summariseCohortCodeUse(list(cs = as.numeric()),
                                                cdm = cdm,
                                                cohortTable = "cohorts",
                                                timing = "entry")
  expect_true(inherits(results_cohort_mult, "summarised_result"))
  expect_true(nrow(results_cohort_mult) == 0)

  CDMConnector::cdmDisconnect(cdm)

})
oxford-pharmacoepi/CodelistGenerator documentation built on June 9, 2025, 9:38 a.m.