tests/testthat/test-summariseMissingData.R

test_that("summariseMissingData() works", {
  skip_on_cran()
  # Load mock database ----
  cdm <- cdmEunomia()


  # Check all tables work ----
  expect_true(inherits(summariseMissingData(cdm, "drug_exposure"), "summarised_result"))
  expect_no_error(y <- summariseMissingData(cdm, "observation_period"))
  checkResultType(y, "summarise_missing_data")
  expect_no_error(x <- summariseMissingData(cdm, "visit_occurrence"))
  expect_no_error(summariseMissingData(cdm, "condition_occurrence"))
  expect_no_error(summariseMissingData(cdm, "drug_exposure"))

  expect_no_error(summariseMissingData(cdm, "procedure_occurrence", interval = "years"))
  expect_warning(summariseMissingData(cdm, "device_exposure"))
  expect_no_error(z <- summariseMissingData(cdm, "measurement"))
  expect_no_error(s <- summariseMissingData(cdm, "observation"))

  expect_warning(de <- summariseMissingData(cdm, "death"))
  checkResultType(de, "summarise_missing_data")
  expect_warning(p <- summariseMissingData(cdm, "person", ageGroup = list(c(0, 50))))
  expect_true(omopgenerics::settings(p)$strata == "")

  expect_no_error(all <- summariseMissingData(cdm, c("observation_period", "visit_occurrence", "measurement")))
  expect_equal(all, dplyr::bind_rows(y, x, z))
  expect_equal(summariseMissingData(cdm, "observation"), summariseMissingData(cdm, "observation", col = colnames(cdm[["observation"]])))
  x <- summariseMissingData(cdm, "procedure_occurrence", col = "procedure_date")

  expect_equal(summariseMissingData(cdm, c("procedure_occurrence", "observation"), col = "procedure_date"), dplyr::bind_rows(x, s))
  y <- summariseMissingData(cdm, "observation", col = "observation_date")
  expect_equal(summariseMissingData(cdm, c("procedure_occurrence", "observation"), col = c("procedure_date", "observation_date")), dplyr::bind_rows(x, y))

  # Check inputs ----
  expect_true(summariseMissingData(cdm, "procedure_occurrence", col = "person_id") |>
    dplyr::select(estimate_value) |>
    dplyr::mutate(estimate_value = as.numeric(estimate_value)) |>
    dplyr::summarise(sum = sum(estimate_value)) |>
    dplyr::pull() == 0)

  expect_true(summariseMissingData(cdm, "procedure_occurrence", col = "person_id", sex = TRUE, ageGroup = list(c(0, 50), c(51, Inf))) |>
    dplyr::distinct(.data$strata_level) |>
    dplyr::tally() |>
    dplyr::pull() == 9)

  expect_true(summariseMissingData(cdm, "procedure_occurrence", col = "person_id", ageGroup = list(c(0, 50))) |>
    dplyr::distinct(.data$strata_level) |>
    dplyr::tally() |>
    dplyr::pull() == 3)

  cdm$procedure_occurrence <- cdm$procedure_occurrence |>
    dplyr::mutate(procedure_concept_id = NA_integer_) |>
    dplyr::compute(name = "procedure_occurrence", temporary = FALSE)

  expect_warning(summariseMissingData(cdm, "procedure_occurrence", col = "procedure_concept_id", ageGroup = list(c(0, 50))))
  expect_no_error(summariseMissingData(cdm, "procedure_occurrence", col = "procedure_concept_id", ageGroup = list(c(0, 50)), sample = 100))
})

test_that("dateRange argument works", {
  skip_on_cran()
  # Load mock database ----
  cdm <- cdmEunomia()

  expect_no_error(summariseMissingData(cdm, "condition_occurrence", dateRange = as.Date(c("2012-01-01", "2018-01-01"))))
  expect_message(x <- summariseMissingData(cdm, "drug_exposure", dateRange = as.Date(c("2012-01-01", "2025-01-01"))))
  observationRange <- cdm$observation_period |>
    dplyr::summarise(
      minobs = min(.data$observation_period_start_date, na.rm = TRUE),
      maxobs = max(.data$observation_period_end_date, na.rm = TRUE)
    )
  expect_no_error(y <- summariseMissingData(cdm, "drug_exposure", dateRange = as.Date(c("2012-01-01", observationRange |> dplyr::pull("maxobs")))))
  expect_equal(x, y, ignore_attr = TRUE)
  expect_false(settings(x)$study_period_end == settings(y)$study_period_end)
  expect_error(summariseMissingData(cdm, "drug_exposure", dateRange = as.Date(c("2015-01-01", "2014-01-01"))))
  expect_warning(z <- summariseMissingData(cdm, "drug_exposure", dateRange = as.Date(c("2020-01-01", "2021-01-01"))))
  expect_equal(z, omopgenerics::emptySummarisedResult(), ignore_attr = TRUE)
  expect_equal(summariseMissingData(cdm, "drug_exposure", dateRange = as.Date(c("2012-01-01", NA))), y, ignore_attr = TRUE)
  checkResultType(z, "summarise_missing_data")
  expect_equal(colnames(settings(z)), colnames(settings(x)))
  PatientProfiles::mockDisconnect(cdm = cdm)
})

test_that("tableMissingData() works", {
  skip_on_cran()
  # Load mock database ----
  cdm <- cdmEunomia()

  # Check that works ----
  expect_no_error(x <- tableMissingData(summariseMissingData(cdm, "condition_occurrence")))
  expect_true(inherits(x, "gt_tbl"))
  expect_no_error(y <- tableMissingData(summariseMissingData(cdm, c(
    "observation_period",
    "measurement"
  ))))
  expect_true(inherits(y, "gt_tbl"))
  expect_warning(t <- summariseMissingData(cdm, "death"))
  expect_warning(inherits(tableMissingData(t), "gt_tbl"))

  PatientProfiles::mockDisconnect(cdm = cdm)
})

test_that("col not present in table", {
  skip_on_cran()
  # Load mock database ----
  # Load mock database
  cdm <- omopgenerics::cdmFromTables(
    tables = list(
      person = dplyr::tibble(
        person_id = as.integer(1:4),
        gender_concept_id = c(8507L, 8532L, 8532L, 8507L),
        year_of_birth = 2010L,
        month_of_birth = 1L,
        day_of_birth = 1L,
        race_concept_id = 0L,
        ethnicity_concept_id = 0L
      ),
      observation_period = dplyr::tibble(
        observation_period_id = as.integer(1:8),
        person_id = c(1, 1, 1, 2, 2, 3, 3, 4) |> as.integer(),
        observation_period_start_date = as.Date(c(
          "2020-03-01", "2020-03-25", "2020-04-25", "2020-08-10", "2020-03-10",
          "2020-03-01", "2020-04-10", "2020-03-10"
        )),
        observation_period_end_date = as.Date(c(
          "2020-03-20", "2020-03-30", "2020-08-15", "2020-12-31", "2020-03-27",
          "2020-03-09", "2020-05-08", "2020-12-10"
        )),
        period_type_concept_id = 0L
      )
    ),
    cdmName = "mock data"
  )
  cdm <- CDMConnector::copyCdmTo(
    con = connection(), cdm = cdm, schema = schema()
  )

  expect_no_error(expect_message(summariseMissingData(cdm, "person", col = NULL)))

  PatientProfiles::mockDisconnect(cdm = cdm)
})

test_that("no tables created", {
  skip_on_cran()
  # Load mock database ----
  cdm <- cdmEunomia()

  startNames <- CDMConnector::listSourceTables(cdm)

  results <- summariseMissingData(
    cdm = cdm,
    omopTableName = c("drug_exposure", "condition_occurrence"),
    interval = "years",
    sex = TRUE,
    ageGroup = list(
      c(0, 17),
      c(18, 65),
      c(66, 100)
    ),
    dateRange = as.Date(c("2012-01-01", "2018-01-01")),
    sample = 100
  )

  endNames <- CDMConnector::listSourceTables(cdm)

  expect_true(length(setdiff(endNames, startNames)) == 0)


  PatientProfiles::mockDisconnect(cdm = cdm)
})

test_that("interval argument works", {
  skip_on_cran()
  # Load mock database ----
  cdm <- cdmEunomia()
  expect_no_error(y <- summariseMissingData(
    cdm = cdm,
    omopTableName = "drug_exposure",
    interval = "years"
  ))

  expect_no_error(o <- summariseMissingData(
    omopTableName = "drug_exposure",
    cdm = cdm,
    interval = "overall"
  ))
  expect_no_error(q <- summariseMissingData(
    omopTableName = "drug_exposure",
    cdm = cdm,
    interval = "quarters"
  ))
  expect_no_error(m <- summariseMissingData(
    omopTableName = "drug_exposure",
    cdm = cdm,
    interval = "months"
  ))



  m_quarters <- m |>
    omopgenerics::splitAdditional() |>
    omopgenerics::pivotEstimates() |>
    dplyr::filter(time_interval != "overall") |>
    dplyr::mutate(
      start_date = as.Date(sub(" to .*", "", time_interval)),
      quarter_start = lubridate::quarter(start_date, type = "date_first"),
      quarter_end = lubridate::quarter(start_date, type = "date_last"),
      quarter = paste(quarter_start, "to", quarter_end)
    ) |>
    dplyr::select(!c("time_interval", "start_date", "quarter_start", "quarter_end")) |>
    dplyr::group_by(quarter, variable_name) |>
    dplyr::summarise(na_count = sum(na_count), .groups = "drop") |>
    dplyr::rename("time_interval" = quarter) |>
    dplyr::arrange(time_interval)

  q_quarters <- q |>
    omopgenerics::splitAdditional() |>
    omopgenerics::pivotEstimates() |>
    dplyr::filter(time_interval != "overall") |>
    dplyr::select(time_interval, variable_name, na_count) |>
    dplyr::arrange(time_interval)

  expect_equal(m_quarters |>
    sortTibble(), q_quarters |> sortTibble())

  m_year <- m |>
    omopgenerics::splitAdditional() |>
    dplyr::filter(time_interval != "overall") |>
    dplyr::mutate(
      # Extract the start date
      start_date = clock::date_parse(stringr::str_extract(time_interval, "^\\d{4}-\\d{2}-\\d{2}")),
      # Convert start_date to a year-month-day object and extract the year
      year = clock::get_year(clock::as_year_month_day(start_date))
    ) |>
    omopgenerics::pivotEstimates() |>
    dplyr::group_by(year, variable_name) |>
    dplyr::summarise(
      na_count = sum(na_count),
      .groups = "drop"
    ) |>
    dplyr::arrange(year)
  y_year <- y |>
    omopgenerics::splitAdditional() |>
    dplyr::filter(time_interval != "overall") |>
    dplyr::mutate(
      # Extract the start date
      start_date = clock::date_parse(stringr::str_extract(time_interval, "^\\d{4}-\\d{2}-\\d{2}")),
      # Convert start_date to a year-month-day object and extract the year
      year = clock::get_year(clock::as_year_month_day(start_date))
    ) |>
    omopgenerics::pivotEstimates() |>
    dplyr::select(year, variable_name, na_count) |>
    dplyr::arrange(year)

  expect_equal(m_year |> sortTibble(), y_year |> sortTibble())

  o <- o |>
    omopgenerics::splitAdditional() |>
    omopgenerics::pivotEstimates() |>
    dplyr::select(variable_name, na_count)

  expect_equal(y_year |> dplyr::group_by(variable_name) |> dplyr::summarise(na_count = sum(na_count), .groups = "drop") |> sortTibble(), o |> sortTibble())


  q_year <- q |>
    omopgenerics::splitAdditional() |>
    dplyr::filter(time_interval != "overall") |>
    dplyr::mutate(
      # Extract the start date
      start_date = clock::date_parse(stringr::str_extract(time_interval, "^\\d{4}-\\d{2}-\\d{2}")),
      # Convert start_date to a year-month-day object and extract the year
      year = clock::get_year(clock::as_year_month_day(start_date))
    ) |>
    omopgenerics::pivotEstimates() |>
    dplyr::group_by(year, variable_name) |>
    dplyr::summarise(
      na_count = sum(na_count),
      .groups = "drop"
    ) |>
    dplyr::arrange(year)

  expect_equal(q_year |> sortTibble(), y_year |> sortTibble())

  expect_no_error(x <- summariseMissingData(cdm, "drug_exposure", sex = TRUE, interval = "years"))
  expect_true(x |> dplyr::distinct(.data$additional_level) |> dplyr::tally() > 1)
  PatientProfiles::mockDisconnect(cdm = cdm)
})

test_that("summariseMissingData() works", {
  skip_on_cran()
  # Load mock database ----
  cdm <- cdmEunomia()


  # Check all tables work ----
  expect_true(inherits(summariseMissingData(cdm, "drug_exposure"), "summarised_result"))
  expect_no_error(y <- summariseMissingData(cdm, "observation_period"))
  checkResultType(y, "summarise_missing_data")
  expect_no_error(x <- summariseMissingData(cdm, "visit_occurrence"))
  expect_no_error(summariseMissingData(cdm, "condition_occurrence"))
  expect_no_error(summariseMissingData(cdm, "drug_exposure"))

  expect_no_error(summariseMissingData(cdm, "procedure_occurrence", interval = "years"))
  expect_warning(summariseMissingData(cdm, "device_exposure"))
  expect_no_error(z <- summariseMissingData(cdm, "measurement"))
  expect_no_error(s <- summariseMissingData(cdm, "observation"))

  expect_warning(de <- summariseMissingData(cdm, "death"))
  checkResultType(de, "summarise_missing_data")
  expect_warning(p <- summariseMissingData(cdm, "person", ageGroup = list(c(0, 50))))
  expect_true(omopgenerics::settings(p)$strata == "")

  expect_no_error(all <- summariseMissingData(cdm, c("observation_period", "visit_occurrence", "measurement")))
  expect_equal(all, dplyr::bind_rows(y, x, z))
  expect_equal(summariseMissingData(cdm, "observation"), summariseMissingData(cdm, "observation", col = colnames(cdm[["observation"]])))
  x <- summariseMissingData(cdm, "procedure_occurrence", col = "procedure_date")

  expect_equal(summariseMissingData(cdm, c("procedure_occurrence", "observation"), col = "procedure_date"), dplyr::bind_rows(x, s))
  y <- summariseMissingData(cdm, "observation", col = "observation_date")
  expect_equal(summariseMissingData(cdm, c("procedure_occurrence", "observation"), col = c("procedure_date", "observation_date")), dplyr::bind_rows(x, y))

  # Check inputs ----
  expect_true(summariseMissingData(cdm, "procedure_occurrence", col = "person_id") |>
    dplyr::select(estimate_value) |>
    dplyr::mutate(estimate_value = as.numeric(estimate_value)) |>
    dplyr::summarise(sum = sum(estimate_value)) |>
    dplyr::pull() == 0)

  expect_true(summariseMissingData(cdm, "procedure_occurrence", col = "person_id", sex = TRUE, ageGroup = list(c(0, 50), c(51, Inf))) |>
    dplyr::distinct(.data$strata_level) |>
    dplyr::tally() |>
    dplyr::pull() == 9)

  expect_true(summariseMissingData(cdm, "procedure_occurrence", col = "person_id", ageGroup = list(c(0, 50))) |>
    dplyr::distinct(.data$strata_level) |>
    dplyr::tally() |>
    dplyr::pull() == 3)

  cdm$procedure_occurrence <- cdm$procedure_occurrence |>
    dplyr::mutate(procedure_concept_id = NA_integer_) |>
    dplyr::compute(name = "procedure_occurrence", temporary = FALSE)

  expect_warning(summariseMissingData(cdm, "procedure_occurrence", col = "procedure_concept_id", ageGroup = list(c(0, 50))))
  expect_no_error(summariseMissingData(cdm, "procedure_occurrence", col = "procedure_concept_id", ageGroup = list(c(0, 50)), sample = 100))
})

test_that("dateRange argument works", {
  skip_on_cran()
  # Load mock database ----
  cdm <- cdmEunomia()

  expect_no_error(summariseMissingData(cdm, "condition_occurrence", dateRange = as.Date(c("2012-01-01", "2018-01-01"))))
  expect_message(x <- summariseMissingData(cdm, "drug_exposure", dateRange = as.Date(c("2012-01-01", "2025-01-01"))))
  observationRange <- cdm$observation_period |>
    dplyr::summarise(
      minobs = min(.data$observation_period_start_date, na.rm = TRUE),
      maxobs = max(.data$observation_period_end_date, na.rm = TRUE)
    )
  expect_no_error(y <- summariseMissingData(cdm, "drug_exposure", dateRange = as.Date(c("2012-01-01", observationRange |> dplyr::pull("maxobs")))))
  expect_equal(x, y, ignore_attr = TRUE)
  expect_false(settings(x)$study_period_end == settings(y)$study_period_end)
  expect_error(summariseMissingData(cdm, "drug_exposure", dateRange = as.Date(c("2015-01-01", "2014-01-01"))))
  expect_warning(z <- summariseMissingData(cdm, "drug_exposure", dateRange = as.Date(c("2020-01-01", "2021-01-01"))))
  expect_equal(z, omopgenerics::emptySummarisedResult(), ignore_attr = TRUE)
  expect_equal(summariseMissingData(cdm, "drug_exposure", dateRange = as.Date(c("2012-01-01", NA))), y, ignore_attr = TRUE)
  checkResultType(z, "summarise_missing_data")
  expect_equal(colnames(settings(z)), colnames(settings(x)))
  PatientProfiles::mockDisconnect(cdm = cdm)
})

test_that("tableMissingData() works", {
  skip_on_cran()
  # Load mock database ----
  cdm <- cdmEunomia()

  # Check that works ----
  expect_no_error(x <- tableMissingData(summariseMissingData(cdm, "condition_occurrence")))
  expect_true(inherits(x, "gt_tbl"))
  expect_no_error(y <- tableMissingData(summariseMissingData(cdm, c(
    "observation_period",
    "measurement"
  ))))
  expect_true(inherits(y, "gt_tbl"))
  expect_warning(t <- summariseMissingData(cdm, "death"))
  expect_warning(inherits(tableMissingData(t), "gt_tbl"))

  PatientProfiles::mockDisconnect(cdm = cdm)
})

test_that("col not present in table", {
  skip_on_cran()
  # Load mock database ----
  # Load mock database
  cdm <- omopgenerics::cdmFromTables(
    tables = list(
      person = dplyr::tibble(
        person_id = as.integer(1:4),
        gender_concept_id = c(8507L, 8532L, 8532L, 8507L),
        year_of_birth = 2010L,
        month_of_birth = 1L,
        day_of_birth = 1L,
        race_concept_id = 0L,
        ethnicity_concept_id = 0L
      ),
      observation_period = dplyr::tibble(
        observation_period_id = as.integer(1:8),
        person_id = c(1, 1, 1, 2, 2, 3, 3, 4) |> as.integer(),
        observation_period_start_date = as.Date(c(
          "2020-03-01", "2020-03-25", "2020-04-25", "2020-08-10", "2020-03-10",
          "2020-03-01", "2020-04-10", "2020-03-10"
        )),
        observation_period_end_date = as.Date(c(
          "2020-03-20", "2020-03-30", "2020-08-15", "2020-12-31", "2020-03-27",
          "2020-03-09", "2020-05-08", "2020-12-10"
        )),
        period_type_concept_id = 0L
      )
    ),
    cdmName = "mock data"
  )
  cdm <- CDMConnector::copyCdmTo(
    con = connection(), cdm = cdm, schema = schema()
  )

  expect_no_error(expect_message(summariseMissingData(cdm, "person", col = NULL)))

  PatientProfiles::mockDisconnect(cdm = cdm)
})

test_that("no tables created", {
  skip_on_cran()
  # Load mock database ----
  cdm <- cdmEunomia()

  startNames <- CDMConnector::listSourceTables(cdm)

  results <- summariseMissingData(
    cdm = cdm,
    omopTableName = c("drug_exposure", "condition_occurrence"),
    interval = "years",
    sex = TRUE,
    ageGroup = list(
      c(0, 17),
      c(18, 65),
      c(66, 100)
    ),
    dateRange = as.Date(c("2012-01-01", "2018-01-01")),
    sample = 100
  )

  endNames <- CDMConnector::listSourceTables(cdm)

  expect_true(length(setdiff(endNames, startNames)) == 0)


  PatientProfiles::mockDisconnect(cdm = cdm)
})

test_that("zero count argument works", {
  skip_on_cran()
  # Load mock database ----
  cdm <- cdmEunomia()
  cdm$person <- cdm$person |>
    dplyr::mutate(person_id = dplyr::if_else(.data$person_id == 6, 0, .data$person_id))
  expect_equal(summariseMissingData(cdm, omopTableName = "person") |>
    dplyr::filter(variable_name == "person_id" & estimate_name == "zero_count") |>
    dplyr::pull(.data$estimate_value) |> as.integer(), 1L)


  columns <- cdm$drug_exposure |> colnames()
  columns_zero <- columns[grepl("_id$", columns)]
  expect_equal(summariseMissingData(cdm, "drug_exposure") |>
    dplyr::filter(estimate_name == "zero_count") |>
    dplyr::distinct(variable_name) |>
    dplyr::pull() |> sort(), columns_zero |> sort())


  PatientProfiles::mockDisconnect(cdm = cdm)
})

Try the OmopSketch package in your browser

Any scripts or data that you put into this service are public.

OmopSketch documentation built on June 8, 2025, 1:36 p.m.