tests/testthat/test-estimatePrevalence.R

test_that("mock db: check output format", {
  cdm <- mockIncidencePrevalence() %>%
    generateDenominatorCohortSet(name = "denominator")

  prev <- estimatePrevalence(
    cdm = cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    interval = "years"
  )
  my_settings <- settings(prev)
  expect_true(nrow(my_settings) > 0)

  expect_identical(
    colnames(prev),
    c(
      "result_id", "cdm_name", "group_name",
      "group_level", "strata_name", "strata_level",
      "variable_name", "variable_level", "estimate_name",
      "estimate_type", "estimate_value", "additional_name",
      "additional_level"
    )
  )
  expect_true(all(c(
    "result_id", "result_type", "group", "additional",
    "package_name", "package_version", "min_cell_count",
    "analysis_type",
    "analysis_complete_database_intervals", "analysis_full_contribution",
    "analysis_level",
    "denominator_age_group", "denominator_sex",
    "denominator_days_prior_observation", "denominator_start_date",
    "denominator_end_date", "denominator_target_cohort_name",
    "denominator_time_at_risk"
  ) %in% colnames(settings(prev))))


  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: checks on working example", {
  skip_on_cran()
  personTable <- dplyr::tibble(
    person_id = 1L,
    gender_concept_id = 8507L,
    year_of_birth = 2000L,
    month_of_birth = 01L,
    day_of_birth = 01L
  )
  observationPeriodTable <- dplyr::tibble(
    observation_period_id = 1L,
    person_id = 1L,
    observation_period_start_date = as.Date("2000-01-01"),
    observation_period_end_date = as.Date("2012-06-01")
  )
  outcomeTable <- dplyr::tibble(
    cohort_definition_id = 1L,
    subject_id = 1L,
    cohort_start_date = c(
      as.Date("2008-02-05"),
      as.Date("2010-02-08"),
      as.Date("2010-02-20")
    ),
    cohort_end_date = c(
      as.Date("2008-02-05"),
      as.Date("2010-02-08"),
      as.Date("2010-02-20")
    )
  )

  cdm <- mockIncidencePrevalence(
    personTable = personTable,
    observationPeriodTable = observationPeriodTable,
    outcomeTable = outcomeTable
  )

  cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")

  prev <- estimatePrevalence(
    cdm = cdm, type = "period",
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    interval = c(
      "weeks", "months", "quarters",
      "years", "overall"
    )
  )
  expect_true(nrow(prev) >= 1)

  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: working examples 2", {
  skip_on_cran()
  personTable <- dplyr::tibble(
    person_id = 1L,
    gender_concept_id = 8507L,
    year_of_birth = 2000L,
    month_of_birth = 01L,
    day_of_birth = 01L
  )
  observationPeriodTable <- dplyr::tibble(
    observation_period_id = 1L,
    person_id = 1L,
    observation_period_start_date = as.Date("2010-01-01"),
    observation_period_end_date = as.Date("2012-06-01")
  )
  outcomeTable <- dplyr::tibble(
    cohort_definition_id = 1L,
    subject_id = 1L,
    cohort_start_date = c(
      as.Date("2010-02-05"),
      as.Date("2010-02-08"),
      as.Date("2010-02-20")
    ),
    cohort_end_date = c(
      as.Date("2010-02-05"),
      as.Date("2010-02-08"),
      as.Date("2010-02-20")
    )
  )
  cdm <- mockIncidencePrevalence(
    personTable = personTable,
    observationPeriodTable = observationPeriodTable,
    outcomeTable = outcomeTable
  )
  cdm <- generateDenominatorCohortSet(
    cdm = cdm, name = "denominator"
  )

  prev <- estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "point",
    interval = c("months", "years")
  )
  expect_true(nrow(prev) >= 1)

  prev <- estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "point",
    interval = c("months", "years")
  )
  expect_true(nrow(prev) >= 1)

  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: check minimum counts", {
  skip_on_cran()
  # 20 people
  personTable <- dplyr::tibble(
    person_id = as.integer(c(1:20)),
    gender_concept_id = as.integer(rep(8507, 20)),
    year_of_birth = as.integer(rep(2000, 20)),
    month_of_birth = as.integer(rep(01, 20)),
    day_of_birth = as.integer(rep(01, 20))
  )
  observationPeriodTable <- dplyr::bind_rows(
    dplyr::tibble(
      observation_period_id = as.integer(c(1:17)),
      person_id = as.integer(c(1:17)),
      observation_period_start_date = rep(as.Date("2000-01-01"), 17),
      observation_period_end_date = rep(as.Date("2000-01-31"), 17)
    ),
    dplyr::tibble(
      observation_period_id = as.integer(c(18:20)),
      person_id = as.integer(c(18:20)),
      observation_period_start_date = rep(as.Date("2000-01-01"), 3),
      observation_period_end_date = rep(as.Date("2012-06-01"), 3)
    )
  )

  outcomeTable <-
    dplyr::bind_rows(
      # 17 in first period
      dplyr::tibble(
        cohort_definition_id = as.integer(rep(1, 17)),
        subject_id = as.integer(c(1:17)),
        cohort_start_date = rep(
          as.Date("2000-01-02"), 17
        ),
        cohort_end_date = rep(
          as.Date("2000-01-03"), 17
        )
      ),
      # three in second
      dplyr::tibble(
        cohort_definition_id = as.integer(rep(1, 3)),
        subject_id = as.integer(c(18:20)),
        cohort_start_date = rep(
          as.Date("2000-02-02"), 3
        ),
        cohort_end_date = rep(
          as.Date("2000-02-03"), 3
        )
      )
    )

  cdm <- mockIncidencePrevalence(
    personTable = personTable,
    observationPeriodTable = observationPeriodTable,
    outcomeTable = outcomeTable
  )

  cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")

  prev <- estimatePrevalence(
    cdm = cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "period",
    interval = "months"
  )
  prev_est <- prev |>
    dplyr::filter(estimate_name == "outcome_count")
  expect_true(prev_est$estimate_value[1] == "17")
  expect_true(prev_est$estimate_value[2] == "3")
  expect_true(prev_est$estimate_value[3] == "0")
  prev_est <- prev |>
    dplyr::filter(estimate_name == "denominator_count")
  expect_true(prev_est$estimate_value[1] == "20")
  expect_true(prev_est$estimate_value[2] == "3")
  expect_true(prev_est$estimate_value[3] == "3")
  prev_est <- prev |>
    dplyr::filter(estimate_name == "prevalence")
  expect_true(!is.na(prev_est$estimate_value[1]))
  expect_true(!is.na(prev_est$estimate_value[2]))
  expect_true(!is.na(prev_est$estimate_value[3]))
  prev_est <- prev |>
    dplyr::filter(estimate_name == "prevalence_95CI_lower")
  expect_true(!is.na(prev_est$estimate_value[1]))
  prev_est <- prev |>
    dplyr::filter(estimate_name == "prevalence_95CI_upper")
  expect_true(!is.na(prev_est$estimate_value[1]))

  # suppress results
  prev <- omopgenerics::suppress(prev, minCellCount = 5)
  prev_est <- prev |>
    dplyr::filter(estimate_name == "outcome_count")
  expect_true(prev_est$estimate_value[1] == "17")
  expect_true(prev_est$estimate_value[2] == "-")
  expect_true(prev_est$estimate_value[3] == "0") # don't suppress zero
  prev_est <- prev |>
    dplyr::filter(estimate_name == "denominator_count")
  expect_true(prev_est$estimate_value[1] == "20")
  expect_true(prev_est$estimate_value[2] == "-")
  expect_true(prev_est$estimate_value[3] == "-")
  prev_est <- prev |>
    dplyr::filter(estimate_name == "prevalence")
  expect_true(prev_est$estimate_value[1] != "-")
  expect_true(prev_est$estimate_value[2] == "-")
  expect_true(prev_est$estimate_value[3] == "0")
  prev_est <- prev |>
    dplyr::filter(estimate_name == "prevalence_95CI_lower")
  expect_true(!is.na(prev_est$estimate_value[1]))
  expect_true(prev_est$estimate_value[2] == "-")
  expect_true(prev_est$estimate_value[3] == "0")
  prev_est <- prev |>
    dplyr::filter(estimate_name == "prevalence_95CI_upper")
  expect_true(prev_est$estimate_value[1] != "-")
  expect_true(prev_est$estimate_value[2] == "-")
  expect_equal(as.numeric(prev_est$estimate_value[3]), 0.56, tolerance = 0.1)

  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: check study time periods", {
  skip_on_cran()
  personTable <- dplyr::tibble(
    person_id = 1L,
    gender_concept_id = 8507L,
    year_of_birth = 2000L,
    month_of_birth = 01L,
    day_of_birth = 01L
  )
  observationPeriodTable <- dplyr::tibble(
    observation_period_id = 1L,
    person_id = 1L,
    observation_period_start_date = as.Date("2010-01-01"),
    observation_period_end_date = as.Date("2010-12-31")
  )
  outcomeTable <- dplyr::tibble(
    cohort_definition_id = 1L,
    subject_id = 1L,
    cohort_start_date = c(
      as.Date("2010-02-05"),
      as.Date("2010-02-08"),
      as.Date("2010-02-20")
    ),
    cohort_end_date = c(
      as.Date("2010-02-05"),
      as.Date("2010-02-08"),
      as.Date("2010-02-20")
    )
  )

  cdm <- mockIncidencePrevalence(
    personTable = personTable,
    observationPeriodTable = observationPeriodTable,
    outcomeTable = outcomeTable
  )

  cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")

  prev <- estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "point",
    interval = "months"
  )

  # we expect 12 months of which the last in December
  # the last month should also be included
  # as the person goes up to the last day of the month
  expect_true(nrow(prev |>
    dplyr::filter(
      estimate_name == "outcome_count"
    )) == 12)


  # overall period
  prev <- estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "period",
    interval = "overall"
  )
  # no overall for point
  expect_error(estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "point",
    interval = "overall"
  ))
  # just one row
  expect_true(nrow(prev |>
    dplyr::filter(estimate_name == "outcome_count")) == 1)

  omopgenerics::cdmDisconnect(cdm)

  # should return empty if no study days
  personTable <- dplyr::tibble(
    person_id = 1L,
    gender_concept_id = 8507L,
    year_of_birth = 2000L,
    month_of_birth = 01L,
    day_of_birth = 01L
  )
  observationPeriodTable <- dplyr::tibble(
    observation_period_id = 1L,
    person_id = 1L,
    observation_period_start_date = as.Date("2010-01-01"),
    observation_period_end_date = as.Date("2010-11-15")
  )
  outcomeTable <- dplyr::tibble(
    cohort_definition_id = 1L,
    subject_id = 1L,
    cohort_start_date = c(
      as.Date("2010-02-05"),
      as.Date("2010-02-08"),
      as.Date("2010-02-20")
    ),
    cohort_end_date = c(
      as.Date("2010-02-05"),
      as.Date("2010-02-08"),
      as.Date("2010-02-20")
    )
  )

  cdm <- mockIncidencePrevalence(
    personTable = personTable,
    observationPeriodTable = observationPeriodTable,
    outcomeTable = outcomeTable
  )

  cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")

  prev <- estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "period",
    fullContribution = TRUE,
    interval = "weeks"
  )
  expect_true(nrow(prev |>
    dplyr::filter(estimate_name == "outcome_count")) == 45)

  prev <- estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "period",
    fullContribution = TRUE,
    interval = "months"
  )
  expect_true(nrow(prev |>
    dplyr::filter(estimate_name == "outcome_count")) == 10)

  prev <- estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "period",
    fullContribution = TRUE,
    interval = "years"
  )
  expect_true(nrow(prev |>
    dplyr::filter(estimate_name == "outcome_count")) == 0)

  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: check fullContribution requirement", {
  skip_on_cran()
  personTable <- dplyr::tibble(
    person_id = as.integer(c(1, 2, 3)),
    gender_concept_id = 8507L,
    year_of_birth = 2000L,
    month_of_birth = 01L,
    day_of_birth = 01L
  )
  observationPeriodTable <- dplyr::tibble(
    observation_period_id = as.integer(c(1, 2, 3)),
    person_id = as.integer(c(1, 2, 3)),
    observation_period_start_date = c(
      as.Date("2010-01-01"),
      as.Date("2010-01-01"),
      as.Date("2012-04-01")
    ),
    observation_period_end_date = c(
      as.Date("2011-06-01"),
      as.Date("2012-06-01"),
      as.Date("2012-06-01")
    )
  )
  outcomeTable <- dplyr::tibble(
    cohort_definition_id = 1L,
    subject_id = 1L,
    cohort_start_date = c(
      as.Date("2010-02-05"),
      as.Date("2010-02-08"),
      as.Date("2010-02-20")
    ),
    cohort_end_date = c(
      as.Date("2010-02-05"),
      as.Date("2010-02-08"),
      as.Date("2010-02-20")
    )
  )
  cdm <- mockIncidencePrevalence(
    personTable = personTable,
    observationPeriodTable = observationPeriodTable,
    outcomeTable = outcomeTable
  )
  cdm <- generateDenominatorCohortSet(
    cdm = cdm, name = "denominator"
  )

  prev <- estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "period",
    interval = "years",
    fullContribution = FALSE,
    completeDatabaseIntervals = FALSE
  )
  expect_true(all(prev |>
    dplyr::filter(
      estimate_name == "denominator_count"
    ) |>
    dplyr::pull("estimate_value") == "2"))

  prev <- estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "period",
    interval = "years",
    fullContribution = TRUE,
    completeDatabaseIntervals = FALSE
  )
  expect_true(all(prev |>
    dplyr::filter(
      estimate_name == "denominator_count"
    ) |>
    dplyr::pull("estimate_value") == c("2", "1", "1")))
  expect_true(prev |>
    omopgenerics::filterSettings(result_type == "prevalence_attrition") |>
    dplyr::filter(strata_level == "Do not satisfy full contribution requirement for an interval") |>
    dplyr::filter(variable_name == "excluded_subjects") |>
    dplyr::pull("estimate_value") == "1")

  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: check periods follow calendar dates", {
  skip_on_cran()
  # check that even if study_start_date is during a period
  # periods still follow calendar dates
  personTable <- dplyr::tibble(
    person_id = 1L,
    gender_concept_id = 8507L,
    year_of_birth = 2000L,
    month_of_birth = 01L,
    day_of_birth = 01L
  )
  observationPeriodTable <- dplyr::tibble(
    observation_period_id = 1L,
    person_id = 1L,
    observation_period_start_date = as.Date("2010-06-05"),
    observation_period_end_date = as.Date("2013-06-15")
  )
  outcomeTable <- dplyr::tibble(
    cohort_definition_id = 1L,
    subject_id = 1L,
    cohort_start_date = c(
      as.Date("2011-01-31"),
      as.Date("2011-02-01"),
      as.Date("2011-03-01")
    ),
    cohort_end_date = c(
      as.Date("2011-01-31"),
      as.Date("2011-02-01"),
      as.Date("2011-03-01")
    )
  )

  cdm <- mockIncidencePrevalence(
    personTable = personTable,
    observationPeriodTable = observationPeriodTable,
    outcomeTable = outcomeTable
  )

  # if completeDatabaseIntervals is TRUE we should go from 2010 to 2013
  # but if FALSE we should go from 2011 to 2012
  # for yearly incidence
  cdm <- generateDenominatorCohortSet(
    cdm = cdm, name = "denominator"
  )
  prev1 <- estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "period",
    interval = "years",
    fullContribution = FALSE,
    completeDatabaseIntervals = FALSE
  )
  expect_true(nrow(prev1 |>
    dplyr::filter(
      estimate_name == "denominator_count"
    )) == 4)
  expect_true(all(clock::get_year(
    prev1 |> omopgenerics::splitAdditional() |>
      dplyr::filter(
        estimate_name == "denominator_count"
      ) |>
      dplyr::pull("prevalence_start_date") %>%
      as.Date()
  ) ==
    c("2010", "2011", "2012", "2013")))

  prev2 <- estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "period",
    interval = "years",
    fullContribution = FALSE,
    completeDatabaseIntervals = TRUE
  )
  expect_true(all(clock::get_year(
    prev2 |> omopgenerics::splitAdditional() |>
      dplyr::filter(
        estimate_name == "denominator_count"
      ) |>
      dplyr::pull("prevalence_start_date") |>
      as.Date()
  ) ==
    c("2011", "2012")))

  # for months
  cdm <- generateDenominatorCohortSet(
    cdm = cdm, name = "denominator",
    cohortDateRange = c(as.Date("2011-01-15"), as.Date(NA))
  )

  # where we expect the study to start on 2011-01-15
  prev <- estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "period",
    interval = "months",
    fullContribution = FALSE,
    completeDatabaseIntervals = FALSE
  )

  expect_true(prev |> omopgenerics::splitAdditional() |>
    dplyr::filter(
      estimate_name == "denominator_count"
    ) |>
    head(1) |>
    dplyr::pull("prevalence_start_date") ==
    "2011-01-15")
  # where we expect the study to start the next month
  prev <- estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "period",
    interval = "months",
    fullContribution = FALSE,
    completeDatabaseIntervals = TRUE
  )
  expect_true(prev |> omopgenerics::splitAdditional() |>
    dplyr::filter(
      estimate_name == "denominator_count"
    ) |>
    head(1) |>
    dplyr::pull("prevalence_start_date") ==
    "2011-02-01")

  # for overall
  prev <- estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "period",
    interval = "overall",
    fullContribution = FALSE,
    completeDatabaseIntervals = FALSE
  )
  expect_true(prev |> omopgenerics::splitAdditional() |>
    dplyr::filter(
      estimate_name == "denominator_count"
    ) |>
    head(1) |>
    dplyr::pull("prevalence_start_date") ==
    "2011-01-15")
  expect_true(prev |> omopgenerics::splitAdditional() |>
    dplyr::filter(
      estimate_name == "denominator_count"
    ) |>
    head(1) |>
    dplyr::pull("prevalence_end_date") ==
    "2013-06-15")

  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: check multiple outcome ids", {
  skip_on_cran()
  personTable <- dplyr::tibble(
    person_id = as.integer(c(1, 2)),
    gender_concept_id = 8507L,
    year_of_birth = 2000L,
    month_of_birth = 01L,
    day_of_birth = 01L
  )
  observationPeriodTable <- dplyr::tibble(
    observation_period_id = c(1L, 2L),
    person_id = c(1L, 2L),
    observation_period_start_date = as.Date("2011-01-01"),
    observation_period_end_date = as.Date("2012-06-01")
  )
  outcomeTable <- dplyr::tibble(
    cohort_definition_id = c(1L, 2L), # two different outcome ids
    subject_id = c(1L, 2L),
    cohort_start_date = c(
      as.Date("2011-02-05")
    ),
    cohort_end_date = c(
      as.Date("2011-02-05")
    )
  )
  cdm <- mockIncidencePrevalence(
    personTable = personTable,
    observationPeriodTable = observationPeriodTable,
    outcomeTable = outcomeTable
  )
  cdm <- generateDenominatorCohortSet(
    cdm = cdm, name = "denominator"
  )

  prev <- estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    outcomeCohortId = c(1, 2),
    type = "period",
    interval = "years"
  )

  expect_true(all(prev |>
    dplyr::filter(
      estimate_name == "outcome_count"
    ) |>
    dplyr::pull("estimate_value") == "1"))

  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: multiple denominator inputs", {
  skip_on_cran()
  cdm <- mockIncidencePrevalence(sampleSize = 6000,
                                 maxOutcomeDays = 5,
                                 maxOutcomes = 6)
  cdm <- generateDenominatorCohortSet(
    cdm = cdm,
    name = "denominator_1",
    cohortDateRange = c(as.Date("2010-01-01"),
                        as.Date("2020-12-31")),
    ageGroup = list(c(25, 50)),
    sex = "Both")
  cdm <- generateDenominatorCohortSet(
    cdm = cdm,
    name = "denominator_2",
    cohortDateRange = c(as.Date("2010-01-01"),
                        as.Date("2020-12-31")),
    ageGroup = list(c(0, 50),
                    c(25, 30),
                    c(25, 50),
                    c(25, 80),
                    c(55, 100)
    ) ,
    sex =  c("Both", "Male", "Female"))

  prev_1 <- estimatePeriodPrevalence(cdm,
                                     denominatorTable = "denominator_1",
                                     outcomeTable = "outcome")
  prev_2 <- estimatePeriodPrevalence(cdm,
                                     denominatorTable = "denominator_2",
                                     outcomeTable = "outcome")

  expect_identical(
    tablePrevalence(prev_1,
                    type = "tibble"),
    tablePrevalence(prev_2 |>
                      omopgenerics::filterSettings(denominator_age_group == "25 to 50",
                                                   denominator_sex == "Both"),
                    type = "tibble")
  )

  omopgenerics::cdmDisconnect(cdm)

})

test_that("mock db: some empty result sets", {
  skip_on_cran()
  personTable <- dplyr::tibble(
    person_id = c(1L, 2L),
    gender_concept_id = 8507L,
    year_of_birth = 2000L,
    month_of_birth = 01L,
    day_of_birth = 01L
  )
  observationPeriodTable <- dplyr::tibble(
    observation_period_id = c(1L, 2L),
    person_id = c(1L, 2L),
    observation_period_start_date = as.Date("2012-02-01"),
    observation_period_end_date = as.Date("2012-06-01")
  )
  outcomeTable <- dplyr::tibble(
    cohort_definition_id = c(1L, 2L), # two different outcome ids
    subject_id = c(1L, 2L),
    cohort_start_date = c(
      as.Date("2012-02-05")
    ),
    cohort_end_date = c(
      as.Date("2012-02-05")
    )
  )
  cdm <- mockIncidencePrevalence(
    personTable = personTable,
    observationPeriodTable = observationPeriodTable,
    outcomeTable = outcomeTable
  )
  cdm <- generateDenominatorCohortSet(
    cdm = cdm, name = "denominator"
  )

  prev <- estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "period",
    interval = "years"
  )
  expect_true(nrow(prev %>%
    omopgenerics::filterSettings(result_type == "prevalence")) == 0)

  prev <- estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "period",
    interval = c("months", "years")
  )
  expect_true(nrow(prev %>%
    omopgenerics::filterSettings(result_type == "prevalence")) > 0)

  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: check expected errors", {
  skip_on_cran()
  personTable <- dplyr::tibble(
    person_id = 1L,
    gender_concept_id = 8507L,
    year_of_birth = 2000L,
    month_of_birth = 01L,
    day_of_birth = 01L
  )
  observationPeriodTable <- dplyr::tibble(
    observation_period_id = 1L,
    person_id = 1L,
    observation_period_start_date = as.Date("2010-01-01"),
    observation_period_end_date = as.Date("2012-06-01")
  )
  outcomeTable <- dplyr::tibble(
    cohort_definition_id = 1L,
    subject_id = 1L,
    cohort_start_date = c(
      as.Date("2010-02-05"),
      as.Date("2010-02-08"),
      as.Date("2010-02-20")
    ),
    cohort_end_date = c(
      as.Date("2010-02-05"),
      as.Date("2010-02-08"),
      as.Date("2010-02-20")
    )
  )
  cdm <- mockIncidencePrevalence(
    personTable = personTable,
    observationPeriodTable = observationPeriodTable,
    outcomeTable = outcomeTable
  )

  cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")

  expect_error(estimatePrevalence(
    cdm = "a",
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    outcomeCohortId = 1,
    denominatorCohortId = 1
  ))

  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: check user point prevalence function", {
  skip_on_cran()
  cdm <- mockIncidencePrevalence()

  cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")

  prev <- estimatePrevalence(
    cdm = cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome"
  )
  prev_point <- estimatePointPrevalence(
    cdm = cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome"
  )

  expect_true(all(names(prev) == names(prev_point)))
  expect_true(all(names(prev) ==
    names(prev_point)))

  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: check user period prevalence function", {
  skip_on_cran()
  cdm <- mockIncidencePrevalence()

  cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")

  prev <- estimatePrevalence(
    cdm = cdm,
    type = "period",
    denominatorTable = "denominator",
    outcomeTable = "outcome"
  )
  prev_period <- estimatePeriodPrevalence(
    cdm = cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome"
  )

  expect_true(all(names(prev) == names(prev_period)))
  expect_true(all(names(prev) ==
    names(prev_period)))

  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: multiple observation periods", {
  skip_on_cran()
  # create data for hypothetical people to test
  personTable <- dplyr::tibble(
    person_id = c(1L, 2L),
    gender_concept_id = c(8507L, 8507L),
    year_of_birth = c(1998L, 1976L),
    month_of_birth = c(02L, 06L),
    day_of_birth = c(12L, 01L)
  )

  # three observation periods for 1 person
  # and a couple of consecutive events lost to washout
  observationPeriodTable <- dplyr::tibble(
    observation_period_id = c(1L, 2L, 3L, 4L),
    person_id = c(1L, 1L, 1L, 2L),
    observation_period_start_date = c(
      as.Date("2005-04-01"),
      as.Date("2009-04-10"),
      as.Date("2010-08-20"),
      as.Date("2012-01-01")
    ),
    observation_period_end_date = c(
      as.Date("2005-11-29"),
      as.Date("2010-01-02"),
      as.Date("2011-12-11"),
      as.Date("2015-06-01")
    )
  )

  conditionX <- dplyr::tibble(
    cohort_definition_id = c(1L, 1L, 1L),
    subject_id = c(1L, 1L, 2L),
    cohort_start_date = c(
      as.Date("2005-04-01"),
      as.Date("2009-06-10"),
      as.Date("2013-01-01")
    ),
    cohort_end_date = c(
      as.Date("2005-11-29"),
      as.Date("2010-01-02"),
      as.Date("2015-01-01")
    )
  )

  outcomeTable <- dplyr::tibble(
    cohort_definition_id = as.integer(c(1, 1, 1, 1, 1, 1, 1)),
    subject_id = as.integer(c(1, 1, 1, 1, 1, 1, 2)),
    cohort_start_date = c(
      as.Date("2005-08-09"),
      as.Date("2005-08-10"),
      as.Date("2005-08-11"),
      as.Date("2009-11-11"),
      as.Date("2009-11-21"),
      as.Date("2010-12-21"),
      as.Date("2014-04-04")
    ),
    cohort_end_date = c(
      as.Date("2005-08-09"),
      as.Date("2005-08-10"),
      as.Date("2005-08-11"),
      as.Date("2009-11-11"),
      as.Date("2009-11-21"),
      as.Date("2010-12-21"),
      as.Date("2014-04-04")
    )
  )

  cdm <- mockIncidencePrevalence(
    personTable = personTable,
    observationPeriodTable = observationPeriodTable,
    targetCohortTable = conditionX,
    outcomeTable = outcomeTable
  )

  cdm <- generateTargetDenominatorCohortSet(
    cdm = cdm, name = "denominator",
    targetCohortTable = "target",
    targetCohortId = 1
  )
  ppe <- estimatePeriodPrevalence(
    cdm = cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    interval = "years",
    fullContribution = FALSE,
    completeDatabaseIntervals = FALSE
  )
  # nobody should appear in 2006
  expect_true(ppe %>%
    omopgenerics::splitAdditional() |>
    dplyr::filter(
      prevalence_start_date == "2006-01-01",
      estimate_name == "denominator_count"
    ) %>%
    dplyr::pull("estimate_value") == 0)
  expect_true(ppe %>%
    omopgenerics::splitAdditional() |>
    dplyr::filter(
      prevalence_start_date == "2006-01-01",
      estimate_name == "outcome_count"
    ) %>%
    dplyr::pull("estimate_value") == 0)

  # one person with an event in 2005
  expect_true(ppe %>%
    omopgenerics::filterSettings(result_type == "prevalence") %>%
    omopgenerics::splitAdditional() |>
    dplyr::filter(
      clock::get_year(prevalence_start_date %>%
        as.Date()) == "2005",
      estimate_name == "denominator_count"
    ) %>%
    dplyr::pull("estimate_value") == "1")
  expect_true(ppe %>%
    omopgenerics::filterSettings(result_type == "prevalence") %>%
    omopgenerics::splitAdditional() |>
    dplyr::filter(
      clock::get_year(prevalence_start_date %>%
        as.Date()) == "2005",
      estimate_name == "outcome_count"
    ) %>%
    dplyr::pull("estimate_value") == "1")


  # as for point prevalence, we would expect no positive n_cases at default
  ppo <- estimatePointPrevalence(
    cdm = cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    interval = "months"
  )

  expect_true(sum(as.numeric(ppo %>%
    omopgenerics::splitAdditional() |>
    dplyr::filter(estimate_name == "outcome_count") %>%
    dplyr::pull("estimate_value"))) == 0)

  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: check confidence intervals", {
  skip_on_cran()
  cdm <- mockIncidencePrevalence(sampleSize = 1000)
  cdm <- generateDenominatorCohortSet(
    cdm = cdm, name = "denominator"
  )
  prev <- estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "point",
    interval = "years"
  )

  pkg_est <- prev %>%
    omopgenerics::filterSettings(result_type == "prevalence") %>%
    dplyr::select(
      "estimate_name",
      "estimate_value", "additional_level"
    ) |>
    tidyr::pivot_wider(
      names_from = "estimate_name",
      values_from = "estimate_value"
    ) |>
    dplyr::filter(denominator_count > 1)

  # compare our wilson CIs with those from binom
  pkg_est <- cbind(
    pkg_est,
    binom::binom.confint(as.integer(pkg_est$outcome_count),
      as.integer(pkg_est$denominator_count),
      conf.level = 0.95,
      method = "wilson"
    )
  )

  expect_equal(as.numeric(pkg_est$prevalence_95CI_lower),
    pkg_est$lower,
    tolerance = 1e-2
  )
  expect_equal(as.numeric(pkg_est$prevalence_95CI_upper),
    pkg_est$upper,
    tolerance = 1e-2
  )

  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: check attrition", {
  skip_on_cran()
  cdm <- mockIncidencePrevalence(sampleSize = 1000)
  cdm <- generateDenominatorCohortSet(
    cdm = cdm, name = "denominator",
    sex = c("Male", "Female")
  )
  prev <- estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "point",
    interval = "years"
  )

  # for female cohort we should have a row for those excluded for not being male
  expect_true(nrow(prev |>
    omopgenerics::filterSettings(
      result_type == "prevalence_attrition",
      denominator_sex == "Female"
    ) |>
    dplyr::filter(strata_level == "Not Female")) > 0)

  # for male, the opposite
  expect_true(nrow(prev |>
    omopgenerics::filterSettings(
      result_type == "prevalence_attrition",
      denominator_sex == "Male"
    ) |>
    dplyr::filter(strata_level == "Not Male")) > 0)

  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: check attrition with complete database intervals", {
  skip_on_cran()
  personTable <- dplyr::tibble(
    person_id = c(1L, 2L, 3L),
    gender_concept_id = 8507L,
    year_of_birth = 2000L,
    month_of_birth = 01L,
    day_of_birth = 01L
  )
  observationPeriodTable <- dplyr::tibble(
    observation_period_id = c(1L, 2L, 3L),
    person_id = c(1L, 2L, 3L),
    observation_period_start_date = c(
      as.Date("2000-06-01"),
      as.Date("2000-06-01"),
      as.Date("2000-06-01")
    ),
    observation_period_end_date = c(
      as.Date("2011-07-01"),
      as.Date("2012-06-01"),
      as.Date("2000-06-15")
    )
  )
  outcomeTable <- dplyr::tibble(
    cohort_definition_id = 1L,
    subject_id = 1L,
    cohort_start_date = c(
      as.Date("2008-02-05"),
      as.Date("2010-02-08"),
      as.Date("2010-02-20")
    ),
    cohort_end_date = c(
      as.Date("2008-02-05"),
      as.Date("2010-02-08"),
      as.Date("2010-02-20")
    )
  )

  cdm <- mockIncidencePrevalence(
    personTable = personTable,
    observationPeriodTable = observationPeriodTable,
    outcomeTable = outcomeTable
  )

  cdm <- generateDenominatorCohortSet(
    cdm = cdm, name = "denominator"
  )
  prev <- estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "point",
    interval = "years",
    completeDatabaseIntervals = TRUE
  )

  expect_true(prev |>
    omopgenerics::filterSettings(result_type == "prevalence_attrition") |>
    dplyr::filter(strata_level == "Not observed during the complete database interval") |>
    dplyr::filter(variable_name == "excluded_subjects") |>
    dplyr::pull("estimate_value") == "1")

  # check min cell suppression
  prev2 <- estimatePrevalence(cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    type = "point",
    interval = "years",
    completeDatabaseIntervals = TRUE
  ) |> omopgenerics::suppress(5)

  expect_true(prev2 |>
    omopgenerics::filterSettings(result_type == "prevalence_attrition") |>
    dplyr::filter(strata_level == "Not observed during the complete database interval") |>
    dplyr::filter(variable_name == "excluded_subjects") |>
    dplyr::pull("estimate_value") == "-")

  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: check compute permanent", {
  skip_on_cran()

  # using permanent (no prefix)
  cdm <- mockIncidencePrevalence(sampleSize = 1000)

  cdm <- generateDenominatorCohortSet(
    cdm = cdm, name = "dpop"
  )
  prev <- estimatePrevalence(
    cdm = cdm,
    denominatorTable = "dpop",
    outcomeTable = "outcome",
    interval = "years"
  )
  # no temp tables created by dbplyr
  expect_false(any(stringr::str_starts(
    CDMConnector::listTables(attr(attr(cdm, "cdm_source"), "dbcon"),
      schema = attr(attr(cdm, "cdm_source"), "write_schema")
    ),
    "dbplyr_"
  )))

  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: if missing cohort attributes", {
  # missing cohort_set
  cdm <- mockIncidencePrevalence()
  cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")
  attr(cdm$outcome, "cohort_set") <- NULL
  expect_error(estimatePrevalence(
    cdm = cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    interval = "years"
  ))
  omopgenerics::cdmDisconnect(cdm)

  # missing cohort_count
  cdm <- mockIncidencePrevalence()
  cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")
  attr(cdm$outcome, "cohort_attrition") <- NULL
  expect_error(estimatePrevalence(
    cdm = cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    interval = "years"
  ))
  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: test empty outcome table works", {
  skip_on_cran()

  cdm <- mockIncidencePrevalence(sampleSize = 1000)

  cdm[["outcome"]] <- cdm[["outcome"]] %>%
    dplyr::filter(cohort_definition_id == 33)

  cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")

  expect_no_error(estimatePrevalence(
    cdm = cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    interval = "years"
  ))

  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: prevalence using strata vars", {
  cdm <- mockIncidencePrevalence(
    sampleSize = 1000,
    outPre = 0.7
  )

  cdm <- generateDenominatorCohortSet(
    cdm = cdm,
    name = "denominator"
  )

  prev_orig <- estimatePrevalence(
    cdm = cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    interval = "years"
  )

  cdm$denominator <- cdm$denominator %>%
    dplyr::mutate(my_strata = dplyr::if_else(year(cohort_start_date) < 1990,
      "first", "second"
    )) %>%
    dplyr::compute(
      temporary = FALSE,
      name = "denominator"
    )

  prev <- estimatePrevalence(
    cdm = cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    interval = "years",
    strata = list(c("my_strata"))
  )

  expect_true(all(c("overall", "first", "second") %in%
    unique(prev |> dplyr::pull("strata_level"))))

  # original without strata should be the same as "Overall" strata
  prev1 <- prev_orig |>
    omopgenerics::filterSettings(result_type == "prevalence")
  attr(prev1, "settings") <- NULL
  prev2 <- prev |>
    omopgenerics::filterSettings(result_type == "prevalence") %>%
    dplyr::filter(strata_level == "overall")
  attr(prev2, "settings") <- NULL
  expect_equal(prev1, prev2)

  cdm$denominator <- cdm$denominator %>%
    dplyr::mutate(my_strata2 = dplyr::if_else(month(cohort_start_date) < 7,
      "a", "b"
    )) %>%
    dplyr::compute(
      temporary = FALSE,
      name = "denominator"
    )

  prev2 <- estimatePrevalence(
    cdm = cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    interval = "years",
    strata = list(c("my_strata", "my_strata2"))
  )

  expect_true(all(c(
    "overall", "first &&& a",
    "first &&& b",
    "second &&& a",
    "second &&& b"
  ) %in%
    unique(prev2 |> dplyr::pull("strata_level"))))

  prev3 <- estimatePrevalence(
    cdm = cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    interval = "years",
    strata = list(
      c("my_strata"),
      c("my_strata2"),
      c("my_strata", "my_strata2")
    )
  )

  expect_true(all(c(
    "overall",
    "first",
    "second",
    "first &&& a",
    "first &&& b",
    "second &&& a",
    "second &&& b"
  ) %in%
    unique(prev3 |> dplyr::pull("strata_level"))))



  # without overall strata
  prev4 <- estimatePrevalence(
    cdm = cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    interval = "years",
    strata = list(
      c("my_strata"),
      c("my_strata2"),
      c("my_strata", "my_strata2")
    ),
    includeOverallStrata = FALSE
  )
  expect_true(all(c(
    "first",
    "second",
    "first &&& a",
    "first &&& b",
    "second &&& a",
    "second &&& b"
  ) %in%
    unique(prev4 |> dplyr::pull("strata_level"))))
  expect_false(c("overall") %in%
    unique(prev4 |> dplyr::pull("strata_level")))

  expect_error(estimatePrevalence(
    cdm = cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    interval = "years",
    strata = list(c("not_a_col"))
  ))

  expect_error(estimatePrevalence(
    cdm = cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    interval = "years",
    strata = list(c("my_strata", "not_a_col"))
  ))

  expect_error(estimatePrevalence(
    cdm = cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    interval = "years",
    strata = list(c("my_strata"), c("not_a_col"))
  ))

  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: cohort names for cohortId args", {
  skip_on_cran()
  personTable <- dplyr::tibble(
    person_id = 1L,
    gender_concept_id = 8507L,
    year_of_birth = 2000L,
    month_of_birth = 01L,
    day_of_birth = 01L
  )
  observationPeriodTable <- dplyr::tibble(
    observation_period_id = 1L,
    person_id = 1L,
    observation_period_start_date = as.Date("2010-01-28"),
    observation_period_end_date = as.Date("2012-12-31")
  )
  outcomeTable <- dplyr::tibble(
    cohort_definition_id = 1L,
    subject_id = 1L,
    cohort_start_date = c(
      as.Date("2010-01-28")
    ),
    cohort_end_date = c(
      as.Date("2010-01-28")
    )
  )

  cdm <- mockIncidencePrevalence(
    personTable = personTable,
    observationPeriodTable = observationPeriodTable,
    outcomeTable = outcomeTable
  )

  pre1 <- estimatePrevalence(cdm, "target", "outcome")
  pre2 <- estimatePrevalence(cdm, "target", "outcome", 1, 1)
  pre3 <- estimatePrevalence(cdm, "target", "outcome", "cohort_1", "cohort_1")

  expect_true(all.equal(pre1, pre2))
  expect_true(all.equal(pre2, pre3))

  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: empty denominator", {
  skip_on_cran()
  personTable <- dplyr::tibble(
    person_id = 1L,
    gender_concept_id = 8507L,
    year_of_birth = 2000L,
    month_of_birth = 01L,
    day_of_birth = 01L
  )
  observationPeriodTable <- dplyr::tibble(
    observation_period_id = 1L,
    person_id = 1L,
    observation_period_start_date = as.Date("2010-01-28"),
    observation_period_end_date = as.Date("2012-12-31")
  )
  outcomeTable <- dplyr::tibble(
    cohort_definition_id = 1L,
    subject_id = 1L,
    cohort_start_date = c(
      as.Date("2010-01-28")
    ),
    cohort_end_date = c(
      as.Date("2010-01-28")
    )
  )

  cdm <- mockIncidencePrevalence(
    personTable = personTable,
    observationPeriodTable = observationPeriodTable,
    outcomeTable = outcomeTable
  )

  attr(cdm$target, "cohort_set") <- dplyr::union_all(
    attr(cdm$target, "cohort_set"),
    dplyr::tibble(
      cohort_definition_id = 2,
      cohort_name = "cohort_2"
    ),
    copy = TRUE
  )

  expect_error(estimatePrevalence(cdm, "target", "outcome", 2))

  omopgenerics::cdmDisconnect(cdm)
})

test_that("mock db: check local cdm", {
  skip_on_cran()
  personTable <- dplyr::tibble(
    person_id = 1L,
    gender_concept_id = 8507L,
    year_of_birth = 2000L,
    month_of_birth = 01L,
    day_of_birth = 01L
  )
  observationPeriodTable <- dplyr::tibble(
    observation_period_id = 1L,
    person_id = 1L,
    observation_period_start_date = as.Date("2000-01-01"),
    observation_period_end_date = as.Date("2012-06-01")
  )
  outcomeTable <- dplyr::tibble(
    cohort_definition_id = 1L,
    subject_id = 1L,
    cohort_start_date = c(
      as.Date("2008-02-05"),
      as.Date("2010-02-08"),
      as.Date("2010-02-20")
    ),
    cohort_end_date = c(
      as.Date("2008-02-05"),
      as.Date("2010-02-08"),
      as.Date("2010-02-20")
    )
  )

  cdm <- mockIncidencePrevalence(
    personTable = personTable,
    observationPeriodTable = observationPeriodTable,
    outcomeTable = outcomeTable
  )

  cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")

  cdm <- cdm |> dplyr::collect()

  expect_no_error(prev <- estimatePrevalence(
    cdm = cdm,
    denominatorTable = "denominator",
    outcomeTable = "outcome",
    interval = c("months", "years")
  ))

  omopgenerics::cdmDisconnect(cdm)

})

test_that("mock db: period prevalence at the record level", {

  skip_on_cran()

  personTable <- dplyr::tibble(
    person_id = c(1L, 2L, 3L),
    gender_concept_id = 8507L,
    year_of_birth = 2000L,
    month_of_birth = 01L,
    day_of_birth = 01L
  )
  # one person with two spans of time in the denominator
  observationPeriodTable <- dplyr::tibble(
    observation_period_id = c(1L, 1L, 2L, 3L),
    person_id = c(1L, 1L, 2L, 3L),
    observation_period_start_date = c(
      as.Date("2000-06-01"),
      as.Date("2010-06-01"),
      as.Date("2000-06-01"),
      as.Date("2000-06-01")
    ),
    observation_period_end_date = c(
      as.Date("2005-07-01"),
      as.Date("2012-07-01"),
      as.Date("2012-06-01"),
      as.Date("2000-06-15")
    )
  )

  # one outcome during their first entry
  # another in their second
  outcomeTable <- dplyr::tibble(
    cohort_definition_id = 1L,
    subject_id = 1L,
    cohort_start_date = c(
      as.Date("2003-02-05"),
      as.Date("2010-09-01")
    ),
    cohort_end_date = c(
      as.Date("2003-02-06"),
      as.Date("2010-09-02")
    )
  )

  cdm <- mockIncidencePrevalence(
    personTable = personTable,
    observationPeriodTable = observationPeriodTable,
    outcomeTable = outcomeTable
  )
  cdm <- generateDenominatorCohortSet(cdm, "denom")

  # person level results
  prev_person_level <- estimatePeriodPrevalence(
    cdm = cdm,
    denominatorTable = "denom",
    outcomeTable = "outcome",
    interval = "overall",
    level = "person")
  expect_true(prev_person_level |>
    dplyr::filter(estimate_name == "denominator_count") |>
    dplyr::pull("estimate_value") == "3")
  expect_true(prev_person_level |>
    dplyr::filter(estimate_name == "outcome_count") |>
    dplyr::pull("estimate_value") == "1")
  expect_true(all(settings(prev_person_level) |>
    dplyr::pull("analysis_level") == "person"))

  # record level results
  prev_record_level <- estimatePeriodPrevalence(
    cdm = cdm,
    denominatorTable = "denom",
    outcomeTable = "outcome",
    interval = "overall",
    level = "record")
  expect_true(prev_record_level |>
    dplyr::filter(estimate_name == "denominator_count") |>
    dplyr::pull("estimate_value") == "4")
  expect_true(prev_record_level |>
    dplyr::filter(estimate_name == "outcome_count") |>
    dplyr::pull("estimate_value") == "2")
  expect_true(all(settings(prev_record_level) |>
                    dplyr::pull("analysis_level") == "record"))

  # check with strata
  cdm$denom <- cdm$denom |>
    dplyr::mutate(group = "a")
  prev_record_level_strata <- estimatePeriodPrevalence(
    cdm = cdm,
    denominatorTable = "denom",
    outcomeTable = "outcome",
    interval = "overall",
    strata = "group",
    level = "record")
  expect_true(all(prev_record_level_strata |>
                dplyr::filter(estimate_name == "denominator_count") |>
                dplyr::pull("estimate_value") == "4"))
  expect_true(all(prev_record_level_strata |>
                dplyr::filter(estimate_name == "outcome_count") |>
                dplyr::pull("estimate_value") == "2"))
  expect_true(all(settings(prev_record_level_strata) |>
                    dplyr::pull("analysis_level") == "record"))


  # expected error
  expect_error(estimatePeriodPrevalence(
    cdm = cdm,
    denominatorTable = "denom",
    outcomeTable = "outcome",
    interval = "overall",
    strata = "group",
    level = "something else"))

  omopgenerics::cdmDisconnect(cdm)

})

test_that("mock db: period prevalence at the record level - more examples", {

  skip_on_cran()


  personTable <- dplyr::tibble(
    person_id = 1L,
    gender_concept_id = 8507L,
    year_of_birth = 2000L,
    month_of_birth = 01L,
    day_of_birth = 01L
  )
  # one person with two spans of time in the denominator
  observationPeriodTable <- dplyr::tibble(
    observation_period_id = 1L,
    person_id = 1L,
    observation_period_start_date = c(
      as.Date("2000-06-01")
    ),
    observation_period_end_date = c(
      as.Date("2020-07-01")
    )
  )

  cdm <- mockIncidencePrevalence(
    personTable = personTable,
    observationPeriodTable = observationPeriodTable
  )

  pregnancy_table <- dplyr::tribble(
    ~cohort_definition_id,   ~subject_id,   ~cohort_start_date,      ~cohort_end_date,
    99,                       1,             as.Date("2014-01-01"),   as.Date("2014-09-05")
  )

  cdm <- CDMConnector::insertTable(
    cdm = cdm,
    name = "pregnancy_table",
    table = pregnancy_table,
    overwrite = TRUE,
    temporary = FALSE
  )

  cohort_table <- dplyr::tribble(
    ~cohort_definition_id,   ~subject_id,   ~cohort_start_date,      ~cohort_end_date,
    1,                       1,             as.Date("2014-01-01"),   as.Date("2014-01-30"),
    1,                       1,             as.Date("2014-03-14"),   as.Date("2014-04-30")
  )

  cdm <- CDMConnector::insertTable(
    cdm = cdm,
    name = "cohort_table",
    table = cohort_table,
    overwrite = TRUE,
    temporary = FALSE
  )

  cdm$cohort_table <- cdm$cohort_table |>
    omopgenerics::newCohortTable() |>
    dplyr::compute(name = "cohort_table", temporary = FALSE, overwrite = TRUE)

  cdm$pregnancy_table <- cdm$pregnancy_table |>
    omopgenerics::newCohortTable() |>
    dplyr::compute(name = "pregnancy_table", temporary = FALSE, overwrite = TRUE)

  starts <- seq(1, 20 * 7, 7)
  stops <- seq(7, 20 * 7, 7)

  timesAtRisk <- lapply(seq_len(length(starts)), function(i) {
    c(
      starts[i],
      stops[i]
    )
  })

  cdm <- cdm |>
    generateTargetDenominatorCohortSet(
      name = "denom",
      targetCohortTable = "pregnancy_table",
      timeAtRisk = timesAtRisk,
      requirementsAtEntry = FALSE,
      requirementInteractions = FALSE
    )

  prev <- cdm |>
    estimatePeriodPrevalence(
      denominatorTable = "denom",
      outcomeTable = "cohort_table",
      interval = "overall",
      level = "record",
      completeDatabaseIntervals = FALSE
    )

  # should appear in all denominators
  expect_true(all(prev |>
    dplyr::filter(estimate_name == "denominator_count") |>
    dplyr::pull("estimate_value") == "1"))

  # tar 1 to 7 - they have an outcome
  expect_true(prev |>
                  omopgenerics::filterSettings(denominator_time_at_risk == "1 to 7") |>
                    dplyr::filter(estimate_name == "outcome_count") |>
                    dplyr::pull("estimate_value") == "1")

  # tar 36 to 42 - they do not have an outcome
  expect_true(prev |>
                omopgenerics::filterSettings(denominator_time_at_risk == "36 to 42") |>
                dplyr::filter(estimate_name == "outcome_count") |>
                dplyr::pull("estimate_value") == "0")


  # check with strata
  cdm$denom <- cdm$denom |>
    dplyr::mutate(group = "a")
  prev_2 <- cdm |>
    estimatePeriodPrevalence(
      denominatorTable = "denom",
      outcomeTable = "cohort_table",
      interval = "overall",
      level = "record",
      strata = "group",
      completeDatabaseIntervals = FALSE
    )
  # should appear in all denominators
  expect_true(all(prev_2 |>
                    dplyr::filter(estimate_name == "denominator_count") |>
                    dplyr::pull("estimate_value") == "1"))

  # tar 1 to 7 - they have an outcome
  expect_true(all(prev_2 |>
                omopgenerics::filterSettings(denominator_time_at_risk == "1 to 7") |>
                dplyr::filter(estimate_name == "outcome_count") |>
                dplyr::pull("estimate_value") == "1"))

  # tar 36 to 42 - they do not have an outcome
  expect_true(all(prev_2 |>
                omopgenerics::filterSettings(denominator_time_at_risk == "36 to 42") |>
                dplyr::filter(estimate_name == "outcome_count") |>
                dplyr::pull("estimate_value") == "0"))


  omopgenerics::cdmDisconnect(cdm = cdm)

})

Try the IncidencePrevalence package in your browser

Any scripts or data that you put into this service are public.

IncidencePrevalence documentation built on Aug. 8, 2025, 6:38 p.m.