Nothing
test_that("mock db: check output format", {
cdm <- mockIncidencePrevalence()
cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "months"
)
my_settings <- settings(inc)
expect_gt(nrow(my_settings), 0)
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "months"
)
expect_true(inherits(inc, "summarised_result"))
expect_identical(
sort(colnames(settings(inc))),
sort(c(
"result_id", "result_type",
"package_name", "package_version",
"group", "strata", "additional", "min_cell_count",
"analysis_complete_database_intervals",
"analysis_censor_cohort_name",
"analysis_outcome_washout", "analysis_repeated_events",
"denominator_age_group", "denominator_sex",
"denominator_days_prior_observation", "denominator_start_date",
"denominator_end_date",
"denominator_requirements_at_entry",
"denominator_target_cohort_name",
"denominator_time_at_risk"
))
)
expect_identical(
colnames(inc),
c(
"result_id", "cdm_name", "group_name",
"group_level", "strata_name", "strata_level",
"variable_name", "variable_level", "estimate_name",
"estimate_type", "estimate_value", "additional_name",
"additional_level"
)
)
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: checks on working example", {
skip_on_cran()
personTable <- dplyr::tibble(
person_id = 1L,
gender_concept_id = 8507L,
year_of_birth = 2000L,
month_of_birth = 01L,
day_of_birth = 01L
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = 1L,
person_id = 1L,
observation_period_start_date = as.Date("2000-01-01"),
observation_period_end_date = as.Date("2012-06-01")
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(
as.Date("2008-02-05"),
as.Date("2010-02-08"),
as.Date("2010-02-20")
),
cohort_end_date = c(
as.Date("2008-02-05"),
as.Date("2010-02-08"),
as.Date("2010-02-20")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = c("years", "overall")
)
expect_gte(nrow(inc), 1)
# reconnect
cdmReconn <- CDMConnector::cdmFromCon(
con = attr(attr(cdm, "cdm_source"), "dbcon"),
cohortTables = c("denominator", "outcome"),
writeSchema = "main", cdmSchema = "main", cdmName = "mock"
)
inc_recon <- estimateIncidence(
cdm = cdmReconn,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = c("years", "overall")
)
expect_identical(inc, inc_recon)
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: check working example 2", {
skip_on_cran()
personTable <- dplyr::tibble(
person_id = 1L,
gender_concept_id = 8507L,
year_of_birth = 2000L,
month_of_birth = 01L,
day_of_birth = 01L
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = 1L,
person_id = 1L,
observation_period_start_date = as.Date("2010-01-01"),
observation_period_end_date = as.Date("2012-06-01")
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(
as.Date("2010-02-05"),
as.Date("2010-02-08"),
as.Date("2010-02-20")
),
cohort_end_date = c(
as.Date("2010-02-05"),
as.Date("2010-02-08"),
as.Date("2010-02-20")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")
inc <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = FALSE,
outcomeWashout = 0,
completeDatabaseIntervals = FALSE
)
expect_true(sum(as.numeric(inc |>
dplyr::filter(
estimate_name == "outcome_count"
) |>
dplyr::pull("estimate_value"))) == 1)
inc <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = TRUE,
outcomeWashout = 2,
completeDatabaseIntervals = FALSE
)
expect_true(sum(as.numeric(inc |>
dplyr::filter(
estimate_name == "outcome_count"
) |>
dplyr::pull("estimate_value"))) == 3)
inc <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = TRUE,
outcomeWashout = 10,
completeDatabaseIntervals = FALSE
)
expect_true(sum(as.numeric(inc |>
dplyr::filter(
estimate_name == "outcome_count"
) |>
dplyr::pull("estimate_value"))) == 2)
# even if repeatedEvents = TRUE,
# if outcomeWashout=NULL (all of history)
# then it won<U+00B4>t be possible to have any recurrent events
inc <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = TRUE,
outcomeWashout = Inf,
completeDatabaseIntervals = FALSE
)
expect_true(sum(as.numeric(inc |>
dplyr::filter(
estimate_name == "outcome_count"
) |>
dplyr::pull("estimate_value"))) == 1)
inc <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = TRUE,
outcomeWashout = Inf,
interval = "weeks",
completeDatabaseIntervals = FALSE
)
expect_true(sum(as.numeric(inc |>
dplyr::filter(
estimate_name == "outcome_count"
) |>
dplyr::pull("estimate_value"))) == 1)
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: check study periods", {
skip_on_cran()
personTable <- dplyr::tibble(
person_id = 1L,
gender_concept_id = 8507L,
year_of_birth = 2000L,
month_of_birth = 01L,
day_of_birth = 01L
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = 1L,
person_id = 1L,
observation_period_start_date = as.Date("2010-01-15"),
observation_period_end_date = as.Date("2010-12-15")
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(
as.Date("2010-02-05"),
as.Date("2010-02-08"),
as.Date("2010-02-20")
),
cohort_end_date = c(
as.Date("2010-02-05"),
as.Date("2010-02-08"),
as.Date("2010-02-20")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")
inc <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "months",
outcomeWashout = 0,
repeatedEvents = TRUE,
completeDatabaseIntervals = FALSE
)
# we expect 12 months of which the last in december
# the last month should also be included
# as the person goes up to the last day of the month
expect_true(nrow(inc |>
dplyr::filter(estimate_name == "outcome_count")) == 12)
inc <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
outcomeWashout = 0,
interval = "months",
repeatedEvents = TRUE,
completeDatabaseIntervals = TRUE
)
# now with completeDatabaseIntervals is TRUE
# we expect 10 months of which the last in november
expect_true(nrow(inc |>
dplyr::filter(estimate_name == "outcome_count")) == 10)
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: check overall", {
skip_on_cran()
personTable <- dplyr::tibble(
person_id = c(1L, 2L),
gender_concept_id = 8507L,
year_of_birth = 2000L,
month_of_birth = 01L,
day_of_birth = 01L
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L),
person_id = c(1L, 2L),
observation_period_start_date = c(
as.Date("2005-01-15"),
as.Date("2005-01-15")
),
observation_period_end_date = c(
as.Date("2007-05-01"),
as.Date("2011-06-15")
)
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = c(1L, 2L, 2L),
cohort_start_date = c(
as.Date("2006-02-05"),
as.Date("2006-02-05"),
as.Date("2010-02-05")
),
cohort_end_date = c(
as.Date("2006-02-05"),
as.Date("2006-02-05"),
as.Date("2010-02-05")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator",
cohortDateRange = c(as.Date("2007-01-01"), as.Date(NA))
)
inc <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "overall",
repeatedEvents = FALSE,
outcomeWashout = 0,
completeDatabaseIntervals = FALSE
)
# we expect one row with the overall results
# with two people
# one person had the event before the study period
# (but washout was 0 so was included)
# one person had the event during the study period
expect_true(nrow(inc |>
dplyr::filter(estimate_name == "outcome_count")) == 1)
expect_true(inc |>
dplyr::filter(estimate_name == "denominator_count") |>
dplyr::pull("estimate_value") == "2")
expect_true(all(inc |>
omopgenerics::filterSettings(result_type == "incidence") |>
omopgenerics::splitAdditional() |>
dplyr::pull("incidence_start_date") == as.Date("2007-01-01")))
expect_true(all(inc |>
omopgenerics::filterSettings(result_type == "incidence") |>
omopgenerics::splitAdditional() |>
dplyr::pull("incidence_end_date") == as.Date("2010-02-05"))) # date of first event
inc <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "overall",
repeatedEvents = TRUE,
outcomeWashout = 0,
completeDatabaseIntervals = FALSE
)
expect_true(nrow(inc |>
omopgenerics::filterSettings(result_type == "incidence") |>
dplyr::filter(estimate_name == "outcome_count")) == 1)
expect_true(all(inc |>
omopgenerics::filterSettings(result_type == "incidence") |>
omopgenerics::splitAdditional() |>
dplyr::pull("incidence_start_date") == as.Date("2007-01-01")))
expect_true(all(inc |>
omopgenerics::filterSettings(result_type == "incidence") |>
omopgenerics::splitAdditional() |>
dplyr::pull("incidence_end_date") == as.Date("2011-06-15"))) # date of end of obs
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: check person days", {
skip_on_cran()
personTable <- dplyr::tibble(
person_id = c(1L, 2L),
gender_concept_id = c(8507L, 8532L),
year_of_birth = c(2000L, 1999L),
month_of_birth = c(07L, 07L),
day_of_birth = c(01L, 01L)
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L),
person_id = c(1L, 2L),
observation_period_start_date = c(
as.Date("2007-01-01"),
as.Date("2007-01-01")
),
observation_period_end_date = c(
as.Date("2022-12-31"),
as.Date("2022-10-05")
)
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(as.Date("2021-06-27")),
cohort_end_date = c(as.Date("2021-06-27"))
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator",
ageGroup = list(c(20, 30))
)
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = FALSE,
interval = c("Years"),
completeDatabaseIntervals = FALSE
)
# in 2019 we expect person 2 to contribute from 1st july to end of December
expect_true(inc |>
dplyr::filter(estimate_name == "person_days") |>
head(1) |>
dplyr::pull("estimate_value") ==
as.numeric(difftime(
as.Date("2019-12-31"),
as.Date("2019-07-01")
)) + 1)
# in 2020 we expect person 2 to contribute all year
# and person 1 from 1st January to end of December
expect_true(inc |>
dplyr::filter(estimate_name == "person_days") |>
dplyr::filter(dplyr::row_number() == 2) |>
dplyr::pull("estimate_value") ==
(as.numeric(difftime(
as.Date("2020-12-31"),
as.Date("2020-07-01")
)) + 1) +
(as.numeric(difftime(
as.Date("2020-12-31"),
as.Date("2020-01-01")
) + 1)))
# in 2021 we expect person 2 to contribute all year
# and person 1 from 1st January up to 27th june (date of their outcome)
expect_true(inc |>
dplyr::filter(estimate_name == "person_days") |>
dplyr::filter(dplyr::row_number() == 3) |>
dplyr::pull("estimate_value") ==
(as.numeric(difftime(
as.Date("2021-12-31"),
as.Date("2021-01-01")
)) + 1) +
(as.numeric(difftime(
as.Date("2021-06-27"),
as.Date("2021-01-01")
) + 1)))
# in 2022 we expect person 2 to contribute all year
# (person 1 is out- they have had an event)
expect_true(inc |>
dplyr::filter(estimate_name == "person_days") |>
dplyr::filter(dplyr::row_number() == 4) |>
dplyr::pull("estimate_value") ==
(as.numeric(difftime(
as.Date("2021-10-05"),
as.Date("2021-01-01")
)) + 1))
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: check periods follow calendar dates", {
skip_on_cran()
# check that even if startDate as during a period
# periods still follow calendar dates
personTable <- dplyr::tibble(
person_id = 1L,
gender_concept_id = 8507L,
year_of_birth = 2000L,
month_of_birth = 01L,
day_of_birth = 01L
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = 1L,
person_id = 1L,
observation_period_start_date = as.Date("2010-01-01"),
observation_period_end_date = as.Date("2012-12-31")
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(
as.Date("2010-03-01"),
as.Date("2011-01-31"),
as.Date("2011-02-01"),
as.Date("2011-03-01")
),
cohort_end_date = c(
as.Date("2010-03-01"),
as.Date("2011-01-31"),
as.Date("2011-02-01"),
as.Date("2011-03-01")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
# startDate during a year (with year as interval)
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator", cohortDateRange = c(as.Date("2010-02-01"), as.Date(NA))
)
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = TRUE,
outcomeWashout = 0,
interval = c("Years"),
completeDatabaseIntervals = FALSE
)
expect_true(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::filter(dplyr::row_number() == 1) |>
dplyr::pull("estimate_value") == "1")
expect_true(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::filter(dplyr::row_number() == 2) |>
dplyr::pull("estimate_value") == "3")
# startDate during a month (with month as interval)
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator",
cohortDateRange = c(as.Date("2011-01-15"), as.Date(NA))
)
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = TRUE,
outcomeWashout = 0,
interval = c("months"),
completeDatabaseIntervals = FALSE
)
expect_true(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::filter(dplyr::row_number() == 1) |>
dplyr::pull("estimate_value") == "1")
expect_true(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::filter(dplyr::row_number() == 2) |>
dplyr::pull("estimate_value") == "1")
expect_true(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::filter(dplyr::row_number() == 3) |>
dplyr::pull("estimate_value") == "1")
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: check washout windows", {
skip_on_cran()
personTable <- dplyr::tibble(
person_id = 1L,
gender_concept_id = 8507L,
year_of_birth = 2000L,
month_of_birth = 01L,
day_of_birth = 01L
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = 1L,
person_id = 1L,
observation_period_start_date = as.Date("2010-01-01"),
observation_period_end_date = as.Date("2012-12-31")
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(
as.Date("2010-06-01"),
# more than six months since the last event
as.Date("2011-01-13"),
# two days since the end of the last event
as.Date("2011-01-16"),
# one day since the end of the last event
as.Date("2011-01-18")
),
cohort_end_date = c(
as.Date("2010-06-02"),
as.Date("2011-01-14"),
as.Date("2011-01-17"),
as.Date("2011-01-19")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")
incW0 <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = TRUE,
outcomeWashout = 0,
completeDatabaseIntervals = FALSE
)
# expect all events if we have zero days washout
expect_true(sum(as.numeric(incW0 |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value"))) == 4)
incW1 <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = TRUE,
outcomeWashout = 1L,
completeDatabaseIntervals = FALSE
)
# expect three events if we have one days washout
expect_true(sum(as.numeric(incW1 |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value"))) == 3)
incW2 <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = TRUE,
outcomeWashout = 2,
completeDatabaseIntervals = FALSE
)
# expect two events if we have two days washout
expect_true(sum(as.numeric(incW2 |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value"))) == 2)
incW365 <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = TRUE,
outcomeWashout = 365,
completeDatabaseIntervals = FALSE
)
# expect one event if we have 365 days washout
expect_true(sum(as.numeric(incW365 |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value"))) == 1)
incInf <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = TRUE,
outcomeWashout = Inf,
completeDatabaseIntervals = FALSE
)
# expect one event if we have NULL (all history washout)
expect_true(sum(as.numeric(incInf |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value"))) == 1)
# but, we will have move days when using the 365 day washout
# as the person came back to contribute more time at risk
expect_lt(sum(as.numeric(incInf |>
dplyr::filter(estimate_name == "person_days") |>
dplyr::pull("estimate_value"))), sum(as.numeric(incW365 |>
dplyr::filter(estimate_name == "person_days") |>
dplyr::pull("estimate_value"))))
omopgenerics::cdmDisconnect(cdm)
# never satisfy criteria in study period
personTable <- dplyr::tibble(
person_id = 1L,
gender_concept_id = 8507L,
year_of_birth = 2000L,
month_of_birth = 01L,
day_of_birth = 01L
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = 1L,
person_id = 1L,
observation_period_start_date = as.Date("2009-01-01"),
observation_period_end_date = as.Date("2012-12-31")
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(
as.Date("2009-12-31")
),
cohort_end_date = c(
as.Date("2010-06-02")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(
cdm = cdm,
name = "denominator",
cohortDateRange = as.Date(c("2010-01-01", NA))
)
incW365 <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = TRUE,
outcomeWashout = 36500,
completeDatabaseIntervals = FALSE
)
expect_true(nrow(incW365 |>
omopgenerics::filterSettings(result_type == "incidence")) == 0)
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: check events overlapping with start of a period", {
skip_on_cran()
personTable <- dplyr::tibble(
person_id = c(1L, 2L),
gender_concept_id = c(8507L, 8532L),
year_of_birth = c(2000L, 1999L),
month_of_birth = c(07L, 07L),
day_of_birth = c(01L, 01L)
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L),
person_id = c(1L, 2L),
observation_period_start_date = c(
as.Date("2000-01-21"),
as.Date("2007-01-01")
),
observation_period_end_date = c(
as.Date("2022-12-31"),
as.Date("2022-12-31")
)
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(as.Date("2020-06-27")),
cohort_end_date = c(as.Date("2020-07-19"))
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator",
ageGroup = list(c(20, 30))
)
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
outcomeWashout = Inf,
repeatedEvents = TRUE,
interval = c("Years")
)
expect_true(all(inc |>
dplyr::filter(estimate_name == "denominator_count") |>
dplyr::pull("estimate_value") == 1))
omopgenerics::cdmDisconnect(cdm)
# another example
personTable <- dplyr::tibble(
person_id = c(1L, 2L),
gender_concept_id = c(8507L, 8532L),
year_of_birth = c(2000L, 1999L),
month_of_birth = c(07L, 07L),
day_of_birth = c(01L, 01L)
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L),
person_id = c(1L, 2L),
observation_period_start_date = c(
as.Date("2000-01-21"),
as.Date("2007-01-01")
),
observation_period_end_date = c(
as.Date("2022-12-31"),
as.Date("2022-12-31")
)
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = c(1L, 1L),
subject_id = c(1L, 1L),
cohort_start_date = c(as.Date("2020-06-27"), as.Date("2020-07-30")),
cohort_end_date = c(as.Date("2020-07-19"), as.Date("2020-08-20"))
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator",
ageGroup = list(c(20, 30))
)
inc2 <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
outcomeWashout = Inf,
repeatedEvents = TRUE,
interval = c("Years")
)
expect_true(all(inc2 |>
dplyr::filter(estimate_name == "denominator_count") |>
dplyr::pull("estimate_value") == 1))
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: compare results from months and years", {
skip_on_cran()
personTable <- dplyr::tibble(
person_id = c(1L, 2L),
gender_concept_id = rep(8507L, 2),
year_of_birth = rep(2000L, 2),
month_of_birth = rep(01L, 2),
day_of_birth = rep(01L, 2)
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L),
person_id = c(1L, 2L),
observation_period_start_date = c(
as.Date("2010-01-01"),
as.Date("2010-01-01")
),
observation_period_end_date = c(
as.Date("2012-01-01"),
as.Date("2012-01-01")
)
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(
as.Date("2011-07-01")
),
cohort_end_date = c(
as.Date("2011-07-01")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator", cohortDateRange = c(as.Date("2010-01-01"), as.Date("2011-12-31"))
)
incMonths <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = c("months")
)
incYears <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = c("years")
)
# consistent results for months and years
expect_true(sum(as.numeric(incMonths |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value"))) ==
sum(as.numeric(incYears |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value"))))
expect_true(sum(as.numeric(incMonths |>
dplyr::filter(estimate_name == "person_days") |>
dplyr::pull("estimate_value"))) ==
sum(as.numeric(incYears |>
dplyr::filter(estimate_name == "person_days") |>
dplyr::pull("estimate_value"))))
expect_equal(
sum(as.numeric(incMonths |>
dplyr::filter(estimate_name == "person_years") |>
dplyr::pull("estimate_value"))),
sum(as.numeric(incYears |>
dplyr::filter(estimate_name == "person_years") |>
dplyr::pull("estimate_value")))
)
omopgenerics::cdmDisconnect(cdm)
cdm <- mockIncidencePrevalence(sampleSize = 1000)
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator",
cohortDateRange = c(as.Date("2010-01-01"), as.Date("2011-12-31"))
)
incWeeks <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = c("weeks"),
completeDatabaseIntervals = FALSE
)
incQuarters <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = c("quarters"),
completeDatabaseIntervals = FALSE
)
incMonths <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = c("months"),
completeDatabaseIntervals = FALSE
)
incYears <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = c("years"),
completeDatabaseIntervals = FALSE
)
# consistent results for months and years
expect_identical(sum(as.numeric(incWeeks |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value"))), sum(as.numeric(incYears |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value"))))
expect_identical(sum(as.numeric(incQuarters |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value"))), sum(as.numeric(incYears |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value"))))
expect_identical(sum(as.numeric(incMonths |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value"))), sum(as.numeric(incYears |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value"))))
expect_identical(sum(as.numeric(incWeeks |>
dplyr::filter(estimate_name == "person_days") |>
dplyr::pull("estimate_value"))), sum(as.numeric(incYears |>
dplyr::filter(estimate_name == "person_days") |>
dplyr::pull("estimate_value"))))
expect_identical(sum(as.numeric(incQuarters |>
dplyr::filter(estimate_name == "person_days") |>
dplyr::pull("estimate_value"))), sum(as.numeric(incYears |>
dplyr::filter(estimate_name == "person_days") |>
dplyr::pull("estimate_value"))))
expect_identical(sum(as.numeric(incMonths |>
dplyr::filter(estimate_name == "person_days") |>
dplyr::pull("estimate_value"))), sum(as.numeric(incYears |>
dplyr::filter(estimate_name == "person_days") |>
dplyr::pull("estimate_value"))))
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: multiple denominator inputs", {
cdm <- mockIncidencePrevalence(sampleSize = 6000,
maxOutcomeDays = 5,
maxOutcomes = 6)
cdm <- generateDenominatorCohortSet(
cdm = cdm,
name = "denominator_1",
cohortDateRange = c(as.Date("2010-01-01"),
as.Date("2020-12-31")),
ageGroup = list(c(25, 50)),
sex = "Both")
cdm <- generateDenominatorCohortSet(
cdm = cdm,
name = "denominator_2",
cohortDateRange = c(as.Date("2010-01-01"),
as.Date("2020-12-31")),
ageGroup = list(c(0, 50),
c(25, 30),
c(25, 50),
c(25, 80),
c(45, 100)),
sex = c("Both", "Male", "Female"))
inc_1 <- estimateIncidence(cdm,
denominatorTable = "denominator_1",
outcomeTable = "outcome")
inc_2 <- estimateIncidence(cdm,
denominatorTable = "denominator_2",
outcomeTable = "outcome")
expect_identical(
tableIncidence(inc_1,
type = "tibble"),
tableIncidence(inc_2 |>
omopgenerics::filterSettings(denominator_age_group == "25 to 50",
denominator_sex == "Both"),
type = "tibble")
)
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: check entry and event on same day", {
skip_on_cran()
personTable <- dplyr::tibble(
person_id = 1L,
gender_concept_id = 8507L,
year_of_birth = 2000L,
month_of_birth = 01L,
day_of_birth = 01L
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = 1L,
person_id = 1L,
observation_period_start_date = as.Date("2010-01-28"),
observation_period_end_date = as.Date("2012-12-31")
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(
as.Date("2010-01-28")
),
cohort_end_date = c(
as.Date("2010-01-28")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")
incWithoutRep <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = FALSE,
outcomeWashout = Inf,
interval = "years",
completeDatabaseIntervals = FALSE
)
expect_true(sum(as.numeric(incWithoutRep |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value"))) == "1")
incWithRep <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = TRUE,
outcomeWashout = Inf,
interval = "years",
completeDatabaseIntervals = FALSE
)
expect_true(sum(as.numeric(incWithRep |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value"))) == "1")
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: cohort start overlaps with the outcome", {
skip_on_cran()
personTable <- dplyr::tibble(
person_id = c(1L, 2L),
gender_concept_id = c(8507L, 8532L),
year_of_birth = c(1995L, 1995L),
month_of_birth = c(07L, 07L),
day_of_birth = c(01L, 01L)
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L),
person_id = c(1L, 2L),
observation_period_start_date = c(
as.Date("2019-05-09"),
as.Date("2019-01-01")
),
observation_period_end_date = c(
as.Date("2022-05-19"),
as.Date("2021-12-31")
)
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(as.Date("2019-05-09")),
cohort_end_date = c(as.Date("2022-05-19"))
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator",
cohortDateRange = as.Date(c(
"2021-01-01",
"2021-12-31"
))
)
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
outcomeWashout = 180,
repeatedEvents = TRUE,
interval = c("Years")
)
expect_true(all(inc |>
dplyr::filter(estimate_name == "denominator_count") |>
dplyr::pull("estimate_value") == "1"))
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: check outcome in previous obeservation period", {
skip_on_cran()
# 1) with outcome starting and ending before observation period start
personTable <- dplyr::tibble(
person_id = c(1L, 2L),
gender_concept_id = c(8507L, 8532L),
year_of_birth = c(1995L, 1995L),
month_of_birth = c(07L, 07L),
day_of_birth = c(01L, 01L)
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L, 3L),
person_id = c(1L, 1L, 2L),
observation_period_start_date = c(
as.Date("2000-01-01"),
as.Date("2010-01-01"),
as.Date("2000-01-01")
),
observation_period_end_date = c(
as.Date("2005-12-31"),
as.Date("2020-12-31"),
as.Date("2020-12-31")
)
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = c(1L, 1L),
subject_id = c(1L, 1L),
cohort_start_date = as.Date(c("2000-01-01", "2018-01-01")),
cohort_end_date = as.Date(c("2005-12-31", "2019-01-01"))
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator",
cohortDateRange = c(as.Date("2011-01-01"), as.Date("2020-01-01"))
)
# with rep events - should have both people
incRep <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
outcomeWashout = 0,
repeatedEvents = TRUE,
interval = c("Years")
)
expect_true(all(incRep |>
dplyr::filter(estimate_name == "denominator_count") |>
dplyr::pull("estimate_value") == "2"))
# with inf wash out- should only have 1 person
incNoRep <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
outcomeWashout = Inf,
repeatedEvents = TRUE,
interval = c("Years")
)
expect_true(all(incNoRep |>
dplyr::filter(estimate_name == "denominator_count") |>
dplyr::pull("estimate_value") == "1"))
# with 5 year wash out- should have 2 people at the start of the study period
incNoRep2 <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
outcomeWashout = 1825,
repeatedEvents = TRUE,
interval = c("Years")
)
expect_true(max(as.numeric(incNoRep2 |>
dplyr::filter(estimate_name == "denominator_count") |>
dplyr::pull("estimate_value"))) == 2)
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: check minimum counts", {
skip_on_cran()
# 20 people
personTable <- dplyr::tibble(
person_id = c(1:20),
gender_concept_id = rep(8507L, 20),
year_of_birth = rep(2000L, 20),
month_of_birth = rep(01L, 20),
day_of_birth = rep(01L, 20)
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1:20),
person_id = c(1:20),
observation_period_start_date = rep(as.Date("2000-01-01"), 20),
observation_period_end_date = rep(as.Date("2012-06-01"), 20)
)
outcomeTable <-
dplyr::bind_rows(
# 17 in first period
dplyr::tibble(
cohort_definition_id = as.integer(rep(1, 17)),
subject_id = as.integer(c(1:17)),
cohort_start_date = rep(
as.Date("2000-01-02"), 17
),
cohort_end_date = rep(
as.Date("2000-01-03"), 17
)
),
# three in second
dplyr::tibble(
cohort_definition_id = as.integer(rep(1, 3)),
subject_id = as.integer(c(18:20)),
cohort_start_date = rep(
as.Date("2000-02-02"), 3
),
cohort_end_date = rep(
as.Date("2000-02-03"), 3
)
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "months",
repeatedEvents = FALSE,
completeDatabaseIntervals = FALSE
)
expect_identical(inc |>
dplyr::filter(estimate_name == "denominator_count") |>
dplyr::pull("estimate_value"), c("20", "3"))
expect_identical(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value"), c("17", "3"))
expect_false(is.na(inc |>
dplyr::filter(estimate_name == "person_days") |>
dplyr::filter(dplyr::row_number() == 1) |>
dplyr::pull("estimate_value")))
expect_false(is.na(inc |>
dplyr::filter(estimate_name == "person_days") |>
dplyr::filter(dplyr::row_number() == 2) |>
dplyr::pull("estimate_value")))
expect_false(is.na(inc |>
dplyr::filter(estimate_name == "person_years") |>
dplyr::filter(dplyr::row_number() == 1) |>
dplyr::pull("estimate_value")))
expect_false(is.na(inc |>
dplyr::filter(estimate_name == "person_years") |>
dplyr::filter(dplyr::row_number() == 2) |>
dplyr::pull("estimate_value")))
expect_false(is.na(inc |>
dplyr::filter(estimate_name == "incidence_100000_pys") |>
dplyr::filter(dplyr::row_number() == 1) |>
dplyr::pull("estimate_value")))
expect_false(is.na(inc |>
dplyr::filter(estimate_name == "incidence_100000_pys") |>
dplyr::filter(dplyr::row_number() == 2) |>
dplyr::pull("estimate_value")))
expect_false(is.na(inc |>
dplyr::filter(estimate_name == "incidence_100000_pys_95CI_lower") |>
dplyr::filter(dplyr::row_number() == 1) |>
dplyr::pull("estimate_value")))
expect_false(is.na(inc |>
dplyr::filter(estimate_name == "incidence_100000_pys_95CI_lower") |>
dplyr::filter(dplyr::row_number() == 2) |>
dplyr::pull("estimate_value")))
expect_false(is.na(inc |>
dplyr::filter(estimate_name == "incidence_100000_pys_95CI_upper") |>
dplyr::filter(dplyr::row_number() == 1) |>
dplyr::pull("estimate_value")))
expect_false(is.na(inc |>
dplyr::filter(estimate_name == "incidence_100000_pys_95CI_upper") |>
dplyr::filter(dplyr::row_number() == 2) |>
dplyr::pull("estimate_value")))
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "months",
repeatedEvents = FALSE,
completeDatabaseIntervals = FALSE
) |> omopgenerics::suppress(5)
expect_identical(inc |>
dplyr::filter(estimate_name == "denominator_count") |>
dplyr::pull("estimate_value"), c("20", "-"))
expect_identical(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value"), c("17", "-"))
expect_false(inc |>
dplyr::filter(estimate_name == "person_days") |>
dplyr::filter(dplyr::row_number() == 1) |>
dplyr::pull("estimate_value") == "-")
expect_true(inc |>
dplyr::filter(estimate_name == "person_days") |>
dplyr::filter(dplyr::row_number() == 2) |>
dplyr::pull("estimate_value") == "-")
expect_false(inc |>
dplyr::filter(estimate_name == "person_years") |>
dplyr::filter(dplyr::row_number() == 1) |>
dplyr::pull("estimate_value") == "-")
expect_true(inc |>
dplyr::filter(estimate_name == "person_years") |>
dplyr::filter(dplyr::row_number() == 2) |>
dplyr::pull("estimate_value") == "-")
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: multiple overlapping outcomes", {
# technically overlapping outcomes are not allowed
# check this edge case, but validation might also not allow this
skip_on_cran()
# two people
personTable <- dplyr::tibble(
person_id = c(1L, 2L),
gender_concept_id = c(8507L, 8532L),
year_of_birth = c(1995L, 1995L),
month_of_birth = c(07L, 07L),
day_of_birth = c(01L, 01L)
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L),
person_id = c(1L, 2L),
observation_period_start_date = c(
as.Date("2020-04-29"),
as.Date("2019-01-01")
),
observation_period_end_date = c(
as.Date("2020-12-31"),
as.Date("2021-12-31")
)
)
# two outcomes for person one
outcomeTable <- dplyr::tibble(
cohort_definition_id = c(1L, 1L),
subject_id = c(1L, 1L),
cohort_start_date = c(as.Date("2020-04-29"), as.Date("2020-11-10")),
cohort_end_date = c(as.Date("2020-05-17"), as.Date("2020-12-17"))
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
outcomeWashout = 180,
repeatedEvents = TRUE,
interval = "overall"
)
expect_true(inc |>
dplyr::filter(estimate_name == "denominator_count") |>
dplyr::pull("estimate_value") == "2")
expect_true(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value") == "1")
omopgenerics::cdmDisconnect(cdm)
# three
personTable <- dplyr::tibble(
person_id = c(1L, 2L),
gender_concept_id = c(8507L, 8532L),
year_of_birth = c(1995L, 1995L),
month_of_birth = c(07L, 07L),
day_of_birth = c(01L, 01L)
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L),
person_id = c(1L, 2L),
observation_period_start_date = c(
as.Date("2020-04-29"),
as.Date("2019-01-01")
),
observation_period_end_date = c(
as.Date("2020-12-31"),
as.Date("2021-12-31")
)
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = c(1L, 1L, 1L),
subject_id = c(1L, 1L, 1L),
cohort_start_date = c(
as.Date("2020-04-29"),
as.Date("2020-11-08"),
as.Date("2020-11-10")
),
cohort_end_date = c(
as.Date("2020-05-17"),
as.Date("2020-11-09"),
as.Date("2020-12-17")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
outcomeWashout = 180,
repeatedEvents = TRUE,
interval = "overall"
)
expect_true(inc |>
dplyr::filter(estimate_name == "denominator_count") |>
dplyr::pull("estimate_value") == "2")
expect_true(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value") == "1")
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: cohort before period start ending after period", {
skip_on_cran()
personTable <- dplyr::tibble(
person_id = c(1L, 2L),
gender_concept_id = c(8507L, 8532L),
year_of_birth = c(1990L, 1990L),
month_of_birth = c(01L, 01L),
day_of_birth = c(01L, 01L)
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L),
person_id = c(1L, 2L),
observation_period_start_date = c(
as.Date("2000-07-31"),
as.Date("2000-07-31")
),
observation_period_end_date = c(
as.Date("2020-01-01"),
as.Date("2010-01-01")
)
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = c(1L, 1L),
subject_id = c(1L, 2L),
cohort_start_date = c(
as.Date("2000-08-02"),
as.Date("2001-06-01")
),
cohort_end_date = c(
as.Date("2020-01-01"),
as.Date("2001-07-01")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator", cohortDateRange = c(as.Date("2001-01-01"), as.Date("2001-12-31"))
)
# regardless of washout we expect one event
# with only one participant
# person 1s outcome starts before period and ends after
# no washout
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
outcomeWashout = 0,
repeatedEvents = FALSE,
interval = c("Years"),
completeDatabaseIntervals = FALSE
)
expect_true(all(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value") == "1"))
# washout
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
outcomeWashout = Inf,
repeatedEvents = FALSE,
interval = c("Years"),
completeDatabaseIntervals = FALSE
)
expect_true(all(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value") == "1"))
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: check full period requirement - year", {
skip_on_cran()
personTable <- dplyr::tibble(
person_id = c(1L, 2L),
gender_concept_id = c(8507L, 8532L),
year_of_birth = c(1995L, 1995L),
month_of_birth = c(07L, 07L),
day_of_birth = c(01L, 01L)
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L),
person_id = c(1L, 2L),
observation_period_start_date = c(
as.Date("2020-05-09"),
as.Date("2020-03-01")
),
observation_period_end_date = c(
as.Date("2020-06-06"),
as.Date("2021-06-06")
)
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(as.Date("2020-05-28")),
cohort_end_date = c(as.Date("2020-05-29"))
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator",
ageGroup = list(c(20, 30))
)
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
outcomeWashout = Inf,
repeatedEvents = TRUE,
completeDatabaseIntervals = TRUE,
interval = c("Years")
)
expect_true(nrow(inc |>
omopgenerics::filterSettings(result_type == "incidence")) == 0)
omopgenerics::cdmDisconnect(cdm)
# edge case first day to last of the year
# still expect this to work
personTable <- dplyr::tibble(
person_id = c(1L, 2L),
gender_concept_id = c(8507L, 8532L),
year_of_birth = c(1995L, 1995L),
month_of_birth = c(07L, 07L),
day_of_birth = c(01L, 01L)
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L),
person_id = c(1L, 2L),
observation_period_start_date = c(
as.Date("2020-05-09"),
as.Date("2020-01-01")
),
observation_period_end_date = c(
as.Date("2020-12-31"),
as.Date("2020-12-31")
)
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(as.Date("2020-05-29")),
cohort_end_date = c(as.Date("2020-05-29"))
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator",
ageGroup = list(c(20, 30))
)
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
outcomeWashout = Inf,
repeatedEvents = TRUE,
interval = c("Years")
)
expect_true(nrow(inc |>
omopgenerics::filterSettings(result_type == "incidence") |>
dplyr::filter(estimate_name == "outcome_count")) ==
1)
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: check full period requirement - month", {
skip_on_cran()
# expected to work
personTable <- dplyr::tibble(
person_id = c(1L, 2L),
gender_concept_id = c(8507L, 8532L),
year_of_birth = c(1995L, 1995L),
month_of_birth = c(07L, 07L),
day_of_birth = c(01L, 01L)
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L),
person_id = c(1L, 2L),
observation_period_start_date = c(
as.Date("2020-04-28"),
as.Date("2020-01-01")
),
observation_period_end_date = c(
as.Date("2020-06-06"),
as.Date("2020-06-06")
)
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(as.Date("2020-04-28")),
cohort_end_date = c(as.Date("2020-05-19"))
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator",
ageGroup = list(c(20, 30))
)
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
outcomeWashout = Inf,
repeatedEvents = TRUE,
interval = c("Months")
)
expect_gte(nrow(inc), 1)
omopgenerics::cdmDisconnect(cdm)
# edge case first day to last of the month
# still expect this to work
personTable <- dplyr::tibble(
person_id = c(1L, 2L),
gender_concept_id = c(8507L, 8532L),
year_of_birth = c(1995L, 1995L),
month_of_birth = c(07L, 07L),
day_of_birth = c(01L, 01L)
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L),
person_id = c(1L, 2L),
observation_period_start_date = c(
as.Date("2020-04-28"),
as.Date("2020-01-01")
),
observation_period_end_date = c(
as.Date("2020-04-29"),
as.Date("2020-01-31")
)
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(as.Date("2020-04-28")),
cohort_end_date = c(as.Date("2020-04-28"))
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator",
ageGroup = list(c(20, 30))
)
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
outcomeWashout = Inf,
repeatedEvents = TRUE,
interval = c("Months")
)
expect_gte(nrow(inc |>
omopgenerics::filterSettings(result_type == "incidence")), 1)
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: check completeDatabaseIntervals", {
skip_on_cran()
personTable <- dplyr::tibble(
person_id = c(1L, 2L),
gender_concept_id = c(8507L, 8532L),
year_of_birth = c(1995L, 1995L),
month_of_birth = c(07L, 07L),
day_of_birth = c(01L, 01L)
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L),
person_id = c(1L, 2L),
observation_period_start_date = c(
as.Date("2019-05-09"),
as.Date("2019-02-02")
),
observation_period_end_date = c(
as.Date("2022-06-01"),
as.Date("2021-06-06")
)
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(as.Date("2020-04-28")),
cohort_end_date = c(as.Date("2020-05-19"))
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator"
)
# full periods required TRUE
# repetitive events TRUE
# - we expect to start in 2020 (both start during 2019)
# - we expect to go up to 2021 (id 2 end date is in 2022)
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = c("Years"),
outcomeWashout = 0,
repeatedEvents = TRUE,
completeDatabaseIntervals = TRUE
)
expect_true(nrow(inc |>
dplyr::filter(estimate_name == "outcome_count")) == 2)
expect_true(clock::get_year(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
omopgenerics::splitAdditional() |>
dplyr::filter(dplyr::row_number() == 1) |>
dplyr::pull("incidence_start_date") |>
as.Date()) == "2020")
expect_true(clock::get_year(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
omopgenerics::splitAdditional() |>
dplyr::filter(dplyr::row_number() == 2) |>
dplyr::pull("incidence_start_date") |>
as.Date()) == "2021")
# repetitive events FALSE
# - now we expect only to use 2020 (id 2 obs end is in 21)
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = c("Years"),
repeatedEvents = FALSE,
completeDatabaseIntervals = TRUE
)
expect_true(nrow(inc |>
dplyr::filter(estimate_name == "outcome_count")) == 1)
expect_true(clock::get_year(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
omopgenerics::splitAdditional() |>
dplyr::filter(dplyr::row_number() == 1) |>
dplyr::pull("incidence_start_date") |>
as.Date()) == "2020")
# full periods required FALSE
# repetitive events TRUE
# - we expect to start in 2019
# - we expect to go up to 2022
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
outcomeWashout = 0,
interval = c("Years"),
repeatedEvents = TRUE,
completeDatabaseIntervals = FALSE
)
expect_true(nrow(inc |>
dplyr::filter(estimate_name == "outcome_count")) == 4)
expect_true(clock::get_year(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
omopgenerics::splitAdditional() |>
dplyr::filter(dplyr::row_number() == 1) |>
dplyr::pull("incidence_start_date") |>
as.Date()) == "2019")
expect_true(clock::get_year(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
omopgenerics::splitAdditional() |>
dplyr::filter(dplyr::row_number() == 2) |>
dplyr::pull("incidence_start_date") |>
as.Date()) == "2020")
expect_true(clock::get_year(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
omopgenerics::splitAdditional() |>
dplyr::filter(dplyr::row_number() == 3) |>
dplyr::pull("incidence_start_date") |>
as.Date()) == "2021")
expect_true(clock::get_year(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
omopgenerics::splitAdditional() |>
dplyr::filter(dplyr::row_number() == 4) |>
dplyr::pull("incidence_start_date") |>
as.Date()) == "2022")
# repetitive events FALSE
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = c("Years"),
repeatedEvents = FALSE,
completeDatabaseIntervals = FALSE
)
expect_true(nrow(inc |>
dplyr::filter(estimate_name == "outcome_count")) == 3)
expect_true(clock::get_year(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
omopgenerics::splitAdditional() |>
dplyr::filter(dplyr::row_number() == 1) |>
dplyr::pull("incidence_start_date") |>
as.Date()) == "2019")
expect_true(clock::get_year(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
omopgenerics::splitAdditional() |>
dplyr::filter(dplyr::row_number() == 2) |>
dplyr::pull("incidence_start_date") |>
as.Date()) == "2020")
expect_true(clock::get_year(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
omopgenerics::splitAdditional() |>
dplyr::filter(dplyr::row_number() == 3) |>
dplyr::pull("incidence_start_date") |>
as.Date()) == "2021")
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: check insufficient study days", {
skip_on_cran()
personTable <- dplyr::tibble(
person_id = c(1L, 2L),
gender_concept_id = c(8507L, 8532L),
year_of_birth = c(1995L, 1995L),
month_of_birth = c(07L, 07L),
day_of_birth = c(01L, 01L)
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L),
person_id = c(1L, 2L),
observation_period_start_date = c(
as.Date("2019-05-09"),
as.Date("2019-02-02")
),
observation_period_end_date = c(
as.Date("2019-06-01"),
as.Date("2019-06-06")
)
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(as.Date("2019-06-01")),
cohort_end_date = c(as.Date("2019-06-01"))
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator"
)
# we have less than a year of follow up
# so we should return an empty tibble if full periods are required
# and we<U+00B4>re looking for yearly incidence
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = c("Years"),
repeatedEvents = TRUE,
completeDatabaseIntervals = TRUE
)
expect_true(nrow(inc |>
omopgenerics::filterSettings(result_type == "incidence")) == 0)
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: check with and without study start and end date", {
skip_on_cran()
personTable <- dplyr::tibble(
person_id = c(1L, 2L, 3L, 4L, 5L, 6L),
gender_concept_id = 8507L,
year_of_birth = 2000L,
month_of_birth = 07L,
day_of_birth = 01L
)
# one person leaving before 2010
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L, 3L, 4L, 5L, 6L),
person_id = c(1L, 2L, 3L, 4L, 5L, 6L),
observation_period_start_date = c(
rep(as.Date("2007-01-01"), 5),
as.Date("2010-06-01")
),
observation_period_end_date = c(
rep(as.Date("2022-12-31"), 4),
as.Date("2009-06-01"),
as.Date("2010-11-01")
)
)
outcomeTable <- dplyr::bind_rows(
# 1 event before obs start ending after obs end
dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(as.Date("2007-01-01")),
cohort_end_date = c(as.Date("2022-12-31"))
),
# 2 multiple events
dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 2L,
cohort_start_date = c(as.Date("2008-06-01")),
cohort_end_date = c(as.Date("2008-10-01"))
),
dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 2L,
cohort_start_date = c(as.Date("2008-11-01")),
cohort_end_date = c(as.Date("2010-10-14"))
),
dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 2L,
cohort_start_date = c(as.Date("2010-12-01")),
cohort_end_date = c(as.Date("2011-06-18"))
),
dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 2L,
cohort_start_date = c(as.Date("2011-06-19")),
cohort_end_date = c(as.Date("2012-12-31"))
),
# 3 multiple events into the period
dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 3L,
cohort_start_date = c(as.Date("2009-06-01")),
cohort_end_date = c(as.Date("2010-02-01"))
),
dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 3L,
cohort_start_date = c(as.Date("2010-06-01")),
cohort_end_date = c(as.Date("2022-12-31"))
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
# no study period required
cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator1")
# study period
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator2",
cohortDateRange = c(as.Date("2009-01-01"), as.Date("2011-01-01"))
)
# no washout, repetitive events
inc1A <- estimateIncidence(cdm,
denominatorTable = "denominator1",
outcomeTable = "outcome",
interval = "years",
repeatedEvents = TRUE,
outcomeWashout = 0,
completeDatabaseIntervals = FALSE
)
inc2A <- estimateIncidence(cdm,
denominatorTable = "denominator2",
outcomeTable = "outcome",
interval = "years",
repeatedEvents = TRUE,
outcomeWashout = 0,
completeDatabaseIntervals = FALSE
)
# given the settings above we would expect the same results for 2010
expect_identical(inc1A |>
dplyr::filter(estimate_name == "denominator_count") |>
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) |>
dplyr::pull("estimate_value"), inc2A |>
dplyr::filter(estimate_name == "denominator_count") |>
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) |>
dplyr::pull("estimate_value"))
expect_identical(inc1A |>
dplyr::filter(estimate_name == "person_days") |>
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) |>
dplyr::pull("estimate_value"), inc2A |>
dplyr::filter(estimate_name == "person_days") |>
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) |>
dplyr::pull("estimate_value"))
expect_identical(inc1A |>
dplyr::filter(estimate_name == "outcome_count") |>
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) |>
dplyr::pull("estimate_value"), inc2A |>
dplyr::filter(estimate_name == "outcome_count") |>
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) |>
dplyr::pull("estimate_value"))
# 365 washout, repetitive events
inc1B <- estimateIncidence(cdm,
denominatorTable = "denominator1",
outcomeTable = "outcome",
interval = "years",
repeatedEvents = TRUE,
outcomeWashout = 365,
completeDatabaseIntervals = FALSE
)
inc2B <- estimateIncidence(cdm,
denominatorTable = "denominator2",
outcomeTable = "outcome",
interval = "years",
repeatedEvents = TRUE,
outcomeWashout = 365,
completeDatabaseIntervals = FALSE
)
# given the settings above we would expect the same results for 2010
expect_identical(inc1B |>
dplyr::filter(estimate_name == "denominator_count") |>
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) |>
dplyr::pull("estimate_value"), inc2B |>
dplyr::filter(estimate_name == "denominator_count") |>
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) |>
dplyr::pull("estimate_value"))
expect_identical(inc1B |>
dplyr::filter(estimate_name == "person_days") |>
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) |>
dplyr::pull("estimate_value"), inc2B |>
dplyr::filter(estimate_name == "person_days") |>
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) |>
dplyr::pull("estimate_value"))
expect_identical(inc1B |>
dplyr::filter(estimate_name == "outcome_count") |>
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) |>
dplyr::pull("estimate_value"), inc2B |>
dplyr::filter(estimate_name == "outcome_count") |>
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) |>
dplyr::pull("estimate_value"))
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: check study start and end date 1000", {
skip_on_cran()
# with one outcome per person
cdm <- mockIncidencePrevalence(
sampleSize = 1000
)
# no study period required
cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator1")
# study period
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator2",
cohortDateRange = c(as.Date("2009-01-01"), as.Date("2011-01-01"))
)
# no washout, repetitive events
inc1A <- estimateIncidence(cdm,
denominatorTable = "denominator1",
outcomeTable = "outcome",
interval = "years",
repeatedEvents = TRUE,
outcomeWashout = 0,
completeDatabaseIntervals = FALSE
)
inc2A <- estimateIncidence(cdm,
denominatorTable = "denominator2",
outcomeTable = "outcome",
interval = "years",
repeatedEvents = TRUE,
outcomeWashout = 0,
completeDatabaseIntervals = FALSE
)
expect_true(inc1A |>
omopgenerics::filterSettings(result_type == "incidence") %>%
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) %>%
dplyr::filter(estimate_name == "denominator_count") %>%
dplyr::pull("estimate_value") ==
inc2A |>
omopgenerics::filterSettings(result_type == "incidence") %>%
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) %>%
dplyr::filter(estimate_name == "denominator_count") %>%
dplyr::pull("estimate_value"))
expect_true(inc1A |>
omopgenerics::filterSettings(result_type == "incidence") %>%
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) %>%
dplyr::filter(estimate_name == "person_days") %>%
dplyr::pull("estimate_value") ==
inc2A |>
omopgenerics::filterSettings(result_type == "incidence") %>%
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) %>%
dplyr::filter(estimate_name == "person_days") %>%
dplyr::pull("estimate_value"))
expect_true(inc1A |>
omopgenerics::filterSettings(result_type == "incidence") %>%
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) %>%
dplyr::filter(estimate_name == "outcome_count") %>%
dplyr::pull("estimate_value") ==
inc2A |>
omopgenerics::filterSettings(result_type == "incidence") %>%
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) %>%
dplyr::filter(estimate_name == "outcome_count") %>%
dplyr::pull("estimate_value"))
# 365 washout, repetitive events
inc1B <- estimateIncidence(cdm,
denominatorTable = "denominator1",
outcomeTable = "outcome",
interval = "years",
repeatedEvents = TRUE,
outcomeWashout = 365,
completeDatabaseIntervals = FALSE
)
inc2B <- estimateIncidence(cdm,
denominatorTable = "denominator2",
outcomeTable = "outcome",
interval = "years",
repeatedEvents = TRUE,
outcomeWashout = 365,
completeDatabaseIntervals = FALSE
)
expect_true(inc1B |>
omopgenerics::filterSettings(result_type == "incidence") %>%
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) %>%
dplyr::filter(estimate_name == "incidence_100000_pys") %>%
dplyr::pull("estimate_value") ==
inc2B |>
omopgenerics::filterSettings(result_type == "incidence") %>%
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) %>%
dplyr::filter(estimate_name == "incidence_100000_pys") %>%
dplyr::pull("estimate_value"))
omopgenerics::cdmDisconnect(cdm)
# with multiple outcomes per person
cdm <- mockIncidencePrevalence(
sampleSize = 1000
)
# no study period required
cdm <- generateDenominatorCohortSet(
cdm = cdm,
name = "denominator1"
)
# study period
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator2",
cohortDateRange = c(as.Date("2009-01-01"), as.Date("2011-01-01"))
)
# no washout, repetitive events
inc1A <- estimateIncidence(cdm,
denominatorTable = "denominator1",
outcomeTable = "outcome",
interval = "years",
repeatedEvents = TRUE,
outcomeWashout = 0,
completeDatabaseIntervals = FALSE
)
inc2A <- estimateIncidence(cdm,
denominatorTable = "denominator2",
outcomeTable = "outcome",
interval = "years",
repeatedEvents = TRUE,
outcomeWashout = 0,
completeDatabaseIntervals = FALSE
)
expect_true(inc1A |>
omopgenerics::filterSettings(result_type == "incidence") %>%
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) %>%
dplyr::filter(estimate_name == "incidence_100000_pys") %>%
dplyr::pull("estimate_value") ==
inc2A |>
omopgenerics::filterSettings(result_type == "incidence") %>%
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) %>%
dplyr::filter(estimate_name == "incidence_100000_pys") %>%
dplyr::pull("estimate_value"))
# 365 washout, repetitive events
inc1B <- estimateIncidence(cdm,
denominatorTable = "denominator1",
outcomeTable = "outcome",
interval = "years",
repeatedEvents = TRUE,
outcomeWashout = 365,
completeDatabaseIntervals = FALSE
)
inc2B <- estimateIncidence(cdm,
denominatorTable = "denominator2",
outcomeTable = "outcome",
interval = "years",
repeatedEvents = TRUE,
outcomeWashout = 365,
completeDatabaseIntervals = FALSE
)
expect_true(inc1B |>
omopgenerics::filterSettings(result_type == "incidence") %>%
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) %>%
dplyr::filter(estimate_name == "incidence_100000_pys") %>%
dplyr::pull("estimate_value") ==
inc2B |>
omopgenerics::filterSettings(result_type == "incidence") %>%
omopgenerics::splitAdditional() |>
dplyr::filter(clock::get_year(incidence_start_date |>
as.Date()) == 2010) %>%
dplyr::filter(estimate_name == "incidence_100000_pys") %>%
dplyr::pull("estimate_value"))
omopgenerics::cdmDisconnect(cdm)
})
test_that("expected errors with mock", {
skip_on_cran()
personTable <- dplyr::tibble(
person_id = 1L,
gender_concept_id = 8507L,
year_of_birth = 2000L,
month_of_birth = 01L,
day_of_birth = 01L
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = 1L,
person_id = 1L,
observation_period_start_date = as.Date("2010-01-01"),
observation_period_end_date = as.Date("2012-06-01")
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(
as.Date("2010-02-05"),
as.Date("2010-02-08"),
as.Date("2010-02-20")
),
cohort_end_date = c(
as.Date("2010-02-05"),
as.Date("2010-02-08"),
as.Date("2010-02-20")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")
# not a cdm reference
expect_error(estimateIncidence(
cdm = "a",
denominatorTable = "denominator",
outcomeTable = "outcome"
))
# wrong type
expect_error(estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
denominatorCohortId = "1",
outcomeTable = "outcome",
outcomeCohortId = 1
))
# no study pop
expect_error(estimateIncidence(cdm,
outcomeTable = "outcome",
interval = c("months"),
denominatorTable = "denominator1"
))
expect_error(estimateIncidence(cdm,
outcomeTable = "outcome",
interval = c("months"),
denominatorTable = "denominator",
denominatorCohortId = 999
))
# outcome definition id doesn't exist in cohort set
expect_error(estimateIncidence(cdm,
outcomeTable = "outcome",
interval = c("months"),
denominatorTable = "denominator",
outcomeCohortId = 11
))
# cohortId wrong format
expect_error(estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
outcomeCohortId = TRUE
))
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: multiple observation periods", {
skip_on_cran()
# create data for hypothetical people to test
personTable <- dplyr::tibble(
person_id = c(1L, 2L),
gender_concept_id = c(8507L, 8507L),
year_of_birth = c(1998L, 1976L),
month_of_birth = c(02L, 06L),
day_of_birth = c(12L, 01L)
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L, 3L),
person_id = c(1L, 1L, 2L),
observation_period_start_date = c(
as.Date("2005-04-01"),
as.Date("2009-04-10"),
as.Date("2010-12-11")
),
observation_period_end_date = c(
as.Date("2005-11-29"),
as.Date("2016-01-02"),
as.Date("2015-06-01")
)
)
conditionX <- dplyr::tibble(
cohort_definition_id = c(1L, 1L),
subject_id = c(1L, 2L),
cohort_start_date = c(
as.Date("2005-07-19"),
as.Date("2010-12-11")
),
cohort_end_date = c(
as.Date("2005-07-19"),
as.Date("2015-06-01")
)
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = c(1L, 1L, 1L, 1L),
subject_id = c(1L, 1L, 2L, 2L),
cohort_start_date = c(
as.Date("2005-08-09"),
as.Date("2010-01-11"),
as.Date("2010-12-21"),
as.Date("2014-04-04")
),
cohort_end_date = c(
as.Date("2005-08-09"),
as.Date("2010-01-11"),
as.Date("2010-12-21"),
as.Date("2014-04-04")
)
)
# should only pick up one of the four observation periods,
# as the inclusion of the cohorts is only well defined for one
# (entry event in the observation period)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
targetCohortTable = conditionX,
outcomeTable = outcomeTable
)
cdm <- generateTargetDenominatorCohortSet(
cdm = cdm, name = "denominator",
targetCohortTable = "target",
targetCohortId = 1
)
incW0 <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = TRUE,
outcomeWashout = 0,
completeDatabaseIntervals = FALSE
)
# expect all events if we have zero days washout
expect_true(sum(as.numeric(incW0 %>%
dplyr::filter(estimate_name == "outcome_count") %>%
dplyr::pull("estimate_value"))) == 2)
omopgenerics::cdmDisconnect(cdm)
# Change the inclusion so that both patients have valid observation periods. Now 1 should have two, and 2 one.
# Should capture the final part of the first observation period, and the initial part of the second for person 1
conditionX <- dplyr::tibble(
cohort_definition_id = c(1L, 1L, 1L),
subject_id = c(1L, 1L, 2L),
cohort_start_date = c(
as.Date("2005-07-19"),
as.Date("2009-04-10"),
as.Date("2010-12-11")
),
cohort_end_date = c(
as.Date("2005-08-11"),
as.Date("2015-01-02"),
as.Date("2011-12-11")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
targetCohortTable = conditionX,
outcomeTable = outcomeTable
)
cdm <- generateTargetDenominatorCohortSet(
cdm = cdm, name = "denominator",
targetCohortTable = "target",
targetCohortId = 1
)
incW10 <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = TRUE,
outcomeWashout = 10,
completeDatabaseIntervals = FALSE
)
# expect all events if we have ten days washout
expect_true(sum(as.numeric(incW10 %>%
dplyr::filter(estimate_name == "outcome_count") %>%
dplyr::pull("estimate_value"))) == 3)
omopgenerics::cdmDisconnect(cdm)
# try event not counted for outcome but counted for washout as denominator (before observ period)
outcomeTable <- dplyr::tibble(
cohort_definition_id = c(1L, 1L, 1L, 1L, 1L),
subject_id = c(1L, 1L, 1L, 2L, 2L),
cohort_start_date = c(
as.Date("2005-07-11"),
as.Date("2005-08-09"),
as.Date("2010-01-11"),
as.Date("2010-12-21"),
as.Date("2014-04-04")
),
cohort_end_date = c(
as.Date("2005-07-11"),
as.Date("2005-08-09"),
as.Date("2010-01-11"),
as.Date("2010-12-21"),
as.Date("2014-04-04")
)
)
# now we would expect same number of events, but three less days in the denominator
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
targetCohortTable = conditionX,
outcomeTable = outcomeTable
)
cdm <- generateTargetDenominatorCohortSet(
cdm = cdm, name = "denominator",
targetCohortTable = "target",
targetCohortId = 1
)
inc_PreWashout <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = TRUE,
outcomeWashout = 10,
completeDatabaseIntervals = FALSE
)
expect_true(sum(as.numeric(inc_PreWashout %>%
dplyr::filter(estimate_name == "outcome_count") %>%
dplyr::pull("estimate_value"))) == 3)
expect_true(sum(as.numeric(inc_PreWashout %>%
dplyr::filter(estimate_name == "person_days") %>%
dplyr::pull("estimate_value"))) ==
as.numeric(difftime(as.Date("2005-08-11"), as.Date("2005-07-19"))) +
1 - 2 + as.numeric(difftime(
as.Date("2015-01-02"),
as.Date("2009-04-10")
)) + 1 - 10 +
as.numeric(difftime(as.Date("2011-12-11"), as.Date("2010-12-11"))) +
1 - 10 - 3)
omopgenerics::cdmDisconnect(cdm)
# multiple events in one of the observation periods of person 1
conditionX <- dplyr::tibble(
cohort_definition_id = c(1L, 1L, 1L),
subject_id = c(1L, 1L, 2L),
cohort_start_date = c(
as.Date("2005-06-19"),
as.Date("2009-04-10"),
as.Date("2010-12-11")
),
cohort_end_date = c(
as.Date("2005-08-11"),
as.Date("2015-01-02"),
as.Date("2011-12-11")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
targetCohortTable = conditionX,
outcomeTable = outcomeTable
)
cdm <- generateTargetDenominatorCohortSet(
cdm = cdm, name = "denominator",
targetCohortTable = "target",
targetCohortId = 1
)
inc_Mult1_W0 <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = TRUE,
outcomeWashout = 0,
completeDatabaseIntervals = FALSE
)
inc_Mult1_W30 <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = TRUE,
outcomeWashout = 30,
completeDatabaseIntervals = FALSE
)
# we should have 4 events with washout 0, but 3 events with washout 30
expect_true(sum(as.numeric(inc_Mult1_W0 %>%
dplyr::filter(estimate_name == "outcome_count") %>%
dplyr::pull("estimate_value"))) == 4)
expect_true(sum(as.numeric(inc_Mult1_W30 %>%
dplyr::filter(estimate_name == "outcome_count") %>%
dplyr::pull("estimate_value"))) == 3)
expect_true(sum(as.numeric(inc_Mult1_W0 %>%
dplyr::filter(estimate_name == "person_days") %>%
dplyr::pull("estimate_value"))) ==
as.numeric(difftime(
as.Date("2005-08-11"),
as.Date("2005-06-19")
)) + 1 +
as.numeric(difftime(
as.Date("2015-01-02"),
as.Date("2009-04-10")
)) + 1 +
as.numeric(difftime(
as.Date("2011-12-11"),
as.Date("2010-12-11")
)) + 1)
expect_true(sum(as.numeric(inc_Mult1_W30 %>%
dplyr::filter(estimate_name == "person_days") %>%
dplyr::pull("estimate_value"))) ==
as.numeric(difftime(
as.Date("2005-08-11"),
as.Date("2005-06-19")
)) - 30 +
as.numeric(difftime(
as.Date("2015-01-02"),
as.Date("2009-04-10")
)) +
1 - 30 + as.numeric(difftime(
as.Date("2011-12-11"),
as.Date("2010-12-11")
)) + 1 - 30)
omopgenerics::cdmDisconnect(cdm)
# The first event of person 1 will not be included in the observation period
# but should also influence the second event with the washout
conditionX <- dplyr::tibble(
cohort_definition_id = c(1L, 1L, 1L),
subject_id = c(1L, 1L, 2L),
cohort_start_date = c(
as.Date("2005-07-19"),
as.Date("2009-04-10"),
as.Date("2010-12-11")
),
cohort_end_date = c(
as.Date("2005-08-11"),
as.Date("2015-01-02"),
as.Date("2011-12-11")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
targetCohortTable = conditionX,
outcomeTable = outcomeTable
)
cdm <- generateTargetDenominatorCohortSet(
cdm = cdm, name = "denominator",
targetCohortTable = "target",
targetCohortId = 1
)
inc_PreWashEv <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = TRUE,
outcomeWashout = 30,
completeDatabaseIntervals = FALSE
)
# we should have 2 events with washout 30
expect_true(sum(as.numeric(inc_PreWashEv %>%
dplyr::filter(estimate_name == "outcome_count") %>%
dplyr::pull("estimate_value"))) == 2)
expect_true(sum(as.numeric(inc_PreWashEv %>%
dplyr::filter(estimate_name == "person_days") %>%
dplyr::pull("estimate_value"))) ==
as.numeric(difftime(
as.Date("2005-08-11"),
as.Date("2005-07-19")
)) - 30 + 7 +
as.numeric(difftime(
as.Date("2015-01-02"),
as.Date("2009-04-10")
)) + 1 - 30 +
as.numeric(difftime(
as.Date("2011-12-11"),
as.Date("2010-12-11")
)) + 1 - 30)
omopgenerics::cdmDisconnect(cdm)
# three observation periods for 1 person and a
# couple of consecutive events lost to washout
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L, 3L, 4L),
person_id = c(1L, 1L, 1L, 2L),
observation_period_start_date = c(
as.Date("2005-04-01"),
as.Date("2009-04-10"),
as.Date("2010-08-20"),
as.Date("2010-01-01")
),
observation_period_end_date = c(
as.Date("2005-11-29"),
as.Date("2010-01-02"),
as.Date("2011-12-11"),
as.Date("2015-06-01")
)
)
conditionX <- dplyr::tibble(
cohort_definition_id = c(1L, 1L, 1L, 1L),
subject_id = c(1L, 1L, 1L, 2L),
cohort_start_date = c(
as.Date("2005-04-01"),
as.Date("2009-06-10"),
as.Date("2010-08-20"),
as.Date("2010-01-01")
),
cohort_end_date = c(
as.Date("2005-11-29"),
as.Date("2010-01-02"),
as.Date("2011-10-11"),
as.Date("2015-06-01")
)
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L),
subject_id = c(1L, 1L, 1L, 1L, 1L, 1L, 2L),
cohort_start_date = c(
as.Date("2005-08-09"),
as.Date("2005-08-10"),
as.Date("2005-08-11"),
as.Date("2009-11-11"),
as.Date("2009-11-21"),
as.Date("2010-12-21"),
as.Date("2014-04-04")
),
cohort_end_date = c(
as.Date("2005-08-09"),
as.Date("2005-08-10"),
as.Date("2005-08-11"),
as.Date("2009-11-11"),
as.Date("2009-11-21"),
as.Date("2010-12-21"),
as.Date("2014-04-04")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
targetCohortTable = conditionX,
outcomeTable = outcomeTable
)
cdm <- generateTargetDenominatorCohortSet(
cdm = cdm, name = "denominator",
targetCohortTable = "target",
targetCohortId = 1
)
inc_3op <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = TRUE,
outcomeWashout = 1L,
completeDatabaseIntervals = FALSE
)
# we should have 5 events with washout 1
expect_true(sum(as.numeric(inc_3op %>%
dplyr::filter(estimate_name == "outcome_count") %>%
dplyr::pull("estimate_value"))) == 5)
# try repeated events FALSE.
inc_repev <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
repeatedEvents = FALSE,
outcomeWashout = 1L,
completeDatabaseIntervals = FALSE
)
expect_true(sum(as.numeric(inc_repev %>%
dplyr::filter(estimate_name == "outcome_count") %>%
dplyr::pull("estimate_value"))) == 2)
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: check confidence intervals", {
skip_on_cran()
cdm <- mockIncidencePrevalence(
sampleSize = 1000
)
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator",
cohortDateRange = c(as.Date("2008-01-01"), as.Date("2011-01-01"))
)
inc <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "years",
repeatedEvents = TRUE,
outcomeWashout = 0,
completeDatabaseIntervals = TRUE
)
pkg_est <- inc %>%
omopgenerics::filterSettings(result_type == "incidence") |>
dplyr::select(
"estimate_name",
"estimate_value", "additional_level"
) |>
tidyr::pivot_wider(
names_from = "estimate_name",
values_from = "estimate_value"
) |>
dplyr::filter(denominator_count > 1)
expect_equal(as.numeric(pkg_est$incidence_100000_pys_95CI_lower),
epitools::pois.exact(
as.numeric(pkg_est$outcome_count),
as.numeric(pkg_est$person_years)
)$lower * 100000,
tolerance = 1e-2
)
expect_equal(as.numeric(pkg_est$incidence_100000_pys_95CI_upper),
epitools::pois.exact(
as.numeric(pkg_est$outcome_count),
as.numeric(pkg_est$person_years)
)$upper * 100000,
tolerance = 1e-2
)
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: check attrition", {
skip_on_cran()
cdm <- mockIncidencePrevalence(sampleSize = 1000)
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator",
sex = c("Male", "Female")
)
inc <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "years"
)
# for female cohort we should have a row for those excluded for not being male
expect_gt(nrow(inc |>
omopgenerics::filterSettings(
result_type == "incidence_attrition",
denominator_sex == "Female"
) |>
dplyr::filter(strata_level == "Not Female")), 0)
# for male, the opposite
expect_gt(nrow(inc |>
omopgenerics::filterSettings(
result_type == "incidence_attrition",
denominator_sex == "Male"
) |>
dplyr::filter(strata_level == "Not Male")), 0)
omopgenerics::cdmDisconnect(cdm)
# check obscuring counts
cdm <- mockIncidencePrevalence(sampleSize = 4)
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator",
sex = c("Male", "Female")
)
inc <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "years"
) |> omopgenerics::suppress(5)
expect_true(inc |>
omopgenerics::filterSettings(
result_type == "incidence_attrition",
denominator_sex == "Male"
) |>
dplyr::filter(strata_level == "Not Male") |>
dplyr::filter(variable_name == "excluded_subjects") |>
dplyr::pull("estimate_value") == "-")
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: check attrition with complete database intervals", {
skip_on_cran()
personTable <- dplyr::tibble(
person_id = c(1L, 2L),
gender_concept_id = 8507L,
year_of_birth = 2000L,
month_of_birth = 01L,
day_of_birth = 01L
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L),
person_id = c(1L, 2L),
observation_period_start_date = c(
as.Date("2000-06-01"),
as.Date("2000-06-01")
),
observation_period_end_date = c(
as.Date("2011-07-01"),
as.Date("2000-07-01")
)
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(
as.Date("2008-02-05"),
as.Date("2010-02-08"),
as.Date("2010-02-20")
),
cohort_end_date = c(
as.Date("2008-02-05"),
as.Date("2010-02-08"),
as.Date("2010-02-20")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")
inc <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "years"
)
expect_true(inc |>
omopgenerics::filterSettings(result_type == "incidence_attrition") |>
dplyr::filter(strata_level == "Not observed during the complete database interval") |>
dplyr::filter(variable_name == "excluded_subjects") |>
dplyr::pull("estimate_value") == "1")
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: check compute permanent", {
skip_on_cran()
# using temp
cdm <- mockIncidencePrevalence(sampleSize = 1000)
attr(cdm, "write_schema") <- "main"
cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "overall"
)
# we have no temp tables created by dbplyr
expect_false(any(stringr::str_starts(
CDMConnector::listTables(attr(attr(cdm, "cdm_source"), "dbcon")),
"dbplyr_"
)))
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: if missing cohort attributes", {
skip_on_cran()
# missing cohort_set
cdm <- mockIncidencePrevalence()
cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")
attr(cdm$outcome, "cohort_set") <- NULL
expect_error(estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "overall"
))
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: empty outcome cohort", {
skip_on_cran()
cdm <- mockIncidencePrevalence(sampleSize = 200)
cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")
cdm$outcome <- cdm$outcome %>% dplyr::filter(cohort_definition_id == 99)
expect_no_error(inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "months"
))
expect_true(sum(as.numeric(inc |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value"))) == 0)
# make sure we also have a confidence interval even in the case of an empty outcome cohort
expect_true(all(inc |>
dplyr::filter(estimate_name == "incidence_100000_pys") |>
dplyr::pull("estimate_value") == "0"))
expect_true(all(as.numeric(inc |>
dplyr::filter(estimate_name == "incidence_100000_pys_95CI_lower") |>
dplyr::pull("estimate_value")) == 0))
expect_true(all(as.numeric(inc |>
dplyr::filter(estimate_name == "incidence_100000_pys_95CI_upper") |>
dplyr::pull("estimate_value")) > 0))
tidyInc <- inc |>
asIncidenceResult()
# compare our wilson CIs with those from epitools
epi_ci <- epitools::binom.wilson(
as.numeric(tidyInc$outcome_count),
as.numeric(tidyInc$denominator_count)
)
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: incidence using strata vars", {
skip_on_cran()
cdm <- mockIncidencePrevalence(
sampleSize = 2000L,
outPre = 0.2
)
cdm <- generateDenominatorCohortSet(
cdm = cdm,
name = "denominator",
cohortDateRange = as.Date(
c("1993-01-01", "1998-01-01")
)
)
inc_orig <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "months"
)
cdm$denominator <- cdm$denominator %>%
dplyr::mutate(my_strata = dplyr::if_else(year(cohort_start_date) < 1995L,
"first", "second"
)) %>%
dplyr::compute(
name = "denominator",
temporary = FALSE
)
inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "months",
strata = list(c("my_strata"))
) |>
omopgenerics::filterSettings(result_type == "incidence")
expect_true(all(c("overall", "my_strata") %in% unique(inc$strata_name)))
expect_true(all(c("overall", "first", "second") %in% unique(inc$strata_level)))
org <- inc_orig |>
omopgenerics::filterSettings(result_type == "incidence")
attr(org, "settings") <- NULL
attr(inc, "settings") <- NULL
# original without strata should be the same as "Overall" strata
expect_identical(org, inc |> dplyr::filter(strata_level == "overall"))
cdm$denominator <- cdm$denominator %>%
dplyr::mutate(my_strata2 = dplyr::if_else(month(cohort_start_date) < 7,
"a", "b"
)) %>%
dplyr::compute(
name = "denominator",
temporary = FALSE
)
inc2 <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "months",
strata = list(c("my_strata", "my_strata2"))
)
expect_true(all(c("overall", "my_strata &&& my_strata2") %in%
unique(inc2$strata_name)))
expect_true(all(c(
"overall",
"first &&& a",
"second &&& a",
"first &&& b",
"second &&& b"
) %in%
unique(inc2$strata_level)))
inc3 <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "months",
strata = list(
c("my_strata"),
c("my_strata2"),
c("my_strata", "my_strata2")
)
)
expect_true(all(c(
"overall",
"my_strata",
"my_strata2",
"my_strata &&& my_strata2"
) %in%
unique(inc3$strata_name)))
expect_true(all(c(
"overall",
"first", "second",
"a", "b",
"first &&& a",
"second &&& a",
"first &&& b",
"second &&& b"
) %in%
unique(inc3$strata_level)))
# without overall strata
inc4 <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "months",
strata = list(
c("my_strata"),
c("my_strata2"),
c("my_strata", "my_strata2")
),
includeOverallStrata = FALSE
)
expect_true(all(c(
"my_strata",
"my_strata2",
"my_strata &&& my_strata2"
) %in%
unique(inc4$strata_name)))
expect_true(all(c(
"first", "second",
"a", "b",
"first &&& a",
"second &&& a",
"first &&& b",
"second &&& b"
) %in%
unique(inc4$strata_level)))
expect_error(estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "months",
strata = list(c("not_a_col"))
))
expect_error(estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "months",
strata = list(c("my_strata", "not_a_col"))
))
expect_error(estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = "months",
strata = list(c("my_strata"), c("not_a_col"))
))
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: multiple outcome cohort id", {
skip_on_cran()
personTable <- dplyr::tibble(
person_id = c(1L, 2L),
gender_concept_id = 8507L,
year_of_birth = 2000L,
month_of_birth = 01L,
day_of_birth = 01L
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L),
person_id = c(1L, 2L),
observation_period_start_date = c(
as.Date("2005-01-15"),
as.Date("2005-01-15")
),
observation_period_end_date = c(
as.Date("2007-05-01"),
as.Date("2011-06-15")
)
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = c(1L, 1L, 2L),
subject_id = c(1L, 2L, 2L),
cohort_start_date = c(
as.Date("2006-02-05"),
as.Date("2006-02-05"),
as.Date("2010-02-05")
),
cohort_end_date = c(
as.Date("2006-02-05"),
as.Date("2006-02-05"),
as.Date("2010-02-05")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator"
)
inc_all_outcome <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
outcomeCohortId = c(1L, 2L),
interval = "overall"
)
inc_all_outcome_1 <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
outcomeCohortId = 1L,
interval = "overall"
)
inc_all_outcome_2 <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
outcomeCohortId = 2,
interval = "overall"
)
expect_identical(inc_all_outcome %>%
omopgenerics::filterGroup(outcome_cohort_name == "cohort_1") |>
dplyr::pull("estimate_value"), inc_all_outcome_1 |>
dplyr::pull("estimate_value"))
expect_identical(inc_all_outcome %>%
omopgenerics::filterGroup(outcome_cohort_name == "cohort_2") |>
dplyr::pull("estimate_value"), inc_all_outcome_2 |>
dplyr::pull("estimate_value"))
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: cohort names for cohortId args", {
skip_on_cran()
personTable <- dplyr::tibble(
person_id = 1L,
gender_concept_id = 8507L,
year_of_birth = 2000L,
month_of_birth = 01L,
day_of_birth = 01L
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = 1L,
person_id = 1L,
observation_period_start_date = as.Date("2010-01-28"),
observation_period_end_date = as.Date("2012-12-31")
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(
as.Date("2010-01-28")
),
cohort_end_date = c(
as.Date("2010-01-28")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
inc1 <- estimateIncidence(cdm, "target", "outcome")
inc2 <- estimateIncidence(cdm, denominatorTable = "target", outcomeTable = "outcome",
denominatorCohortId = 1, outcomeCohortId = 1)
inc3 <- estimateIncidence(cdm, denominatorTable = "target", outcomeTable = "outcome",
denominatorCohortId = "cohort_1", outcomeCohortId = "cohort_1")
expect_true(all.equal(inc1, inc2))
expect_true(all.equal(inc2, inc3))
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: empty denominator", {
skip_on_cran()
personTable <- dplyr::tibble(
person_id = 1L,
gender_concept_id = 8507L,
year_of_birth = 2000L,
month_of_birth = 01L,
day_of_birth = 01L
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = 1L,
person_id = 1L,
observation_period_start_date = as.Date("2010-01-28"),
observation_period_end_date = as.Date("2012-12-31")
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(
as.Date("2010-01-28")
),
cohort_end_date = c(
as.Date("2010-01-28")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
attr(cdm$target, "cohort_set") <- dplyr::union_all(
attr(cdm$target, "cohort_set"),
dplyr::tibble(
cohort_definition_id = 2,
cohort_name = "cohort_2"
),
copy = TRUE
)
expect_error(estimateIncidence(cdm, "target", "outcome", 2))
omopgenerics::cdmDisconnect(cdm)
})
test_that("mock db: check local cdm", {
skip_on_cran()
personTable <- dplyr::tibble(
person_id = 1L,
gender_concept_id = 8507L,
year_of_birth = 2000L,
month_of_birth = 01L,
day_of_birth = 01L
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = 1L,
person_id = 1L,
observation_period_start_date = as.Date("2000-01-01"),
observation_period_end_date = as.Date("2012-06-01")
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(
as.Date("2008-02-05"),
as.Date("2010-02-08"),
as.Date("2010-02-20")
),
cohort_end_date = c(
as.Date("2008-02-05"),
as.Date("2010-02-08"),
as.Date("2010-02-20")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable
)
cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator")
cdm <- cdm |> dplyr::collect()
expect_no_error(inc <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
interval = c("years", "overall")
))
})
test_that("mock db: censor cohort", {
skip_on_cran()
# if appearing in censor cohort before denominator, the person should be
# excluded
personTable <- dplyr::tibble(
person_id = 1L,
gender_concept_id = 8507L,
year_of_birth = 1990L,
month_of_birth = 01L,
day_of_birth = 01L
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = 1L,
person_id = 1L,
observation_period_start_date = as.Date("1990-01-01"),
observation_period_end_date = as.Date("2012-06-01")
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(
as.Date("2008-02-05"),
as.Date("2010-02-08"),
as.Date("2010-02-20")
),
cohort_end_date = c(
as.Date("2008-02-05"),
as.Date("2010-02-08"),
as.Date("2010-02-20")
)
)
censorCohort <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(
as.Date("1995-01-01")
),
cohort_end_date = c(
as.Date("1995-01-01")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable,
censorTable = censorCohort
)
cdm <- generateDenominatorCohortSet(cdm = cdm,
name = "denominator",
cohortDateRange = c(as.Date("2000-01-01"),
as.Date(NA)))
# using censor cohort should mean person excluded before start
inc <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
censorTable = "censor")
expect_true(all(inc |>
dplyr::filter(estimate_name == "denominator_count") |>
dplyr::pull("estimate_value") == "0"))
# if appearing in censor cohort after denominator but before outcome, they
# should contribute time at risk until censor and then no more time and no
# outcome
# censor cohort after 1st outcome
# so should be excluded from then (if analysis is allowing repetitive events)
censorCohort2 <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(
as.Date("2009-01-01")
),
cohort_end_date = c(
as.Date("2009-01-01")
)
)
cdm <- omopgenerics::insertTable(cdm, "my_censor_cohort_2", censorCohort2)
cdm$my_censor_cohort_2 <- omopgenerics::newCohortTable(cdm$my_censor_cohort_2)
inc_2 <- estimateIncidence(cdm,
interval = "overall",
denominatorTable = "denominator",
outcomeTable = "outcome",
censorTable = "my_censor_cohort_2",
outcomeWashout = 0,
repeatedEvents = TRUE)
expect_true(all(sum(as.numeric(inc_2 |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value"))) == 1))
# if appearing in the censor cohort on the day of the outcome, they
# contribute the outcome but are censored from then on
censorCohort3 <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(
as.Date("2008-02-05")
),
cohort_end_date = c(
as.Date("2008-02-05")
)
)
cdm <- omopgenerics::insertTable(cdm, "my_censor_cohort_3", censorCohort3)
cdm$my_censor_cohort_3 <- omopgenerics::newCohortTable(cdm$my_censor_cohort_3)
inc_3 <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
censorTable = "my_censor_cohort_3",
outcomeWashout = 0,
repeatedEvents = TRUE)
expect_true(all(sum(as.numeric(inc_2 |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value"))) == 1))
# multiple denominators - censor cohort should apply with each
# check censor cohort id argument
# if multiple cohorts in censor cohort table, one must be chosen
censorCohort5 <- dplyr::tibble(
cohort_definition_id = c(1L, 2L),
subject_id = c(1L,1L),
cohort_start_date = c(
as.Date("2008-02-05"),
as.Date("2009-02-05")
),
cohort_end_date = c(
as.Date("2008-02-05"),
as.Date("2009-02-05")
)
)
cdm <- omopgenerics::insertTable(cdm, "my_censor_cohort_5", censorCohort5)
cdm$my_censor_cohort_5 <- omopgenerics::newCohortTable(cdm$my_censor_cohort_5)
# need to specify one cohort if multiple exist
expect_no_error(estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
censorTable = "my_censor_cohort_5",
censorCohortId = "cohort_1")) # can use name
expect_error(estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
censorTable = "my_censor_cohort_5",
censorCohortId = c(1,2)))
expect_error(estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
censorTable = "my_censor_cohort_5",
censorCohortId = c(1,2)))
expect_error(estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
censorTable = "my_censor_cohort_5",
censorCohortId = NULL))
# censor cohort must be one record per person
censorCohort6 <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(
as.Date("1995-01-01"),
as.Date("1998-01-01")
),
cohort_end_date = c(
as.Date("1995-01-01"),
as.Date("1998-01-01")
)
)
cdm <- omopgenerics::insertTable(cdm, "my_censor_cohort_6", censorCohort6)
cdm$my_censor_cohort_6 <- omopgenerics::newCohortTable(cdm$my_censor_cohort_6)
expect_error(estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
censorTable = "my_censor_cohort_6"))
# validate input
expect_error(estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
censorTable = "not_a_table",
outcomeWashout = 0,
repeatedEvents = TRUE))
expect_error(estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
censorTable = "my_censor_cohort_3",
censorCohortId = 99)) # not in cohort table
# multiple observation periods
# should look back
personTable <- dplyr::tibble(
person_id = 1L,
gender_concept_id = 8507L,
year_of_birth = 1990L,
month_of_birth = 01L,
day_of_birth = 01L
)
observationPeriodTable <- dplyr::tibble(
observation_period_id = c(1L, 2L),
person_id = c(1L, 1L),
observation_period_start_date = c(as.Date("2000-01-01"),
as.Date("2002-01-01")),
observation_period_end_date = c(as.Date("2001-06-01"),
as.Date("2003-06-01"))
)
outcomeTable <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(
as.Date("2002-02-05")
),
cohort_end_date = c(
as.Date("2002-02-05")
)
)
censorCohort <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = c(
as.Date("2000-04-01")
),
cohort_end_date = c(
as.Date("2000-04-01")
)
)
cdm <- mockIncidencePrevalence(
personTable = personTable,
observationPeriodTable = observationPeriodTable,
outcomeTable = outcomeTable,
censorTable = censorCohort
)
cdm <- generateDenominatorCohortSet(cdm = cdm,
name = "denominator")
inc_1 <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
completeDatabaseIntervals = FALSE)
expect_true(all(inc_1 |>
dplyr::filter(estimate_name == "denominator_count") |>
dplyr::pull("estimate_value") == "1"))
expect_true(any(inc_1 |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value") == "1"))
# using censor cohort should mean person excluded before start
inc_2 <- estimateIncidence(cdm,
denominatorTable = "denominator",
outcomeTable = "outcome",
censorTable = "censor",
completeDatabaseIntervals = FALSE)
expect_true(inc_2 |>
dplyr::filter(estimate_name == "outcome_count") |>
dplyr::pull("estimate_value") == "0")
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.