Nothing
test_generate_concept_cohort_set <- function(con, cdm_schema, write_schema) {
skip_if_not_installed("CirceR")
# withr::local_options("CDMConnector.cohort_as_temp" = FALSE) # temp cohort tables are not implemented yet
cdm <- cdmFromCon(
con = con,
cdmName = "cdm",
cdmSchema = cdm_schema,
writeSchema = write_schema
)
# check that we have records. Need the eunomia gibleed data for this.
cdm$condition_occurrence %>%
dplyr::filter(condition_concept_id == 192671L) %>%
dplyr::count() %>%
dplyr::pull("n") %>%
expect_gt(10)
# default (no descendants) ----
# debugonce(generateConceptCohortSet)
cdm <- generateConceptCohortSet(
cdm = cdm,
conceptSet = list(gibleed = 192671L),
name = "gibleed",
overwrite = TRUE
)
cohort <- readCohortSet(system.file("cohorts3", package = "CDMConnector")) %>%
dplyr::filter(cohort_name %in% c("gibleed_default", "GiBleed_default")) %>%
dplyr::mutate(cohort_definition_id = 1L)
# TODO add gibleed data to spark test server
# if (dbms(con) == "spark") cohort$json <- stringr::str_replace_all(cohort$json, "192671", "40481087")[[1]]
stopifnot(nrow(cohort) == 1)
cdm <- generateCohortSet(cdm, cohortSet = cohort, name = "gibleed2", overwrite = TRUE)
expected <- dplyr::collect(cdm$gibleed2) %>%
dplyr::arrange(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date) %>%
dplyr::mutate_if(~ "integer64" %in% class(.), as.integer)
actual <- dplyr::collect(cdm$gibleed) %>%
dplyr::arrange(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date) %>%
dplyr::mutate_if(~ "integer64" %in% class(.), as.integer)
expect_true(nrow(expected) > 0)
expect_true(nrow(actual) == nrow(expected))
# setdiff(unique(expected$subject_id), unique(actual$subject_id))
# setdiff(unique(actual$subject_id), unique(expected$subject_id))
expect_setequal(unique(expected$subject_id), unique(actual$subject_id))
# remove attributes since they are a bit different
# TODO should the attributes be the exact same? probably. but it will take time to implement that.
attr(actual, 'cohort_attrition') <- attr(expected, 'cohort_attrition') <- NULL
attr(actual, 'cohort_set') <- attr(expected, 'cohort_set') <- NULL
attr(actual, 'cohort_codelist') <- attr(expected, 'cohort_codelist') <- NULL
expect_equal(actual, expected)
expect_error({
# should be fail fast case
generateConceptCohortSet(
cdm = cdm,
conceptSet = list(gibleed = 192671L),
name = "gibleed",
overwrite = FALSE
)
}, "gibleed already exists in the CDM")
# bind both cohorts
cdm <- bind(cdm$gibleed, cdm$gibleed2, name = "new_gibleed")
expect_true("new_gibleed" %in% names(cdm))
expect_true(inherits(x = cdm$new_gibleed, what = "cohort_table"))
expect_identical(
settings(cdm$new_gibleed),
settings(cdm$gibleed) |>
dplyr::bind_rows(
settings(cdm$gibleed2) |> dplyr::mutate("cohort_definition_id" = 2L)
)
)
cdm <- generateConceptCohortSet(cdm,
conceptSet = list(gibleed = 192671L), name = "gibleed3",
requiredObservation = c(2, 2),
overwrite = TRUE
)
cdm <- generateConceptCohortSet(cdm,
conceptSet = list(gibleed = 192671L), name = "gibleed4",
requiredObservation = c(2, 200),
overwrite = TRUE
)
expect_identical(settings(cdm$gibleed4)$limit, "first")
expect_identical(settings(cdm$gibleed4)$end, "observation_period_end_date")
expect_true(settings(cdm$gibleed4)$prior_observation == 2)
expect_true(settings(cdm$gibleed4)$future_observation == 200)
expect_true({
cohortCount(cdm$gibleed3)$number_records >= cohortCount(cdm$gibleed4)$number_records
})
# default (with descendants) ----
# if (rlang::is_installed("Capr")) {
# if (FALSE) { # TODO: capr concept generation failing on sql server
# # we need Capr to include descendants
# cdm <- generateConceptCohortSet(
# cdm = cdm,
# conceptSet = list(gibleed = Capr::cs(Capr::descendants(192671), name = "gibleed")),
# name = "gibleed",
# overwrite = TRUE
# )
#
# cohort <- readCohortSet(system.file("cohorts3", package = "CDMConnector")) %>%
# dplyr::filter(cohort_name %in% c("gibleed_default_with_descendants", "GiBleed_default_with_descendants")) %>%
# dplyr::mutate(cohort_definition_id = 1L)
#
# stopifnot(nrow(cohort) == 1)
#
# cdm <- generateCohortSet(cdm, cohortSet = cohort, name = "gibleed2", overwrite = TRUE)
#
# expected <- dplyr::collect(cdm$gibleed2) %>%
# dplyr::arrange(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date) %>%
# dplyr::mutate_if(~ "integer64" %in% class(.), as.integer)
#
# actual <- dplyr::collect(cdm$gibleed) %>%
# dplyr::arrange(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date) %>%
# dplyr::mutate_if(~ "integer64" %in% class(.), as.integer)
#
# setdiff(unique(expected$subject_id), unique(actual$subject_id))
# setdiff(unique(actual$subject_id), unique(expected$subject_id))
# expect_true(nrow(expected) > 0)
# expect_true(nrow(actual) == nrow(expected))
#
# # note cohort table should be the same
# # but some attributes might differ (e.g. cohort attrition)
# expect_setequal(unique(expected$subject_id), unique(actual$subject_id))
# expect_equal(cohortCount(cdm$gibleed),
# cohortCount(cdm$gibleed2))
# }
# use omopgenerics conceptSetExpression to include descendants
# Capr::cs(Capr::descendants(192671), name = "gibleed")
conceptSet <- omopgenerics::newConceptSetExpression(
list("gibleed" = dplyr::tibble(
"concept_id" = 192671,
"excluded" = FALSE,
"descendants" = TRUE,
"mapped" = FALSE
))
)
cdm <- generateConceptCohortSet(
cdm = cdm,
conceptSet = conceptSet,
name = "gibleed",
overwrite = TRUE
)
cohort <- readCohortSet(system.file("cohorts3", package = "CDMConnector")) %>%
dplyr::filter(cohort_name %in% c("gibleed_default_with_descendants", "GiBleed_default_with_descendants")) %>%
dplyr::mutate(cohort_definition_id = 1L)
stopifnot(nrow(cohort) == 1)
cdm <- generateCohortSet(cdm, cohortSet = cohort, name = "gibleed2", overwrite = TRUE)
expected <- dplyr::collect(cdm$gibleed2) %>%
dplyr::arrange(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date) %>%
dplyr::mutate_if(~ "integer64" %in% class(.), as.integer)
actual <- dplyr::collect(cdm$gibleed) %>%
dplyr::arrange(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date) %>%
dplyr::mutate_if(~ "integer64" %in% class(.), as.integer)
# setdiff(unique(expected$subject_id), unique(actual$subject_id))
# setdiff(unique(actual$subject_id), unique(expected$subject_id))
expect_equal(sort(unique(expected$subject_id)), sort(unique(actual$subject_id)))
expect_true(nrow(expected) > 0)
expect_true(nrow(actual) == nrow(expected))
# note cohort table should be the same
# but some attributes might differ (e.g. cohort attrition)
expect_setequal(unique(expected$subject_id), unique(actual$subject_id))
expect_equal(cohortCount(cdm$gibleed),
cohortCount(cdm$gibleed2))
# all occurrences (no descendants) ----
cdm <- generateConceptCohortSet(
cdm = cdm,
conceptSet = list(gibleed = 192671L),
name = "gibleed",
limit = "all",
overwrite = TRUE
)
cohort <- readCohortSet(system.file("cohorts3", package = "CDMConnector")) %>%
dplyr::filter(cohort_name %in% c("gibleed_all", "GiBleed_all")) %>%
dplyr::mutate(cohort_definition_id = 1L)
stopifnot(nrow(cohort) == 1)
cdm <- generateCohortSet(cdm, cohortSet = cohort, name = "gibleed2", overwrite = TRUE)
expect_equal(
as.integer(dplyr::pull(dplyr::tally(cdm$gibleed2), "n")),
as.integer(dplyr::pull(dplyr::tally(cdm$gibleed), "n"))
)
expected <- dplyr::collect(cdm$gibleed2) %>%
dplyr::arrange(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date) %>%
dplyr::mutate_if(~ "integer64" %in% class(.), as.integer)
actual <- dplyr::collect(cdm$gibleed) %>%
dplyr::arrange(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date) %>%
dplyr::mutate_if(~ "integer64" %in% class(.), as.integer)
# setdiff(unique(expected$subject_id), unique(actual$subject_id))
# setdiff(unique(actual$subject_id), unique(expected$subject_id))
expect_true(nrow(expected) > 0)
expect_true(nrow(actual) == nrow(expected))
expect_setequal(unique(expected$subject_id), unique(actual$subject_id))
attr(actual, 'cohort_attrition') <- attr(expected, 'cohort_attrition') <- NULL
attr(actual, 'cohort_set') <- attr(expected, 'cohort_set') <- NULL
attr(actual, 'cohort_codelist') <- attr(expected, 'cohort_codelist') <- NULL
expect_equal(actual, expected)
# all occurrences (no descendants) fixed end date ----
cdm <- generateConceptCohortSet(
cdm = cdm,
conceptSet = list(gibleed = 192671L),
name = "gibleed",
limit = "all",
end = "observation_period_end_date",
overwrite = TRUE
)
cohort <- readCohortSet(system.file("cohorts3", package = "CDMConnector")) %>%
dplyr::filter(cohort_name %in% c("gibleed_all")) %>%
dplyr::mutate(cohort_definition_id = 1L)
stopifnot(nrow(cohort) == 1)
cdm <- generateCohortSet(cdm, cohortSet = cohort, name = "gibleed2", overwrite = TRUE)
expect_equal(
as.integer(dplyr::pull(dplyr::tally(cdm$gibleed2), "n")),
as.integer(dplyr::pull(dplyr::tally(cdm$gibleed), "n"))
)
expected <- dplyr::collect(cdm$gibleed2) %>%
dplyr::arrange(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date) %>%
dplyr::mutate_if(~ "integer64" %in% class(.), as.integer)
actual <- dplyr::collect(cdm$gibleed) %>%
dplyr::arrange(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date) %>%
dplyr::mutate_if(~ "integer64" %in% class(.), as.integer)
# setdiff(unique(expected$subject_id), unique(actual$subject_id))
# setdiff(unique(actual$subject_id), unique(expected$subject_id))
expect_true(nrow(expected) > 0)
expect_true(nrow(actual) == nrow(expected))
expect_setequal(unique(expected$subject_id), unique(actual$subject_id))
attr(actual, 'cohort_attrition') <- attr(expected, 'cohort_attrition') <- NULL
attr(actual, 'cohort_set') <- attr(expected, 'cohort_set') <- NULL
attr(actual, 'cohort_codelist') <- attr(expected, 'cohort_codelist') <- NULL
expect_equal(actual, expected)
# multiple cohort generation ------
cdm <- generateConceptCohortSet(
cdm = cdm,
conceptSet = list("acetaminophen_1" = 1127433,
"acetaminophen_2" = 1127433),
name = "acetaminophen",
limit = "all",
end = "event_end_date",
overwrite = TRUE
)
# should have two identical cohorts
expect_equal(length(cohortCount(cdm$acetaminophen) %>%
dplyr::select("number_records") |>
dplyr::distinct() |>
dplyr::pull()), 1)
# cohort generation with a cohort subset ------
# create our main cohort of interest
cdm <- generateConceptCohortSet(
cdm = cdm,
conceptSet = list(gibleed_1 = 192671L, gibleed_2 = 4112343),
name = "gibleed_exp",
overwrite = TRUE
)
start_person_count <- cdm$person %>% dplyr::tally() %>% dplyr::pull("n")
cdm <- generateConceptCohortSet(cdm = cdm,
name = "gibleed_medications",
conceptSet = list("diclofenac" = 1124300,
"acetaminophen" = 1127433),
subsetCohort = "gibleed_exp",
overwrite = TRUE)
# we should still have our original cdm
end_person_count <- cdm$person %>% dplyr::tally() %>% dplyr::pull("n")
expect_true(start_person_count == end_person_count)
expect_true(nrow(cdm$gibleed_medications %>%
dplyr::select("subject_id") %>%
dplyr::distinct() %>%
dplyr::anti_join(cdm$gibleed_exp %>%
dplyr::select("subject_id") %>%
dplyr::distinct(),
by = "subject_id") %>%
dplyr::collect()) == 0)
# specifying cohort ids
cdm <- generateConceptCohortSet(cdm = cdm,
name = "gibleed_medications2",
conceptSet = list("diclofenac" = 1124300,
"acetaminophen" = 1127433),
subsetCohort = "gibleed_exp",
subsetCohortId = 1,
overwrite = TRUE)
expect_true(nrow(cdm$gibleed_medications2 %>%
dplyr::select("subject_id") %>%
dplyr::distinct() %>%
dplyr::anti_join(cdm$gibleed_exp %>%
dplyr::filter(cohort_definition_id == 1L) %>%
dplyr::select("subject_id") %>%
dplyr::distinct(),
by = "subject_id") %>%
dplyr::collect()) == 0)
# expected errors
expect_error(
generateConceptCohortSet(cdm = cdm,
name = "gibleed_medications2",
conceptSet = list("diclofenac" = 1124300,
"acetaminophen" = 1127433),
subsetCohort = "not_a_table",
subsetCohortId = 1,
overwrite = TRUE)
)
expect_error(
generateConceptCohortSet(cdm = cdm,
name = "gibleed_medications2",
conceptSet = list("diclofenac" = 1124300,
"acetaminophen" = 1127433),
subsetCohort = "gibleed_exp",
subsetCohortId = c(99,100,101), # these cohort ids not in cohort table
overwrite = TRUE)
)
# clean up
dropSourceTable(cdm, dplyr::contains("gibleed"))
}
# dbtype = "duckdb"
for (dbtype in dbToTest) {
test_that(glue::glue("{dbtype} - generateConceptCohortSet"), {
if (!(dbtype %in% ciTestDbs)) skip_on_ci()
if (dbtype != "duckdb") skip_on_cran() else skip_if_not_installed("duckdb")
skip_if_not_installed("CirceR")
con <- get_connection(dbtype)
cdm_schema <- get_cdm_schema(dbtype)
write_schema <- get_write_schema(dbtype)
skip_if(any(write_schema == "") || any(cdm_schema == "") || is.null(con))
test_generate_concept_cohort_set(con, cdm_schema, write_schema)
disconnect(con)
})
}
test_that("missing domains produce warning", {
skip_on_cran()
skip_if_not_installed("duckdb")
skip_if_not("duckdb" %in% dbToTest)
con <- DBI::dbConnect(duckdb::duckdb(eunomiaDir()))
cdm <- cdmFromCon(
con = con, cdmName = "eunomia", cdmSchema = "main", writeSchema = "main"
) %>%
cdmSelect(-drug_exposure)
expect_warning({
cdm <- generateConceptCohortSet(cdm, name = "celecoxib",
conceptSet = list(celecoxib = 1118084))
})
DBI::dbDisconnect(con, shutdown = TRUE)
})
test_that("Regimen domain does not cause error", {
skip_on_cran()
skip_if_not("duckdb" %in% dbToTest)
skip_if_not_installed("duckdb")
con <- DBI::dbConnect(duckdb::duckdb(eunomiaDir()))
# create a fake concept with domain "Regimen"
DBI::dbExecute(con, "UPDATE main.concept SET domain_id = 'Regimen' WHERE concept_id = 19129655")
cdm <- cdmFromCon(
con = con, cdmName = "eunomia", cdmSchema = "main", writeSchema = "main"
)
concept_set <- list(drug_1 = c(1127433, 19129655), drug_2 = 19129655, drug_3 = 1127433)
expect_no_error({
cdm <- generateConceptCohortSet(cdm = cdm,
name = "cohort",
conceptSet = concept_set,
overwrite = TRUE)
})
expect_s3_class(cdm$cohort, "cohort_table")
DBI::dbDisconnect(con, shutdown = TRUE)
})
test_that("Eunomia", {
skip_on_cran()
skip_if_not_installed("duckdb")
skip_if_not("duckdb" %in% dbToTest)
skip_if_not(eunomiaIsAvailable())
# edge case with overlaps (issue 420)
db <- DBI::dbConnect(duckdb::duckdb(), eunomiaDir())
cdm <- cdmFromCon(
con = db,
cdmSchema = "main",
writeSchema = "main"
)
expect_no_error(cdm <- cdm %>%
generateConceptCohortSet(conceptSet = list("acetaminophen" = c(1125315,
1127078,
1127433,
40229134,
40231925,
40162522,
19133768)),
limit = "all",
end = "event_end_date",
name = "acetaminophen",
overwrite = TRUE))
# behaviour with concepts not in vocab
# this works even though 1 is not in the concept table
expect_no_error(cdm <- generateConceptCohortSet(
cdm = cdm,
name = "ankle_sprain",
conceptSet = list("ankle_sprain" = c(81151, 1)),
end = "event_end_date",
limit = "all",
overwrite = TRUE
))
expect_true(settings(cdm$ankle_sprain) |>
dplyr::pull("cohort_name") == "ankle_sprain")
expect_warning(cdm <- generateConceptCohortSet(
cdm = cdm,
name = "ankle_sprain",
conceptSet = list("ankle_sprain" = 1),
end = "event_end_date",
limit = "all",
overwrite = TRUE
), "None of the input concept IDs found for the cdm reference")
expect_true(settings(cdm$ankle_sprain) |>
dplyr::pull("cohort_name") == "ankle_sprain")
# we should have ankle_sprain2 as an empty cohort in our set but don't
expect_no_error(cdm <- generateConceptCohortSet(
cdm = cdm,
name = "ankle_sprain",
conceptSet = list("ankle_sprain" = 81151,
"ankle_sprain2" = 1),
end = "event_end_date",
limit = "all",
overwrite = TRUE
))
expect_true(all(sort(settings(cdm$ankle_sprain) |>
dplyr::pull("cohort_name")) ==
c("ankle_sprain", "ankle_sprain2")))
})
test_that("invalid cdm records are ignored in generateConceptCohortSet", {
skip_if_not_installed("duckdb")
cdm <- cdmFromTables(
tables = list(
"person" = dplyr::tibble(
person_id = 1L, gender_concept_id = 0L, year_of_birth = 1900L,
race_concept_id = 0L, ethnicity_concept_id = 0L
),
"observation_period" = dplyr::tibble(
observation_period_id = 1L, person_id = 1L,
observation_period_start_date = as.Date("1900-01-01"),
observation_period_end_date = as.Date("2000-01-01"),
period_type_concept_id = 0L
),
"drug_exposure" = dplyr::tibble(
drug_exposure_id = 1L, person_id = 1L, drug_concept_id = 1L,
drug_exposure_start_date = as.Date(c("1950-01-01", "1951-01-01")),
drug_exposure_end_date = as.Date(c("1945-01-01", "1952-01-01")),
drug_type_concept_id = 0L
),
"concept" = dplyr::tibble(
concept_id = 1L, concept_name = "my_drug", domain_id = "Drug",
vocabulary_id = "vocab", concept_class_id = "0", concept_code = "0",
valid_start_date = as.Date("1900-01-01"), valid_end_date = as.Date("2030-01-01"),
standard_concept = "S",
invalid_reason = ""
)
),
cdmName = "test"
)
con <- DBI::dbConnect(duckdb::duckdb())
cdm <- copyCdmTo(con, cdm = cdm, schema = "main")
cdm <- generateConceptCohortSet(cdm = cdm,
conceptSet = list(custom = 1),
name = "my_cohort",
end = "event_end_date")
actual <- dplyr::collect(cdm$my_cohort) %>%
dplyr::tibble()
# names(attributes(actual))
# remove cohort attributes
attr(actual, "cohort_set") <- NULL
attr(actual, "cohort_attrition") <- NULL
attr(actual, "cohort_codelist") <- NULL
expected <- dplyr::tibble(
cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = as.Date("1951-01-01"),
cohort_end_date = as.Date("1952-01-01"),
)
expect_equal(actual, expected)
})
test_that("attrition columns are correct", {
skip_if_not("duckdb" %in% dbToTest)
con <- DBI::dbConnect(duckdb::duckdb(), eunomiaDir())
cdm <- cdmFromCon(con, "main", "main")
cdm <- generateConceptCohortSet(cdm,
conceptSet = list(acetaminophen = 1127433),
name = "cohort1")
cohort_set <- readCohortSet(system.file("cohorts1", package = "CDMConnector"))[1,]
cdm <- generateCohortSet(cdm,
cohortSet = cohort_set,
name = "cohort2")
expected_colnames <- c("cohort_definition_id", "number_records", "number_subjects",
"reason_id", "reason", "excluded_records", "excluded_subjects")
expect_equal(expected_colnames, colnames(attrition(cdm$cohort1)))
expect_equal(expected_colnames, colnames(attrition(cdm$cohort2)))
DBI::dbDisconnect(con, shutdown = T)
})
test_that("attrition columns are correct", {
skip_if_not_installed("Capr")
skip_if_not("duckdb" %in% dbToTest)
skip_on_cran()
con <- DBI::dbConnect(duckdb::duckdb(), eunomiaDir())
cdm <- cdmFromCon(con, "main", "main")
cohort_set <- readCohortSet(system.file("cohorts1", package = "CDMConnector"))
cdm <- generateCohortSet(cdm,
cohortSet = cohort_set,
name = "cohort")
expected_colnames <- c("cohort_definition_id", "number_records", "number_subjects",
"reason_id", "reason", "excluded_records", "excluded_subjects")
expect_equal(expected_colnames, colnames(attrition(cdm$cohort)))
DBI::dbDisconnect(con, shutdown = T)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.