knitr::opts_chunk$set( collapse = TRUE, eval = TRUE, warning = FALSE, message = FALSE, comment = "#>" ) library(CDMConnector) requireEunomia()
Let's first create a cdm reference to the Eunomia synthetic data.
library(CDMConnector) library(CodelistGenerator) library(PatientProfiles) library(CohortConstructor) library(dplyr) con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomiaDir()) cdm <- cdmFromCon(con, cdmSchema = "main", writeSchema = "main", writePrefix = "my_study_")
A way of defining base cohorts is to identify clinical records with codes from some pre-specified concept list. Here for example we'll first find codes for diclofenac and acetaminophen. We use the getDrugIngredientCodes()
function from the package CodelistGenerator to obtain the codes for these drugs.
drug_codes <- getDrugIngredientCodes(cdm, name = c("acetaminophen", "amoxicillin", "diclofenac", "simvastatin", "warfarin")) drug_codes
Now we have our codes of interest, we'll make cohorts for each of these where cohort exit is defined as the event start date (which for these will be their drug exposure end date).
cdm$drugs <- conceptCohort(cdm, conceptSet = drug_codes, exit = "event_end_date", name = "drugs") settings(cdm$drugs) cohortCount(cdm$drugs) attrition(cdm$drugs)
This creates a cohort where individuals are defined by their exposure to the specified drugs, and their cohort duration is determined by the exposure end date.
Next, let's create a cohort for individuals with bronchitis. We define a set of codes representing bronchitis and use the conceptCohort()
function to create the cohort. Here, the cohort exit is defined by the event start date (i.e., event_start_date
). We set table = "condition_occurrence"
so that the records for the provided concepts will be searched only in the condition_occurrence table. We then set subsetCohort = "drugs"
to restrict the cohort creation to individuals already in the drugs
cohort. Additionally, we use subsetCohortId = 1
to include only subjects from the cohort 1 (which corresponds to individuals who have been exposed to warfarin).
bronchitis_codes <- list(bronchitis = c(260139, 256451, 4232302)) cdm$bronchitis <- conceptCohort(cdm, conceptSet = bronchitis_codes, exit = "event_start_date", name = "bronchitis", table = "condition_occurrence", subsetCohort = "drugs", subsetCohortId = 1 ) cohortCount(cdm$bronchitis) attrition(cdm$bronchitis)
When some records in the cohort overlap, the cohort start date will be set to the earliest start date. If we set overlap = "merge"
, the cohort end date will be set to the latest end date of the overlapping records.
cdm$drugs_merge <- conceptCohort(cdm, conceptSet = drug_codes, overlap = "merge", name = "drugs_merge") cdm$drugs_merge |> attrition()
Alternatively, if we set overlap = "extend"
, the cohort end date will be extended by summing the durations of each overlapping record.
cdm$drugs_extend <- conceptCohort(cdm, conceptSet = drug_codes, overlap = "extend", name = "drugs_extend") cdm$drugs_extend |> attrition()
To create a cohort from a concept set and include records outside of the observation period, we can set inObservation = FALSE
. If we also want to search for the given concepts in the source concept_id fields, rather than only the standard concept_id fields, we can set useSourceFields = TRUE
.
cdm$celecoxib <- conceptCohort(cdm, conceptSet = list(celecoxib = 44923712), name = "celecoxib", inObservation = FALSE, useSourceFields = TRUE) cdm$celecoxib |> glimpse()
One base cohort we can create is based around patient demographics. Here for example we create a cohort where people enter on their 18th birthday and leave at on the day before their 66th birthday.
cdm$working_age_cohort <- demographicsCohort(cdm = cdm, ageRange = c(18, 65), name = "working_age_cohort") settings(cdm$working_age_cohort) cohortCount(cdm$working_age_cohort) attrition(cdm$working_age_cohort)
We can also add an additional requirement of only people of working age with sex "female".
cdm$female_working_age_cohort <- demographicsCohort(cdm = cdm, ageRange = c(18, 65), sex = "Female", name = "female_working_age_cohort") settings(cdm$female_working_age_cohort) cohortCount(cdm$female_working_age_cohort) attrition(cdm$female_working_age_cohort)
We can also use this function to create cohorts for different combinations of age groups and sex.
cdm$age_sex_cohorts <- demographicsCohort(cdm = cdm, ageRange = list(c(0, 17), c(18, 65), c(66,120)), sex = c("Female", "Male"), name = "age_sex_cohorts") settings(cdm$age_sex_cohorts) cohortCount(cdm$age_sex_cohorts) attrition(cdm$age_sex_cohorts)
We can also specify the minimum number of days of prior observation required.
cdm$working_age_cohort_0_365 <- demographicsCohort(cdm = cdm, ageRange = c(18, 65), name = "working_age_cohort_0_365", minPriorObservation = c(0,365)) settings(cdm$working_age_cohort_0_365) cohortCount(cdm$working_age_cohort_0_365) attrition(cdm$working_age_cohort_0_365)
cdm <- mockCohortConstructor(con = NULL) cdm$concept <- cdm$concept |> dplyr::union_all( dplyr::tibble( concept_id = c(4245997, 9531, 4069590), concept_name = c("Body mass index", "kilogram per square meter", "Normal"), domain_id = "Measurement", vocabulary_id = c("SNOMED", "UCUM", "SNOMED"), standard_concept = "S", concept_class_id = c("Observable Entity", "Unit", "Qualifier Value"), concept_code = NA, valid_start_date = NA, valid_end_date = NA, invalid_reason = NA ) ) cdm$measurement <- dplyr::tibble( measurement_id = 1:6, person_id = c(1, 1, 3, 1, 3, 2), measurement_concept_id = c(4245997, 4245997, 4245997, 4245997, 4245997, 4245997), measurement_date = as.Date(c("2009-07-01", "2000-12-11", "1999-09-08", "2015-02-19", "2016-08-22", "1965-03-10")), measurement_type_concept_id = NA, value_as_number = c(18, 36, 0, 29, 0, 25), value_as_concept_id = c(0, 0, 4069590, 4069590, 4069590, 0), unit_concept_id = c(9531, 9531, 0, 9531, 0, 9531) ) cdm <- CDMConnector::copyCdmTo( con = DBI::dbConnect(duckdb::duckdb()), cdm = cdm, schema = "main")
Another base cohort we can create is based around patient measurements. Here for example we create a cohort of patients who have a normal BMI (BMI between 18 and 25). To do this you must first identify the measurement you want to look at (in this case BMI (concept id = 4245997)), the unit of measurement (kg per square-meter (concept id = 9531)) and 'normal' value concept (concept id = 4069590). The value concept is included for the cases where the exact BMI measurement is not specified, but the BMI category (i.e. normal, overweight, obese etc) is. This means that if a record matches the value concept OR has a normal BMI score then it is included in the cohort.
cdm$cohort <- measurementCohort( cdm = cdm, name = "cohort", conceptSet = list("bmi_normal" = c(4245997)), valueAsConcept = c(4069590), valueAsNumber = list("9531" = c(18, 25)) ) attrition(cdm$cohort) settings(cdm$cohort) cdm$cohort
As you can see in the above code, the concept set is the list of BMI concepts, the concept value is the 'normal' weight concept, and the values are the minimum and maximum BMI scores to be considered.
It is also possible to include records outside of observation by setting the inObservation
argument to false.
cdm$cohort <- measurementCohort( cdm = cdm, name = "cohort", conceptSet = list("bmi_normal" = c(4245997)), valueAsConcept = c(4069590), valueAsNumber = list("9531" = c(18, 25)), inObservation = FALSE ) attrition(cdm$cohort) settings(cdm$cohort) cdm$cohort
con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomiaDir()) cdm <- cdmFromCon(con, cdmSchema = "main", writeSchema = "main", writePrefix = "my_study_") cdm$drugs <- conceptCohort(cdm, conceptSet = drug_codes, exit = "event_end_date", name = "drugs")
Another base cohort we can make is one with individuals who have died. For this we'll simply be finding those people in the OMOP CDM death table and creating a cohort with them.
cdm$death_cohort <- deathCohort(cdm = cdm, name = "death_cohort")
To create a cohort of individuals who have died, but restrict it to those already part of another cohort (e.g., the "drugs" cohort), you can use subsetCohort = "drugs"
. This ensures that only individuals from the "drugs" cohort are included in the death cohort.
cdm$death_drugs <- deathCohort(cdm = cdm, name = "death_drugs", subsetCohort = "drugs")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.