Combining Cohorts

NOT_CRAN <- identical(tolower(Sys.getenv("NOT_CRAN")), "true")

knitr::opts_chunk$set(
  collapse = TRUE, 
  warning = FALSE, 
  message = FALSE,
  comment = "#>",
  eval = NOT_CRAN
)
library(CDMConnector)
library(dplyr, warn.conflicts = FALSE)

if (Sys.getenv("EUNOMIA_DATA_FOLDER") == ""){
  Sys.setenv("EUNOMIA_DATA_FOLDER" = file.path(tempdir(), "eunomia"))}
if (!dir.exists(Sys.getenv("EUNOMIA_DATA_FOLDER"))){ dir.create(Sys.getenv("EUNOMIA_DATA_FOLDER"))
  downloadEunomiaData()  
}
library(CohortConstructor)
library(CohortCharacteristics)
library(ggplot2)

For this example we'll use the Eunomia synthetic data from the CDMConnector package.

con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomiaDir())
cdm <- cdmFromCon(con, cdmSchema = "main", 
                    writeSchema = c(prefix = "my_study_", schema = "main"))

Let's start by creating two drug cohorts, one for users of diclofenac and another for users of acetaminophen.

cdm$medications <- conceptCohort(cdm = cdm, 
                                 conceptSet = list("diclofenac" = 1124300,
                                                   "acetaminophen" = 1127433), 
                                 name = "medications")
cohortCount(cdm$medications)

To check whether there is an overlap between records in both cohorts using the function intersectCohorts().

cdm$medintersect <- CohortConstructor::intersectCohorts(
  cohort = cdm$medications,
  name = "medintersect"
)

cohortCount(cdm$medintersect)

There are 6 individuals who had overlapping records in the diclofenac and acetaminophen cohorts.

con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomiaDir())
cdm <- CDMConnector::cdmFromCon(con, cdmSchema = "main", 
                    writeSchema = "main", writePrefix = "my_study_")
cdm$medications <- conceptCohort(cdm = cdm, 
                                 conceptSet = list("diclofenac" = 1124300,
                                                   "acetaminophen" = 1127433), 
                                 name = "medications")

We can choose the number of days between cohort entries using the gap argument.

cdm$medintersect <- CohortConstructor::intersectCohorts(
  cohort = cdm$medications,
  gap = 365,
  name = "medintersect"
)

cohortCount(cdm$medintersect)

There are 94 individuals who had overlapping records (within 365 days) in the diclofenac and acetaminophen cohorts.

We can also combine different cohorts using the function unionCohorts().

cdm$medunion <- CohortConstructor::unionCohorts(
  cohort = cdm$medications,
  name = "medunion"
)

cohortCount(cdm$medunion)

We have now created a new cohort which includes individuals in either the diclofenac cohort or the acetaminophen cohort.

con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomiaDir())
cdm <- cdmFromCon(con, cdmSchema = "main", 
                    writeSchema = c(prefix = "my_study_", schema = "main"))
cdm$medications <- conceptCohort(cdm = cdm, 
                                 conceptSet = list("diclofenac" = 1124300,
                                                   "acetaminophen" = 1127433), 
                                 name = "medications")

You can keep the original cohorts in the new table if you use the argument keepOriginalCohorts = TRUE.

cdm$medunion <- CohortConstructor::unionCohorts(
  cohort = cdm$medications,
  name = "medunion",
  keepOriginalCohorts = TRUE
)

cohortCount(cdm$medunion)

You can also choose the number of days between two subsequent cohort entries to be merged using the gap argument.

con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomiaDir())
cdm <- cdmFromCon(con, cdmSchema = "main", 
                    writeSchema = c(prefix = "my_study_", schema = "main"))
cdm$medications <- conceptCohort(cdm = cdm, 
                                 conceptSet = list("diclofenac" = 1124300,
                                                   "acetaminophen" = 1127433), 
                                 name = "medications")
cdm$medunion <- CohortConstructor::unionCohorts(
  cohort = cdm$medications,
  name = "medunion",
  gap = 365,
  keepOriginalCohorts = TRUE
)

cohortCount(cdm$medunion)


Try the CohortConstructor package in your browser

Any scripts or data that you put into this service are public.

CohortConstructor documentation built on June 8, 2025, 12:49 p.m.