test-manualData.R
In Characterization: Implement Descriptive Studies Using the Common Data Model

context("manual data")

manualData <- file.path(tempdir(), "manual.sqlite")
on.exit(file.remove(manualData), add = TRUE)

manualData2 <- file.path(tempdir(), "manual2.sqlite")
on.exit(file.remove(manualData2), add = TRUE)

test_that("manual data runCharacterizationAnalyses", {
  # this test creates made-up OMOP CDM data
  # and runs runCharacterizationAnalyses on the data
  # to check whether the results are as expected
  connectionDetails <- DatabaseConnector::createConnectionDetails(
    dbms = "sqlite",
    server = manualData
  )
  con <- DatabaseConnector::connect(connectionDetails = connectionDetails)
  schema <- "main"

  # add persons  - aggregate covs (age)
  persons <- data.frame(
    person_id = 1:10,
    gender_concept_id = rep(8532, 10),
    year_of_birth = rep(2000, 10),
    race_concept_id = rep(1, 10),
    ethnicity_concept_id = rep(1, 10),
    location_id = rep(1, 10),
    provider_id = rep(1, 10),
    care_site_id = rep(1, 10),
    person_source_value = 1:10,
    gender_source_value = rep("female", 10),
    race_source_value = rep("na", 10),
    ethnicity_source_value = rep("na", 10)
  )
  DatabaseConnector::insertTable(
    connection = con,
    databaseSchema = schema,
    tableName = "person",
    data = persons
  )

  # observation period
  obs_period <- data.frame(
    observation_period_id = 1:10,
    person_id = 1:10,
    observation_period_start_date = rep("2000-12-31", 10),
    observation_period_end_date = c("2000-12-31", rep("2020-12-31", 9)),
    period_type_concept_id = rep(1, 10)
  )
  obs_period$observation_period_start_date <- as.Date(obs_period$observation_period_start_date)
  obs_period$observation_period_end_date <- as.Date(obs_period$observation_period_end_date)
  DatabaseConnector::insertTable(
    connection = con,
    databaseSchema = schema,
    tableName = "observation_period",
    data = obs_period
  )
  # person 1 has 1 day obs
  # person 2-6 has no events
  # person 7 has diabetes at 10, headache at 12
  # person 8 has diabetes at 13
  # person 9 has headache multiple times
  # person 10 has diabetes at 14
  # add conditions - aggregate covs (conditions)

  condition_era <- data.frame(
    condition_era_id = 1:7,
    person_id = c(7, 7, 8, 9, 9, 9, 10),
    condition_concept_id = c(201820, 378253, 201820, 378253, 378253, 378253, 201820),
    condition_era_start_date = c(
      "2011-01-01", "2013-04-03", "2016-01-01",
      "2006-01-04", "2014-08-02", "2014-08-04",
      "2013-01-04"
    ),
    condition_era_end_date = c(
      "2011-01-01", "2013-04-03", "2016-01-01",
      "2006-01-04", "2014-08-02", "2014-08-04",
      "2013-01-04"
    ),
    condition_occurrence_count = rep(1, 7)
  )
  condition_era$condition_era_start_date <- as.Date(condition_era$condition_era_start_date)
  condition_era$condition_era_end_date <- as.Date(condition_era$condition_era_end_date)

  DatabaseConnector::insertTable(
    connection = con,
    databaseSchema = schema,
    tableName = "condition_era",
    data = condition_era
  )

  # add concept
  concept <- data.frame(
    concept_id = c(201820, 378253),
    concept_name = c("diabetes", "hypertension"),
    domain_id = rep(1, 2),
    vocabulary_id = rep(1, 2),
    concept_class_id = c("Condition", "Condition"),
    standard_concept = rep("S", 2),
    concept_code = rep("Snowmed", 2)
    # ,valid_start_date = NULL,
    # valid_end_date = NULL,
    # invalid_reason = NULL
  )
  DatabaseConnector::insertTable(
    connection = con,
    databaseSchema = schema,
    tableName = "concept",
    data = concept
  )

  # add cohort  - tte/dechal/rechal
  cohort <- data.frame(
    subject_id = c(
      1:10,
      7, 8, 10,
      c(3, 6, 7, 8, 10),
      c(7)
    ),
    cohort_definition_id = c(
      rep(1, 10),
      rep(1, 3),
      rep(2, 5),
      2
    ),
    cohort_start_date = c(
      rep("2018-01-01", 10),
      rep("2018-05-01", 3),
      "2018-01-13", "2018-01-03", rep("2018-01-06", 3),
      "2018-05-24"
    ),
    cohort_end_date = c(
      rep("2018-02-01", 10),
      rep("2018-06-01", 3),
      "2018-02-02", "2018-02-04", rep("2018-02-08", 3),
      "2018-06-05"
    )
  )
  cohort$cohort_start_date <- as.Date(cohort$cohort_start_date)
  cohort$cohort_end_date <- as.Date(cohort$cohort_end_date)
  DatabaseConnector::insertTable(
    connection = con,
    databaseSchema = schema,
    tableName = "cohort",
    data = cohort
  )

  # create settings and run
  characterizationSettings <- Characterization::createCharacterizationSettings(
    timeToEventSettings = Characterization::createTimeToEventSettings(
      targetIds = 1,
      outcomeIds = 2
    ),
    dechallengeRechallengeSettings = Characterization::createDechallengeRechallengeSettings(
      targetIds = 1,
      outcomeIds = 2
    ),
    aggregateCovariateSettings = Characterization::createAggregateCovariateSettings(
      targetIds = 1,
      outcomeIds = 2,
      minPriorObservation = 365,
      outcomeWashoutDays = 30,
      riskWindowStart = 1,
      riskWindowEnd = 90,
      covariateSettings = FeatureExtraction::createCovariateSettings(
        useDemographicsAge = T,
        useDemographicsGender = T,
        useConditionEraAnyTimePrior = T
      ),
      caseCovariateSettings = Characterization::createDuringCovariateSettings(useConditionEraDuring = T),
      casePreTargetDuration = 365 * 5
    )
  )
  Characterization::runCharacterizationAnalyses(
    connectionDetails = connectionDetails,
    targetDatabaseSchema = schema,
    targetTable = "cohort",
    outcomeDatabaseSchema = schema,
    outcomeTable = "cohort",
    cdmDatabaseSchema = schema,
    characterizationSettings = characterizationSettings,
    outputDirectory = file.path(tempdir(), "result"),
    executionPath = file.path(tempdir(), "execution"),
    csvFilePrefix = "c_",
    databaseId = "1",
    incremental = T,
    threads = 1,
    minCharacterizationMean = 0.0001,
    minCellCount = NULL,
    showSubjectId = T
  )

  # check csv results are as expected

  tte <- read.csv(file.path(tempdir(), "result", "c_time_to_event.csv"))

  # check counts - 1-day subsequent missing?
  testthat::expect_true(5 == sum(tte$num_events[tte$outcome_type == "first" & tte$time_scale == "per 1-day"]))
  # subsequent is > 100 days after first drug so not in the 1-day count
  testthat::expect_true(0 == sum(tte$num_events[tte$outcome_type == "subsequent" & tte$time_scale == "per 1-day"]))
  testthat::expect_true(5 == sum(tte$num_events[tte$outcome_type == "first" & tte$time_scale == "per 30-day"]))
  testthat::expect_true(1 == sum(tte$num_events[tte$outcome_type == "subsequent" & tte$time_scale == "per 30-day"]))
  testthat::expect_true(5 == sum(tte$num_events[tte$outcome_type == "first" & tte$time_scale == "per 365-day"]))
  testthat::expect_true(1 == sum(tte$num_events[tte$outcome_type == "subsequent" & tte$time_scale == "per 365-day"]))

  # check times
  testthat::expect_true(sum(c(2, 5, 12) %in% tte$time_to_event[tte$outcome_type == "first" & tte$time_scale == "per 1-day"]) == 3)

  # TODO: check in code whether minCellCount < or <=

  dechal <- read.csv(file.path(tempdir(), "result", "c_dechallenge_rechallenge.csv"))
  testthat::expect_true(dechal$num_exposure_eras == 13)
  testthat::expect_true(dechal$num_persons_exposed == 10)
  testthat::expect_true(dechal$num_cases == 6)
  testthat::expect_true(dechal$dechallenge_attempt == 5)
  testthat::expect_true(dechal$dechallenge_success == 5)
  testthat::expect_true(dechal$rechallenge_attempt == 3)

  # one person has a rechal and event stops when second drug exposure stops
  testthat::expect_true(dechal$rechallenge_fail == 1)
  testthat::expect_true(dechal$rechallenge_success == 2)
  testthat::expect_true(dechal$pct_rechallenge_fail == 0.3333333)

  failed <- read.csv(file.path(tempdir(), "result", "c_rechallenge_fail_case_series.csv"))
  testthat::expect_true(nrow(failed) == 1)
  testthat::expect_true(failed$subject_id == 7)
  testthat::expect_true(failed$dechallenge_exposure_end_date_offset == 31)
  testthat::expect_true(failed$dechallenge_outcome_start_date_offset == 5)
  testthat::expect_true(failed$rechallenge_exposure_start_date_offset == 120)
  testthat::expect_true(failed$rechallenge_outcome_start_date_offset == 143)

  # Aggregate covs
  # =======
  counts <- read.csv(file.path(tempdir(), "result", "c_cohort_counts.csv"))
  # when restricted to first exposure 5 people have outcome
  testthat::expect_true(counts$row_count[counts$cohort_type == "Cases"] == 5)
  # target is 9 because 1 has insufficient min prior obs
  testthat::expect_true(counts$row_count[counts$cohort_type == "Target" &
    counts$target_cohort_id == 1] == 9)
  # make sure outcome is there a has count of 5
  testthat::expect_true(counts$row_count[counts$cohort_type == "Target" &
    counts$target_cohort_id == 2] == 5)

  # Tall should not have first restriction
  testthat::expect_true(counts$row_count[counts$cohort_type == "Tall" &
    counts$target_cohort_id == 1] == 13)
  testthat::expect_true(counts$person_count[counts$cohort_type == "Tall" &
    counts$target_cohort_id == 1] == 10)
  # make sure outcome is there a has count of 6 and 5
  testthat::expect_true(counts$row_count[counts$cohort_type == "Tall" &
    counts$target_cohort_id == 2] == 6)
  testthat::expect_true(counts$person_count[counts$cohort_type == "Tall" &
    counts$target_cohort_id == 2] == 5)

  covs <- read.csv(file.path(tempdir(), "result", "c_covariates.csv"))

  # checks all females
  testthat::expect_true(covs$average_value[covs$covariate_id == 8532001 & covs$cohort_type == "Cases"] == 1)
  testthat::expect_true(covs$average_value[covs$covariate_id == 8532001 & covs$cohort_type == "Target" &
    covs$target_cohort_id == 1] == 1)
  testthat::expect_true(covs$average_value[covs$covariate_id == 8532001 & covs$cohort_type == "Target" &
    covs$target_cohort_id == 2] == 1)

  ## TODO: check diabetes and hypertensions
  # covs$covariate_id
  # 201820 7,8 and 10 have in history and all are cases
  ind <- covs$covariate_id == 201820201 & covs$target_cohort_id == 1 &
    covs$cohort_type == "Cases"
  testthat::expect_true(covs$sum_value[ind] == 3)
  testthat::expect_true(covs$average_value[ind] == 3 / 5)

  ind <- covs$covariate_id == 201820201 & covs$target_cohort_id == 1 &
    covs$cohort_type == "Target"
  testthat::expect_true(covs$sum_value[ind] == 3)
  testthat::expect_equal(covs$average_value[ind], 3 / 9, tolerance = 0.01)

  # 378253 7,9 (9 multiple times) but 9 not a case
  ind <- covs$covariate_id == 378253201 & covs$target_cohort_id == 1 &
    covs$cohort_type == "Cases"
  testthat::expect_true(covs$sum_value[ind] == 1)
  testthat::expect_true(covs$average_value[ind] == 1 / 5)

  ind <- covs$covariate_id == 378253201 & covs$target_cohort_id == 1 &
    covs$cohort_type == "Target"
  testthat::expect_true(covs$sum_value[ind] == 2)
  testthat::expect_equal(covs$average_value[ind], 2 / 9, tolerance = 0.01)


  covs_cont <- read.csv(file.path(tempdir(), "result", "c_covariates_continuous.csv"))

  # checks age in years
  testthat::expect_true(covs_cont$average_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Cases"] == 18)
  testthat::expect_true(covs_cont$count_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Cases"] == 5)
  testthat::expect_true(covs_cont$average_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Target" &
    covs_cont$target_cohort_id == 1] == 18)
  testthat::expect_true(covs_cont$count_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Target" &
    covs_cont$target_cohort_id == 1] == 9)
  testthat::expect_true(covs_cont$average_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Target" &
    covs_cont$target_cohort_id == 2] == 18)
  testthat::expect_true(covs_cont$count_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Target" &
    covs_cont$target_cohort_id == 2] == 5)
})



test_that("manual data checking exclude count works", {
  # this test creates made-up OMOP CDM data
  # and runs runCharacterizationAnalyses on the data
  # to check whether the results are as expected
  connectionDetails <- DatabaseConnector::createConnectionDetails(
    dbms = "sqlite",
    server = manualData2
  )
  con <- DatabaseConnector::connect(connectionDetails = connectionDetails)
  schema <- "main"

  # add persons  - aggregate covs (age)
  persons <- data.frame(
    person_id = 1:10,
    gender_concept_id = rep(8532, 10),
    year_of_birth = rep(2000, 10),
    race_concept_id = rep(1, 10),
    ethnicity_concept_id = rep(1, 10),
    location_id = rep(1, 10),
    provider_id = rep(1, 10),
    care_site_id = rep(1, 10),
    person_source_value = 1:10,
    gender_source_value = rep("female", 10),
    race_source_value = rep("na", 10),
    ethnicity_source_value = rep("na", 10)
  )
  DatabaseConnector::insertTable(
    connection = con,
    databaseSchema = schema,
    tableName = "person",
    data = persons
  )

  # observation period
  obs_period <- data.frame(
    observation_period_id = 1:10,
    person_id = 1:10,
    observation_period_start_date = rep("2000-12-31", 10),
    observation_period_end_date = c("2000-12-31", rep("2020-12-31", 9)),
    period_type_concept_id = rep(1, 10)
  )
  obs_period$observation_period_start_date <- as.Date(obs_period$observation_period_start_date)
  obs_period$observation_period_end_date <- as.Date(obs_period$observation_period_end_date)
  DatabaseConnector::insertTable(
    connection = con,
    databaseSchema = schema,
    tableName = "observation_period",
    data = obs_period
  )
  # person 1 has 1 day obs
  # person 2-6 has no events
  # person 7 has diabetes at 10, headache at 12
  # person 8 has diabetes at 13
  # person 9 has headache multiple times
  # person 10 has diabetes at 14
  # add conditions - aggregate covs (conditions)

  condition_era <- data.frame(
    condition_era_id = 1:7,
    person_id = c(7, 7, 8, 9, 9, 9, 10),
    condition_concept_id = c(201820, 378253, 201820, 378253, 378253, 378253, 201820),
    condition_era_start_date = c(
      "2011-01-01", "2013-04-03", "2016-01-01",
      "2006-01-04", "2014-08-02", "2014-08-04",
      "2013-01-04"
    ),
    condition_era_end_date = c(
      "2011-01-01", "2013-04-03", "2016-01-01",
      "2006-01-04", "2014-08-02", "2014-08-04",
      "2013-01-04"
    ),
    condition_occurrence_count = rep(1, 7)
  )
  condition_era$condition_era_start_date <- as.Date(condition_era$condition_era_start_date)
  condition_era$condition_era_end_date <- as.Date(condition_era$condition_era_end_date)

  DatabaseConnector::insertTable(
    connection = con,
    databaseSchema = schema,
    tableName = "condition_era",
    data = condition_era
  )

  # add concept
  concept <- data.frame(
    concept_id = c(201820, 378253),
    concept_name = c("diabetes", "hypertension"),
    domain_id = rep(1, 2),
    vocabulary_id = rep(1, 2),
    concept_class_id = c("Condition", "Condition"),
    standard_concept = rep("S", 2),
    concept_code = rep("Snowmed", 2)
    # ,valid_start_date = NULL,
    # valid_end_date = NULL,
    # invalid_reason = NULL
  )
  DatabaseConnector::insertTable(
    connection = con,
    databaseSchema = schema,
    tableName = "concept",
    data = concept
  )

  # add cohort  - tte/dechal/rechal
  # person 6 has the outcome just before the exposure
  cohort <- data.frame(
    subject_id = c(
      1:10,
      7, 8, 10,
      c(3, 6, 7, 8, 10),
      c(7),
      6
    ),
    cohort_definition_id = c(
      rep(1, 10),
      rep(1, 3),
      rep(2, 5),
      2,
      2
    ),
    cohort_start_date = c(
      rep("2018-01-01", 10),
      rep("2018-05-01", 3),
      "2018-01-13", "2018-01-03", rep("2018-01-06", 3),
      "2018-05-24",
      "2017-12-29"
    ),
    cohort_end_date = c(
      rep("2018-02-01", 10),
      rep("2018-06-01", 3),
      "2018-02-02", "2018-02-04", rep("2018-02-08", 3),
      "2018-06-05",
      "2017-12-29"
    )
  )
  cohort$cohort_start_date <- as.Date(cohort$cohort_start_date)
  cohort$cohort_end_date <- as.Date(cohort$cohort_end_date)
  DatabaseConnector::insertTable(
    connection = con,
    databaseSchema = schema,
    tableName = "cohort",
    data = cohort
  )

  # create settings and run
  characterizationSettings <- Characterization::createCharacterizationSettings(
    timeToEventSettings = Characterization::createTimeToEventSettings(
      targetIds = 1,
      outcomeIds = 2
    ),
    dechallengeRechallengeSettings = Characterization::createDechallengeRechallengeSettings(
      targetIds = 1,
      outcomeIds = 2
    ),
    aggregateCovariateSettings = Characterization::createAggregateCovariateSettings(
      targetIds = 1,
      outcomeIds = 2,
      minPriorObservation = 365,
      outcomeWashoutDays = 30,
      riskWindowStart = 1,
      riskWindowEnd = 90,
      covariateSettings = FeatureExtraction::createCovariateSettings(
        useDemographicsAge = T,
        useDemographicsGender = T,
        useConditionEraAnyTimePrior = T
      ),
      caseCovariateSettings = Characterization::createDuringCovariateSettings(useConditionEraDuring = T),
      casePreTargetDuration = 365 * 5
    )
  )
  Characterization::runCharacterizationAnalyses(
    connectionDetails = connectionDetails,
    targetDatabaseSchema = schema,
    targetTable = "cohort",
    outcomeDatabaseSchema = schema,
    outcomeTable = "cohort",
    cdmDatabaseSchema = schema,
    characterizationSettings = characterizationSettings,
    outputDirectory = file.path(tempdir(), "result2"),
    executionPath = file.path(tempdir(), "execution2"),
    csvFilePrefix = "c_",
    databaseId = "1",
    incremental = T,
    threads = 1,
    minCharacterizationMean = 0.0001,
    minCellCount = NULL,
    showSubjectId = T
  )

  # load the cohort counts to make sure the exclude is there
  counts <- read.csv(file.path(tempdir(), "result2", "c_cohort_counts.csv"))
  # when restricted to first exposure 5 people have outcome
  testthat::expect_true(counts$row_count[counts$cohort_type == "Cases"] == 4)
  testthat::expect_true(counts$row_count[counts$cohort_type == "Exclude"] == 1)
})
Any scripts or data that you put into this service are public.
Characterization documentation built on April 4, 2025, 2:02 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
Characterization
Implement Descriptive Studies Using the Common Data Model

tests/testthat/test-manualData.R
In Characterization: Implement Descriptive Studies Using the Common Data Model

Try the Characterization package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

Characterization Implement Descriptive Studies Using the Common Data Model

tests/testthat/test-manualData.R In Characterization: Implement Descriptive Studies Using the Common Data Model

Try the Characterization package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

Characterization
Implement Descriptive Studies Using the Common Data Model

tests/testthat/test-manualData.R
In Characterization: Implement Descriptive Studies Using the Common Data Model