test-runCharacterization.R
In Characterization: Implement Descriptive Studies Using the Common Data Model

# library(Characterization)
# library(testthat)

context("runCharacterizationAnalyses")

test_that("runCharacterizationAnalyses", {
  targetIds <- c(1, 2, 4)
  outcomeIds <- c(3)

  timeToEventSettings1 <- createTimeToEventSettings(
    targetIds = 1,
    outcomeIds = c(3, 4)
  )
  timeToEventSettings2 <- createTimeToEventSettings(
    targetIds = 2,
    outcomeIds = c(3, 4)
  )

  dechallengeRechallengeSettings <- createDechallengeRechallengeSettings(
    targetIds = targetIds,
    outcomeIds = outcomeIds,
    dechallengeStopInterval = 30,
    dechallengeEvaluationWindow = 31
  )

  aggregateCovariateSettings1 <- createAggregateCovariateSettings(
    targetIds = targetIds,
    outcomeIds = outcomeIds,
    riskWindowStart = 1,
    startAnchor = "cohort start",
    riskWindowEnd = 365,
    endAnchor = "cohort start",
    covariateSettings = FeatureExtraction::createCovariateSettings(
      useDemographicsGender = T,
      useDemographicsAge = T,
      useDemographicsRace = T
    )
  )

  aggregateCovariateSettings2 <- createAggregateCovariateSettings(
    targetIds = targetIds,
    outcomeIds = outcomeIds,
    riskWindowStart = 1,
    startAnchor = "cohort start",
    riskWindowEnd = 365,
    endAnchor = "cohort start",
    covariateSettings = FeatureExtraction::createCovariateSettings(
      useDemographicsGender = T,
      useDemographicsAge = T,
      useDemographicsRace = T
    )
  )

  characterizationSettings <- createCharacterizationSettings(
    timeToEventSettings = list(
      timeToEventSettings1,
      timeToEventSettings2
    ),
    dechallengeRechallengeSettings = list(
      dechallengeRechallengeSettings
    ),
    aggregateCovariateSettings = list(
      aggregateCovariateSettings1,
      aggregateCovariateSettings2
    )
  )

  testthat::expect_true(
    class(characterizationSettings) == "characterizationSettings"
  )

  testthat::expect_true(
    length(characterizationSettings$timeToEventSettings) == 2
  )
  testthat::expect_true(
    length(characterizationSettings$dechallengeRechallengeSettings) == 1
  )
  testthat::expect_true(
    length(characterizationSettings$aggregateCovariateSettings) == 2
  )

  tempFile <- tempfile(fileext = ".json")
  on.exit(unlink(tempFile))
  saveLoc <- saveCharacterizationSettings(
    settings = characterizationSettings,
    fileName = tempFile
  )

  testthat::expect_true(file.exists(tempFile))

  loadedSettings <- loadCharacterizationSettings(
    fileName = tempFile
  )

  # In R, empty arrays are automatically of type 'logical.' When loading JSON
  # they are currently automatically of type 'list'. Neither is right or wrong,
  # so ignoring distinction:
  convertEmptyListToEmptyLogical <- function(object) {
    if (is.list(object)) {
      if (length(object) == 0) {
        return(vector(mode = "logical", length = 0))
      } else {
        return(lapply(object, convertEmptyListToEmptyLogical))
      }
    } else {
      return(object)
    }
  }
  testthat::expect_equivalent(characterizationSettings, convertEmptyListToEmptyLogical(loadedSettings))

  tempFolder <- tempfile("Characterization")
  on.exit(unlink(tempFolder, recursive = TRUE), add = TRUE)

  runCharacterizationAnalyses(
    connectionDetails = connectionDetails,
    cdmDatabaseSchema = "main",
    targetDatabaseSchema = "main",
    targetTable = "cohort",
    outcomeDatabaseSchema = "main",
    outcomeTable = "cohort",
    characterizationSettings = characterizationSettings,
    outputDirectory = file.path(tempFolder, "result"),
    executionPath = file.path(tempFolder, "execution"),
    csvFilePrefix = "c_",
    databaseId = "1",
    incremental = T,
    minCharacterizationMean = 0.01,
    threads = 1
  )

  testthat::expect_true(
    dir.exists(file.path(tempFolder, "result"))
  )

  # check csv files
  testthat::expect_true(
    length(dir(file.path(tempFolder, "result"))) > 0
  )

  # check cohort details is saved
  testthat::expect_true(
    file.exists(file.path(tempFolder, "result", "c_cohort_details.csv"))
  )
  testthat::expect_true(
    file.exists(file.path(tempFolder, "result", "c_settings.csv"))
  )
  testthat::expect_true(
    file.exists(file.path(tempFolder, "result", "c_analysis_ref.csv"))
  )
  testthat::expect_true(
    file.exists(file.path(tempFolder, "result", "c_covariate_ref.csv"))
  )
  testthat::expect_true(
    file.exists(file.path(tempFolder, "result", "c_covariates.csv"))
  )
  testthat::expect_true(
    file.exists(file.path(tempFolder, "result", "c_covariates_continuous.csv"))
  )

  # no results for dechal due to Eunomia - how to test?
  testthat::expect_false(
    file.exists(file.path(tempFolder, "result", "c_dechallenge_rechallenge.csv"))
  )
  # testthat::expect_true(
  #  file.exists(file.path(tempFolder, "result", "rechallenge_fail_case_series.csv"))
  # )
  testthat::expect_true(
    file.exists(file.path(tempFolder, "result", "c_time_to_event.csv"))
  )

  # make sure both tte runs are in the csv
  tte <- readr::read_csv(
    file = file.path(tempFolder, "result", "c_time_to_event.csv"),
    show_col_types = FALSE
  )
  testthat::expect_equivalent(
    unique(tte$target_cohort_definition_id),
    c(1, 2)
  )
})



manualDataMin <- file.path(tempdir(), "manual_min.sqlite")
on.exit(file.remove(manualDataMin), add = TRUE)

test_that("min cell count works", {
  tempFolder <- tempfile("CharacterizationMin")
  on.exit(unlink(tempFolder, recursive = TRUE), add = TRUE)

  connectionDetails <- DatabaseConnector::createConnectionDetails(
    dbms = "sqlite",
    server = manualDataMin
  )
  con <- DatabaseConnector::connect(connectionDetails = connectionDetails)
  on.exit(DatabaseConnector::disconnect(con))
  schema <- "main"

  # add persons  - aggregate covs (age)
  persons <- data.frame(
    person_id = 1:10,
    gender_concept_id = rep(8532, 10),
    year_of_birth = rep(2000, 10),
    race_concept_id = rep(1, 10),
    ethnicity_concept_id = rep(1, 10),
    location_id = rep(1, 10),
    provider_id = rep(1, 10),
    care_site_id = rep(1, 10),
    person_source_value = 1:10,
    gender_source_value = rep("female", 10),
    race_source_value = rep("na", 10),
    ethnicity_source_value = rep("na", 10)
  )
  DatabaseConnector::insertTable(
    connection = con,
    databaseSchema = schema,
    tableName = "person",
    data = persons
  )

  # observation period
  obs_period <- data.frame(
    observation_period_id = 1:10,
    person_id = 1:10,
    observation_period_start_date = rep("2000-12-31", 10),
    observation_period_end_date = c("2000-12-31", rep("2020-12-31", 9)),
    period_type_concept_id = rep(1, 10)
  )
  obs_period$observation_period_start_date <- as.Date(obs_period$observation_period_start_date)
  obs_period$observation_period_end_date <- as.Date(obs_period$observation_period_end_date)
  DatabaseConnector::insertTable(
    connection = con,
    databaseSchema = schema,
    tableName = "observation_period",
    data = obs_period
  )
  # person 1 has 1 day obs
  # person 2-6 has no events
  # person 7 has diabetes at 10, headache at 12
  # person 8 has diabetes at 13
  # person 9 has headache multiple times
  # person 10 has diabetes at 14
  # add conditions - aggregate covs (conditions)

  condition_era <- data.frame(
    condition_era_id = 1:7,
    person_id = c(7, 7, 8, 9, 9, 9, 10),
    condition_concept_id = c(201820, 378253, 201820, 378253, 378253, 378253, 201820),
    condition_era_start_date = c(
      "2011-01-01", "2013-04-03", "2016-01-01",
      "2006-01-04", "2014-08-02", "2014-08-04",
      "2013-01-04"
    ),
    condition_era_end_date = c(
      "2011-01-01", "2013-04-03", "2016-01-01",
      "2006-01-04", "2014-08-02", "2014-08-04",
      "2013-01-04"
    ),
    condition_occurrence_count = rep(1, 7)
  )
  condition_era$condition_era_start_date <- as.Date(condition_era$condition_era_start_date)
  condition_era$condition_era_end_date <- as.Date(condition_era$condition_era_end_date)

  DatabaseConnector::insertTable(
    connection = con,
    databaseSchema = schema,
    tableName = "condition_era",
    data = condition_era
  )

  # add concept
  concept <- data.frame(
    concept_id = c(201820, 378253),
    concept_name = c("diabetes", "hypertension"),
    domain_id = rep(1, 2),
    vocabulary_id = rep(1, 2),
    concept_class_id = c("Condition", "Condition"),
    standard_concept = rep("S", 2),
    concept_code = rep("Snowmed", 2)
    # ,valid_start_date = NULL,
    # valid_end_date = NULL,
    # invalid_reason = NULL
  )
  DatabaseConnector::insertTable(
    connection = con,
    databaseSchema = schema,
    tableName = "concept",
    data = concept
  )

  # add cohort  - tte/dechal/rechal
  cohort <- data.frame(
    subject_id = c(
      1:10,
      7, 8, 10,
      c(3, 6, 7, 8, 10),
      c(7)
    ),
    cohort_definition_id = c(
      rep(1, 10),
      rep(1, 3),
      rep(2, 5),
      2
    ),
    cohort_start_date = c(
      rep("2018-01-01", 10),
      rep("2018-05-01", 3),
      "2018-01-13", "2018-01-03", rep("2018-01-06", 3),
      "2018-05-24"
    ),
    cohort_end_date = c(
      rep("2018-02-01", 10),
      rep("2018-06-01", 3),
      "2018-02-02", "2018-02-04", rep("2018-02-08", 3),
      "2018-06-05"
    )
  )
  cohort$cohort_start_date <- as.Date(cohort$cohort_start_date)
  cohort$cohort_end_date <- as.Date(cohort$cohort_end_date)
  DatabaseConnector::insertTable(
    connection = con,
    databaseSchema = schema,
    tableName = "cohort",
    data = cohort
  )

  # create settings and run
  characterizationSettings <- Characterization::createCharacterizationSettings(
    timeToEventSettings = Characterization::createTimeToEventSettings(
      targetIds = 1,
      outcomeIds = 2
    ),
    dechallengeRechallengeSettings = Characterization::createDechallengeRechallengeSettings(
      targetIds = 1,
      outcomeIds = 2
    ),
    aggregateCovariateSettings = Characterization::createAggregateCovariateSettings(
      targetIds = 1,
      outcomeIds = 2,
      minPriorObservation = 365,
      outcomeWashoutDays = 30,
      riskWindowStart = 1,
      riskWindowEnd = 90,
      covariateSettings = FeatureExtraction::createCovariateSettings(
        useDemographicsAge = T,
        useDemographicsGender = T,
        useConditionEraAnyTimePrior = T
      ),
      caseCovariateSettings = Characterization::createDuringCovariateSettings(useConditionEraDuring = T),
      casePreTargetDuration = 365 * 5
    )
  )

  Characterization::runCharacterizationAnalyses(
    connectionDetails = connectionDetails,
    cdmDatabaseSchema = "main",
    targetDatabaseSchema = "main",
    targetTable = "cohort",
    outcomeDatabaseSchema = "main",
    outcomeTable = "cohort",
    characterizationSettings = characterizationSettings,
    outputDirectory = file.path(tempFolder, "result_mincell"),
    executionPath = file.path(tempFolder, "execution_mincell"),
    csvFilePrefix = "c_",
    databaseId = "1",
    incremental = F,
    minCharacterizationMean = 0.001,
    threads = 1,
    minCellCount = 1000000
  )

  testthat::expect_true(
    file.exists(file.path(tempFolder, "result_mincell", "c_time_to_event.csv"))
  )
  res <- readr::read_csv(file.path(tempFolder, "result_mincell", "c_time_to_event.csv"))
  # all values will be censored to -1 times the minCellCount of 10000000
  testthat::expect_true(sum(res$num_events == -1000000) == length(res$num_events))

  testthat::expect_true(
    file.exists(file.path(tempFolder, "result_mincell", "c_dechallenge_rechallenge.csv"))
  )
  res <- readr::read_csv(file.path(tempFolder, "result_mincell", "c_dechallenge_rechallenge.csv"))
  # all values will be censored to -1 times the minCellCount of 10000000
  testthat::expect_true(sum(res$num_exposure_eras == -1000000) == length(res$num_exposure_eras))
  testthat::expect_true(sum(res$num_cases == -1000000) == length(res$num_cases))
  testthat::expect_true(sum(res$dechallenge_attempt == -1000000) == length(res$dechallenge_attempt))
  testthat::expect_true(sum(res$dechallenge_success == -1000000) == length(res$dechallenge_success))
  testthat::expect_true(sum(res$dechallenge_fail == -1000000) == length(res$dechallenge_fail))

  testthat::expect_true(sum(res$rechallenge_attempt == -1000000) == length(res$rechallenge_attempt))
  testthat::expect_true(sum(res$rechallenge_success == -1000000) == length(res$rechallenge_success))
  testthat::expect_true(sum(res$rechallenge_fail == -1000000) == length(res$rechallenge_fail))

  testthat::expect_true(sum(is.na(res$pct_dechallenge_attempt)) == length(res$pct_dechallenge_attempt))
  testthat::expect_true(sum(is.na(res$pct_dechallenge_success)) == length(res$pct_dechallenge_success))
  testthat::expect_true(sum(is.na(res$pct_dechallenge_fail)) == length(res$pct_dechallenge_fail))

  testthat::expect_true(sum(is.na(res$pct_rechallenge_attempt)) == length(res$pct_rechallenge_attempt))
  testthat::expect_true(sum(is.na(res$pct_rechallenge_success)) == length(res$pct_rechallenge_success))
  testthat::expect_true(sum(is.na(res$pct_rechallenge_fail)) == length(res$pct_rechallenge_fail))


  testthat::expect_true(
    file.exists(file.path(tempFolder, "result_mincell", "c_cohort_counts.csv"))
  )
  res <- readr::read_csv(file.path(tempFolder, "result_mincell", "c_cohort_counts.csv"))
  testthat::expect_true(sum(res$row_count == -1000000) == length(res$row_count))
  testthat::expect_true(sum(res$person_count == -1000000) == length(res$person_count))


  testthat::expect_true(
    file.exists(file.path(tempFolder, "result_mincell", "c_covariates.csv"))
  )
  res <- readr::read_csv(file.path(tempFolder, "result_mincell", "c_covariates.csv"))
  testthat::expect_true(sum(res$sum_value == -1000000) == length(res$sum_value))
  testthat::expect_true(sum(is.na(res$average_value)) == length(res$average_value))

  testthat::expect_true(
    file.exists(file.path(tempFolder, "result_mincell", "c_covariates_continuous.csv"))
  )
  res <- readr::read_csv(file.path(tempFolder, "result_mincell", "c_covariates_continuous.csv"))
  testthat::expect_true(sum(res$count_value == -1000000) == length(res$count_value))
  testthat::expect_true(sum(is.na(res$average_value)) == length(res$average_value))
  testthat::expect_true(sum(is.na(res$p_10_value)) == length(res$p_10_value))
  testthat::expect_true(sum(is.na(res$p_90_value)) == length(res$p_90_value))
  testthat::expect_true(sum(is.na(res$p_25_value)) == length(res$p_25_value))
  testthat::expect_true(sum(is.na(res$p_75_value)) == length(res$p_75_value))
  testthat::expect_true(sum(is.na(res$min_value)) == length(res$min_value))
  testthat::expect_true(sum(is.na(res$max_value)) == length(res$max_value))
})
Any scripts or data that you put into this service are public.
Characterization documentation built on April 4, 2025, 2:02 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
Characterization
Implement Descriptive Studies Using the Common Data Model

tests/testthat/test-runCharacterization.R
In Characterization: Implement Descriptive Studies Using the Common Data Model

Try the Characterization package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

Characterization Implement Descriptive Studies Using the Common Data Model

tests/testthat/test-runCharacterization.R In Characterization: Implement Descriptive Studies Using the Common Data Model

Try the Characterization package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

Characterization
Implement Descriptive Studies Using the Common Data Model

tests/testthat/test-runCharacterization.R
In Characterization: Implement Descriptive Studies Using the Common Data Model