Nothing
context("manual data")
manualData <- file.path(tempdir(), "manual.sqlite")
on.exit(file.remove(manualData), add = TRUE)
manualData2 <- file.path(tempdir(), "manual2.sqlite")
on.exit(file.remove(manualData2), add = TRUE)
test_that("manual data runCharacterizationAnalyses", {
# this test creates made-up OMOP CDM data
# and runs runCharacterizationAnalyses on the data
# to check whether the results are as expected
connectionDetails <- DatabaseConnector::createConnectionDetails(
dbms = "sqlite",
server = manualData
)
con <- DatabaseConnector::connect(connectionDetails = connectionDetails)
schema <- "main"
# add persons - aggregate covs (age)
persons <- data.frame(
person_id = 1:10,
gender_concept_id = rep(8532, 10),
year_of_birth = rep(2000, 10),
race_concept_id = rep(1, 10),
ethnicity_concept_id = rep(1, 10),
location_id = rep(1, 10),
provider_id = rep(1, 10),
care_site_id = rep(1, 10),
person_source_value = 1:10,
gender_source_value = rep("female", 10),
race_source_value = rep("na", 10),
ethnicity_source_value = rep("na", 10)
)
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
tableName = "person",
data = persons
)
# observation period
obs_period <- data.frame(
observation_period_id = 1:10,
person_id = 1:10,
observation_period_start_date = rep("2000-12-31", 10),
observation_period_end_date = c("2000-12-31", rep("2020-12-31", 9)),
period_type_concept_id = rep(1, 10)
)
obs_period$observation_period_start_date <- as.Date(obs_period$observation_period_start_date)
obs_period$observation_period_end_date <- as.Date(obs_period$observation_period_end_date)
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
tableName = "observation_period",
data = obs_period
)
# person 1 has 1 day obs
# person 2-6 has no events
# person 7 has diabetes at 10, headache at 12
# person 8 has diabetes at 13
# person 9 has headache multiple times
# person 10 has diabetes at 14
# add conditions - aggregate covs (conditions)
condition_era <- data.frame(
condition_era_id = 1:7,
person_id = c(7, 7, 8, 9, 9, 9, 10),
condition_concept_id = c(201820, 378253, 201820, 378253, 378253, 378253, 201820),
condition_era_start_date = c(
"2011-01-01", "2013-04-03", "2016-01-01",
"2006-01-04", "2014-08-02", "2014-08-04",
"2013-01-04"
),
condition_era_end_date = c(
"2011-01-01", "2013-04-03", "2016-01-01",
"2006-01-04", "2014-08-02", "2014-08-04",
"2013-01-04"
),
condition_occurrence_count = rep(1, 7)
)
condition_era$condition_era_start_date <- as.Date(condition_era$condition_era_start_date)
condition_era$condition_era_end_date <- as.Date(condition_era$condition_era_end_date)
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
tableName = "condition_era",
data = condition_era
)
# add concept
concept <- data.frame(
concept_id = c(201820, 378253),
concept_name = c("diabetes", "hypertension"),
domain_id = rep(1, 2),
vocabulary_id = rep(1, 2),
concept_class_id = c("Condition", "Condition"),
standard_concept = rep("S", 2),
concept_code = rep("Snowmed", 2)
# ,valid_start_date = NULL,
# valid_end_date = NULL,
# invalid_reason = NULL
)
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
tableName = "concept",
data = concept
)
# add cohort - tte/dechal/rechal
cohort <- data.frame(
subject_id = c(
1:10,
7, 8, 10,
c(3, 6, 7, 8, 10),
c(7)
),
cohort_definition_id = c(
rep(1, 10),
rep(1, 3),
rep(2, 5),
2
),
cohort_start_date = c(
rep("2018-01-01", 10),
rep("2018-05-01", 3),
"2018-01-13", "2018-01-03", rep("2018-01-06", 3),
"2018-05-24"
),
cohort_end_date = c(
rep("2018-02-01", 10),
rep("2018-06-01", 3),
"2018-02-02", "2018-02-04", rep("2018-02-08", 3),
"2018-06-05"
)
)
cohort$cohort_start_date <- as.Date(cohort$cohort_start_date)
cohort$cohort_end_date <- as.Date(cohort$cohort_end_date)
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
tableName = "cohort",
data = cohort
)
# create settings and run
characterizationSettings <- Characterization::createCharacterizationSettings(
timeToEventSettings = Characterization::createTimeToEventSettings(
targetIds = 1,
outcomeIds = 2
),
dechallengeRechallengeSettings = Characterization::createDechallengeRechallengeSettings(
targetIds = 1,
outcomeIds = 2
),
aggregateCovariateSettings = Characterization::createAggregateCovariateSettings(
targetIds = 1,
outcomeIds = 2,
minPriorObservation = 365,
outcomeWashoutDays = 30,
riskWindowStart = 1,
riskWindowEnd = 90,
covariateSettings = FeatureExtraction::createCovariateSettings(
useDemographicsAge = T,
useDemographicsGender = T,
useConditionEraAnyTimePrior = T
),
caseCovariateSettings = Characterization::createDuringCovariateSettings(useConditionEraDuring = T),
casePreTargetDuration = 365 * 5
)
)
Characterization::runCharacterizationAnalyses(
connectionDetails = connectionDetails,
targetDatabaseSchema = schema,
targetTable = "cohort",
outcomeDatabaseSchema = schema,
outcomeTable = "cohort",
cdmDatabaseSchema = schema,
characterizationSettings = characterizationSettings,
outputDirectory = file.path(tempdir(), "result"),
executionPath = file.path(tempdir(), "execution"),
csvFilePrefix = "c_",
databaseId = "1",
incremental = T,
threads = 1,
minCharacterizationMean = 0.0001,
minCellCount = NULL,
showSubjectId = T
)
# check csv results are as expected
tte <- read.csv(file.path(tempdir(), "result", "c_time_to_event.csv"))
# check counts - 1-day subsequent missing?
testthat::expect_true(5 == sum(tte$num_events[tte$outcome_type == "first" & tte$time_scale == "per 1-day"]))
# subsequent is > 100 days after first drug so not in the 1-day count
testthat::expect_true(0 == sum(tte$num_events[tte$outcome_type == "subsequent" & tte$time_scale == "per 1-day"]))
testthat::expect_true(5 == sum(tte$num_events[tte$outcome_type == "first" & tte$time_scale == "per 30-day"]))
testthat::expect_true(1 == sum(tte$num_events[tte$outcome_type == "subsequent" & tte$time_scale == "per 30-day"]))
testthat::expect_true(5 == sum(tte$num_events[tte$outcome_type == "first" & tte$time_scale == "per 365-day"]))
testthat::expect_true(1 == sum(tte$num_events[tte$outcome_type == "subsequent" & tte$time_scale == "per 365-day"]))
# check times
testthat::expect_true(sum(c(2, 5, 12) %in% tte$time_to_event[tte$outcome_type == "first" & tte$time_scale == "per 1-day"]) == 3)
# TODO: check in code whether minCellCount < or <=
dechal <- read.csv(file.path(tempdir(), "result", "c_dechallenge_rechallenge.csv"))
testthat::expect_true(dechal$num_exposure_eras == 13)
testthat::expect_true(dechal$num_persons_exposed == 10)
testthat::expect_true(dechal$num_cases == 6)
testthat::expect_true(dechal$dechallenge_attempt == 5)
testthat::expect_true(dechal$dechallenge_success == 5)
testthat::expect_true(dechal$rechallenge_attempt == 3)
# one person has a rechal and event stops when second drug exposure stops
testthat::expect_true(dechal$rechallenge_fail == 1)
testthat::expect_true(dechal$rechallenge_success == 2)
testthat::expect_true(dechal$pct_rechallenge_fail == 0.3333333)
failed <- read.csv(file.path(tempdir(), "result", "c_rechallenge_fail_case_series.csv"))
testthat::expect_true(nrow(failed) == 1)
testthat::expect_true(failed$subject_id == 7)
testthat::expect_true(failed$dechallenge_exposure_end_date_offset == 31)
testthat::expect_true(failed$dechallenge_outcome_start_date_offset == 5)
testthat::expect_true(failed$rechallenge_exposure_start_date_offset == 120)
testthat::expect_true(failed$rechallenge_outcome_start_date_offset == 143)
# Aggregate covs
# =======
counts <- read.csv(file.path(tempdir(), "result", "c_cohort_counts.csv"))
# when restricted to first exposure 5 people have outcome
testthat::expect_true(counts$row_count[counts$cohort_type == "Cases"] == 5)
# target is 9 because 1 has insufficient min prior obs
testthat::expect_true(counts$row_count[counts$cohort_type == "Target" &
counts$target_cohort_id == 1] == 9)
# make sure outcome is there a has count of 5
testthat::expect_true(counts$row_count[counts$cohort_type == "Target" &
counts$target_cohort_id == 2] == 5)
# Tall should not have first restriction
testthat::expect_true(counts$row_count[counts$cohort_type == "Tall" &
counts$target_cohort_id == 1] == 13)
testthat::expect_true(counts$person_count[counts$cohort_type == "Tall" &
counts$target_cohort_id == 1] == 10)
# make sure outcome is there a has count of 6 and 5
testthat::expect_true(counts$row_count[counts$cohort_type == "Tall" &
counts$target_cohort_id == 2] == 6)
testthat::expect_true(counts$person_count[counts$cohort_type == "Tall" &
counts$target_cohort_id == 2] == 5)
covs <- read.csv(file.path(tempdir(), "result", "c_covariates.csv"))
# checks all females
testthat::expect_true(covs$average_value[covs$covariate_id == 8532001 & covs$cohort_type == "Cases"] == 1)
testthat::expect_true(covs$average_value[covs$covariate_id == 8532001 & covs$cohort_type == "Target" &
covs$target_cohort_id == 1] == 1)
testthat::expect_true(covs$average_value[covs$covariate_id == 8532001 & covs$cohort_type == "Target" &
covs$target_cohort_id == 2] == 1)
## TODO: check diabetes and hypertensions
# covs$covariate_id
# 201820 7,8 and 10 have in history and all are cases
ind <- covs$covariate_id == 201820201 & covs$target_cohort_id == 1 &
covs$cohort_type == "Cases"
testthat::expect_true(covs$sum_value[ind] == 3)
testthat::expect_true(covs$average_value[ind] == 3 / 5)
ind <- covs$covariate_id == 201820201 & covs$target_cohort_id == 1 &
covs$cohort_type == "Target"
testthat::expect_true(covs$sum_value[ind] == 3)
testthat::expect_equal(covs$average_value[ind], 3 / 9, tolerance = 0.01)
# 378253 7,9 (9 multiple times) but 9 not a case
ind <- covs$covariate_id == 378253201 & covs$target_cohort_id == 1 &
covs$cohort_type == "Cases"
testthat::expect_true(covs$sum_value[ind] == 1)
testthat::expect_true(covs$average_value[ind] == 1 / 5)
ind <- covs$covariate_id == 378253201 & covs$target_cohort_id == 1 &
covs$cohort_type == "Target"
testthat::expect_true(covs$sum_value[ind] == 2)
testthat::expect_equal(covs$average_value[ind], 2 / 9, tolerance = 0.01)
covs_cont <- read.csv(file.path(tempdir(), "result", "c_covariates_continuous.csv"))
# checks age in years
testthat::expect_true(covs_cont$average_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Cases"] == 18)
testthat::expect_true(covs_cont$count_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Cases"] == 5)
testthat::expect_true(covs_cont$average_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Target" &
covs_cont$target_cohort_id == 1] == 18)
testthat::expect_true(covs_cont$count_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Target" &
covs_cont$target_cohort_id == 1] == 9)
testthat::expect_true(covs_cont$average_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Target" &
covs_cont$target_cohort_id == 2] == 18)
testthat::expect_true(covs_cont$count_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Target" &
covs_cont$target_cohort_id == 2] == 5)
})
test_that("manual data checking exclude count works", {
# this test creates made-up OMOP CDM data
# and runs runCharacterizationAnalyses on the data
# to check whether the results are as expected
connectionDetails <- DatabaseConnector::createConnectionDetails(
dbms = "sqlite",
server = manualData2
)
con <- DatabaseConnector::connect(connectionDetails = connectionDetails)
schema <- "main"
# add persons - aggregate covs (age)
persons <- data.frame(
person_id = 1:10,
gender_concept_id = rep(8532, 10),
year_of_birth = rep(2000, 10),
race_concept_id = rep(1, 10),
ethnicity_concept_id = rep(1, 10),
location_id = rep(1, 10),
provider_id = rep(1, 10),
care_site_id = rep(1, 10),
person_source_value = 1:10,
gender_source_value = rep("female", 10),
race_source_value = rep("na", 10),
ethnicity_source_value = rep("na", 10)
)
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
tableName = "person",
data = persons
)
# observation period
obs_period <- data.frame(
observation_period_id = 1:10,
person_id = 1:10,
observation_period_start_date = rep("2000-12-31", 10),
observation_period_end_date = c("2000-12-31", rep("2020-12-31", 9)),
period_type_concept_id = rep(1, 10)
)
obs_period$observation_period_start_date <- as.Date(obs_period$observation_period_start_date)
obs_period$observation_period_end_date <- as.Date(obs_period$observation_period_end_date)
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
tableName = "observation_period",
data = obs_period
)
# person 1 has 1 day obs
# person 2-6 has no events
# person 7 has diabetes at 10, headache at 12
# person 8 has diabetes at 13
# person 9 has headache multiple times
# person 10 has diabetes at 14
# add conditions - aggregate covs (conditions)
condition_era <- data.frame(
condition_era_id = 1:7,
person_id = c(7, 7, 8, 9, 9, 9, 10),
condition_concept_id = c(201820, 378253, 201820, 378253, 378253, 378253, 201820),
condition_era_start_date = c(
"2011-01-01", "2013-04-03", "2016-01-01",
"2006-01-04", "2014-08-02", "2014-08-04",
"2013-01-04"
),
condition_era_end_date = c(
"2011-01-01", "2013-04-03", "2016-01-01",
"2006-01-04", "2014-08-02", "2014-08-04",
"2013-01-04"
),
condition_occurrence_count = rep(1, 7)
)
condition_era$condition_era_start_date <- as.Date(condition_era$condition_era_start_date)
condition_era$condition_era_end_date <- as.Date(condition_era$condition_era_end_date)
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
tableName = "condition_era",
data = condition_era
)
# add concept
concept <- data.frame(
concept_id = c(201820, 378253),
concept_name = c("diabetes", "hypertension"),
domain_id = rep(1, 2),
vocabulary_id = rep(1, 2),
concept_class_id = c("Condition", "Condition"),
standard_concept = rep("S", 2),
concept_code = rep("Snowmed", 2)
# ,valid_start_date = NULL,
# valid_end_date = NULL,
# invalid_reason = NULL
)
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
tableName = "concept",
data = concept
)
# add cohort - tte/dechal/rechal
# person 6 has the outcome just before the exposure
cohort <- data.frame(
subject_id = c(
1:10,
7, 8, 10,
c(3, 6, 7, 8, 10),
c(7),
6
),
cohort_definition_id = c(
rep(1, 10),
rep(1, 3),
rep(2, 5),
2,
2
),
cohort_start_date = c(
rep("2018-01-01", 10),
rep("2018-05-01", 3),
"2018-01-13", "2018-01-03", rep("2018-01-06", 3),
"2018-05-24",
"2017-12-29"
),
cohort_end_date = c(
rep("2018-02-01", 10),
rep("2018-06-01", 3),
"2018-02-02", "2018-02-04", rep("2018-02-08", 3),
"2018-06-05",
"2017-12-29"
)
)
cohort$cohort_start_date <- as.Date(cohort$cohort_start_date)
cohort$cohort_end_date <- as.Date(cohort$cohort_end_date)
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
tableName = "cohort",
data = cohort
)
# create settings and run
characterizationSettings <- Characterization::createCharacterizationSettings(
timeToEventSettings = Characterization::createTimeToEventSettings(
targetIds = 1,
outcomeIds = 2
),
dechallengeRechallengeSettings = Characterization::createDechallengeRechallengeSettings(
targetIds = 1,
outcomeIds = 2
),
aggregateCovariateSettings = Characterization::createAggregateCovariateSettings(
targetIds = 1,
outcomeIds = 2,
minPriorObservation = 365,
outcomeWashoutDays = 30,
riskWindowStart = 1,
riskWindowEnd = 90,
covariateSettings = FeatureExtraction::createCovariateSettings(
useDemographicsAge = T,
useDemographicsGender = T,
useConditionEraAnyTimePrior = T
),
caseCovariateSettings = Characterization::createDuringCovariateSettings(useConditionEraDuring = T),
casePreTargetDuration = 365 * 5
)
)
Characterization::runCharacterizationAnalyses(
connectionDetails = connectionDetails,
targetDatabaseSchema = schema,
targetTable = "cohort",
outcomeDatabaseSchema = schema,
outcomeTable = "cohort",
cdmDatabaseSchema = schema,
characterizationSettings = characterizationSettings,
outputDirectory = file.path(tempdir(), "result2"),
executionPath = file.path(tempdir(), "execution2"),
csvFilePrefix = "c_",
databaseId = "1",
incremental = T,
threads = 1,
minCharacterizationMean = 0.0001,
minCellCount = NULL,
showSubjectId = T
)
# load the cohort counts to make sure the exclude is there
counts <- read.csv(file.path(tempdir(), "result2", "c_cohort_counts.csv"))
# when restricted to first exposure 5 people have outcome
testthat::expect_true(counts$row_count[counts$cohort_type == "Cases"] == 4)
testthat::expect_true(counts$row_count[counts$cohort_type == "Exclude"] == 1)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.