tests/testthat/setup.R

dbToTest <- Sys.getenv("DB_TO_TEST", "duckdb")

writeSchema <- function() {
  prefix <- paste0("mdiag_", paste0(sample(letters, 4), collapse = ""), "_")
  switch(dbToTest,
         "duckdb" = c(schema = "main", prefix = prefix),
         "sql server" = c(catalog = "ohdsi", schema = "dbo", prefix = prefix),
         "redshift" = c(schema = "resultsv281", prefix = prefix)
  )
}
connection <- function() {
  switch(dbToTest,
         "duckdb" = DBI::dbConnect(duckdb::duckdb(), ":memory:"),
         "sql server" = DBI::dbConnect(
           odbc::odbc(),
           Driver = "ODBC Driver 18 for SQL Server",
           Server = Sys.getenv("CDM5_SQL_SERVER_SERVER"),
           Database = Sys.getenv("CDM5_SQL_SERVER_CDM_DATABASE"),
           UID = Sys.getenv("CDM5_SQL_SERVER_USER"),
           PWD = Sys.getenv("CDM5_SQL_SERVER_PASSWORD"),
           TrustServerCertificate = "yes",
           Port = 1433
         ),
         "redshift" = DBI::dbConnect(
           RPostgres::Redshift(),
           dbname = Sys.getenv("CDM5_REDSHIFT_DBNAME"),
           port = Sys.getenv("CDM5_REDSHIFT_PORT"),
           host = Sys.getenv("CDM5_REDSHIFT_HOST"),
           user = Sys.getenv("CDM5_REDSHIFT_USER"),
           password = Sys.getenv("CDM5_REDSHIFT_PASSWORD")
         )
  )
}
copyCdm <- function(cdm) {
  if (dbToTest != "local") {
    src <- CDMConnector::dbSource(con = connection(), writeSchema = writeSchema())
    cdm <- omopgenerics::insertCdmTo(cdm = cdm, to = src)
  }
  return(cdm)
}
testMockCdm <- function() {

  cdm <- omopgenerics::emptyCdmReference(cdmName = "test_mock")
  set.seed(111)

  # person
  birthRange = as.Date(c("1950-01-01", "2000-12-31"))
  proportionFemale = 0.5
  person_id <- seq_len(100)
  dob <- sample(seq(birthRange[1], birthRange[2], by = "day"),
                length(person_id), replace = TRUE)
  gender <- sample(c(8532, 8507), length(person_id), prob = c(proportionFemale, 1 - proportionFemale), TRUE)
  person <- dplyr::tibble(
    person_id = person_id,
    gender_concept_id = as.integer(gender),
    year_of_birth = as.integer(lubridate::year(dob)),
    month_of_birth = as.integer(lubridate::month(dob)),
    day_of_birth = as.integer(lubridate::day(dob)),
    race_concept_id = as.integer(NA),
    ethnicity_concept_id = as.integer(NA),
    location_id = as.integer(NA),
    provider_id = as.integer(NA),
    care_site_id = as.integer(NA),
    person_source_value = NA_character_,
    gender_source_value = NA_character_,
    gender_source_concept_id = as.integer(NA),
    race_source_value = NA_character_,
    race_source_concept_id = as.integer(NA),
    ethnicity_source_value = NA_character_,
    ethnicity_source_concept_id = as.integer(NA)
  )
  cdm <- omopgenerics::insertTable(cdm = cdm, name = "person", table = person)

  # observation period
  dob <- cdm$person |>
    dplyr::mutate(
      year_of_birth1 = as.character(as.integer(.data$year_of_birth)),
      month_of_birth1 = as.character(as.integer(.data$month_of_birth)),
      day_of_birth1 = as.character(as.integer(.data$day_of_birth))
    ) |>
    dplyr::mutate(dob := as.Date(
      paste0(.data$year_of_birth1, "-", .data$month_of_birth1, "-", .data$day_of_birth1)
    )) |>
    dplyr::select(!c("year_of_birth1", "month_of_birth1", "day_of_birth1")) |>
    dplyr::select("dob") |>
    dplyr::pull()
  start <- dob + floor((as.Date(max(as.Date("2020-01-01"), max(as.Date(dob)))) - dob) * stats::runif(n = length(dob)))
  end <- start + ceiling((as.Date(max(as.Date("2020-01-01"), max(as.Date(dob)))) - start) * stats::runif(n = length(dob)))
  person_id <- dplyr::pull(cdm$person, person_id)
  observationPeriod <- dplyr::tibble(
    observation_period_id = as.integer(person_id),
    person_id = as.integer(person_id),
    observation_period_start_date = as.Date(start),
    observation_period_end_date = as.Date(end),
    period_type_concept_id = NA_integer_
  )
  cdm <- omopgenerics::insertTable(
    cdm = cdm, name = "observation_period", table = observationPeriod
  )

  # concept
  conceptSet <- c(8507, 8532, 3001467, 45875977, 194152, 4092121, 1033535, 4328749, 4267416L, 9529)
  sourceSet  <- c(12, 11, 122, 24442, 444, 45663, 2564, 250603, 45678, 23478273)
  conceptName <- c("Male", "Female", "Alkaline phosphatase.bone [Enzymatic activity/volume] in Serum or Plasma", "PhenX", "Renal agenesis and dysgenesis", "Level of mood", "Minimum Data Set", "High", "Low", "kilogram")
  sourceName  <- c("Male", "Female", "Alkaline phosphatase.bone", "PhenX", "Agenesis and dysgenesis renal", "Mood", "Minimum Data Set", "High", "Low", "kg")
  domain <- c("Gender", "Gender", "Measurement", "Measurement", "Condition", "Observation", "Observation", "Meas Value", "Meas Value", "Unit")
  concept <- dplyr::tibble(
    concept_id = as.integer(c(conceptSet, sourceSet)),
    concept_name = c(conceptName, sourceName),
    domain_id = rep(domain, 2),
    vocabulary_id = NA_character_,
    standard_concept = c(rep("S", length(conceptName)), rep(NA_character_, length(sourceName))),
    concept_class_id = NA_character_,
    concept_code = NA_character_,
    valid_start_date = as.Date(NA),
    valid_end_date = as.Date(NA),
    invalid_reason = NA_character_
  )
  cdm <- omopgenerics::insertTable(
    cdm = cdm, name = "concept", table = concept
  )

  # measurement
  concept_id <- cdm$concept |>
    dplyr::filter(.data$domain_id == "Measurement" & .data$standard_concept == "S") |>
    dplyr::distinct(concept_id) |>
    dplyr::pull()
  source_concept_id <- cdm$concept |>
    dplyr::filter(.data$domain_id == "Measurement" & is.na(.data$standard_concept)) |>
    dplyr::distinct(concept_id) |>
    dplyr::pull()
  concept_count <- length(concept_id)
  recordDates <- obsDate(cdm$observation_period$observation_period_end_date, cdm$observation_period$observation_period_end_date)
  measurement <- dplyr::tibble(
    measurement_id = 1:100L,
    measurement_concept_id = concept_id[1],
    person_id = sample(
      x = cdm$person |> dplyr::pull("person_id"),
      size = 100,
      replace = TRUE
    ),
    measurement_date = recordDates[[1]],
    measurement_type_concept_id = 1L,
    measurement_datetime = as.Date(NA),
    measurement_time = NA_character_,
    operator_concept_id = NA_integer_,
    range_low = as.numeric(NA),
    range_high = as.numeric(NA),
    provider_id = NA_integer_,
    visit_occurrence_id = NA_integer_,
    visit_detail_id = NA_integer_,
    measurement_source_value = NA_character_,
    measurement_source_concept_id = source_concept_id[1],
    unit_source_value = NA_character_,
    value_source_value = NA_character_
  )  |>
    dplyr::mutate(
      unit_concept_id = dplyr::if_else(dplyr::row_number()%%2 == 0, 9529L, NA_integer_),
      value_as_number = dplyr::if_else(dplyr::row_number()<6, NA, seq(from = 5, to = 150, length.out = 100)),
      value_as_concept_id = dplyr::case_when(
        dplyr::row_number()%%3 == 0 ~ 4328749L,
        dplyr::row_number()%%3 == 1 ~ 4267416L,
        dplyr::row_number()%%3 == 2 ~ NA_integer_,
      )
    )
  cdm <- omopgenerics::insertTable(
    cdm = cdm, name = "measurement", table = measurement
  )

  # observation
  concept_id <- cdm$concept |>
    dplyr::filter(.data$domain_id == "Observation" & .data$standard_concept == "S") |>
    dplyr::distinct(concept_id) |>
    dplyr::pull()
  concept_count <- length(concept_id)
  recordDates <- obsDate(cdm$observation_period$observation_period_end_date, cdm$observation_period$observation_period_end_date)
  observation <- dplyr::tibble(
    observation_id = 1:100L,
    observation_concept_id = concept_id[1],
    person_id = sample(
      x = cdm$person |> dplyr::pull("person_id"),
      size = 100,
      replace = TRUE
    ),
    observation_date = recordDates[[1]],
    observation_type_concept_id = 1L,
    observation_datetime = as.Date(NA),
    observation_time = NA,
    operator_concept_id = NA,
    range_low = NA,
    range_high = NA,
    provider_id = NA_integer_,
    visit_occurrence_id = NA_integer_,
    visit_detail_id = NA_integer_,
    observation_source_value = NA_character_,
    observation_source_concept_id = NA_integer_,
    unit_source_value = NA_character_,
    qualifier_source_value = NA_character_,
    value_as_string = NA_character_,
    qualifier_concept_id = NA_integer_
  )  |>
    dplyr::mutate(
      unit_concept_id = dplyr::if_else(dplyr::row_number()%%2 == 0, 9529L, NA_integer_),
      value_as_number = dplyr::if_else(dplyr::row_number()<6, NA, seq(from = 5, to = 150, length.out = 100)),
      value_as_concept_id = dplyr::case_when(
        dplyr::row_number()%%3 == 0 ~ 4328749L,
        dplyr::row_number()%%3 == 1 ~ 4267416L,
        dplyr::row_number()%%3 == 2 ~ NA_integer_,
      )
    )
  cdm <- omopgenerics::insertTable(
    cdm = cdm, name = "observation", table = observation
  )

  # cohort
  my_cohort <- list()
  for (i in 1:2) {
    dates <- obsDate(cdm$observation_period$observation_period_end_date, cdm$observation_period$observation_period_end_date)
    my_cohort[[i]] <- dplyr::tibble(
      cohort_definition_id = i,
      subject_id = sample(
        x = cdm$person |> dplyr::pull("person_id"),
        size = 100,
        replace = TRUE
      )
    ) |>
      dplyr::mutate(
        cohort_start_date = dates[[1]],
        cohort_end_date = dates[[2]]
      )
  }
  my_cohort <- my_cohort |>
    dplyr::bind_rows() |>
    dplyr::arrange(.data$cohort_definition_id,
                   .data$subject_id,
                   .data$cohort_start_date) |>
    dplyr::group_by(.data$cohort_definition_id, .data$subject_id) |>
    dplyr::mutate(
      next_observation = dplyr::lead(
        x = .data$cohort_start_date,
        n = 1,
        order_by = .data$cohort_start_date
      ),
      cohort_end_date =
        dplyr::if_else(
          .data$cohort_end_date >=
            .data$next_observation &
            !is.na(.data$next_observation),
          .data$next_observation - 1,
          .data$cohort_end_date
        ),
      cohort_end_date = dplyr::if_else(
        .data$cohort_end_date <
          .data$cohort_start_date,
        NA,
        .data$cohort_end_date
      )
    ) |>
    dplyr::ungroup() |>
    dplyr::select(-"next_observation") |>
    stats::na.omit() |>
    dplyr::distinct() |>
    dplyr::inner_join(
      cdm$observation_period |>
        dplyr::select("subject_id" = "person_id", "observation_period_start_date", "observation_period_end_date"),
      by = "subject_id"
    ) |>
    dplyr::filter(
      cohort_start_date >= observation_period_start_date,
      cohort_end_date <= observation_period_end_date
    ) |>
    dplyr::select(!c("observation_period_start_date", "observation_period_end_date"))
  cdm <- omopgenerics::insertTable(
    cdm = cdm, name = "my_cohort", table = my_cohort
  )
  cdm[["my_cohort"]] <- cdm[["my_cohort"]] |>
    omopgenerics::newCohortTable(
      cohortSetRef = dplyr::tibble(cohort_definition_id = 1:2L, cohort_name = paste0("cohort_", 1:2)),
      cohortAttritionRef = NULL
    )
  return(cdm)
}
obsDate <- function(start, end) {
  r1 <- stats::runif(n = length(start))
  start <- start + floor((as.Date(end) - start) * r1)
  r2 <- stats::runif(n = length(start))
  end <- start + ceiling((as.Date(end) - start) * r2)
  end <- pmax(start, end)
  list(start, end)
}
dropCreatedTables <- function(cdm) {
  omopgenerics::dropSourceTable(cdm = cdm, name = dplyr::everything())
  omopgenerics::cdmDisconnect(cdm = cdm)
}

Try the MeasurementDiagnostics package in your browser

Any scripts or data that you put into this service are public.

MeasurementDiagnostics documentation built on Dec. 17, 2025, 5:10 p.m.