R/benchmarkPatientProfiles.R
In PatientProfiles: Identify Characteristics of Patients in the OMOP Common Data Model

Documented in benchmarkPatientProfiles

#' Benchmark intersections and demographics functions for a certain source
#' (cdm).
#'
#' @param cdm A cdm_reference object.
#' @param n Size of the synthetic cohorts used to benchmark.
#' @param iterations Number of iterations to run the benchmark.
#'
#' @return A summarise_result object with the summary statistics.
#' @export
#'
benchmarkPatientProfiles <- function(cdm,
                                     n = 50000,
                                     iterations = 1) {
  rlang::check_installed("CDMConnector", version = "2.0.0")

  # initial check
  cdm <- omopgenerics::validateCdmArgument(cdm = cdm)
  omopgenerics::assertNumeric(n, integerish = TRUE, min = 1, length = 1)
  omopgenerics::assertNumeric(iterations, integerish = TRUE, min = 1, length = 1)

  # create synthetic cdm
  cli::cli_inform(c("i" = "Generating synthetic data"))
  person <- personTable(n = n)
  observationPeriod <- observationPeriodTable(person)
  cohort1 <- indexCohort(observationPeriod, 2* n)
  cohort2 <- targetCohort(observationPeriod, 4 * n)

  cdmLocal <- omopgenerics::cdmFromTables(
    tables = list(person = person, observation_period = observationPeriod),
    cdmName = "synthetic cdm",
    cohortTables = list(cohort1 = cohort1, cohort2 = cohort2),
    cdmVersion = "5.3"
  )

  # insert cdm object
  cdm2 <- omopgenerics::insertCdmTo(cdm = cdmLocal, to = cdm)

  # intersect function
  analysis <- c("count", "flag", "date", "days", "test_result")
  intersectFunction <- function(value, window, prefix) {
    t0 <- Sys.time()
    x <- cdm2$cohort1 |>
      .addIntersect(
        tableName = "cohort2",
        value = value,
        filterVariable = "cohort_definition_id",
        filterId = c(1L, 2L),
        idName = c("var_1", "var_2"),
        window = window,
        indexDate = "cohort_start_date",
        censorDate = NULL,
        targetStartDate = "cohort_start_date",
        targetEndDate = "cohort_end_date",
        inObservation = TRUE,
        order = "first",
        nameStyle = "{id_name}_{window_name}",
        name = omopgenerics::uniqueTableName(prefix = prefix)
      )
    as.numeric(Sys.time() - t0)
  }

  # demographics function
  demographics <- c("age", "sex", "age_group", "prior_observation", "future_observation", "all")
  demographicsFunciton <- function(value, prefix) {
    ageGroups <- list(c(0, 19), c(20, 39), c(40, 59), c(60, 79), c(80, Inf))
    vals <- switch(value,
                   "age" = list(T, F, NULL, F, F),
                   "sex" = list(F, T, NULL, F, F),
                   "age_group" = list(F, F, ageGroups, F, F),
                   "prior_observation" = list(F, F, NULL, T, F),
                   "future_observation" = list(F, F, NULL, F, T),
                   "all" = list(T, T, ageGroups, T, T))
    t0 <- Sys.time()
    x <- cdm2$cohort1 |>
      .addDemographicsQuery(
        indexDate = "cohort_start_date",
        age = vals[[1]],
        ageName = "age",
        ageMissingMonth = 1L,
        ageMissingDay = 1L,
        ageImposeMonth = FALSE,
        ageImposeDay = FALSE,
        ageGroup = vals[[3]],
        missingAgeGroupValue = "none",
        sex = vals[[2]],
        sexName = "sex",
        missingSexValue = "none",
        priorObservation = vals[[4]],
        priorObservationName = "prior_obs",
        priorObservationType = "date",
        futureObservation = vals[[5]],
        futureObservationName = "future_obs",
        futureObservationType = "days",
        dateOfBirth = FALSE,
        dateOfBirthName = NULL
      ) |>
      dplyr::compute(name = omopgenerics::uniqueTableName(prefix = prefix))
    as.numeric(Sys.time() - t0)
  }

  # TODO add index to person_id

  # iterate over
  result <- purrr::map_df(seq_len(iterations), \(it) {
    cli::cli_inform(c("Iteration {.pkg {it}} of {iterations}"))
    prefix <- omopgenerics::tmpPrefix()

    result <- list(
      # intersect one window
      purrr::map_df(analysis, \(x) {
        dplyr::tibble(
          task = glue::glue("Intersect {x} with one window"),
          estimate_value = intersectFunction(x, c(0, 365), prefix)
        )
      }),
      # intersect three windows
      purrr::map_df(analysis, \(x) {
        window <- list(c(-365, -1), c(0, 0), c(1, 365))
        dplyr::tibble(
          task = glue::glue("Intersect {x} with three windows"),
          estimate_value = intersectFunction(x, window, prefix)
        )
      }),
      # demographics
      purrr::map_df(demographics, \(x) {
        dplyr::tibble(
          task = glue::glue("Demographics: {x}"),
          estimate_value = demographicsFunciton(x, prefix)
        )
      })
    ) |>
      dplyr::bind_rows()

    # delete created tables
    omopgenerics::dropSourceTable(cdm = cdm2, name = dplyr::starts_with(prefix))

    result |>
      dplyr::mutate(iteration = .env$it)
  })

  # clean cdm2
  omopgenerics::dropSourceTable(cdm = cdm2, name = c(
    "person", "observation_period", "cohort1", "cohort2"
  ))

  # create result object
  result |>
    dplyr::union_all(
      result |>
        dplyr::group_by(.data$task) |>
        dplyr::summarise(estimate_value = sum(.data$estimate_value)) |>
        dplyr::mutate(iteration = NA_real_)
    ) |>
    omopgenerics::uniteGroup(cols = "task") |>
    omopgenerics::uniteStrata(cols = "iteration") |>
    omopgenerics::uniteAdditional() |>
    dplyr::mutate(
      result_id = 1L,
      cdm_name = omopgenerics::cdmName(cdm),
      variable_name = "overall",
      variable_level = "overall",
      estimate_name = "time_seconds",
      estimate_type = "numeric",
      estimate_value = sprintf("%.2f", .data$estimate_value)
    ) |>
    omopgenerics::newSummarisedResult(settings = dplyr::tibble(
      result_id = 1L,
      package_name = "PatientProfiles",
      package_version = as.character(utils::packageVersion("PatientProfiles")),
      result_type = "benckmark_patientprofiles",
      sample_size = as.character(.env$n)
    ))
}
personTable <- function(n) {
  dplyr::tibble(
    person_id = as.integer(seq_len(.env$n)),
    gender_concept_id = sample(c(8507L, 8532L), size = .env$n, replace = TRUE),
    year_of_birth = sample(1990:2000L, size = .env$n, replace = TRUE),
    month_of_birth = sample(c(NA_integer_, 1:12L), size = .env$n, replace = TRUE),
    day_of_birth = sample(c(NA_integer_, 1:28L), size = .env$n, replace = TRUE, prob = c(20, rep(1, 28))),
    birth_datetime = as.Date(NA),
    race_concept_id = 0L,
    ethnicity_concept_id = 0L,
    location_id = 0L,
    provider_id = 0L,
    care_site_id = 0L,
    person_source_value = NA_character_,
    gender_source_value = NA_character_,
    gender_source_concept_id = 0L,
    race_source_value = NA_character_,
    race_source_concept_id = 0L,
    ethnicity_source_value = NA_character_,
    ethnicity_source_concept_id = 0L
  )
}
observationPeriodTable <- function(person) {
  n <- nrow(person)
  dplyr::tibble(
    observation_period_id = person$person_id,
    person_id = person$person_id,
    observation_period_start_date = as.Date("2001-01-01") + round(3500 * stats::runif(.env$n)),
    observation_period_end_date = .data$observation_period_start_date + 100 + round(5000 * stats::runif(.env$n)),
    period_type_concept_id = 0L
  )
}
indexCohort <- function(observationPeriod, n) {
  id <- as.integer(sample(nrow(observationPeriod), size = n, replace = TRUE))
  observationPeriod[id,] |>
    dplyr::select(
      "person_id",
      "start" = "observation_period_start_date",
      "end" = "observation_period_end_date"
    ) |>
    dplyr::mutate(
      cohort_definition_id = sample(1:4L, size = .env$n, replace = TRUE),
      duration = as.integer(.data$end - .data$start),
      prev_time = 1 + 10 * stats::runif(n = .env$n),
      duration_time = 15 + 100 * stats::runif(n = .env$n),
      end_time = 1 + 10 * stats::runif(n = .env$n)
    ) |>
    dplyr::group_by(.data$cohort_definition_id, .data$person_id) |>
    dplyr::mutate(
      id = dplyr::row_number(),
      end_time = dplyr::if_else(.data$id == max(.data$id), .data$end_time, 0),
      total_time = sum(.data$prev_time) + sum(.data$duration_time) + sum(.data$end_time)
    ) |>
    dplyr::ungroup() |>
    dplyr::mutate(
      prev_time = round(.data$duration * .data$prev_time / .data$total_time) + 1L,
      duration_time = round(.data$duration * .data$duration_time / .data$total_time),
      end_time = round(.data$duration * .data$end_time / .data$total_time),
      total_time = .data$prev_time + .data$duration_time + .data$end_time
    ) |>
    dplyr::group_by(.data$cohort_definition_id, .data$person_id) |>
    dplyr::mutate(
      prev_time = dplyr::coalesce(dplyr::lag(cumsum(.data$total_time), order_by = .data$id), 0) + .data$prev_time
    ) |>
    dplyr::ungroup() |>
    dplyr::mutate(
      cohort_start_date = .data$start + as.integer(.data$prev_time),
      cohort_end_date = .data$cohort_start_date + as.integer(.data$duration_time),
      cohort_start_date = pmin(.data$end, .data$cohort_start_date),
      cohort_end_date = pmin(.data$end, .data$cohort_end_date)
    ) |>
    dplyr::rename(subject_id = "person_id") |>
    dplyr::select(dplyr::all_of(omopgenerics::cohortColumns("cohort")))
}
targetCohort <- function(observationPeriod, n) {
  indexCohort(observationPeriod = observationPeriod, n = n) |>
    dplyr::mutate(test_result = sample(x = c("A", "B", "C"), size = n, replace = TRUE))
}

Any scripts or data that you put into this service are public.

PatientProfiles documentation built on April 3, 2025, 9:47 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

PatientProfiles
Identify Characteristics of Patients in the OMOP Common Data Model

R/benchmarkPatientProfiles.R
In PatientProfiles: Identify Characteristics of Patients in the OMOP Common Data Model

Defines functions targetCohort indexCohort observationPeriodTable personTable benchmarkPatientProfiles

Documented in benchmarkPatientProfiles

Try the PatientProfiles package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PatientProfiles Identify Characteristics of Patients in the OMOP Common Data Model

R/benchmarkPatientProfiles.R In PatientProfiles: Identify Characteristics of Patients in the OMOP Common Data Model

Defines functions targetCohort indexCohort observationPeriodTable personTable benchmarkPatientProfiles

Documented in benchmarkPatientProfiles

Try the PatientProfiles package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PatientProfiles
Identify Characteristics of Patients in the OMOP Common Data Model

R/benchmarkPatientProfiles.R
In PatientProfiles: Identify Characteristics of Patients in the OMOP Common Data Model