inst/doc/a11_benchmark.R

## ----include = FALSE----------------------------------------------------------
NOT_CRAN <- identical(tolower(Sys.getenv("NOT_CRAN")), "true")

knitr::opts_chunk$set(
collapse = TRUE,
eval = TRUE, 
warning = FALSE, 
message = FALSE,
comment = "#>",
echo = FALSE,
eval = NOT_CRAN
)

## ----include = FALSE----------------------------------------------------------
# library(CDMConnector)
# if (Sys.getenv("EUNOMIA_DATA_FOLDER") == ""){
# Sys.setenv("EUNOMIA_DATA_FOLDER" = file.path(tempdir(), "eunomia"))}
# if (!dir.exists(Sys.getenv("EUNOMIA_DATA_FOLDER"))){ dir.create(Sys.getenv("EUNOMIA_DATA_FOLDER"))
# downloadEunomiaData()
# }

## -----------------------------------------------------------------------------
# # Packages
# library(visOmopResults)
# library(omopgenerics)
# library(ggplot2)
# library(CohortCharacteristics)
# library(stringr)
# library(dplyr)
# library(tidyr)
# library(gt)
# library(scales)
# library(CohortConstructor)
# library(gt)
# 
# niceOverlapLabels <- function(labels) {
#   new_labels <- gsub("_", " ", gsub(" and.*|cc_", "", labels))
#   return(
#     tibble("Cohort name" = new_labels) |>
#       mutate(
#         "Cohort name" = str_to_sentence(gsub("_", " ", gsub("cc_|atlas_", "", new_labels))),
#         "Cohort name" = case_when(
#           grepl("Asthma", .data[["Cohort name"]]) ~ "Asthma without COPD",
#           grepl("Covid", .data[["Cohort name"]]) ~ gsub("Covid|Covid", "COVID-19", `Cohort name`),
#           grepl("eutropenia", .data[["Cohort name"]]) ~ "Acquired neutropenia or unspecified leukopenia",
#           grepl("Hosp", .data[["Cohort name"]]) ~ "Inpatient hospitalisation",
#           grepl("First", .data[["Cohort name"]]) ~ "First major depression",
#           grepl("fluoro", .data[["Cohort name"]]) ~ "New fluoroquinolone users",
#           grepl("Beta", .data[["Cohort name"]]) ~ "New users of beta blockers nested in essential hypertension",
#           .default = .data[["Cohort name"]]
#         ),
#         "Cohort name" = if_else(
#           grepl("COVID", .data[["Cohort name"]]),
#           gsub(" female", ": female", gsub(" male", ": male", .data[["Cohort name"]])),
#           .data[["Cohort name"]]
#         ),
#         "Cohort name" = if_else(
#           grepl(" to ", .data[["Cohort name"]]),
#           gsub("male ", "male, ", .data[["Cohort name"]]),
#           .data[["Cohort name"]]
#         )
#       )
#   )
# }

## ----echo=TRUE----------------------------------------------------------------
# library(CDMConnector)
# library(CodelistGenerator)
# library(PatientProfiles)
# library(CohortConstructor)
# library(dplyr)
# 
# con <- DBI::dbConnect(duckdb::duckdb(),
#                       dbdir = eunomiaDir())
# cdm <- cdmFromCon(con, cdmSchema = "main", writeSchema = "main",
#                   writePrefix = "my_study_")

## ----echo=TRUE----------------------------------------------------------------
# benchmark_results <- benchmarkCohortConstructor(cdm,
#                runCIRCE = FALSE,
#                runCohortConstructorDefinition = FALSE,
#                runCohortConstructorDomain = TRUE)
# benchmark_results |>
#   glimpse()

## -----------------------------------------------------------------------------
# benchmarkData$omop |>
#   visOmopResults::formatTable() |>
#   tab_style(style = list(cell_fill(color = "#e1e1e1"), cell_text(weight = "bold")),
#             locations = cells_column_labels()) |>
#   tab_style(style = list(cell_text(weight = "bold")),
#             locations = cells_body(columns = 1))

## -----------------------------------------------------------------------------
# benchmarkData$details |>
#   visOmopResults::formatTable(groupColumn = "cdm_name") |>
#   tab_style(style = list(cell_fill(color = "#e1e1e1"), cell_text(weight = "bold")),
#             locations = cells_column_labels()) |>
#   tab_style(style = list(cell_text(weight = "bold")),
#             locations = cells_body(columns = 1:2))

## ----fig.width=10, fig.height=7-----------------------------------------------
# benchmarkData$comparison |>
#   plotCohortOverlap(uniqueCombinations = FALSE, facet = "cdm_name") +
#   scale_y_discrete(labels = niceOverlapLabels) +
#   theme(
#     legend.text = element_text(size = 10),
#     strip.text = element_text(size = 14),
#     axis.text.x = element_text(size = 12),
#     axis.title.x = element_text(size = 14),
#     axis.title.y = element_text(size = 14)
#   ) +
#   # facet_wrap("cdm_name") +
#   scale_fill_discrete(labels = c("Both", "CIRCE", "CohortConstructor")) +
#   scale_color_discrete(labels = c("Both", "CIRCE", "CohortConstructor"))

## -----------------------------------------------------------------------------
# ## TABLE with same results as the plot below.
# 
# # header_prefix <- "[header]Time by database (minutes)\n[header_level]"
# # benchmarkData$time |>
# #   distinct() |>
# #   filter(!grepl("male|set", msg)) |>
# #   mutate(
# #     time = niceNum((as.numeric(toc) - as.numeric(tic))/60, 2),
# #     Tool = if_else(grepl("cc", msg), "CohortConstructor", "CIRCE"),
# #     "Cohort name" = str_to_sentence(gsub("_", " ", gsub("cc_|atlas_", "", msg)))
# #   ) |>
# #   select(-c("tic", "toc", "msg", "callback_msg")) |>
# #   pivot_wider(names_from = "cdm_name", values_from = "time", names_prefix = header_prefix) |>
# #   select(c("Cohort name", "Tool", paste0(header_prefix, data$time$cdm_name |> unique()))) |>
# #   mutate(
# #     "Cohort name" = case_when(
# #       grepl("Asthma", .data[["Cohort name"]]) ~ "Asthma without COPD",
# #       grepl("Covid", .data[["Cohort name"]]) ~ "COVID-19",
# #       grepl("eutropenia", .data[["Cohort name"]]) ~ "Acquired neutropenia or unspecified leukopenia",
# #       grepl("Hosp", .data[["Cohort name"]]) ~ "Inpatient hospitalisation",
# #       grepl("First", .data[["Cohort name"]]) ~ "First major depression",
# #       grepl("fluoro", .data[["Cohort name"]]) ~ "New fluoroquinolone users",
# #       grepl("Beta", .data[["Cohort name"]]) ~ "New users of beta blockers nested in essential hypertension",
# #       .default = .data[["Cohort name"]]
# #     )
# #   ) |>
# #   arrange(`Cohort name`) |>
# #   gtTable(colsToMergeRows = "all_columns") |>
# #   tab_style(style = list(cell_fill(color = "#e1e1e1"), cell_text(weight = "bold")),
# #             locations = cells_column_labels()) |>
# #   tab_style(style = list(cell_text(weight = "bold")),
# #             locations = cells_body(columns = 1:2))

## ----fig.width=10, fig.height=7-----------------------------------------------
# 
# benchmarkData$time_definition |>
#   ggplot(aes(y = `Cohort name`, x = time, colour = Tool, fill = Tool)) +
#   geom_col(position = "dodge", width = 0.6) +
#   xlab("Time (minutes)") +
#   scale_y_discrete(labels = label_wrap(20)) +
#   theme(
#     legend.title = element_blank(),
#     legend.position = "bottom",
#     axis.text.x = element_text(size = 12),
#     legend.text = element_text(size = 12),
#     strip.text = element_text(size = 14),
#     axis.text.y = element_text(size = 12),
#     axis.title.x = element_text(size = 14),
#     axis.title.y = element_text(size = 14)
#   ) +
#   facet_wrap(vars(cdm_name), nrow = 1, scales = "free_x")

## -----------------------------------------------------------------------------
# header_prefix <- "[header]Time by tool (minutes)\n[header_level]"
# benchmarkData$time_domain |>
#   formatTable() |>
#   tab_style(style = list(cell_fill(color = "#e1e1e1"), cell_text(weight = "bold")),
#             locations = cells_column_labels()) |>
#   tab_style(style = list(cell_text(weight = "bold")),
#             locations = cells_body(columns = 1))

## -----------------------------------------------------------------------------
# benchmarkData$time_strata |>
#   formatTable() |>
#   tab_style(style = list(cell_fill(color = "#e1e1e1"), cell_text(weight = "bold")),
#             locations = cells_column_labels()) |>
#   tab_style(style = list(cell_text(weight = "bold")),
#             locations = cells_body(columns = 1))

## ----fig.width=10, fig.height=7-----------------------------------------------
# benchmarkData$sql_indexes |>
#   distinct() |>
#   group_by(cdm_name, msg) |>
#   summarise(time = sum(as.numeric(toc) - as.numeric(tic))/60, .groups = "drop") |>
#   mutate(
#     Index = if_else(grepl("No index", msg), "Without SQL index", "With SQL index"),
#     Domains = str_to_sentence(gsub("No index: |Index: | domains| domain", "", msg)),
#     Domains = gsub("procedure ", "procedure, ", Domains)
#   ) |>
#   ggplot(aes(y = Domains, x = time, colour = Index, fill = Index)) +
#   geom_col(position = "dodge", width = 0.6) +
#   xlab("Time (minutes)") +
#   scale_y_discrete(labels = label_wrap(15)) +
#   theme(
#     legend.title=element_blank(),
#     legend.position = "bottom",
#     legend.text = element_text(size = 12),
#     strip.text = element_text(size = 14),
#     # axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 12),
#     axis.text.x = element_text(size = 12),
#     axis.text.y = element_text(size = 12),
#     axis.title.x = element_text(size = 14),
#     axis.title.y = element_text(size = 14)
#   ) +
#   facet_wrap(vars(cdm_name), scales = "free_x")

Try the CohortConstructor package in your browser

Any scripts or data that you put into this service are public.

CohortConstructor documentation built on June 8, 2025, 12:49 p.m.