Nothing
# Copyright 2025 DARWIN EU®
#
# This file is part of IncidencePrevalence
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#' Run benchmark of incidence and prevalence analyses
#'
#' @param cdm A CDM reference object
#' @param analysisType A string of the following: "all", "only incidence",
#' "only prevalence"
#'
#' @return a tibble with time taken for different analyses
#' @export
#'
#' @examples
#' \donttest{
#' cdm <- mockIncidencePrevalence(
#' sampleSize = 100,
#' earliestObservationStartDate = as.Date("2010-01-01"),
#' latestObservationStartDate = as.Date("2010-01-01"),
#' minDaysToObservationEnd = 364,
#' maxDaysToObservationEnd = 364,
#' outPre = 0.1
#' )
#'
#' timings <- benchmarkIncidencePrevalence(cdm)
#' }
benchmarkIncidencePrevalence <- function(cdm,
analysisType = "all") {
omopgenerics::validateCdmArgument(cdm)
omopgenerics::assertChoice(analysisType, c(
"all", "only incidence",
"only prevalence"
),
msg = "- `analysisType` is not one of the possibilities
('all', 'only incidence'or 'only prevalence')"
)
# will add timings to list
timings <- list()
tictoc::tic()
cdm <- generateDenominatorCohortSet(
cdm = cdm, name = "denominator_typical",
daysPriorObservation = c(0, 180),
sex = c("Both", "Female"),
ageGroup = list(
c(0, 150),
c(10, 70)
)
)
t <- tictoc::toc(quiet = TRUE)
timings[["typical_denominator"]] <- dplyr::tibble(
task = "generating denominator (8 cohorts)",
time_taken_secs = as.numeric(t$toc - t$tic)
)
# create two outcome cohorts
# assume an outcome prevalence of 10%
nSample <- as.integer(ceiling(cdm$denominator_typical %>%
dplyr::count() %>%
dplyr::pull() * 0.1))
# we will create two outcome cohorts
cdm$bench_outcome <- dplyr::union_all(
cdm$person %>%
dplyr::select("person_id") %>%
dplyr::distinct() %>%
dplyr::slice_sample(n = nSample) %>%
dplyr::left_join(cdm$observation_period,
by = c("person_id")
) %>%
dplyr::mutate(cohort_definition_id = 1L),
cdm$person %>%
dplyr::select("person_id") %>%
dplyr::distinct() %>%
dplyr::slice_sample(n = nSample) %>%
dplyr::left_join(cdm$observation_period,
by = c("person_id")
) %>%
dplyr::mutate(cohort_definition_id = 2L)
) %>%
dplyr::select(
"subject_id" = "person_id",
"cohort_definition_id",
"cohort_start_date" = "observation_period_start_date",
"cohort_end_date" = "observation_period_end_date"
) %>%
dplyr::filter(!is.na(.data$cohort_start_date) &
!is.na(.data$cohort_end_date)) %>%
dplyr::compute(
temporary = FALSE,
name = "bench_outcome",
logPrefix = "IncidencePrevalence_benchmarkIncidencePrevalence_outcome_"
) %>%
omopgenerics::newCohortTable()
# calculate prevalence if analysisType is not "only incidence"
if (analysisType != "only incidence") {
# point prevalence
tictoc::tic()
pointPrev <- estimatePointPrevalence(
cdm = cdm,
denominatorTable = "denominator_typical",
outcomeTable = "bench_outcome",
interval = "years"
)
t <- tictoc::toc(quiet = TRUE)
timings[["pointPrev"]] <- dplyr::tibble(
task = paste0(
"yearly point prevalence for two outcomes with eight denominator cohorts"
),
time_taken_secs = as.numeric(t$toc - t$tic)
)
# period prevalence
tictoc::tic()
period_prev <- estimatePeriodPrevalence(
cdm = cdm,
denominatorTable = "denominator_typical",
outcomeTable = "bench_outcome",
interval = "years",
fullContribution = TRUE
)
t <- tictoc::toc(quiet = TRUE)
timings[["period_prev"]] <- dplyr::tibble(
task = paste0(
"yearly period prevalence for two outcomes with eight denominator cohorts"
),
time_taken_secs = as.numeric(t$toc - t$tic)
)
}
# calculate incidence if analysisType is not "only prevalence"
if (analysisType != "only prevalence") {
tictoc::tic()
incTypicalYears <- estimateIncidence(
cdm = cdm,
denominatorTable = "denominator_typical",
outcomeTable = "bench_outcome",
interval = "years"
)
t <- tictoc::toc(quiet = TRUE)
timings[["incTypicalYears"]] <- dplyr::tibble(
task = paste0(
"yearly incidence for two outcomes with eight denominator cohorts"
),
time_taken_secs = as.numeric(t$toc - t$tic)
)
}
# combine results
timings <- dplyr::bind_rows(timings) %>%
dplyr::mutate(time_taken_mins = round(.data$time_taken_secs / 60, 2)) %>%
dplyr::mutate(dbms = attr(attr(cdm, "cdm_source"), "source_type")) %>%
dplyr::mutate(person_n = cdm$person %>%
dplyr::count() %>%
dplyr::pull()) %>%
dplyr::mutate(min_observation_start = cdm$observation_period %>%
dplyr::summarise(
db_min_obs_start =
min(.data$observation_period_start_date,
na.rm = TRUE
)
) %>%
dplyr::pull()) %>%
dplyr::mutate(max_observation_end = cdm$observation_period %>%
dplyr::summarise(
max_observation_end =
max(.data$observation_period_end_date,
na.rm = TRUE
)
) %>%
dplyr::pull())
omopgenerics::dropSourceTable(
cdm = cdm,
name = dplyr::contains("denominator_typical")
)
omopgenerics::dropSourceTable(
cdm = cdm,
name = dplyr::contains("bench_outcome")
)
# as a summarised result
timings <- timings %>%
dplyr::mutate(
result_id = 1L,
cdm_name = omopgenerics::cdmName(cdm),
group_name = "task",
group_level = .data$task,
strata_name = "overall",
strata_level = "overall",
variable_name = "overall",
variable_level = "overall",
estimate_name = "time_taken_minutes",
estimate_type = "numeric",
estimate_value = as.character(.data$time_taken_mins),
additional_name = paste0(
"dbms &&& person_n &&& ",
"min_observation_start &&& ",
"max_observation_end"
),
additional_level = paste0(
.data$dbms, " &&& ",
.data$person_n, " &&& ",
.data$min_observation_start, " &&& ",
.data$max_observation_end
)
) %>%
dplyr::select(dplyr::all_of(
colnames(omopgenerics::emptySummarisedResult())
)) %>%
omopgenerics::newSummarisedResult(settings = dplyr::tibble(
result_id = 1L,
result_type = "IncidecnePrevalence benchmark",
package_name = "IncidencePrevalence",
package_version = as.character(utils::packageVersion("IncidencePrevalence"))
))
return(timings)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.