Nothing
# Copyright 2024 DARWIN EU (C)
#
# This file is part of CohortCharacteristics
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#' Summarise timing between entries into cohorts in a cohort table
#'
#' @param cohort A cohort table in a cdm reference.
#' @param cohortId A cohort definition id to restrict by. If NULL, all cohorts
#' will be included.
#' @param strata A list of variables to stratify results. These variables
#' must have been added as additional columns in the cohort table.
#' @param restrictToFirstEntry If TRUE only an individual's first entry per
#' cohort will be considered. If FALSE all entries per individual will be
#' considered.
#' @param estimates Summary statistics to use when summarising timing.
#' @param density deprecated.
#'
#' @return A summary of timing between entries into cohorts in the cohort table.
#'
#' @export
#'
#' @examples
#' \donttest{
#' library(CohortCharacteristics)
#' library(dplyr, warn.conflicts = FALSE)
#'
#' cdm <- mockCohortCharacteristics(numberIndividuals = 100)
#'
#' summariseCohortTiming(cdm$cohort2) |>
#' glimpse()
#'
#' mockDisconnect(cdm)
#' }
#'
summariseCohortTiming <- function(cohort,
cohortId = NULL,
strata = list(),
restrictToFirstEntry = TRUE,
estimates = c("min", "q25", "median", "q75", "max", "density"),
density = lifecycle::deprecated()) {
if (lifecycle::is_present(density)) {
lifecycle::deprecate_soft(
when = "0.3.0", what = "summariseCohortTiming(density = )",
details = "Please include 'density' in the estimates vector instead."
)
if (density) estimates <- unique(c(estimates, "density"))
}
# validate inputs
cohort <- omopgenerics::validateCohortArgument(cohort)
cohortId <- omopgenerics::validateCohortIdArgument(cohortId, cohort)
omopgenerics::assertNumeric(cohortId, null = TRUE)
checkStrata(strata, cohort)
omopgenerics::assertTrue(all(c("cohort_definition_id", "subject_id", "cohort_start_date", "cohort_end_date") %in% colnames(cohort)))
omopgenerics::assertLogical(restrictToFirstEntry, length = 1)
omopgenerics::assertCharacter(estimates)
timing <- estimates
if (length(timing) == 0) {
return(omopgenerics::emptySummarisedResult())
}
cohort <- cohort |>
dplyr::filter(.data$cohort_definition_id %in% .env$cohortId) |>
PatientProfiles::addCohortName()
if (isTRUE(restrictToFirstEntry)) {
cohort <- cohort |>
dplyr::group_by(.data$subject_id, .data$cohort_definition_id) |>
dplyr::filter(.data$cohort_start_date == min(.data$cohort_start_date, na.rm = TRUE)) |>
dplyr::ungroup()
}
strataCols <- unlist(strata) |> unique()
# should we use addCohortIntersectDate instead to avoid potentially large number of rows?
cohort_timings <- cohort |>
dplyr::rename("cohort_name_reference" = "cohort_name") |>
dplyr::select(dplyr::all_of(c(
strataCols, "cohort_name_reference",
"cohort_start_date", "cohort_end_date",
"subject_id"
))) |>
dplyr::inner_join(
cohort |>
dplyr::rename_with(~ paste0(.x, "_comparator"),
.cols = c(
"cohort_definition_id", "cohort_start_date",
"cohort_end_date", "cohort_name"
)
) |>
dplyr::select(dplyr::all_of(c(
strataCols, "cohort_name_comparator",
"cohort_start_date_comparator", "cohort_end_date_comparator",
"subject_id"
))),
by = c("subject_id", strataCols)
) |>
dplyr::filter(.data$cohort_name_reference != .data$cohort_name_comparator) %>% # to be removed
dplyr::mutate(days_between_cohort_entries = as.integer(!!CDMConnector::datediff(
"cohort_start_date",
"cohort_start_date_comparator",
interval = "day"
))) |>
dplyr::select(!c(
"cohort_start_date", "cohort_end_date", "cohort_start_date_comparator",
"cohort_end_date_comparator"
)) |>
dplyr::collect()
if (nrow(cohort_timings) == 0) {
return(omopgenerics::emptySummarisedResult())
}
result <- cohort_timings |>
PatientProfiles::summariseResult(
group = c("cohort_name_reference", "cohort_name_comparator"),
includeOverallGroup = FALSE,
strata = strata,
includeOverallStrata = TRUE,
variables = "days_between_cohort_entries",
estimates = timing
) |>
dplyr::mutate("cdm_name" = omopgenerics::cdmName(cohort))
# get all combinations
group <- settings(cohort) |>
dplyr::filter(.data$cohort_definition_id %in% .env$cohortId) |>
dplyr::pull("cohort_name") |>
getCohortComp()
strata <- getStratas(cohort, strata)
if ("density" %in% estimates) {
estimates <- c(estimates[estimates != "density"], "density_x", "density_y")
}
combinations <- getCombinations(
group,
strata,
dplyr::tibble(
variable_name = c("number records", "number subjects"),
estimate_name = "count"
) |>
dplyr::union_all(dplyr::tibble(
variable_name = "days_between_cohort_entries",
estimate_name = estimates
))
) |>
dplyr::mutate(order_id = dplyr::row_number())
result <- result |>
dplyr::left_join(
combinations,
by = c(
"group_name", "group_level", "strata_name", "strata_level",
"variable_name", "estimate_name"
)
) |>
dplyr::arrange(.data$order_id, .data$variable_level) |>
dplyr::select(!"order_id")
result <- result |>
omopgenerics::newSummarisedResult(settings = dplyr::tibble(
"result_id" = 1L,
"package_name" = "CohortCharacteristics",
"package_version" = as.character(utils::packageVersion("CohortCharacteristics")),
"result_type" = "summarise_cohort_timing",
"restrict_to_first_entry" = restrictToFirstEntry
))
return(result)
}
getCohortComp <- function(cohortNames) {
tidyr::expand_grid(
cohort_name_reference = cohortNames, cohort_name_comparator = cohortNames
) |>
dplyr::filter(.data$cohort_name_reference != .data$cohort_name_comparator) |>
visOmopResults::uniteGroup(
cols = c("cohort_name_reference", "cohort_name_comparator")
)
}
getStratas <- function(data, strata) {
res <- purrr::map(strata, \(x) {
data |>
dplyr::select(dplyr::all_of(x)) |>
dplyr::distinct() |>
dplyr::collect() |>
dplyr::arrange(dplyr::across(dplyr::everything())) |>
visOmopResults::uniteStrata(cols = x)
}) |>
dplyr::bind_rows()
dplyr::tibble(strata_name = "overall", strata_level = "overall") |>
dplyr::bind_rows(res)
}
getCombinations <- function(...) {
x <- list(...)
opts <- list()
ids <- omopgenerics::uniqueId(length(x), nChar = 1)
for (k in seq_along(x)) {
x[[k]] <- x[[k]] |>
dplyr::mutate(!!ids[k] := dplyr::row_number())
opts[[ids[k]]] <- x[[k]][[ids[k]]]
}
combinations <- do.call(tidyr::expand_grid, opts)
for (k in seq_along(x)) {
combinations <- combinations |>
dplyr::left_join(x[[k]], by = ids[k]) |>
dplyr::select(!dplyr::all_of(ids[k]))
}
return(combinations)
}
sortWindow <- function(window) {
window |>
purrr::imap(\(x, idx) dplyr::tibble(
window_name = idx, first = x[1], second = x[2]
)) |>
dplyr::bind_rows() |>
dplyr::arrange(.data$first, .data$second) |>
dplyr::pull("window_name")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.