Nothing
# Copyright 2024 DARWIN EU (C)
#
# This file is part of PatientProfiles
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
.addIntersect <- function(x,
tableName,
value,
filterVariable = NULL,
filterId = NULL,
idName = NULL,
window = list(c(0, Inf)),
indexDate = "cohort_start_date",
censorDate = NULL,
targetStartDate = startDateColumn(tableName),
targetEndDate = endDateColumn(tableName),
inObservation = TRUE,
order = "first",
allowDuplicates = FALSE,
nameStyle = "{value}_{id_name}_{window_name}",
name = NULL) {
comp <- newTable(name)
if (!is.list(window)) {
window <- list(window)
}
targetStartDate <- eval(targetStartDate)
targetEndDate <- eval(targetEndDate)
cdm <- omopgenerics::cdmReference(x)
# initial checks
personVariable <- checkX(x)
omopgenerics::assertCharacter(tableName, length = 1, na = FALSE)
omopgenerics::assertCharacter(tableName)
checkCdm(cdm, tableName)
personVariableTable <- checkX(cdm[[tableName]])
extraValue <- checkValue(value, cdm[[tableName]], tableName)
filterTbl <- checkFilter(filterVariable, filterId, idName, cdm[[tableName]])
window <- omopgenerics::validateWindowArgument(window)
checkVariableInX(indexDate, x)
checkVariableInX(targetStartDate, cdm[[tableName]], FALSE, "targetStartDate")
checkVariableInX(targetEndDate, cdm[[tableName]], TRUE, "targetEndDate")
omopgenerics::assertChoice(order, choices = c("first", "last"))
checkVariableInX(censorDate, x, TRUE, "censorDate")
omopgenerics::assertLogical(allowDuplicates, length = 1)
if (!is.null(censorDate)) {
checkCensorDate(x, censorDate)
}
if (!is.null(idName)) {
idName <- checkSnakeCase(idName)
}
tablePrefix <- omopgenerics::tmpPrefix()
# define overlapTable that contains the events of interest
overlapTable <- cdm[[tableName]]
if (!is.null(filterTbl)) {
filterTbl <- filterTbl |>
dplyr::rename(!!filterVariable := "id")
filterTblName <- omopgenerics::uniqueTableName(tablePrefix)
cdm <- omopgenerics::insertTable(
cdm = cdm, name = filterTblName, table = filterTbl, overwrite = TRUE
)
overlapTable <- overlapTable |>
dplyr::inner_join(cdm[[filterTblName]], by = filterVariable)
} else {
filterTbl <- dplyr::tibble(id_name = "all")
overlapTable <- dplyr::mutate(overlapTable, "id_name" = "all")
}
values <- list(
"id_name" = filterTbl$id_name,
"window_name" = names(window),
"value" = value
)
assertNameStyle(nameStyle, values)
x <- warnOverwriteColumns(x = x, nameStyle = nameStyle, values = values)
# columns that will be added
newCols <- expand.grid(
value = value,
id_name = filterTbl$id_name,
window_name = names(window)
) |>
dplyr::as_tibble() |>
dplyr::mutate(colnam = as.character(glue::glue(
nameStyle,
value = .data$value,
id_name = .data$id_name,
window_name = .data$window_name
))) |>
dplyr::mutate(colnam = checkSnakeCase(.data$colnam, verbose = F)) |>
dplyr::inner_join(
dplyr::tibble(
window_name = names(window),
w1 = purrr::flatten_dbl(purrr::map(window, \(x) x[1])),
w2 = purrr::flatten_dbl(purrr::map(window, \(x) x[2]))
),
by = "window_name"
)
nameStyle <- stringr::str_replace(nameStyle, "\\{value\\}", "\\{.value\\}")
overlapTable <- overlapTable |>
dplyr::select(
!!personVariable := dplyr::all_of(personVariableTable),
"start_date" = dplyr::all_of(targetStartDate),
"end_date" = dplyr::all_of(targetEndDate %||% targetStartDate),
dplyr::all_of(extraValue),
"id_name"
) |>
dplyr::mutate(end_date = dplyr::coalesce(.data$end_date, .data$start_date))
result <- x |>
dplyr::select(
dplyr::all_of(personVariable),
"index_date" = dplyr::all_of(indexDate),
"censor_date" = dplyr::any_of(censorDate)
) |>
dplyr::distinct()
if (any(value %in% c("count", "flag"))) {
idsObs <- omopgenerics::uniqueId(n = 2, exclude = colnames(x))
qInObservation <- newCols$colnam |>
rlang::set_names() |>
purrr::map(\(col) {
w1 <- newCols$w1[newCols$colnam == col]
w2 <- newCols$w2[newCols$colnam == col]
if (is.infinite(w1)) {
if (is.infinite(w2)) {
res <- NULL
} else {
res <- 'dplyr::if_else(.data${idsObs[1]} <= {sprintf("%.0f", w2)}, .data[["{col}"]], NA)'
}
} else if (is.infinite(w2)) {
res <- 'dplyr::if_else(.data${idsObs[2]} >= {sprintf("%.0f", w1)}, .data[["{col}"]], NA)'
} else {
res <- 'dplyr::if_else(.data${idsObs[1]} <= {sprintf("%.0f", w2)} & .data${idsObs[2]} >= {sprintf("%.0f", w1)}, .data[["{col}"]], NA)'
}
glue::glue(res)
}) |>
unlist() |>
rlang::parse_exprs()
if (length(qInObservation) > 0) {
renamePersonId <- rlang::set_names("person_id", personVariable)
renameDates <- rlang::set_names(
c("observation_period_start_date", "observation_period_end_date"), idsObs
)
individualsInObservation <- x |>
dplyr::select(dplyr::all_of(c(personVariable, indexDate))) |>
dplyr::distinct() |>
dplyr::inner_join(
cdm$observation_period |>
dplyr::select(dplyr::all_of(c(renamePersonId, renameDates))),
by = personVariable
) |>
dplyr::filter(
.data[[indexDate]] >= .data[[idsObs[1]]] &
.data[[indexDate]] <= .data[[idsObs[2]]]
) %>%
dplyr::mutate(
!!idsObs[1] := as.integer(!!CDMConnector::datediff(start = indexDate, end = idsObs[1])),
!!idsObs[2] := as.integer(!!CDMConnector::datediff(start = indexDate, end = idsObs[2]))
) |>
dplyr::compute(name = omopgenerics::uniqueTableName(prefix = tablePrefix))
}
}
if (isTRUE(inObservation)) {
sel <- c("person_id", "observation_period_start_date", "observation_period_end_date") |>
rlang::set_names(c(personVariable, "start_obs", "end_obs"))
result <- result |>
dplyr::inner_join(
cdm$observation_period |>
dplyr::select(dplyr::all_of(sel)),
by = personVariable
) |>
dplyr::filter(
.data$start_obs <= .data$index_date & .data$index_date <= .data$end_obs
)
}
result <- result |>
dplyr::inner_join(overlapTable, by = personVariable)
if (!is.null(censorDate)) {
result <- result |>
dplyr::filter(.data$start_date <= .data$censor_date)
}
if (isTRUE(inObservation)) {
result <- result |>
dplyr::filter(
.data$start_obs <= .data$end_date & .data$start_date <= .data$end_obs
)
}
result <- result %>%
dplyr::mutate(
"start" = !!CDMConnector::datediff("index_date", "start_date"),
"end" = !!CDMConnector::datediff("index_date", "end_date")
) |>
dplyr::select(!dplyr::any_of(c(
"censor_date", "start_date", "end_date", "start_obs", "end_obs"
))) |>
dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))
resultCountFlag <- NULL
resultDateTimeOther <- NULL
# Start loop for different windows
for (i in seq_along(window)) {
win <- window[[i]]
if (is.infinite(win[1])) {
if (is.infinite(win[2])) {
resultW <- result
} else {
resultW <- result |> dplyr::filter(.data$start <= !!win[2])
}
} else {
if (is.infinite(win[2])) {
resultW <- result |> dplyr::filter(.data$end >= !!win[1])
} else {
resultW <- result |>
dplyr::filter(.data$end >= !!win[1] & .data$start <= !!win[2])
}
}
resultW <- resultW |>
dplyr::select(-"end") |>
dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))
# add count or flag
if ("count" %in% value | "flag" %in% value) {
if (identical("flag", value)) {
resultCF <- resultW |>
dplyr::distinct(.data[[personVariable]], .data$index_date, .data$id_name) |>
dplyr::mutate(flag = 1)
} else {
resultCF <- resultW |>
dplyr::group_by(.data[[personVariable]], .data$index_date, .data$id_name) |>
dplyr::summarise(count = as.numeric(dplyr::n()), .groups = "drop")
if ("flag" %in% value) {
resultCF <- resultCF |> dplyr::mutate(flag = 1)
}
if (!("count" %in% value)) {
resultCF <- resultCF |> dplyr::select(-"count")
}
}
resultCF <- resultCF |>
dplyr::mutate("window_name" = !!tolower(names(window)[i]))
if (i == 1) {
resultCountFlag <- resultCF |>
dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))
} else {
resultCountFlag <- resultCountFlag |>
dplyr::union_all(resultCF) |>
dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))
}
}
# add date, time or other
if (length(value[!(value %in% c("count", "flag"))]) > 0) {
if (length(extraValue) > 0) {
resultDTO <- resultW |>
dplyr::select(dplyr::all_of(c(personVariable, "index_date", "id_name", extraValue, "days" = "start"))) |>
dplyr::group_by(.data[[personVariable]], .data$index_date, .data$id_name)
if (order == "first") {
resultDTO <- resultDTO |>
dplyr::filter(.data$days == min(.data$days, na.rm = TRUE))
} else {
resultDTO <- resultDTO |>
dplyr::filter(.data$days == max(.data$days, na.rm = TRUE))
}
if ("date" %in% value) {
resultDTO <- resultDTO %>%
dplyr::mutate(date = as.Date(!!CDMConnector::dateadd("index_date", "days")))
}
} else {
resultDTO <- resultW |>
dplyr::group_by(.data[[personVariable]], .data$index_date, .data$id_name)
if (order == "first") {
resultDTO <- resultDTO |>
dplyr::summarise(
days = min(.data$start, na.rm = TRUE), .groups = "drop"
)
} else {
resultDTO <- resultDTO |>
dplyr::summarise(
days = max(.data$start, na.rm = TRUE), .groups = "drop"
)
}
if ("date" %in% value) {
resultDTO <- resultDTO %>%
dplyr::mutate(date = as.Date(!!CDMConnector::dateadd("index_date", "days")))
}
}
resultDTO <- resultDTO |>
dplyr::mutate("window_name" = !!tolower(names(window)[i]))
if (!("days" %in% value)) {
resultDTO <- dplyr::select(resultDTO, -"days")
}
if (i == 1) {
resultDateTimeOther <- resultDTO |>
dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))
} else {
resultDateTimeOther <- resultDateTimeOther |>
dplyr::union_all(resultDTO) |>
dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))
}
}
}
if (any(c("flag", "count") %in% value)) {
values <- value[value %in% c("count", "flag")]
resultCountFlagPivot <- resultCountFlag |>
tidyr::pivot_wider(
names_from = c("id_name", "window_name"),
values_from = dplyr::any_of(values),
names_glue = nameStyle,
values_fill = 0
) |>
dplyr::rename(!!indexDate := "index_date") |>
dplyr::rename_all(tolower) |>
dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))
newColCountFlag <- colnames(resultCountFlagPivot)
newColCountFlag <- newColCountFlag[newColCountFlag %in% newCols$colnam]
x <- x |>
dplyr::left_join(
resultCountFlagPivot,
by = c(personVariable, indexDate)
) |>
dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))
x <- x |>
dplyr::mutate(dplyr::across(
dplyr::all_of(newColCountFlag), ~ dplyr::if_else(is.na(.x), 0, .x)
)) |>
dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))
}
if (length(value[!(value %in% c("count", "flag"))]) > 0) {
values <- value[!(value %in% c("count", "flag"))]
if (length(extraValue) > 0 & !allowDuplicates) {
duplicates <- resultDateTimeOther |>
dplyr::select(
dplyr::all_of(personVariable), "index_date",
dplyr::all_of(extraValue), "id_name", "window_name"
) |>
dplyr::group_by(dplyr::across(dplyr::all_of(c(
personVariable, "index_date", "id_name", "window_name"
)))) |>
dplyr::filter(dplyr::n() > 1) |>
dplyr::ungroup() |>
dplyr::tally() |>
dplyr::pull()
if (duplicates > 0) {
cli::cli_abort(c(
x = "There are {duplicates} row{?s} in {.strong {tableName}} with same
{.var {c(personVariable, filterVariable, targetStartDate)}}, solve
duplications or swicth {.pkg allowDuplicates} to TRUE.",
i = "NOTE that `allowDuplicates = TRUE` can have different and
unpredictable behavior depending on the cdm_source."
))
}
}
resultDateTimeOther <- resultDateTimeOther |>
dplyr::select(
dplyr::all_of(personVariable), "index_date", dplyr::all_of(values),
"id_name", "window_name"
) |>
tidyr::pivot_wider(
names_from = c("id_name", "window_name"),
values_from = dplyr::all_of(values),
names_glue = nameStyle
) |>
dplyr::rename(!!indexDate := "index_date") |>
dplyr::rename_all(tolower)
x <- x |>
dplyr::left_join(
resultDateTimeOther, by = c(personVariable, indexDate)
) |>
dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))
}
# missing columns
createMissingCols <- newCols |>
dplyr::filter(!.data$colnam %in% colnames(x)) |>
dplyr::pull("colnam") |>
rlang::set_names() |>
purrr::map_chr(\(x) {
val <- as.character(newCols$value[newCols$colnam == x])
switch(val,
flag = "0",
count = "0",
days = "as.numeric(NA)",
date = "as.Date(NA)",
"as.character(NA)")
}) |>
rlang::parse_exprs()
if (length(createMissingCols) > 0) {
x <- x |>
dplyr::mutate(!!!createMissingCols) |>
dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))
}
if (any(value %in% c("count", "flag"))) {
if (length(qInObservation) > 0) {
x <- x |>
dplyr::left_join(individualsInObservation, by = c(personVariable, indexDate)) |>
dplyr::mutate(!!!qInObservation) |>
dplyr::select(!dplyr::all_of(idsObs))
}
}
x <- x |>
dplyr::compute(name = comp$name, temporary = comp$temporary)
omopgenerics::dropSourceTable(
cdm = cdm, name = dplyr::starts_with(tablePrefix)
)
return(x)
}
filterWindow <- function(w1, w2, col) {
}
#' Get the name of the start date column for a certain table in the cdm
#'
#' @param tableName Name of the table.
#'
#' @return Name of the start date column in that table.
#'
#' @export
#'
#' @examples
#' \donttest{
#' library(PatientProfiles)
#' startDateColumn("condition_occurrence")
#' }
#'
startDateColumn <- function(tableName) {
if (tableName %in% omopgenerics::omopTables()) {
col <- omopgenerics::omopColumns(table = tableName, field = "start_date")
} else {
col <- "cohort_start_date"
}
return(col)
}
#' Get the name of the end date column for a certain table in the cdm
#'
#' @param tableName Name of the table.
#'
#' @return Name of the end date column in that table.
#'
#' @export
#'
#' @examples
#' \donttest{
#' library(PatientProfiles)
#' endDateColumn("condition_occurrence")
#' }
#'
endDateColumn <- function(tableName) {
if (tableName %in% omopgenerics::omopTables()) {
col <- omopgenerics::omopColumns(table = tableName, field = "end_date")
} else {
col <- "cohort_end_date"
}
return(col)
}
#' Get the name of the standard concept_id column for a certain table in the cdm
#'
#' @param tableName Name of the table.
#'
#' @return Name of the concept_id column in that table.
#'
#' @export
#'
#' @examples
#' \donttest{
#' library(PatientProfiles)
#' standardConceptIdColumn("condition_occurrence")
#' }
#'
standardConceptIdColumn <- function(tableName) {
if (tableName %in% omopgenerics::omopTables()) {
col <- omopgenerics::omopColumns(table = tableName, field = "standard_concept")
} else {
col <- "cohort_definition_id"
}
return(col)
}
#' Get the name of the source concept_id column for a certain table in the cdm
#'
#' @param tableName Name of the table.
#'
#' @return Name of the source_concept_id column in that table.
#'
#' @export
#'
#' @examples
#' \donttest{
#' library(PatientProfiles)
#' sourceConceptIdColumn("condition_occurrence")
#' }
#'
sourceConceptIdColumn <- function(tableName) {
if (tableName %in% omopgenerics::omopTables()) {
col <- omopgenerics::omopColumns(table = tableName, field = "source_concept")
} else {
col <- NA_character_
}
return(col)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.