R/addIntersect.R
In PatientProfiles: Identify Characteristics of Patients in the OMOP Common Data Model

Documented in endDateColumn sourceConceptIdColumn standardConceptIdColumn startDateColumn

# Copyright 2024 DARWIN EU (C)
#
# This file is part of PatientProfiles
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

.addIntersect <- function(x,
                          tableName,
                          value,
                          filterVariable = NULL,
                          filterId = NULL,
                          idName = NULL,
                          window = list(c(0, Inf)),
                          indexDate = "cohort_start_date",
                          censorDate = NULL,
                          targetStartDate = startDateColumn(tableName),
                          targetEndDate = endDateColumn(tableName),
                          inObservation = TRUE,
                          order = "first",
                          allowDuplicates = FALSE,
                          nameStyle = "{value}_{id_name}_{window_name}",
                          name = NULL) {
  comp <- newTable(name)
  if (!is.list(window)) {
    window <- list(window)
  }

  targetStartDate <- eval(targetStartDate)
  targetEndDate <- eval(targetEndDate)

  cdm <- omopgenerics::cdmReference(x)
  # initial checks
  personVariable <- checkX(x)
  omopgenerics::assertCharacter(tableName, length = 1, na = FALSE)
  omopgenerics::assertCharacter(tableName)
  checkCdm(cdm, tableName)
  personVariableTable <- checkX(cdm[[tableName]])
  extraValue <- checkValue(value, cdm[[tableName]], tableName)
  filterTbl <- checkFilter(filterVariable, filterId, idName, cdm[[tableName]])
  window <- omopgenerics::validateWindowArgument(window)
  checkVariableInX(indexDate, x)
  checkVariableInX(targetStartDate, cdm[[tableName]], FALSE, "targetStartDate")
  checkVariableInX(targetEndDate, cdm[[tableName]], TRUE, "targetEndDate")
  omopgenerics::assertChoice(order, choices = c("first", "last"))
  checkVariableInX(censorDate, x, TRUE, "censorDate")
  omopgenerics::assertLogical(allowDuplicates, length = 1)

  if (!is.null(censorDate)) {
    checkCensorDate(x, censorDate)
  }
  if (!is.null(idName)) {
    idName <- checkSnakeCase(idName)
  }

  tablePrefix <- omopgenerics::tmpPrefix()

  # define overlapTable that contains the events of interest
  overlapTable <- cdm[[tableName]]
  if (!is.null(filterTbl)) {
    filterTbl <- filterTbl |>
      dplyr::rename(!!filterVariable := "id")
    filterTblName <- omopgenerics::uniqueTableName(tablePrefix)
    cdm <- omopgenerics::insertTable(
      cdm = cdm, name = filterTblName, table = filterTbl, overwrite = TRUE
    )
    overlapTable <- overlapTable |>
      dplyr::inner_join(cdm[[filterTblName]], by = filterVariable)
  } else {
    filterTbl <- dplyr::tibble(id_name = "all")
    overlapTable <- dplyr::mutate(overlapTable, "id_name" = "all")
  }

  values <- list(
    "id_name" = filterTbl$id_name,
    "window_name" = names(window),
    "value" = value
  )
  assertNameStyle(nameStyle, values)
  x <- warnOverwriteColumns(x = x, nameStyle = nameStyle, values = values)

  # columns that will be added
  newCols <- expand.grid(
    value = value,
    id_name = filterTbl$id_name,
    window_name = names(window)
  ) |>
    dplyr::as_tibble() |>
    dplyr::mutate(colnam = as.character(glue::glue(
      nameStyle,
      value = .data$value,
      id_name = .data$id_name,
      window_name = .data$window_name
    ))) |>
    dplyr::mutate(colnam = checkSnakeCase(.data$colnam, verbose = F)) |>
    dplyr::inner_join(
      dplyr::tibble(
        window_name = names(window),
        w1 = purrr::flatten_dbl(purrr::map(window, \(x) x[1])),
        w2 = purrr::flatten_dbl(purrr::map(window, \(x) x[2]))
      ),
      by = "window_name"
    )

  nameStyle <- stringr::str_replace(nameStyle, "\\{value\\}", "\\{.value\\}")

  overlapTable <- overlapTable |>
    dplyr::select(
      !!personVariable := dplyr::all_of(personVariableTable),
      "start_date" = dplyr::all_of(targetStartDate),
      "end_date" = dplyr::all_of(targetEndDate %||% targetStartDate),
      dplyr::all_of(extraValue),
      "id_name"
    ) |>
    dplyr::mutate(end_date = dplyr::coalesce(.data$end_date, .data$start_date))

  result <- x |>
    dplyr::select(
      dplyr::all_of(personVariable),
      "index_date" = dplyr::all_of(indexDate),
      "censor_date" = dplyr::any_of(censorDate)
    ) |>
    dplyr::distinct()

  if (any(value %in% c("count", "flag"))) {
    idsObs <- omopgenerics::uniqueId(n = 2, exclude = colnames(x))
    qInObservation <- newCols$colnam |>
      rlang::set_names() |>
      purrr::map(\(col) {
        w1 <- newCols$w1[newCols$colnam == col]
        w2 <- newCols$w2[newCols$colnam == col]
        if (is.infinite(w1)) {
          if (is.infinite(w2)) {
            res <- NULL
          } else {
            res <- 'dplyr::if_else(.data${idsObs[1]} <= {sprintf("%.0f", w2)}, .data[["{col}"]], NA)'
          }
        } else if (is.infinite(w2)) {
          res <- 'dplyr::if_else(.data${idsObs[2]} >= {sprintf("%.0f", w1)}, .data[["{col}"]], NA)'
        } else {
          res <- 'dplyr::if_else(.data${idsObs[1]} <= {sprintf("%.0f", w2)} & .data${idsObs[2]} >= {sprintf("%.0f", w1)}, .data[["{col}"]], NA)'
        }
        glue::glue(res)
      }) |>
      unlist() |>
      rlang::parse_exprs()
    if (length(qInObservation) > 0) {
      renamePersonId <- rlang::set_names("person_id", personVariable)
      renameDates <- rlang::set_names(
        c("observation_period_start_date", "observation_period_end_date"), idsObs
      )
      individualsInObservation <- x |>
        dplyr::select(dplyr::all_of(c(personVariable, indexDate))) |>
        dplyr::distinct() |>
        dplyr::inner_join(
          cdm$observation_period |>
            dplyr::select(dplyr::all_of(c(renamePersonId, renameDates))),
          by = personVariable
        ) |>
        dplyr::filter(
          .data[[indexDate]] >= .data[[idsObs[1]]] &
            .data[[indexDate]] <= .data[[idsObs[2]]]
        ) %>%
        dplyr::mutate(
          !!idsObs[1] := as.integer(!!CDMConnector::datediff(start = indexDate, end = idsObs[1])),
          !!idsObs[2] := as.integer(!!CDMConnector::datediff(start = indexDate, end = idsObs[2]))
        ) |>
        dplyr::compute(name = omopgenerics::uniqueTableName(prefix = tablePrefix))
    }
  }

  if (isTRUE(inObservation)) {
    sel <- c("person_id", "observation_period_start_date", "observation_period_end_date") |>
      rlang::set_names(c(personVariable, "start_obs", "end_obs"))
    result <- result |>
      dplyr::inner_join(
        cdm$observation_period |>
          dplyr::select(dplyr::all_of(sel)),
        by = personVariable
      ) |>
      dplyr::filter(
        .data$start_obs <= .data$index_date & .data$index_date <= .data$end_obs
      )
  }

  result <- result |>
    dplyr::inner_join(overlapTable, by = personVariable)

  if (!is.null(censorDate)) {
    result <- result |>
      dplyr::filter(.data$start_date <= .data$censor_date)
  }

  if (isTRUE(inObservation)) {
    result <- result |>
      dplyr::filter(
        .data$start_obs <= .data$end_date & .data$start_date <= .data$end_obs
      )
  }

  result <- result %>%
    dplyr::mutate(
      "start" = !!CDMConnector::datediff("index_date", "start_date"),
      "end" = !!CDMConnector::datediff("index_date", "end_date")
    ) |>
    dplyr::select(!dplyr::any_of(c(
      "censor_date", "start_date", "end_date", "start_obs", "end_obs"
    ))) |>
    dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))

  resultCountFlag <- NULL
  resultDateTimeOther <- NULL
  # Start loop for different windows

  for (i in seq_along(window)) {
    win <- window[[i]]
    if (is.infinite(win[1])) {
      if (is.infinite(win[2])) {
        resultW <- result
      } else {
        resultW <- result |> dplyr::filter(.data$start <= !!win[2])
      }
    } else {
      if (is.infinite(win[2])) {
        resultW <- result |> dplyr::filter(.data$end >= !!win[1])
      } else {
        resultW <- result |>
          dplyr::filter(.data$end >= !!win[1] & .data$start <= !!win[2])
      }
    }

    resultW <- resultW |>
      dplyr::select(-"end") |>
      dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))

    # add count or flag
    if ("count" %in% value | "flag" %in% value) {
      if (identical("flag", value)) {
        resultCF <- resultW |>
          dplyr::distinct(.data[[personVariable]], .data$index_date, .data$id_name) |>
          dplyr::mutate(flag = 1)
      } else {
        resultCF <- resultW |>
          dplyr::group_by(.data[[personVariable]], .data$index_date, .data$id_name) |>
          dplyr::summarise(count = as.numeric(dplyr::n()), .groups = "drop")
        if ("flag" %in% value) {
          resultCF <- resultCF |> dplyr::mutate(flag = 1)
        }
        if (!("count" %in% value)) {
          resultCF <- resultCF |> dplyr::select(-"count")
        }
      }
      resultCF <- resultCF |>
        dplyr::mutate("window_name" = !!tolower(names(window)[i]))

      if (i == 1) {
        resultCountFlag <- resultCF |>
          dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))
      } else {
        resultCountFlag <- resultCountFlag |>
          dplyr::union_all(resultCF) |>
          dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))
      }
    }
    # add date, time or other
    if (length(value[!(value %in% c("count", "flag"))]) > 0) {
      if (length(extraValue) > 0) {
        resultDTO <- resultW |>
          dplyr::select(dplyr::all_of(c(personVariable, "index_date", "id_name", extraValue, "days" = "start"))) |>
          dplyr::group_by(.data[[personVariable]], .data$index_date, .data$id_name)
        if (order == "first") {
          resultDTO <- resultDTO |>
            dplyr::filter(.data$days == min(.data$days, na.rm = TRUE))
        } else {
          resultDTO <- resultDTO |>
            dplyr::filter(.data$days == max(.data$days, na.rm = TRUE))
        }
        if ("date" %in% value) {
          resultDTO <- resultDTO %>%
            dplyr::mutate(date = as.Date(!!CDMConnector::dateadd("index_date", "days")))
        }
      } else {
        resultDTO <- resultW |>
          dplyr::group_by(.data[[personVariable]], .data$index_date, .data$id_name)
        if (order == "first") {
          resultDTO <- resultDTO |>
            dplyr::summarise(
              days = min(.data$start, na.rm = TRUE), .groups = "drop"
            )
        } else {
          resultDTO <- resultDTO |>
            dplyr::summarise(
              days = max(.data$start, na.rm = TRUE), .groups = "drop"
            )
        }
        if ("date" %in% value) {
          resultDTO <- resultDTO %>%
            dplyr::mutate(date = as.Date(!!CDMConnector::dateadd("index_date", "days")))
        }
      }

      resultDTO <- resultDTO |>
        dplyr::mutate("window_name" = !!tolower(names(window)[i]))
      if (!("days" %in% value)) {
        resultDTO <- dplyr::select(resultDTO, -"days")
      }
      if (i == 1) {
        resultDateTimeOther <- resultDTO |>
          dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))
      } else {
        resultDateTimeOther <- resultDateTimeOther |>
          dplyr::union_all(resultDTO) |>
          dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))
      }
    }
  }

  if (any(c("flag", "count") %in% value)) {
    values <- value[value %in% c("count", "flag")]
    resultCountFlagPivot <- resultCountFlag |>
      tidyr::pivot_wider(
        names_from = c("id_name", "window_name"),
        values_from = dplyr::any_of(values),
        names_glue = nameStyle,
        values_fill = 0
      ) |>
      dplyr::rename(!!indexDate := "index_date") |>
      dplyr::rename_all(tolower) |>
      dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))

    newColCountFlag <- colnames(resultCountFlagPivot)
    newColCountFlag <- newColCountFlag[newColCountFlag %in% newCols$colnam]

    x <- x |>
      dplyr::left_join(
        resultCountFlagPivot,
        by = c(personVariable, indexDate)
      ) |>
      dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))

    x <- x |>
      dplyr::mutate(dplyr::across(
        dplyr::all_of(newColCountFlag), ~ dplyr::if_else(is.na(.x), 0, .x)
      )) |>
      dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))
  }

  if (length(value[!(value %in% c("count", "flag"))]) > 0) {
    values <- value[!(value %in% c("count", "flag"))]

    if (length(extraValue) > 0 & !allowDuplicates) {
      duplicates <- resultDateTimeOther |>
        dplyr::select(
          dplyr::all_of(personVariable), "index_date",
          dplyr::all_of(extraValue), "id_name", "window_name"
        ) |>
        dplyr::group_by(dplyr::across(dplyr::all_of(c(
          personVariable, "index_date", "id_name", "window_name"
        )))) |>
        dplyr::filter(dplyr::n() > 1) |>
        dplyr::ungroup() |>
        dplyr::tally() |>
        dplyr::pull()
      if (duplicates > 0) {
        cli::cli_abort(c(
          x = "There are {duplicates} row{?s} in {.strong {tableName}} with same
          {.var {c(personVariable, filterVariable, targetStartDate)}}, solve
          duplications or swicth {.pkg allowDuplicates} to TRUE.",
          i = "NOTE that `allowDuplicates = TRUE` can have different and
          unpredictable behavior depending on the cdm_source."
        ))
      }
    }

    resultDateTimeOther <- resultDateTimeOther |>
      dplyr::select(
        dplyr::all_of(personVariable), "index_date", dplyr::all_of(values),
        "id_name", "window_name"
      ) |>
      tidyr::pivot_wider(
        names_from = c("id_name", "window_name"),
        values_from = dplyr::all_of(values),
        names_glue = nameStyle
      ) |>
      dplyr::rename(!!indexDate := "index_date") |>
      dplyr::rename_all(tolower)

    x <- x |>
      dplyr::left_join(
        resultDateTimeOther, by = c(personVariable, indexDate)
      ) |>
      dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))
  }

  # missing columns
  createMissingCols <- newCols |>
    dplyr::filter(!.data$colnam %in% colnames(x)) |>
    dplyr::pull("colnam") |>
    rlang::set_names() |>
    purrr::map_chr(\(x) {
      val <- as.character(newCols$value[newCols$colnam == x])
      switch(val,
             flag = "0",
             count = "0",
             days = "as.numeric(NA)",
             date = "as.Date(NA)",
             "as.character(NA)")
    }) |>
    rlang::parse_exprs()
  if (length(createMissingCols) > 0) {
    x <- x |>
      dplyr::mutate(!!!createMissingCols) |>
      dplyr::compute(name = omopgenerics::uniqueTableName(tablePrefix))
  }

  if (any(value %in% c("count", "flag"))) {
    if (length(qInObservation) > 0) {
      x <- x |>
        dplyr::left_join(individualsInObservation, by = c(personVariable, indexDate)) |>
        dplyr::mutate(!!!qInObservation) |>
        dplyr::select(!dplyr::all_of(idsObs))
    }
  }

  x <- x |>
    dplyr::compute(name = comp$name, temporary = comp$temporary)

  omopgenerics::dropSourceTable(
    cdm = cdm, name = dplyr::starts_with(tablePrefix)
  )

  return(x)
}
filterWindow <- function(w1, w2, col) {

}

#' Get the name of the start date column for a certain table in the cdm
#'
#' @param tableName Name of the table.
#'
#' @return Name of the start date column in that table.
#'
#' @export
#'
#' @examples
#' \donttest{
#' library(PatientProfiles)
#' startDateColumn("condition_occurrence")
#' }
#'
startDateColumn <- function(tableName) {
  if (tableName %in% omopgenerics::omopTables()) {
    col <- omopgenerics::omopColumns(table = tableName, field = "start_date")
  } else {
    col <- "cohort_start_date"
  }
  return(col)
}

#' Get the name of the end date column for a certain table in the cdm
#'
#' @param tableName Name of the table.
#'
#' @return Name of the end date column in that table.
#'
#' @export
#'
#' @examples
#' \donttest{
#' library(PatientProfiles)
#' endDateColumn("condition_occurrence")
#' }
#'
endDateColumn <- function(tableName) {
  if (tableName %in% omopgenerics::omopTables()) {
    col <- omopgenerics::omopColumns(table = tableName, field = "end_date")
  } else {
    col <- "cohort_end_date"
  }
  return(col)
}

#' Get the name of the standard concept_id column for a certain table in the cdm
#'
#' @param tableName Name of the table.
#'
#' @return Name of the concept_id column in that table.
#'
#' @export
#'
#' @examples
#' \donttest{
#' library(PatientProfiles)
#' standardConceptIdColumn("condition_occurrence")
#' }
#'
standardConceptIdColumn <- function(tableName) {
  if (tableName %in% omopgenerics::omopTables()) {
    col <- omopgenerics::omopColumns(table = tableName, field = "standard_concept")
  } else {
    col <- "cohort_definition_id"
  }
  return(col)
}

#' Get the name of the source concept_id column for a certain table in the cdm
#'
#' @param tableName Name of the table.
#'
#' @return Name of the source_concept_id column in that table.
#'
#' @export
#'
#' @examples
#' \donttest{
#' library(PatientProfiles)
#' sourceConceptIdColumn("condition_occurrence")
#' }
#'
sourceConceptIdColumn <- function(tableName) {
  if (tableName %in% omopgenerics::omopTables()) {
    col <- omopgenerics::omopColumns(table = tableName, field = "source_concept")
  } else {
    col <- NA_character_
  }
  return(col)
}
Any scripts or data that you put into this service are public.
PatientProfiles documentation built on June 28, 2025, 1:09 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
PatientProfiles
Identify Characteristics of Patients in the OMOP Common Data Model

R/addIntersect.R
In PatientProfiles: Identify Characteristics of Patients in the OMOP Common Data Model

Defines functions sourceConceptIdColumn standardConceptIdColumn endDateColumn startDateColumn filterWindow .addIntersect

Documented in endDateColumn sourceConceptIdColumn standardConceptIdColumn startDateColumn

Try the PatientProfiles package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PatientProfiles Identify Characteristics of Patients in the OMOP Common Data Model

R/addIntersect.R In PatientProfiles: Identify Characteristics of Patients in the OMOP Common Data Model

Defines functions sourceConceptIdColumn standardConceptIdColumn endDateColumn startDateColumn filterWindow .addIntersect

Documented in endDateColumn sourceConceptIdColumn standardConceptIdColumn startDateColumn

Try the PatientProfiles package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PatientProfiles
Identify Characteristics of Patients in the OMOP Common Data Model

R/addIntersect.R
In PatientProfiles: Identify Characteristics of Patients in the OMOP Common Data Model