R/addIntersect.R

Defines functions .addIntersect

# Copyright 2024 DARWIN EU (C)
#
# This file is part of PatientProfiles
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

.addIntersect <- function(x,
                          tableName,
                          value,
                          filterVariable = NULL,
                          filterId = NULL,
                          idName = NULL,
                          window = list(c(0, Inf)),
                          indexDate = "cohort_start_date",
                          censorDate = NULL,
                          targetStartDate = startDateColumn(tableName),
                          targetEndDate = endDateColumn(tableName),
                          inObservation = TRUE,
                          order = "first",
                          nameStyle = "{value}_{id_name}_{window_name}",
                          name = NULL) {
  comp <- newTable(name)
  if (!is.list(window)) {
    window <- list(window)
  }

  targetStartDate <- eval(targetStartDate)
  targetEndDate <- eval(targetEndDate)

  cdm <- omopgenerics::cdmReference(x)
  # initial checks
  personVariable <- checkX(x)
  omopgenerics::assertCharacter(tableName, length = 1, na = FALSE)
  omopgenerics::assertCharacter(tableName)
  checkCdm(cdm, tableName)
  personVariableTable <- checkX(cdm[[tableName]])
  extraValue <- checkValue(value, cdm[[tableName]], tableName)
  filterTbl <- checkFilter(filterVariable, filterId, idName, cdm[[tableName]])
  window <- omopgenerics::validateWindowArgument(window)
  checkVariableInX(indexDate, x)
  checkVariableInX(targetStartDate, cdm[[tableName]], FALSE, "targetStartDate")
  checkVariableInX(targetEndDate, cdm[[tableName]], TRUE, "targetEndDate")
  omopgenerics::assertChoice(order, choices = c("first", "last"))
  checkVariableInX(censorDate, x, TRUE, "censorDate")

  if (!is.null(censorDate)) {
    checkCensorDate(x, censorDate)
  }
  if (!is.null(idName)) {
    idName <- checkSnakeCase(idName)
  }

  tablePrefix <- omopgenerics::tmpPrefix()

  # define overlapTable that contains the events of interest
  overlapTable <- cdm[[tableName]]
  if (!is.null(filterTbl)) {
    overlapTable <- overlapTable %>%
      dplyr::filter(.data[[filterVariable]] %in% .env$filterId)
  } else {
    filterVariable <- "id"
    filterTbl <- dplyr::tibble("id" = 1, "id_name" = "all")
    overlapTable <- dplyr::mutate(overlapTable, "id" = 1)
  }

  values <- list(
    "id_name" = filterTbl$id_name,
    "window_name" = names(window),
    "value" = value
  )
  assertNameStyle(nameStyle, values)
  x <- warnOverwriteColumns(x = x, nameStyle = nameStyle, values = values)

  # columns that will be added
  newCols <- expand.grid(
    value = value,
    id_name = filterTbl$id_name,
    window_name = names(window)
  ) %>%
    dplyr::as_tibble() %>%
    dplyr::mutate(colnam = as.character(glue::glue(
      nameStyle,
      value = .data$value,
      id_name = .data$id_name,
      window_name = .data$window_name
    ))) %>%
    dplyr::mutate(colnam = checkSnakeCase(.data$colnam, verbose = F))

  overlapTable <- overlapTable %>%
    dplyr::select(
      !!personVariable := dplyr::all_of(personVariableTable),
      "id" = dplyr::all_of(filterVariable),
      "start_date" = dplyr::all_of(targetStartDate),
      "end_date" = dplyr::all_of(targetEndDate %||% targetStartDate),
      dplyr::all_of(extraValue)
    ) %>%
    dplyr::mutate(end_date = dplyr::if_else(
      is.na(.data$end_date), .data$start_date, .data$end_date
    ))

  result <- x |>
    dplyr::select(
      dplyr::all_of(personVariable),
      "index_date" = dplyr::all_of(indexDate),
      "censor_date" = dplyr::any_of(censorDate)
    ) |>
    dplyr::distinct()

  if (isTRUE(inObservation)) {
    result <- result |>
      addDemographics(
        indexDate = "index_date",
        age = FALSE,
        sex = FALSE,
        priorObservation = TRUE,
        priorObservationName = "start_obs",
        priorObservationType = "date",
        futureObservation = TRUE,
        futureObservationName = "end_obs",
        futureObservationType = "date",
        name = omopgenerics::uniqueTableName(tablePrefix)
      )
  }

  result <- result |>
    dplyr::inner_join(overlapTable, by = personVariable)

  if (!is.null(censorDate)) {
    result <- result |>
      dplyr::filter(.data$start_date <= .data$censor_date)
  }

  if (isTRUE(inObservation)) {
    result <- result |>
      dplyr::filter(
        .data$start_obs <= .data$end_date & .data$start_date <= .data$end_obs
      )
  }

  result <- result %>%
    dplyr::mutate(
      "start" = !!CDMConnector::datediff("index_date", "start_date"),
      "end" = !!CDMConnector::datediff("index_date", "end_date")
    ) |>
    dplyr::select(!dplyr::any_of(c(
      "censor_date", "start_date", "end_date", "start_obs", "end_obs"
    ))) |>
    dplyr::compute(
      name = omopgenerics::uniqueTableName(tablePrefix),
      temporary = FALSE,
      overwrite = TRUE
    )

  resultCountFlag <- NULL
  resultDateTimeOther <- NULL
  # Start loop for different windows

  for (i in seq_along(window)) {
    win <- window[[i]]
    if (is.infinite(win[1])) {
      if (is.infinite(win[2])) {
        resultW <- result
      } else {
        resultW <- result %>% dplyr::filter(.data$start <= !!win[2])
      }
    } else {
      if (is.infinite(win[2])) {
        resultW <- result %>% dplyr::filter(.data$end >= !!win[1])
      } else {
        resultW <- result %>%
          dplyr::filter(.data$end >= !!win[1] & .data$start <= !!win[2])
      }
    }

    resultW <- resultW %>%
      dplyr::select(-"end") |>
      dplyr::compute(
        name = omopgenerics::uniqueTableName(tablePrefix),
        temporary = FALSE,
        overwrite = TRUE
      )

    filterTblName <- omopgenerics::uniqueTableName(tablePrefix)
    cdm <- omopgenerics::insertTable(
      cdm = cdm, name = filterTblName, table = filterTbl, overwrite = TRUE
    )

    # add count or flag
    if ("count" %in% value | "flag" %in% value) {
      resultCF <- resultW %>%
        dplyr::group_by(.data[[personVariable]], .data$index_date, .data$id) %>%
        dplyr::summarise(count = dplyr::n(), .groups = "drop") %>%
        dplyr::left_join(cdm[[filterTblName]], by = "id") %>%
        dplyr::select(-"id") %>%
        dplyr::mutate("window_name" = !!tolower(names(window)[i]))
      if ("flag" %in% value) {
        resultCF <- resultCF %>% dplyr::mutate(flag = 1)
      }
      if (!("count" %in% value)) {
        resultCF <- resultCF |> dplyr::select(-"count")
      }

      if (i == 1) {
        resultCountFlag <- resultCF %>%
          dplyr::compute(
            name = omopgenerics::uniqueTableName(tablePrefix),
            temporary = FALSE,
            overwrite = TRUE
          )
      } else {
        resultCountFlag <- resultCountFlag |>
          dplyr::union_all(resultCF) |>
          dplyr::compute(
            name = omopgenerics::uniqueTableName(tablePrefix),
            temporary = FALSE,
            overwrite = TRUE
          )
      }
    }
    # add date, time or other
    if (length(value[!(value %in% c("count", "flag"))]) > 0) {
      resultDTO <- resultW %>%
        dplyr::group_by(.data[[personVariable]], .data$index_date, .data$id)
      if (order == "first") {
        resultDTO <- resultDTO %>%
          dplyr::summarise(
            days = min(.data$start, na.rm = TRUE), .groups = "drop"
          )
      } else {
        resultDTO <- resultDTO %>%
          dplyr::summarise(
            days = max(.data$start, na.rm = TRUE), .groups = "drop"
          )
      }
      resultDTO <- resultDTO %>%
        dplyr::right_join(
          resultW %>%
            dplyr::select(dplyr::all_of(c(personVariable, "index_date", "id"))) %>%
            dplyr::distinct(),
          by = c(personVariable, "index_date", "id")
        )
      if ("date" %in% value) {
        resultDTO <- resultDTO %>%
          dplyr::mutate(date = as.Date(!!CDMConnector::dateadd("index_date", "days")))
      }
      if (length(extraValue) > 0) {
        resultDTO <- resultDTO %>%
          dplyr::left_join(
            resultW %>%
              dplyr::select(
                dplyr::all_of(personVariable), "index_date", "id",
                "days" = "start", dplyr::all_of(extraValue)
              ) %>%
              dplyr::inner_join(
                resultDTO %>%
                  dplyr::select(dplyr::all_of(
                    c(personVariable, "index_date", "id", "days")
                  )),
                by = c(personVariable, "index_date", "id", "days")
              ) %>%
              dplyr::group_by(.data[[personVariable]], .data$index_date, .data$id) %>%
              dplyr::summarise(
                dplyr::across(
                  dplyr::all_of(extraValue), ~ str_flatten(.x, collapse = "; ")
                ),
                .groups = "drop"
              ),
            by = c(personVariable, "index_date", "id")
          )
      }

      resultDTO <- resultDTO %>%
        dplyr::left_join(cdm[[filterTblName]], by = "id") %>%
        dplyr::select(-"id") %>%
        dplyr::mutate("window_name" = !!tolower(names(window)[i]))
      if (!("days" %in% value)) {
        resultDTO <- dplyr::select(resultDTO, -"days")
      }
      if (i == 1) {
        resultDateTimeOther <- resultDTO %>%
          dplyr::compute(
            name = omopgenerics::uniqueTableName(tablePrefix),
            temporary = FALSE,
            overwrite = TRUE
          )
      } else {
        resultDateTimeOther <- resultDateTimeOther |>
          dplyr::union_all(resultDTO) |>
          dplyr::compute(
            name = omopgenerics::uniqueTableName(tablePrefix),
            temporary = FALSE,
            overwrite = TRUE
          )
      }
    }
  }

  if (any(c("flag", "count") %in% value)) {
    resultCountFlagPivot <- resultCountFlag %>%
      tidyr::pivot_longer(
        dplyr::any_of(c("count", "flag")),
        names_to = "value",
        values_to = "values"
      ) %>%
      tidyr::pivot_wider(
        names_from = c("value", "id_name", "window_name"),
        values_from = "values",
        names_glue = nameStyle,
        values_fill = 0
      ) %>%
      dplyr::rename(!!indexDate := "index_date") %>%
      dplyr::rename_all(tolower) |>
      dplyr::compute(
        name = omopgenerics::uniqueTableName(tablePrefix),
        temporary = FALSE,
        overwrite = TRUE
      )

    newColCountFlag <- colnames(resultCountFlagPivot)
    newColCountFlag <- newColCountFlag[newColCountFlag %in% newCols$colnam]

    x <- x %>%
      dplyr::left_join(
        resultCountFlagPivot,
        by = c(personVariable, indexDate)
      ) |>
      dplyr::compute(
        name = omopgenerics::uniqueTableName(tablePrefix),
        temporary = FALSE,
        overwrite = TRUE
      )

    x <- x %>%
      dplyr::mutate(dplyr::across(
        dplyr::all_of(newColCountFlag), ~ dplyr::if_else(is.na(.x), 0, .x)
      )) |>
      dplyr::compute(
        name = omopgenerics::uniqueTableName(tablePrefix),
        temporary = FALSE,
        overwrite = TRUE
      )
  }

  if (length(value[!(value %in% c("count", "flag"))]) > 0) {
    values <- value[!(value %in% c("count", "flag"))]
    for (val in values) {
      resultDateTimeOtherX <- resultDateTimeOther %>%
        dplyr::select(
          dplyr::all_of(personVariable), "index_date", dplyr::all_of(val),
          "id_name", "window_name"
        ) %>%
        tidyr::pivot_longer(
          dplyr::all_of(val),
          names_to = "value",
          values_to = "values"
        ) %>%
        tidyr::pivot_wider(
          names_from = c("value", "id_name", "window_name"),
          values_from = "values",
          names_glue = nameStyle
        ) %>%
        dplyr::rename(!!indexDate := "index_date") %>%
        dplyr::rename_all(tolower)

      x <- x %>%
        dplyr::left_join(
          resultDateTimeOtherX,
          by = c(personVariable, indexDate)
        )
    }

    x <- x %>%
      dplyr::compute(
        name = omopgenerics::uniqueTableName(tablePrefix),
        temporary = FALSE,
        overwrite = TRUE
      )
  }

  # missing columns
  newCols <- newCols %>%
    dplyr::filter(!.data$colnam %in% colnames(x))

  for (val in as.character(unique(newCols$value))) {
    cols <- newCols$colnam[newCols$value == val]
    valk <- switch(val,
      flag = 0,
      count = 0,
      days = as.numeric(NA),
      date = as.Date(NA),
      as.character(NA)
    )

    id <- paste0("id_", paste0(sample(letters, 5), collapse = ""))

    newTib <- dplyr::tibble(!!id := 1)
    newTib[, cols] <- valk
    tmpName <- omopgenerics::uniqueTableName(tablePrefix)
    cdm <- omopgenerics::insertTable(cdm = cdm, name = tmpName, table = newTib)

    x <- x |>
      dplyr::mutate(!!id := 1) |>
      dplyr::inner_join(cdm[[tmpName]], by = id) |>
      dplyr::select(!dplyr::all_of(id)) |>
      dplyr::compute(
        name = omopgenerics::uniqueTableName(tablePrefix),
        temporary = FALSE,
        overwrite = TRUE
      )
  }

  if (any(value %in% c("count", "flag"))) {
    for (k in seq_along(window)) {
      tmpName <- "tmp_col_12345"
      cols <- newCols |>
        dplyr::filter(
          .data$window_name == names(window)[k] &
            .data$value %in% c("count", "flag")
        ) |>
        dplyr::pull("colnam")
      x <- x |>
        addInObservation(
          indexDate = indexDate,
          window = window[[k]],
          completeInterval = F,
          nameStyle = tmpName,
          name = omopgenerics::uniqueTableName(tablePrefix)
        ) |>
        dplyr::mutate(dplyr::across(
          .cols = dplyr::all_of(cols),
          .fns = ~ dplyr::if_else(tmp_col_12345 == 0, as.numeric(NA), .)
        )) |>
        dplyr::select(!dplyr::all_of(tmpName))
    }
  }

  x <- x |> dplyr::compute(name = comp$name, temporary = comp$temporary)

  omopgenerics::dropTable(
    cdm = cdm, name = dplyr::starts_with(tablePrefix)
  )

  return(x)
}

#' Get the name of the start date column for a certain table in the cdm
#'
#' @param tableName Name of the table.
#'
#' @return Name of the start date column in that table.
#'
#' @export
#'
#' @examples
#' \donttest{
#' library(PatientProfiles)
#' startDateColumn("condition_occurrence")
#' }
#'
startDateColumn <- function(tableName) {
  if (tableName %in% namesTable$table_name) {
    return(namesTable$start_date_name[namesTable$table_name == tableName])
  } else {
    return("cohort_start_date")
  }
}

#' Get the name of the end date column for a certain table in the cdm
#'
#' @param tableName Name of the table.
#'
#' @return Name of the end date column in that table.
#'
#' @export
#'
#' @examples
#' \donttest{
#' library(PatientProfiles)
#' endDateColumn("condition_occurrence")
#' }
#'
endDateColumn <- function(tableName) {
  if (tableName %in% namesTable$table_name) {
    return(namesTable$end_date_name[namesTable$table_name == tableName])
  } else {
    return("cohort_end_date")
  }
}

#' Get the name of the standard concept_id column for a certain table in the cdm
#'
#' @param tableName Name of the table.
#'
#' @return Name of the concept_id column in that table.
#'
#' @export
#'
#' @examples
#' \donttest{
#' library(PatientProfiles)
#' standardConceptIdColumn("condition_occurrence")
#' }
#'
standardConceptIdColumn <- function(tableName) {
  if (tableName %in% namesTable$table_name) {
    return(namesTable$concept_id_name[namesTable$table_name == tableName])
  } else {
    return("cohort_definition_id")
  }
}

#' Get the name of the source concept_id column for a certain table in the cdm
#'
#' @param tableName Name of the table.
#'
#' @return Name of the source_concept_id column in that table.
#'
#' @export
#'
#' @examples
#' \donttest{
#' library(PatientProfiles)
#' sourceConceptIdColumn("condition_occurrence")
#' }
#'
sourceConceptIdColumn <- function(tableName) {
  if (tableName %in% namesTable$table_name) {
    return(namesTable$source_concept_id_name[namesTable$table_name == tableName])
  } else {
    return(as.character(NA))
  }
}

Try the PatientProfiles package in your browser

Any scripts or data that you put into this service are public.

PatientProfiles documentation built on Oct. 30, 2024, 9:13 a.m.