R/getDrugIngredientCodes.R
In CodelistGenerator: Identify Relevant Clinical Codes and Evaluate Their Use

Documented in getDrugIngredientCodes

# Copyright 2025 DARWIN EU®
#
# This file is part of CodelistGenerator
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#' Get descendant codes of drug ingredients
#'
#' @inheritParams cdmDoc
#' @param name Names of ingredients of interest. For example, c("acetaminophen",
#' "codeine"), would result in a list of length two with the descendant
#' concepts for these two particular drug ingredients. Users can also specify the
#' concept ID instead of the name (e.g., c(1125315, 42948451)) using a numeric vector.
#' @inheritParams nameStyleDoc
#' @inheritParams doseFormDoc
#' @inheritParams doseUnitDoc
#' @inheritParams routeCategoryDoc
#' @inheritParams ingredientRangeDoc
#' @inheritParams typeNarrowDoc
#'
#' @return Concepts with their format based on the type argument.
#' @export
#'
#' @examples
#' \donttest{
#' library(CodelistGenerator)
#' cdm <- mockVocabRef()
#' getDrugIngredientCodes(cdm = cdm,
#'                        name = "Adalimumab",
#'                        nameStyle = "{concept_name}")
#'}
getDrugIngredientCodes <- function(cdm,
                                   name = NULL,
                                   nameStyle = "{concept_code}_{concept_name}",
                                   doseForm = NULL,
                                   doseUnit = NULL,
                                   routeCategory = NULL,
                                   ingredientRange = c(1, Inf),
                                   type = "codelist") {

  # Initial checks
  omopgenerics::validateCdmArgument(cdm)
  nameStyle <- checkNameStyle(nameStyle)
  omopgenerics::assertChoice(type, length = 1,
                             choices = c("codelist",
                                         "concept_set_expression",
                                         "codelist_with_details"))
  omopgenerics::assertNumeric(ingredientRange, length = 2, min = 0)
  omopgenerics::assertTrue(ingredientRange[1] <= ingredientRange[2])
  omopgenerics::assertCharacter(doseForm, null = TRUE)
  omopgenerics::assertCharacter(doseUnit, null = TRUE)
  omopgenerics::assertCharacter(routeCategory, null = TRUE)

  if(type == "concept_set_expression"){
    if(!is.null(doseForm)){
      cli::cli_abort("Dose forms not yet supported for concept set expressions")
    }
    if(!is.null(doseUnit)){
      cli::cli_abort("Dose units not yet supported for concept set expressions")
    }
    if(!is.null(routeCategory)){
      cli::cli_abort("Route categorises not yet supported for concept set expressions")
    }
  }

  if(!is.null(name)){
    if(is.character(name)){
      omopgenerics::assertCharacter(name)
    }else if(is.numeric(name)){
      omopgenerics::assertNumeric(name)
    }else{
      cli::cli_abort("Argument `name` must be either a character vector with the ingredients' names, a
                                numerical vector with the ingredients' concepts ID or NULL")
    }
  }
  ingredientConcepts <- cdm$concept |>
    dplyr::filter(.data$standard_concept == "S",
                  .data$concept_class_id == "Ingredient") |>
    dplyr::select("concept_id", "concept_name", "concept_code") |>
    dplyr::collect()

  if (!is.null(name)){
    ingredientConcepts <- ingredientConcepts |>
      filterIngredientConcepts(name)

    if(nrow(ingredientConcepts) == 0) {
      if(type == "codelist"){
        return(omopgenerics::emptyCodelist())
      } else if(type == "codelist_with_details"){
        return(omopgenerics::emptyCodelistWithDetails())
      } else {
        # note, currently no empty concept set expression function
        return(omopgenerics::newConceptSetExpression(list()))
      }
    }
  }

  # simple case can just return concept set expression with descendants
  if(all(type == "concept_set_expression",
         is.null(doseForm),
         is.null(doseUnit),
         is.null(routeCategory),
         ingredientRange == c(1, Inf))){
    cse <- ingredientConcepts |>
      dplyr::mutate(name = glue::glue(.env$nameStyle)) |>
      dplyr::select("concept_id",
                    "name") |>
      dplyr::mutate("excluded" = FALSE,
                    "descendants" = TRUE,
                    "mapped" = FALSE)
    cse <- split(
      x = cse |>
        dplyr::select(!"name"),
      f = as.factor(cse$name),
      drop = TRUE
    ) |>
      omopgenerics::newConceptSetExpression()
    return(cse)
  }

  # to avoid potential memory problems will batch
  if (nrow(ingredientConcepts) > 0) {
    ingredientCodes <- fetchBatchedDescendants(cdm,
                                               codes = ingredientConcepts$concept_id,
                                               ingredientRange = ingredientRange,
                                               batchSize = 500,
                                               doseForm = doseForm
    ) |>
      dplyr::filter(.data$standard_concept == "S")
  }

  if (nrow(ingredientCodes) == 0) {
    cli::cli_warn("No descendant codes found")
    return(invisible(list()))
  }


  ingredientCodes <- ingredientCodes  |>
    dplyr::select("concept_id", "concept_name",
                  "domain_id", "vocabulary_id",
                  "standard_concept",
                  "ancestor_concept_id") |>
    # split different ancestors into multiple cols
    tidyr::separate_wider_delim(
      cols = "ancestor_concept_id",
      delim = ";",
      names_sep = "",
      too_few = "align_start"
    )

  ingredientCodes <- ingredientCodes |>
    # one row per concept + ancestor
    tidyr::pivot_longer(cols = !c("concept_id", "concept_name",
                                  "domain_id", "vocabulary_id",
                                  "standard_concept"),
                        names_to = NULL,
                        values_to = "ancestor_concept_id",
                        values_drop_na = TRUE
    )

  ingredientCodes <- ingredientCodes |>
    dplyr::left_join(
      ingredientConcepts |>
        dplyr::mutate("concept_name" = omopgenerics::toSnakeCase(.data$concept_name)) |>
        dplyr::mutate("concept_id" = as.character(.data$concept_id)) |>
        dplyr::mutate("name" = glue::glue(.env$nameStyle)) |>
        dplyr::select("concept_id", "name"),
      by = c("ancestor_concept_id" = "concept_id")
    )

  if(type == "codelist"){
    ingredientCodes <- split(
      x = ingredientCodes$concept_id,
      f = as.factor(ingredientCodes$name),
      drop = TRUE
    ) |>
      omopgenerics::newCodelist()
  }else if(type == "codelist_with_details"){
    ingredientCodes <- split(
      x = ingredientCodes,
      f = as.factor(ingredientCodes$name),
      drop = TRUE
    ) |>
      purrr::map(~dplyr::select(., -"name")) |>
      omopgenerics::newCodelistWithDetails()
  } else {
    ingredientCodes <- split(
      x = ingredientCodes|>
        dplyr::select("concept_id") |>
        dplyr::mutate("excluded" = FALSE,
                      "descendants" = FALSE,
                      "mapped" = FALSE),
      f = as.factor(ingredientCodes$name),
      drop = TRUE
    )|>
      omopgenerics::newConceptSetExpression()
  }

  if(!is.null(routeCategory)){
    ingredientCodes <- subsetOnRouteCategory(ingredientCodes,
                                             cdm = cdm,
                                             routeCategory = routeCategory)
  }

  if(!is.null(doseUnit)){
    ingredientCodes <- subsetOnDoseUnit(ingredientCodes,
                                        cdm = cdm,
                                        doseUnit = doseUnit)
  }

  return(ingredientCodes)
}

filterIngredientConcepts <- function(ingredientConcepts, name){
  if(is.character(name)){
    ingredientConcepts <- ingredientConcepts |>
      dplyr::filter(tidyWords(.data$concept_name, message = FALSE) %in%
                      tidyWords(.env$name, message = FALSE))
  }else if(is.numeric(name)){
    ingredientConcepts <- ingredientConcepts |>
      dplyr::filter(.data$concept_id %in% .env$name)
  }
  if(nrow(ingredientConcepts) == 0){
    cli::cli_warn("- No matching Ingredient codes found")
  } else if(nrow(ingredientConcepts) < length(name)){
    missingIngredientConcepts <-  dplyr::tibble(concept_name = tidyWords(.env$name)) |>
      dplyr::anti_join(ingredientConcepts |>
                         dplyr::mutate(concept_name = tidyWords(.data$concept_name)),
                       by = "concept_name") |>
      dplyr::pull("concept_name")
    cli::cli_warn("- No matching Ingredient codes found for {missingIngredientConcepts}")
  }
  ingredientConcepts
}

fetchBatchedDescendants <- function(cdm,
                                    codes,
                                    batchSize,
                                    ingredientRange = c(0, Inf),
                                    doseForm) {
  codeBatches <- split(
    codes,
    ceiling(seq_along(codes) / batchSize)
  )

  descendants <- vector("list", length(codeBatches))

  cli::cli_progress_bar(
    total = length(descendants),
    format = " -- getting descendants {cli::pb_bar} {cli::pb_current} of {cli::pb_total} batched groups"
  )

  for (i in seq_along(descendants)) {
    cli::cli_progress_update()
    x <- getDescendants(
      cdm = cdm,
      conceptId = codeBatches[[i]],
      withAncestor = TRUE
    )

    x <- list("codes" = x) |> newCodelistWithDetails()

    if(ingredientRange[1] != 1 | ingredientRange[2]!= Inf){
    x <- subsetOnIngredientRange(x,
                                 cdm = cdm,
                                 ingredientRange = ingredientRange,
                                 negate = FALSE)
    }

    if(!is.null(doseForm)){
      x <- subsetOnDoseForm(x,
                            cdm = cdm,
                            doseForm = doseForm,
                            negate = FALSE)
    }

    if(length(x$codes) == 0){
      x$codes <- dplyr::tibble(
        "ancestor_concept_id" = character(),
        "concept_id" = integer(),
        "concept_name" = character(),
        "domain_id" = character(),
        "vocabulary_id" = character(),
        "standard_concept" = character()
      )
    }
    descendants[[i]] <- x$codes
  }

  cli::cli_progress_done()
  descendants <- dplyr::bind_rows(descendants)

  return(descendants)
}

checkNameStyle <- function(nameStyle){
  x <- nameStyle %in% c("{concept_code}", "{concept_id}", "{concept_name}",
                        "{concept_code}_{concept_id}", "{concept_code}_{concept_name}", "{concept_id}_{concept_code}",
                        "{concept_id}_{concept_name}", "{concept_name}_{concept_code}", "{concept_name}_{concept_id}",
                        "{concept_code}_{concept_id}_{concept_name}", "{concept_code}_{concept_name}_{concept_id}",
                        "{concept_id}_{concept_code}_{concept_name}", "{concept_id}_{concept_name}_{concept_code}",
                        "{concept_name}_{concept_code}_{concept_id}", "{concept_name}_{concept_id}_{concept_code}")
  if(isFALSE(x)){
    cli::cli_abort("nameStyle {nameStyle} is not supported. nameStyle argument must be a choice between {{concept_name}}, {{concept_id}}, or {{concept_code}},
                   or any combination of the three separated by `_` (i.e., {{concept_id}}_{{concept_name}}.")}

  return(nameStyle)
}

Any scripts or data that you put into this service are public.

CodelistGenerator documentation built on Dec. 17, 2025, 5:06 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

CodelistGenerator
Identify Relevant Clinical Codes and Evaluate Their Use

R/getDrugIngredientCodes.R
In CodelistGenerator: Identify Relevant Clinical Codes and Evaluate Their Use

Defines functions checkNameStyle fetchBatchedDescendants filterIngredientConcepts getDrugIngredientCodes

Documented in getDrugIngredientCodes

Try the CodelistGenerator package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

CodelistGenerator Identify Relevant Clinical Codes and Evaluate Their Use

R/getDrugIngredientCodes.R In CodelistGenerator: Identify Relevant Clinical Codes and Evaluate Their Use

Defines functions checkNameStyle fetchBatchedDescendants filterIngredientConcepts getDrugIngredientCodes

Documented in getDrugIngredientCodes

Try the CodelistGenerator package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

CodelistGenerator
Identify Relevant Clinical Codes and Evaluate Their Use

R/getDrugIngredientCodes.R
In CodelistGenerator: Identify Relevant Clinical Codes and Evaluate Their Use