Nothing
# Copyright 2024 DARWIN EU®
#
# This file is part of CodelistGenerator
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#' Get concept ids from a provided path to json files
#'
#' @param path Path to a file or folder containing JSONs of concept sets
#' @param cdm A cdm reference created with CDMConnector
#' @param type Can be "codelist", "codelist_with_details", or
#' "concept_set_expression"
#'
#' @return Named list with concept_ids for each concept set
#' @export
#'
#' @examples
#' \donttest{
#' cdm <- mockVocabRef("database")
#' x <- codesFromConceptSet(cdm = cdm,
#' path = system.file(package = "CodelistGenerator",
#' "concepts_for_mock"))
#' x
#' CDMConnector::cdmDisconnect(cdm)
#' }
codesFromConceptSet <- function(path,
cdm,
type = c("codelist")) {
# initial checks
checkInputs(path = path, cdm = cdm)
if (dir.exists(path)) {
conceptSets <- dplyr::tibble(concept_set_path = list.files(
path = path,
full.names = TRUE
))
} else {
conceptSets <- dplyr::tibble(concept_set_path = .env$path)
}
conceptSets <- conceptSets %>%
dplyr::filter(tools::file_ext(.data$concept_set_path) == "json") %>%
dplyr::mutate(
concept_set_name =
tools::file_path_sans_ext(basename(.data$concept_set_path))
) %>%
dplyr::mutate(cohort_definition_id = dplyr::row_number())
if (conceptSets %>% nrow() == 0) {
cli::cli_abort(glue::glue("No 'json' file found in {path}"))
}
# first part: read jsons
tryCatch(
expr = conceptList <- readConceptSet(conceptSets),
error = function(e) {
cli::cli_abort(
"The json file is not a properly formated OMOP concept set."
)
}
)
if(type == "concept_set_expression"){
conceptList <- conceptList |>
dplyr::select("concept_id",
"excluded" = "is_excluded",
"descendants" = "include_descendants",
"mapped" = "include_mapped",
"cohort_name")
conceptList <- split(
conceptList,
conceptList[, c("cohort_name")]
)
for(j in seq_along(conceptList)){
conceptList[[j]] <- conceptList[[j]] |>
dplyr::select(!"cohort_name")
}
conceptList <- omopgenerics::newConceptSetExpression(conceptList)
return(conceptList)
}
if(any(conceptList$include_mapped == TRUE)){
exc <- paste(conceptList %>%
dplyr::filter(.data$include_mapped == TRUE) %>%
dplyr::pull("cohort_name"), collapse = "; ")
cli::cli_abort(
glue::glue("Mapped as TRUE not supported (found in {exc})"))
}
# second part: produce output list
if(nrow(conceptList) == 0){
cli::cli_abort("No concepts found in concept set")
}
conceptListTable <- omopgenerics::uniqueTableName()
cdm <- omopgenerics::insertTable(cdm = cdm,
name = conceptListTable,
table = conceptList,
overwrite = TRUE,
temporary = FALSE)
conceptFinalList <- formatConceptList(cdm = cdm,
conceptListTable = conceptListTable)
if(type == "codelist"){
conceptFinalList <- omopgenerics::newCodelist(conceptFinalList)
}
if(type == "codelist_with_details"){
conceptFinalList <- addDetails(conceptList = conceptFinalList,
cdm = cdm)
conceptFinalList <- omopgenerics::newCodelistWithDetails(conceptFinalList)
}
# return list
return(conceptFinalList)
}
#' Get concept ids from a provided path to cohort json files
#'
#' @param path Path to a file or folder containing JSONs of cohort definitions
#' @param cdm A cdm reference created with CDMConnector
#' @param type Can be "codelist", "codelist_with_details", or
#' "concept_set_expression"
#'
#' @return Named list with concept_ids for each concept set
#' @export
#'
codesFromCohort <- function(path,
cdm,
type = c("codelist")) {
# initial checks
checkInputs(path = path, cdm = cdm)
# list jsons
files <- listJsonFromPath(path)
# obtain codelistTibble
codelistTibble <- NULL
unknown <- 1
for (k in seq_along(files)) {
codelistTibble <- codelistTibble %>%
dplyr::union_all(extractCodes(files[k], unknown))
}
if(is.null(codelistTibble)){
cli::cli_abort("No codes found")
}
if(type == "concept_set_expression"){
codelistTibble <- codelistTibble |>
dplyr::mutate(include_mapped = FALSE) |>
dplyr::select("concept_id",
"excluded" = "is_excluded",
"descendants" = "include_descendants",
"mapped" = "include_mapped",
"codelist_name")
codelistTibble <- split(
codelistTibble,
codelistTibble[, c("codelist_name")]
)
for(j in seq_along(codelistTibble)){
codelistTibble[[j]] <- codelistTibble[[j]] |>
dplyr::select(!"codelist_name")
}
codelistTibble <- omopgenerics::newConceptSetExpression(codelistTibble)
return(codelistTibble)
}
codelistTable <- omopgenerics::uniqueTableName()
cdm <- omopgenerics::insertTable(cdm = cdm,
name = codelistTable,
table = codelistTibble,
overwrite = TRUE,
temporary = FALSE)
# obtain descendants
codelistTibble <- appendDescendants(cdm, codelistTable) |>
dplyr::collect(name = codelistTable,
overwrite = TRUE,
temporary = FALSE)
CDMConnector::dropTable(cdm = cdm, name = codelistTable)
cdm[[codelistTable]] <- NULL
# exclude
codelistTibble <- excludeCodes(codelistTibble)
# split into list
codelist <- tibbleToList(codelistTibble)
if(type == "codelist"){
codelist <- omopgenerics::newCodelist(codelist)
}
if(type == "codelist_with_details"){
codelist <- addDetails(conceptList = codelist,
cdm = cdm)
codelist <- omopgenerics::newCodelistWithDetails(codelist)
}
# return
return(codelist)
}
listJsonFromPath <- function(path) {
if (file.info(path)[["isdir"]]) {
files <- list.files(path, full.names = TRUE)
} else {
files <- path
}
files <- files[tools::file_ext(files) == "json"]
if (length(files) == 0) {
cli::cli_abort(paste0("No json files find in ", path))
}
return(files)
}
extractCodes <- function(file, unknown) {
json <- RJSONIO::fromJSON(file)[["ConceptSets"]]
codelistTibble <- NULL
for (k in seq_along(json)) {
name <- json[[k]][["name"]]
if (is.null(name)) {
name <- paste0("unkown concept set ", unknown)
unknown <- unknown + 1
}
#else if (name %in% names(codes)) {
# basefile <- basename(file)
# name <- paste0(name, " (", substr(basefile, 1, nchar(basefile) - 5), ")")
#}
concepts <- json[[k]][["expression"]][["items"]]
conceptId <- NULL
includeDescendants <- NULL
isExcluded <- NULL
for (j in seq_along(concepts)) {
if(!is.null(concepts[[j]][["includeMapped"]]) &&
concepts[[j]][["includeMapped"]] == TRUE ){
cli::cli_abort(
glue::glue("Mapped as TRUE not supported (found in {name})"))
}
conceptId <- c(conceptId, concepts[[j]][["concept"]][["CONCEPT_ID"]])
exc <- concepts[[j]][["isExcluded"]]
isExcluded <- c(
isExcluded, ifelse(is.null(exc), FALSE, exc)
)
incD <- concepts[[j]][["includeDescendants"]]
includeDescendants <- c(
includeDescendants, ifelse(is.null(incD), FALSE, incD)
)
}
codelistTibble <- codelistTibble %>%
dplyr::union_all(dplyr::tibble(
codelist_name = name, concept_id = conceptId,
include_descendants = includeDescendants, is_excluded = isExcluded
) %>%
dplyr::mutate(filename = file))
}
return(codelistTibble)
}
appendDescendants <- function(cdm, codelistTable) {
cdm[[codelistTable]] %>%
dplyr::mutate(concept_id = as.integer(.data$concept_id)) %>%
dplyr::filter(.data$include_descendants == TRUE) %>%
dplyr::rename("ancestor_concept_id" = "concept_id") %>%
dplyr::inner_join(
cdm[["concept_ancestor"]] %>%
dplyr::select("ancestor_concept_id", "descendant_concept_id") %>%
dplyr::mutate(ancestor_concept_id = as.integer(.data$ancestor_concept_id)),
by = "ancestor_concept_id"
) %>%
dplyr::select(-"ancestor_concept_id") %>%
dplyr::rename("concept_id" = "descendant_concept_id") %>%
dplyr::union_all(
cdm[[codelistTable]] %>%
dplyr::filter(.data$include_descendants == "FALSE")
) %>%
dplyr::select(-"include_descendants")
}
excludeCodes <- function(codelistTibble) {
codelistTibble %>%
dplyr::filter(.data$is_excluded == FALSE) %>%
dplyr::select(-"is_excluded") %>%
dplyr::anti_join(
codelistTibble %>%
dplyr::filter(.data$is_excluded == TRUE),
by = c("codelist_name", "concept_id")
)
}
tibbleToList <- function(codelistTibble) {
codelistTibble <- codelistTibble %>%
dplyr::mutate(nam = paste0(.data$codelist_name, "; ",
.data$filename))
nam <- unique(codelistTibble$nam)
codelist <- lapply(nam, function(x) {
codelistTibble %>%
dplyr::filter(.data$nam == .env$x) %>%
dplyr::pull("concept_id") %>%
unique()
})
names(codelist) <- nam
# check if we have any concept sets with the same name but different definitions
# keep first for each name
cs_names <- stringr::str_extract(names(codelist), "^[^;]*")
cs_names_unique <- unique(cs_names)
codelist_dedup <- list()
for(i in seq_along(cs_names_unique)){
same_name_cs <- codelist[which(cs_names_unique[i] == cs_names)]
check_consistent <- all(sapply(same_name_cs, identical, same_name_cs[[1]]))
if(isFALSE(check_consistent)){
cli::cli_abort(message = "Different definitions for concept set {cs_names_unique[i]} found")
}
# keep first
codelist_dedup[[cs_names_unique[i]]] <- same_name_cs[[1]]
}
return(codelist_dedup)
}
addDetails <- function(conceptList, cdm){
if(length(conceptList)==0){
return(conceptList)
}
# will accept either a list or tibble
# will return the same type as the input
inputIsTbl <- inherits(conceptList, "tbl_df")
if(isFALSE(inputIsTbl)){
for(i in seq_along(conceptList)){
conceptList[[i]] <- dplyr::tibble(concept_id = conceptList[[i]],
concept_set = names(conceptList)[i])
}
conceptList <- dplyr::bind_rows(unclass(conceptList))
}
tableConceptList <- omopgenerics::uniqueTableName()
cdm <- omopgenerics::insertTable(cdm = cdm,
table = conceptList,
name = tableConceptList,
overwrite = TRUE,
temporary = FALSE)
conceptList <- cdm[[tableConceptList]] %>%
dplyr::left_join(cdm[["concept"]] %>%
dplyr::select("concept_id", "concept_name",
"domain_id", "vocabulary_id",
"standard_concept"),
by = "concept_id") %>%
dplyr::mutate(
standard_concept = ifelse(is.na(.data$standard_concept),
"non-standard", .data$standard_concept
)
) %>%
dplyr::mutate(
standard_concept = ifelse(.data$standard_concept == "C",
"classification", .data$standard_concept
)
) %>%
dplyr::mutate(
standard_concept = ifelse(.data$standard_concept == "S",
"standard", .data$standard_concept
)
) %>%
dplyr::collect()
CDMConnector::dropTable(cdm = cdm, name = tableConceptList)
cdm[[tableConceptList]] <- NULL
if(isFALSE(inputIsTbl)){
conceptList <- split(
x = conceptList %>% dplyr::select(!"concept_set"),
f = as.factor(conceptList$concept_set)
)
}
for(i in seq_along(conceptList)){
conceptList[[i]] <- conceptList[[i]] %>%
dplyr::arrange(.data$concept_name)
}
return(conceptList)
}
#' Put concept ids from all cohorts of interest in the required list format
#'
#' @param conceptList table with all the concept ids read, with their respective
#' cohort, exclusion and descendant information
#' @param cdm A cdm reference created with CDMConnector
#'
#' @return list of concept_ids and respective cohort_definition_ids of interest
#' @noRd
formatConceptList <- function(cdm, conceptListTable) {
conceptList <- cdm[[conceptListTable]] %>%
dplyr::filter(.data$include_descendants == FALSE) %>%
dplyr::union_all(
cdm[["concept_ancestor"]] %>%
dplyr::select(
"concept_id" = "ancestor_concept_id",
"descendant_concept_id"
) %>%
dplyr::mutate(concept_id = as.integer(.data$concept_id)) %>%
dplyr::right_join(
cdm[[conceptListTable]] %>%
dplyr::mutate(concept_id = as.integer(.data$concept_id)) %>%
dplyr::filter(.data$include_descendants == TRUE),
by = "concept_id"
) %>%
dplyr::mutate(descendant_concept_id = dplyr::if_else(
# in case concept is not in ancestor table
# (even though it should be)
is.na(.data$descendant_concept_id),
.data$concept_id, .data$descendant_concept_id)) %>%
dplyr::select(-"concept_id") %>%
dplyr::rename("concept_id" = "descendant_concept_id")
) %>%
dplyr::select(-"include_descendants") %>%
dplyr::collect()
# eliminate the ones that is_excluded = TRUE
conceptList <- conceptList %>%
dplyr::filter(.data$is_excluded == FALSE) %>%
dplyr::select("cohort_name", "concept_id") %>%
dplyr::anti_join(
conceptList %>%
dplyr::filter(.data$is_excluded == TRUE),
by = c("cohort_name","concept_id")
)
conceptFinalList <- list()
for(n in conceptList[["cohort_name"]] %>% unique()) {
conceptFinalList[[n]] <- conceptList %>%
dplyr::filter(.data$cohort_name == n) %>%
dplyr::select("concept_id") %>%
dplyr::pull()
}
return(conceptFinalList)
}
#' Get concept ids and information from a list of json files provided
#'
#' @param conceptSets paths and names of all jsons to be read
#'
#' @return raw table with all the concept_ids read, and their information
#' @noRd
readConceptSet <- function(conceptSets) {
names <- c("CONCEPT_CLASS_ID",
"CONCEPT_CODE",
"CONCEPT_ID",
"CONCEPT_NAME",
"DOMAIN_ID",
"INVALID_REASON",
"INVALID_REASON_CAPTION",
"STANDARD_CONCEPT",
"STANDARD_CONCEPT_CAPTION",
"VOCABULARY_ID",
"isExcluded",
"includeMapped",
"includeDescendants")
for (k in 1:nrow(conceptSets)) {
conceptSetName <- conceptSets$concept_set_name[k]
conceptSet <- RJSONIO::fromJSON(conceptSets$concept_set_path[k])
conceptSet <- lapply(conceptSet$items, function(x) {
x <- append(x, x[["concept"]])
x[["concept"]] <- NULL
return(x)
}) %>%
dplyr::bind_rows() %>%
dplyr::mutate(
cohort_name = conceptSetName
)
# Add columns missing from the read file with default values
conceptSet[setdiff(names, names(conceptSet))] <- as.character(NA)
conceptSet <- conceptSet %>%
dplyr::mutate(
isExcluded = ifelse(is.na(.data$isExcluded), FALSE, .data$isExcluded),
includeMapped = ifelse(
is.na(.data$includeMapped), FALSE, .data$includeMapped
),
includeDescendants = ifelse(
is.na(.data$includeDescendants), FALSE, .data$includeDescendants
)
)
if (k == 1) {
conceptList <- conceptSet
} else {
conceptList <- dplyr::bind_rows(conceptList, conceptSet)
}
}
conceptList <- conceptList %>%
dplyr::select(
"cohort_name",
"concept_id" = "CONCEPT_ID",
"is_excluded" = "isExcluded",
"include_descendants" = "includeDescendants",
"include_mapped" = "includeMapped"
)
return(conceptList)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.