# @file CohortDefinition
#
# Copyright 2019 Observational Health Data Sciences and Informatics
#
# This file is part of ROhdsiWebApi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#' Get the cohort definition expression for a cohort Id from WebAPI
#'
#' @details
#' Get the cohort definition expression for a cohort Id from WebAPI
#'
#' @param cohortId An integer indicating which cohort definition to fetch.
#' @param baseUrl The base URL for the WebApi instance, for example:
#' "http://server.org:80/WebAPI".
#' @return
#' Get the cohort definition expression for a cohort Id from WebAPI
#'
#' @examples
#' \dontrun{
#' # Get the cohort definition expression for a cohort Id from WebAPI:
#'
#' getCohortSpecification(baseUrl = "http://server.org:80/WebAPI", cohortId = 282)
#' }
#'
#'
#' @export
getCohortSpecification <- function(baseUrl, cohortId){
.checkBaseUrl(baseUrl)
url <- sprintf("%1s/cohortdefinition/%2s", baseUrl, cohortId)
result <- .getApiResponseParse(url)
cohortMetaInformation <- result$parsed
cohortMetaInformation$expression <- NULL
cohortMetaInformation$expressionType <- NULL
cohortMetaInformation <- cohortMetaInformation %>%
.convertNullToNARecursive() %>%
as.data.frame() %>%
dplyr::mutate(
createdDate = lubridate::ymd_hm(createdDate, tz = Sys.timezone()),
modifiedDate = lubridate::ymd_hm(modifiedDate, tz = Sys.timezone())
)
simplify <- jsonlite::fromJSON(txt = result$parsed$expression,
simplifyVector = TRUE,
simplifyDataFrame = TRUE,
flatten = FALSE
)
noSimplify <- jsonlite::fromJSON(txt = result$parsed$expression,
simplifyVector = FALSE,
simplifyDataFrame = FALSE,
flatten = FALSE
)
#ConceptSets
conceptSetInfo <- simplify$ConceptSets %>% dplyr::select(id, name)
conceptSets <- simplify$ConceptSets$expression$items
for (i in (1:length(conceptSets))) {
conceptSets[[i]] <- jsonlite::flatten(conceptSets[[i]]) %>%
dplyr::mutate(
conceptSetId = conceptSetInfo$id[[i]],
name = .formatName(conceptSetInfo$name[[i]]),
nameOriginal = conceptSetInfo$name[[i]]
)
names(conceptSets[[i]]) <- stringr::str_replace_all(
names(conceptSets[[i]]),
pattern = 'concept\\.',
replacement = ''
) %>%
snakecase::to_lower_camel_case()
}
conceptSets <- dplyr::bind_rows(conceptSets) %>%
dplyr::select(conceptSetId, name, conceptId, conceptId, conceptCode,
conceptName, domainId, vocabularyId, standardConcept,
isExcluded, includeDescendants, includeMapped,
standardConceptCaption, nameOriginal)
#dont see a reason to parse anything else other than concept sets
parsed <- noSimplify
parsed$ConceptSets <- conceptSets
parsed$metaData <- cohortMetaInformation
list(
native = result$native,
parsed = parsed
)
}
#' Get cohort generation status for a cohort definition Id and CDM database source.
#'
#' @details
#' Get cohort generation status for a cohort definition Id and CDM sources.
#'
#' @param baseUrl The base URL for the WebApi instance, for example:
#' "http://server.org:80/WebAPI".
#' @param cohortId An integer for the cohort id
#'
#' @return
#' A data frame of cohort generation status, start times, and execution durations
#' for one cohort definition id and cdm database source combination.
#' The parsed object is filtered to generations that were complete, valid and not canceled.
#'
#'
#' @export
#'
getCohortGenerationStatus <- function(baseUrl, cohortId, sourceKeys) {
.checkBaseUrl(baseUrl)
url <- sprintf("%1s/cohortdefinition/%2s/info", baseUrl, cohortId)
json <- .getApiResponseParse(url)
if (length(json$parsed) == 0) {
return(NULL)
}
configuration <- StudyManagement::getCdmSources(baseUrl = baseUrl) %>%
dplyr::filter(sourceKey %in% sourceKeys)
json$parsed <- json$parsed %>%
dplyr::mutate(
startTime = .millisecondsToDate(startTime)
) %>%
dplyr::filter(isValid == TRUE &
isCanceled == FALSE &
status == 'COMPLETE' &
!is.null(personCount) &
!is.na(personCount) &
canceled == FALSE
) %>%
jsonlite::flatten() %>%
dplyr::as_tibble()
names(json$parsed) <- stringr::str_replace_all(
names(json$parsed),
pattern = 'id\\.',
replacement = ''
) %>%
snakecase::to_lower_camel_case()
json$parsed <- json$parsed %>%
dplyr::inner_join(y = configuration, by = 'sourceId')
result <- list(
native = json$native,
parsed = json$parsed
)
result
}
#' Get cohort inclusion rule counts for events and persons
#'
#' @details
#' Obtains the inclusion rules from a cohort definition and summarizes the event and person counts per rule
#'
#' @param baseUrl The base URL for the WebApi instance, for example:
#' "http://server.org:80/WebAPI".
#' @param cohortId The Atlas cohort definition id for the cohort
#' @param sourceKey The sourceKey in WebAPI, as defined in the Configuration page.
#'
#'
#' @export
getCohortInclusionRulesAndCounts <- function(baseUrl,
cohortId,
sourceKey) {
url <- sprintf("%1s/cohortdefinition/%2s/info", baseUrl, cohortId)
response <- .getApiResponseParse(url)
if (length(response$parsed) == 0) {
return(NULL)
}
inclusionRules <- function(baseUrl, cohortId, sourceKey, mode) {
url <- sprintf("%s/cohortdefinition/%d/report/%s?mode=%d",
baseUrl, cohortId, sourceKey, mode)
json <- .getApiResponseParse(url)
json$parsed$treemapData <- .convertNullToNARecursive(RJSONIO::fromJSON(json$parsed$treemapData))
json$parsed$summary <- data.frame(json$parsed$summary, stringsAsFactors = FALSE)
json$parsed$summary$baseCount <- as.integer(json$parsed$summary$baseCount)
json$parsed$summary$finalCount <- as.integer(json$parsed$summary$finalCount)
json$parsed$summary$lostCount <- as.integer(json$parsed$summary$lostCount)
json$parsed$summary$percentMatched <- (as.numeric(sub("%", "", json$parsed$summary$percentMatched)))/100
json$parsed$inclusionRuleStats$countSatisfying <- as.integer(json$parsed$inclusionRuleStats$countSatisfying)
json$parsed$inclusionRuleStats$percentExcluded <- (as.numeric(sub("%", "", json$parsed$inclusionRuleStats$percentExcluded)))/100
json$parsed$inclusionRuleStats$percentSatisfying <- (as.numeric(sub("%", "", json$parsed$inclusionRuleStats$percentSatisfying)))/100
if (length(json$parsed$treemapData$children) == 0) {
json$parsed$treemapData <- NULL
} else {
# call recursive flattenTree, store results in result
treeMapResult <- list(name = c(), size = c())
treeMapResult <- .flattenTree(json$parsed$treemapData, treeMapResult)
json$parsed$treemapData <- dplyr::tibble(bits = treeMapResult$name, size = treeMapResult$size)
json$parsed$treemapData$size <- as.integer(json$parsed$treemapData$size)
json$parsed$treemapData$SatisfiedNumber = stringr::str_count(string = json$parsed$treemapData$bits, pattern = '1')
json$parsed$treemapData$SatisfiedRules = stringr::str_locate_all(string = json$parsed$treemapData$bits, pattern = '1') %>% paste()
}
#output
list(summary = json$parsed$summary,
inclusionRuleStats = json$parsed$inclusionRuleStats,
treeMapResult = json$parsed$treemapData
)
}
result <- list()
result[['byPerson']] <-
inclusionRules(baseUrl = baseUrl, cohortId = cohortId, sourceKey = sourceKey, mode = 1)
result[['byEvent']] <-
inclusionRules(baseUrl = baseUrl, cohortId = cohortId, sourceKey = sourceKey, mode = 0)
result
}
#' Get cohort information for a list of cohorts and source keys
#'
#' @details
#' Obtains the inclusion rules from a cohort definition and summarizes the event and person counts per rule
#'
#' @param baseUrl The base URL for the WebApi instance, for example:
#' "http://server.org:80/WebAPI".
#' @param cohortIds A list of Atlas cohort definition id for the WebApi
#' @param sourceKeys A list of source keys in WebAPI, as defined in the Configuration page.
#'
#'
#' @export
getCohorts <- function(baseUrl,
cohortIds,
sourceKeys) {
.checkBaseUrl(baseUrl)
result <- list()
for (i in (1:length(cohortIds))) {#i = 1
a <- list()
cohortId <- cohortIds[i]
writeLines(paste0('Downloading for cohort id = ', cohortId))
writeLines(paste0(' Downloading specifications'))
a[['specification']] <- StudyManagement::getCohortSpecification(baseUrl, cohortId)
writeLines(paste0(' Downloading results'))
writeLines(paste0(' Downloading generation status'))
a[['generationStatus']] <- StudyManagement::getCohortGenerationStatus(baseUrl, cohortId, sourceKeys)
writeLines(paste0(' Downloading inclusion rule counts'))
for (j in (1:length(sourceKeys))) {#j =1
sourceKey <- sourceKeys[j]
if (cohortId %in% a$generationStatus$parsed$cohortDefinitionId &
sourceKey %in% a$generationStatus$parsed$sourceKey) {
writeLines(paste0(' found for ', sourceKey))
a[['inclusionRuleCounts']][[sourceKey]] <- StudyManagement::getCohortInclusionRulesAndCounts(baseUrl, cohortId, sourceKey)
} else {
writeLines(paste0(' did not find for ', sourceKey))
a[['inclusionRuleCounts']][[sourceKey]] <- NULL
}
}
#a[['definitionSql']] <- getCohortDefinitionSql(baseUrl, cohortId)
result[[paste0('id_', cohortId)]] <- a
}
combinations <- list()
count <- 0
for (i in (1:length(cohortIds))) {#i = 1
for (j in (1:length(sourceKeys))) {#j = 1
sourceKey <- sourceKeys[j]
cohortId <- cohortIds[i]
for (mode in c('byPerson', 'byEvent')) {
count <- count + 1
combinations[[count]] <- data.frame(
sourceKey = sourceKey,
cohortId = cohortId,
mode = mode,
stringsAsFactors = FALSE
)
}
}
}
combinations <- dplyr::bind_rows(combinations)
inclusionRuleStatsLong <- list()
summaryLong <- list()
treeMapResultLong <- list()
for (i in (1:nrow(combinations))) {#i = 1
combination <- combinations[i,]
inclusionRuleStats <-
result[[paste0('id_', combination$cohortId)]]$inclusionRuleCounts[[combination$sourceKey]][[combination$mode]]$inclusionRuleStats
if (!is.null(inclusionRuleStats)) {
inclusionRuleStatsLong[[i]] <- data.frame(
sourceKey = rep(combination$sourceKey, length(inclusionRuleStats$id)),
cohortDefinitionId = rep(combination$cohortId, length(inclusionRuleStats$id)),
mode = rep(combination$mode, length(inclusionRuleStats$id)),
id = inclusionRuleStats$id,
name = inclusionRuleStats$name,
countSatisfying = inclusionRuleStats$countSatisfying,
percentExcluded = inclusionRuleStats$percentExcluded,
percentSatisfying = inclusionRuleStats$percentSatisfying,
stringsAsFactors = FALSE
)
}
# %>%
# dplyr::mutate(
# countSatisfying = scales::label_number(countSatisfying,big.mark = ","),
# percentExcluded = scales::label_percent(percentExcluded,big.mark = ",", accuracy = 0.1),
# percentSatisfying = scales::label_percent(percentSatisfying, big.mark = ",", accuracy = 0.1)
# )
summary <-
result[[paste0('id_', combination$cohortId)]]$inclusionRuleCounts[[combination$sourceKey]][[combination$mode]]$summary
if (!is.null(summary)) {
summaryLong[[i]] <- data.frame(
sourceKey = combination$sourceKey,
cohortDefinitionId = combination$cohortId,
mode = combination$mode,
baseCount = summary$baseCount,
finalCount = summary$finalCount,
lostCount = summary$lostCount,
percentMatched = summary$percentMatched,
stringsAsFactors = FALSE
)
}
# %>% dplyr::mutate(
#
# baseCount = scales::label_number(baseCount, big.mark = ","),
# finalCount = scales::label_number(finalCount, big.mark = ","),
# lostCount = scales::label_number(lostCount, big.mark = ","),
# percentMatched = scales::label_percent(percentMatched, big.mark = ",", accuracy = 0.1)
treeMapResult <-
result[[paste0('id_', combination$cohortId)]]$inclusionRuleCounts[[combination$sourceKey]][[combination$mode]]$treeMapResult
if (!is.null(treeMapResult)) {
treeMapResultLong[[i]] <- data.frame(
sourceKey = rep(combination$sourceKey, length(treeMapResult$bits)),
cohortDefinitionId = rep(combination$cohortId, length(treeMapResult$bits)),
mode = rep(combination$mode, length(treeMapResult$bits)),
SatisfiedNumber = treeMapResult$SatisfiedNumber,
SatisfiedRules = treeMapResult$SatisfiedRules,
stringsAsFactors = FALSE
)
}
}
combinations2 <- combinations %>% dplyr::select(sourceKey, cohortId) %>% unique()
generationStatusLong <- list()
for (i in (1:nrow(combinations2))) {#i = 1
combination <- combinations2[i,]
generationStatusLong[[i]] <-
result[[paste0('id_', combination$cohortId)]]$generationStatus$parsed
}
combinations3 <- combinations %>% dplyr::select(cohortId) %>% unique()
metaDataLong <- list()
for (i in (1:nrow(combinations3))) {#i = 1
combination <- combinations3[i,]
temp <- result[[paste0('id_', combination)]]$specification$parsed$metaData
metaDataLong[[i]] <- lapply(temp, as.character)
metaDataLong[[i]]$nameFormatted <- .formatName(metaDataLong[[i]]$name)
}
result$all$inclusionRuleStats <- dplyr::bind_rows(inclusionRuleStatsLong) %>% tidyr::as_tibble()
result$all$summary <- dplyr::bind_rows(summaryLong) %>% tidyr::as_tibble()
result$all$treeMapResult <- dplyr::bind_rows(treeMapResultLong) %>% tidyr::as_tibble()
result$all$generationStatus <- dplyr::bind_rows(generationStatusLong) %>% tidyr::as_tibble()
result$all$metaData <- dplyr::bind_rows(metaDataLong) %>%
tidyr::as_tibble() %>%
dplyr::mutate(cohortDefinitionId = as.integer(id))
result
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.