R/WebApiCohortDefinition.R

Defines functions getCohortSpecification getCohortGenerationStatus getCohortInclusionRulesAndCounts getCohorts

Documented in getCohortGenerationStatus getCohortInclusionRulesAndCounts getCohorts getCohortSpecification

# @file CohortDefinition
#
# Copyright 2019 Observational Health Data Sciences and Informatics
#
# This file is part of ROhdsiWebApi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#' Get the cohort definition expression for a cohort Id from WebAPI
#'
#' @details
#' Get the cohort definition expression for a cohort Id from WebAPI
#'
#' @param cohortId       An integer indicating which cohort definition to fetch.
#' @param baseUrl        The base URL for the WebApi instance, for example:
#'                       "http://server.org:80/WebAPI".
#' @return
#' Get the cohort definition expression for a cohort Id from WebAPI
#'
#' @examples
#' \dontrun{
#' # Get the cohort definition expression for a cohort Id from WebAPI:
#'
#' getCohortSpecification(baseUrl = "http://server.org:80/WebAPI", cohortId = 282)
#' }
#'
#'
#' @export
getCohortSpecification <- function(baseUrl, cohortId){
  .checkBaseUrl(baseUrl)

  url <- sprintf("%1s/cohortdefinition/%2s", baseUrl, cohortId)
  result <- .getApiResponseParse(url)

  cohortMetaInformation <-  result$parsed
  cohortMetaInformation$expression <- NULL
  cohortMetaInformation$expressionType <- NULL
  cohortMetaInformation <- cohortMetaInformation %>%
                          .convertNullToNARecursive() %>%
                          as.data.frame() %>%
                          dplyr::mutate(
                                        createdDate = lubridate::ymd_hm(createdDate, tz = Sys.timezone()),
                                        modifiedDate = lubridate::ymd_hm(modifiedDate, tz = Sys.timezone())
                                        )

  simplify <- jsonlite::fromJSON(txt = result$parsed$expression,
                                                 simplifyVector = TRUE,
                                                 simplifyDataFrame = TRUE,
                                                 flatten = FALSE
                                                 )
  noSimplify <- jsonlite::fromJSON(txt = result$parsed$expression,
                                                          simplifyVector = FALSE,
                                                          simplifyDataFrame = FALSE,
                                                          flatten = FALSE
  )

  #ConceptSets
  conceptSetInfo <- simplify$ConceptSets %>% dplyr::select(id, name)
  conceptSets <- simplify$ConceptSets$expression$items
  for (i in (1:length(conceptSets))) {
    conceptSets[[i]] <- jsonlite::flatten(conceptSets[[i]]) %>%
                        dplyr::mutate(
                          conceptSetId = conceptSetInfo$id[[i]],
                          name = .formatName(conceptSetInfo$name[[i]]),
                          nameOriginal = conceptSetInfo$name[[i]]
                        )
    names(conceptSets[[i]]) <- stringr::str_replace_all(
                                                  names(conceptSets[[i]]),
                                                  pattern = 'concept\\.',
                                                  replacement = ''
                                                ) %>%
                        snakecase::to_lower_camel_case()
  }
  conceptSets <- dplyr::bind_rows(conceptSets) %>%
                dplyr::select(conceptSetId, name, conceptId, conceptId, conceptCode,
                              conceptName, domainId, vocabularyId, standardConcept,
                              isExcluded, includeDescendants, includeMapped,
                              standardConceptCaption, nameOriginal)


  #dont see a reason to parse anything else other than concept sets
  parsed <- noSimplify
  parsed$ConceptSets <- conceptSets
  parsed$metaData <- cohortMetaInformation

 list(
    native = result$native,
    parsed = parsed
  )
}
#' Get cohort generation status for a cohort definition Id and CDM database source.
#'
#' @details
#' Get cohort generation status for a cohort definition Id and CDM sources.
#'
#' @param baseUrl         The base URL for the WebApi instance, for example:
#'                        "http://server.org:80/WebAPI".
#' @param cohortId        An integer for the cohort id
#'
#' @return
#' A data frame of cohort generation status, start times, and execution durations
#' for one cohort definition id and cdm database source combination.
#' The parsed object is filtered to generations that were complete, valid and not canceled.
#'
#'
#' @export
#'
getCohortGenerationStatus <- function(baseUrl, cohortId, sourceKeys) {
  .checkBaseUrl(baseUrl)
  url <- sprintf("%1s/cohortdefinition/%2s/info", baseUrl, cohortId)
  json <- .getApiResponseParse(url)

  if (length(json$parsed) == 0) {
    return(NULL)
  }

  configuration <- StudyManagement::getCdmSources(baseUrl = baseUrl) %>%
                    dplyr::filter(sourceKey %in% sourceKeys)

  json$parsed <- json$parsed %>%
                      dplyr::mutate(
                              startTime = .millisecondsToDate(startTime)
                      ) %>%
                      dplyr::filter(isValid == TRUE &
                                      isCanceled == FALSE &
                                      status == 'COMPLETE' &
                                      !is.null(personCount) &
                                      !is.na(personCount) &
                                      canceled == FALSE
                      ) %>%
                      jsonlite::flatten() %>%
                      dplyr::as_tibble()

  names(json$parsed) <- stringr::str_replace_all(
                                      names(json$parsed),
                                      pattern = 'id\\.',
                                      replacement = ''
                                    ) %>%
                                      snakecase::to_lower_camel_case()

  json$parsed <- json$parsed %>%
                      dplyr::inner_join(y = configuration, by = 'sourceId')

  result <- list(
    native = json$native,
    parsed = json$parsed
  )
  result
}


#' Get cohort inclusion rule counts for events and persons
#'
#' @details
#' Obtains the inclusion rules from a cohort definition and summarizes the event and person counts per rule
#'
#' @param baseUrl         The base URL for the WebApi instance, for example:
#'                        "http://server.org:80/WebAPI".
#' @param cohortId        The Atlas cohort definition id for the cohort
#' @param sourceKey       The sourceKey in WebAPI, as defined in the Configuration page.
#'
#'
#' @export
getCohortInclusionRulesAndCounts <- function(baseUrl,
                                             cohortId,
                                             sourceKey) {

  url <- sprintf("%1s/cohortdefinition/%2s/info", baseUrl, cohortId)
  response <- .getApiResponseParse(url)
  if (length(response$parsed) == 0) {
    return(NULL)
  }

  inclusionRules <- function(baseUrl, cohortId, sourceKey, mode) {
    url <- sprintf("%s/cohortdefinition/%d/report/%s?mode=%d",
                   baseUrl, cohortId, sourceKey, mode)
    json <- .getApiResponseParse(url)

    json$parsed$treemapData <- .convertNullToNARecursive(RJSONIO::fromJSON(json$parsed$treemapData))
    json$parsed$summary <- data.frame(json$parsed$summary, stringsAsFactors = FALSE)


    json$parsed$summary$baseCount <- as.integer(json$parsed$summary$baseCount)
    json$parsed$summary$finalCount <- as.integer(json$parsed$summary$finalCount)
    json$parsed$summary$lostCount <- as.integer(json$parsed$summary$lostCount)
    json$parsed$summary$percentMatched <- (as.numeric(sub("%", "", json$parsed$summary$percentMatched)))/100

    json$parsed$inclusionRuleStats$countSatisfying <- as.integer(json$parsed$inclusionRuleStats$countSatisfying)
    json$parsed$inclusionRuleStats$percentExcluded <- (as.numeric(sub("%", "", json$parsed$inclusionRuleStats$percentExcluded)))/100
    json$parsed$inclusionRuleStats$percentSatisfying <- (as.numeric(sub("%", "", json$parsed$inclusionRuleStats$percentSatisfying)))/100

    if (length(json$parsed$treemapData$children) == 0) {
      json$parsed$treemapData <- NULL
    } else {
      # call recursive flattenTree, store results in result
      treeMapResult <- list(name = c(), size = c())
      treeMapResult <- .flattenTree(json$parsed$treemapData, treeMapResult)
      json$parsed$treemapData <- dplyr::tibble(bits = treeMapResult$name, size = treeMapResult$size)
      json$parsed$treemapData$size <- as.integer(json$parsed$treemapData$size)
      json$parsed$treemapData$SatisfiedNumber = stringr::str_count(string = json$parsed$treemapData$bits, pattern = '1')
      json$parsed$treemapData$SatisfiedRules = stringr::str_locate_all(string = json$parsed$treemapData$bits, pattern = '1') %>% paste()
    }
    #output
    list(summary = json$parsed$summary,
         inclusionRuleStats = json$parsed$inclusionRuleStats,
         treeMapResult = json$parsed$treemapData
        )
  }
  result <- list()
  result[['byPerson']] <-
    inclusionRules(baseUrl = baseUrl, cohortId = cohortId, sourceKey = sourceKey, mode = 1)
  result[['byEvent']] <-
    inclusionRules(baseUrl = baseUrl, cohortId = cohortId, sourceKey = sourceKey, mode = 0)
result
}


#' Get cohort information for a list of cohorts and source keys
#'
#' @details
#' Obtains the inclusion rules from a cohort definition and summarizes the event and person counts per rule
#'
#' @param baseUrl         The base URL for the WebApi instance, for example:
#'                        "http://server.org:80/WebAPI".
#' @param cohortIds        A list of Atlas cohort definition id for the WebApi
#' @param sourceKeys    A list of source keys in WebAPI, as defined in the Configuration page.
#'
#'
#' @export
getCohorts <- function(baseUrl,
                            cohortIds,
                            sourceKeys) {
  .checkBaseUrl(baseUrl)
  result <- list()
  for (i in (1:length(cohortIds))) {#i = 1
    a <- list()
    cohortId <- cohortIds[i]
    writeLines(paste0('Downloading for cohort id = ', cohortId))
    writeLines(paste0('     Downloading specifications'))
    a[['specification']] <- StudyManagement::getCohortSpecification(baseUrl, cohortId)
    writeLines(paste0('     Downloading results'))
    writeLines(paste0('          Downloading generation status'))
    a[['generationStatus']] <- StudyManagement::getCohortGenerationStatus(baseUrl, cohortId, sourceKeys)
    writeLines(paste0('          Downloading inclusion rule counts'))
    for (j in (1:length(sourceKeys))) {#j =1
      sourceKey <- sourceKeys[j]
      if (cohortId %in% a$generationStatus$parsed$cohortDefinitionId &
            sourceKey %in% a$generationStatus$parsed$sourceKey) {
          writeLines(paste0('               found for ', sourceKey))
          a[['inclusionRuleCounts']][[sourceKey]] <- StudyManagement::getCohortInclusionRulesAndCounts(baseUrl, cohortId, sourceKey)
      } else {
        writeLines(paste0('               did not find for ', sourceKey))
        a[['inclusionRuleCounts']][[sourceKey]] <- NULL
        }
    }
    #a[['definitionSql']] <- getCohortDefinitionSql(baseUrl, cohortId)

    result[[paste0('id_', cohortId)]] <- a
  }

  combinations <- list()
  count <- 0
  for (i in (1:length(cohortIds))) {#i = 1
    for (j in (1:length(sourceKeys))) {#j = 1
      sourceKey <- sourceKeys[j]
      cohortId <- cohortIds[i]

      for (mode in c('byPerson', 'byEvent')) {
        count <- count + 1
        combinations[[count]] <- data.frame(
          sourceKey = sourceKey,
          cohortId = cohortId,
          mode = mode,
          stringsAsFactors = FALSE
        )
      }
    }
  }
  combinations <- dplyr::bind_rows(combinations)

  inclusionRuleStatsLong <- list()
  summaryLong <- list()
  treeMapResultLong <- list()
  for (i in (1:nrow(combinations))) {#i = 1

    combination <- combinations[i,]

    inclusionRuleStats <-
      result[[paste0('id_', combination$cohortId)]]$inclusionRuleCounts[[combination$sourceKey]][[combination$mode]]$inclusionRuleStats

    if (!is.null(inclusionRuleStats)) {
        inclusionRuleStatsLong[[i]] <- data.frame(
          sourceKey = rep(combination$sourceKey, length(inclusionRuleStats$id)),
          cohortDefinitionId = rep(combination$cohortId, length(inclusionRuleStats$id)),
          mode = rep(combination$mode, length(inclusionRuleStats$id)),
          id = inclusionRuleStats$id,
          name = inclusionRuleStats$name,
          countSatisfying = inclusionRuleStats$countSatisfying,
          percentExcluded = inclusionRuleStats$percentExcluded,
          percentSatisfying = inclusionRuleStats$percentSatisfying,
          stringsAsFactors = FALSE
        )
    }
    # %>%
    #   dplyr::mutate(
    # countSatisfying = scales::label_number(countSatisfying,big.mark = ","),
    # percentExcluded = scales::label_percent(percentExcluded,big.mark = ",", accuracy = 0.1),
    # percentSatisfying = scales::label_percent(percentSatisfying, big.mark = ",", accuracy = 0.1)
    #   )

    summary <-
      result[[paste0('id_', combination$cohortId)]]$inclusionRuleCounts[[combination$sourceKey]][[combination$mode]]$summary
    if (!is.null(summary)) {
        summaryLong[[i]] <- data.frame(
          sourceKey = combination$sourceKey,
          cohortDefinitionId = combination$cohortId,
          mode = combination$mode,
          baseCount = summary$baseCount,
          finalCount = summary$finalCount,
          lostCount = summary$lostCount,
          percentMatched = summary$percentMatched,
          stringsAsFactors = FALSE
          )
    }
      # %>% dplyr::mutate(
      #
      # baseCount = scales::label_number(baseCount, big.mark = ","),
      # finalCount = scales::label_number(finalCount, big.mark = ","),
      # lostCount = scales::label_number(lostCount, big.mark = ","),
      # percentMatched = scales::label_percent(percentMatched, big.mark = ",", accuracy = 0.1)

    treeMapResult <-
      result[[paste0('id_', combination$cohortId)]]$inclusionRuleCounts[[combination$sourceKey]][[combination$mode]]$treeMapResult
    if (!is.null(treeMapResult)) {
      treeMapResultLong[[i]] <- data.frame(
        sourceKey = rep(combination$sourceKey, length(treeMapResult$bits)),
        cohortDefinitionId = rep(combination$cohortId, length(treeMapResult$bits)),
        mode = rep(combination$mode, length(treeMapResult$bits)),
        SatisfiedNumber = treeMapResult$SatisfiedNumber,
        SatisfiedRules = treeMapResult$SatisfiedRules,
        stringsAsFactors = FALSE
      )
    }
  }


  combinations2 <- combinations %>% dplyr::select(sourceKey, cohortId) %>% unique()
  generationStatusLong <- list()
  for (i in (1:nrow(combinations2))) {#i = 1
    combination <- combinations2[i,]
    generationStatusLong[[i]] <-
      result[[paste0('id_', combination$cohortId)]]$generationStatus$parsed
  }

  combinations3 <- combinations %>% dplyr::select(cohortId) %>% unique()
  metaDataLong <- list()
  for (i in (1:nrow(combinations3))) {#i = 1
    combination <- combinations3[i,]
    temp <- result[[paste0('id_', combination)]]$specification$parsed$metaData
    metaDataLong[[i]] <- lapply(temp, as.character)
    metaDataLong[[i]]$nameFormatted <- .formatName(metaDataLong[[i]]$name)
  }

  result$all$inclusionRuleStats <- dplyr::bind_rows(inclusionRuleStatsLong) %>% tidyr::as_tibble()
  result$all$summary <- dplyr::bind_rows(summaryLong) %>% tidyr::as_tibble()
  result$all$treeMapResult <- dplyr::bind_rows(treeMapResultLong) %>% tidyr::as_tibble()
  result$all$generationStatus <- dplyr::bind_rows(generationStatusLong) %>% tidyr::as_tibble()
  result$all$metaData <- dplyr::bind_rows(metaDataLong) %>%
                        tidyr::as_tibble() %>%
                        dplyr::mutate(cohortDefinitionId = as.integer(id))

  result
}
gowthamrao/StudyManagement documentation built on March 9, 2020, 10:48 p.m.