R/webApiCohortCharacterization.R

Defines functions getCharacterizationSpecification getCharacterizationGenerationIds getCharacterizationResults getCharacterization getCharacterizations

Documented in getCharacterization getCharacterizationGenerationIds getCharacterizationResults getCharacterizations getCharacterizationSpecification

# @file CohortCharacterization
#
# Copyright 2019 Observational Health Data Sciences and Informatics
#
# This file is part of ROhdsiWebApi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#' Get Cohort Characterization Specification
#'
#' @param baseUrl              The base URL for the WebApi instance, for example:
#'                             "http://server.org:80/WebAPI".
#' @param characterizationId   The id of the cohort characterization in Atlas
#' @param generationIds         (OPTIONAL) Used to specify the id of a particular generation of a cohort
#'                             characterization. By default, the latest execution is retrieved
#'
#'
#' @export
getCharacterizationSpecification <- function(baseUrl,
                                             characterizationId,
                                             generationId = NULL) {
  .checkBaseUrl(baseUrl)
  if (is.null(generationId)) {
    url <- sprintf("%s/cohort-characterization/%d/design", baseUrl, characterizationId)
  } else {
    url <- sprintf("%s/cohort-characterization/generation/%d/design", baseUrl, generationId)
  }

  #json$parsed$cohorts$expression <- NULL #Why is there no cohort expression?

  ##cohorts and feature analyses are returned as data.frame
  json <- .getApiResponseParse(url)
  #if cohorts is a list, and not data.frame - then specification will return NULL
  if (!is.data.frame(json$parsed$cohorts)) {
    json$parsed <- NULL
  }

  # are there parameters? if not, make them null
  if (!is.data.frame(json$parsed$parameters)) {
    json$parsed$parameters <- NULL
  }

  # are there stratas?
  if (is.data.frame(json$parsed$stratas)) {
    json$parsed$stratas <- jsonlite::flatten(json$parsed$stratas)
  } else {
    json$parsed$stratas <- NULL
  }

  # are there strata concepts?
  if (is.list(json$parsed$strataConceptSets) & length(json$parsed$strataConceptSets) > 0) {
    json$parsed$strataConceptSets <- dplyr::bind_rows(json$parsed$stratas)
  } else {
    json$parsed$stratas <- NULL
  }

#################
  list(
      native = json$native,
      parsed = json$parsed
    )
}


#' Get generation ids for a Cohort Characterization ID
#' Latest generation id's are flagged
#'
#' @param baseUrl              The base URL for the WebApi instance, for example:
#'                             "http://server.org:80/WebAPI".
#' @param characterizationId   The id of the cohort characterization in Atlas
#' @param sourceKey            (Optional) A list of sourceKey in WebAPI, as defined in the Configuration page.
#'
#' @export
getCharacterizationGenerationIds <- function(baseUrl, characterizationId, sourceKey = NULL) {
  .checkBaseUrl(baseUrl)

 cdmSources <- StudyManagement::getCdmSources(baseUrl = baseUrl)

  url <- sprintf("%s/cohort-characterization/%d/generation", baseUrl, characterizationId)
  results <- .getApiResponseParse(url)
  results$parsed <- results$parsed %>%
                    dplyr::filter(status == 'COMPLETED') %>%
                    dplyr::bind_rows() %>%
                    dplyr::inner_join(y = (cdmSources %>% dplyr::select(sourceKey))
                                      , by = 'sourceKey'
                    ) %>%
                    dplyr::mutate(startTime = .millisecondsToDate(startTime)) %>%
                    dplyr::mutate(endTime = .millisecondsToDate(endTime))

  if (!is.null(sourceKey)) {
    results$parsed <- results$parsed %>% dplyr::inner_join(y = cdmSources,by = 'sourceKey')
  }

  sourceKeyMax <- results$parsed %>%
    dplyr::group_by(sourceKey) %>%
    dplyr::summarise(id = max(id)) %>%
    dplyr::mutate('latestIdGroup' = TRUE)

  overallMax <- results$parsed %>%
    dplyr::summarise(id = max(id)) %>%
    dplyr::mutate('latestId' = TRUE)

  results$parsed <- results$parsed %>%
    dplyr::left_join(sourceKeyMax, by = c('id', 'sourceKey')) %>%
    dplyr::left_join(overallMax, by = 'id')

  list(
    native = results$native,
    parsed = results$parsed
  )
}

#' Get Cohort Characterization Results
#'
#' @param baseUrl              The base URL for the WebApi instance, for example:
#'                             "http://server.org:80/WebAPI".
#' @param generationId         Used to specify the id of a particular generation of a cohort
#'                             characterization. By default, the latest execution is retrieved
#'
#' @importFrom magrittr "%>%"
#'
#' @export
getCharacterizationResults <- function(baseUrl,
                                       generationId,
                                       characterizationId) {
    .checkBaseUrl(baseUrl)

    resultUrl <- sprintf("%s/cohort-characterization/generation/%d/result", baseUrl, generationId)
    result <- .getApiResponseParse(resultUrl)

    result$parsed <- result$parsed %>%
      dplyr::mutate(generationId = generationId, characterizationId = characterizationId) %>%
      dplyr::select(characterizationId,
                    generationId,
                    id,
                    sourceKey,
                    cohortId,
                    strataId,
                    strataName,
                    timeWindow,
                    analysisId,
                    analysisName,
                    covariateId,
                    covariateName,
                    conceptId,
                    conceptName,
                    faType,
                    resultType,
                    count,
                    avg,
                    stdDev,
                    max,
                    median,
                    min,
                    p10,
                    p25,
                    p75,
                    p90,
                    distance,
                    proportion) %>%
      tidyr::as_tibble()
    result
}


#' Get Cohort Characterization Information
#'
#' @param baseUrl              The base URL for the WebApi instance, for example:
#'                             "http://server.org:80/WebAPI".
#' @param characterizationId   The id of the cohort characterization in Atlas
#' @param cdmDatabaseSchemas   (Optional) A list of CDM Database schemas (fully specified) for a CDM instance in WebAPI, as defined in the
#'                              Configuration page. Source Key will be derived from cdmDatabases.
#' @param generationIds       (Optional) A list CDM generation ids.
#'
#' @importFrom magrittr "%>%"
#'
#' @export
getCharacterization <- function(baseUrl,
                                characterizationId,
                                generationIds = NULL) {
  .checkBaseUrl(baseUrl)
  results <- list()
  writeLines(paste0('Downloading Characterization for characterization id = ', characterizationId))

  writeLines(paste0('    Downloading Specification'))
  results[['specification']] <-
    StudyManagement::getCharacterizationSpecification(baseUrl = baseUrl,
                                                      characterizationId = characterizationId,
                                                      generationId = NULL
                                                      )
  writeLines(paste0('    Downloading Generation information'))
  generationIdsDf <-
    StudyManagement::getCharacterizationGenerationIds(baseUrl = baseUrl,
                                                       characterizationId = characterizationId
                                                       )[['parsed']] %>%
                      dplyr::rename(generationId = id) %>%
                      dplyr::select(id = generationId, sourceKey, latestIdGroup, latestId)


  if (!is.null(generationIds)) {
    generationIds <- dplyr::intersect(generationIds, generationIdsDf$id)
  } else {generationIds <- generationIdsDf %>% dplyr::pull(id)
  }

  resultsLong <- list()
  for (i in (1:length(generationIds))) {#i = 1
    generationId <- generationIds[[i]]
    writeLines(paste0('        Downloading characterization results for generation id = ', generationId))
    writeLines(paste0('            Downloading specifications for generation id '))
    results[['results']][[paste0('generationId_', generationId)]] <-
      StudyManagement::getCharacterizationResults(baseUrl = baseUrl,
                                                  generationId = generationId,
                                                  characterizationId = characterizationId)
    writeLines(paste0('            Downloading results'))
    results[['results']][[paste0('generationId_', generationId)]][['specification']] <-
      StudyManagement::getCharacterizationSpecification(baseUrl = baseUrl,
                                                        characterizationId = characterizationId,
                                                        generationId = generationId
                                                        )
    resultsLong[[i]] <- results[['results']][[paste0('generationId_', generationId)]]$parsed
  }
  results[['results']][['all']] <- dplyr::bind_rows(resultsLong) %>% tidyr::as_tibble()
  results
}





#' Get Cohort Characterization Information for set of characterization ids
#'
#' @param baseUrl              The base URL for the WebApi instance, for example:
#'                             "http://server.org:80/WebAPI".
#' @param characterizationIds   A list of ids of cohort characterization in Atlas
#'
#' @export
getCharacterizations <- function(baseUrl,
                                characterizationIds) {
  result <- list()
  for (i in (1:length(characterizationIds))) {#i = 1
    characterizationId <- characterizationIds[i]
    result[[paste0('id_', characterizationId)]] <-
      StudyManagement::getCharacterization(baseUrl = baseUrl,
                                           characterizationId = characterizationId)
  }
  result
}
gowthamrao/StudyManagement documentation built on March 9, 2020, 10:48 p.m.