R/Private.R

Defines functions .convertNullToNARecursive .isVectorContained matchTwoVectors .flattenTree .millisecondsToDate .checkBaseUrl .formatName .getValidSourceKeys .getApiResponseParse .typeFieldsInComparativeEffectivnessModel .checkErrorsIdFields .getDataFromMemoryOrRDMS

# @file general
#
# Copyright 2020 Observational Health Data Sciences and Informatics
#
# This file is part of R OHDSI webApi package.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#private function recursively converts variables that are NULL to NA
.convertNullToNARecursive <- function(x) {
  nullToNARecursive <- function(x, fn)
  {
    if (is.list(x)) {
      lapply(x, nullToNARecursive, fn)
    } else if (is.data.frame(x)) {
      x[is.null(x)] <- NA
    } else {
      fn(x)
    }
  }
  nullToNARecursive(x, function(x) if (is.null(x)) NA else x)
}


#compares a reference vector of objects to a reference vector. If different it will return the difference.
.isVectorContained = function(vectorToCompare = c(), vectorReference = c()){
  x = vector(length = length(vectorToCompare))
  for (i in 1:length(vectorToCompare)) {
      x[i] = vectorToCompare[i] %in% vectorReference
      if (length(which(vectorToCompare[i] %in% vectorReference)) == 0) {
        vectorReference } else {
          vectorReference = vectorReference[-match(vectorToCompare[i], vectorReference)]
        }
  }
  if (all(x == T)) {
    TRUE
  } else {
    return(vectorToCompare[!x])
  }
}

#' @export
matchTwoVectors = function(vectorToCompare = c(), vectorReference = c()){
  output <- list()
  for (i in 1:length(vectorToCompare)) {#i = 1
    toCompare = vectorToCompare[[i]]
    matchedReference = vectorReference[stringr::str_detect(vectorToCompare[[i]],vectorReference)]
    # next steps checks if there is more than one match, e.g. covariate vs covariate_balance
    # choose the match with the longest length
    matchedReference = matchedReference[nchar(matchedReference)==max(nchar(matchedReference))]

    output[[i]] <- data.frame(
      vectorToCompare = toCompare,
      vectorReference = matchedReference,
      stringsAsFactors = FALSE
    )
  }
  dplyr::bind_rows(output)
}

# recursively flattens tree based structure.
.flattenTree <- function(node, accumulated) {
  if (is.null(node$children)) {
    accumulated$name <- c(accumulated$name, node$name);
    accumulated$size <- c(accumulated$size, node$size);
    return(accumulated)
  } else {
    for (child in node$children) {
      accumulated <- .flattenTree(child, accumulated)
    }
    return(accumulated)
  }
}

# converts time in integer/milliseconds to date-time with timezone.
# assumption is that the system timezone = time zone of the local server running webApi.
.millisecondsToDate <- function(milliseconds) {
  sec <- milliseconds/1000
  as.POSIXct(sec, origin = "1970-01-01", tz = Sys.timezone())
}

# checks if url conforms with expected structure for base url
.checkBaseUrl <- function(baseUrl){

  valid_chars <- rex::rex(except_some_of(".", "/", " ", "-"))

  baseUrlRegEx <- rex::rex(
    start,

    # protocol identifier (optional) + //
    group(list("http", maybe("s")), "://"),

    # user:pass authentication (optional)
    maybe(non_spaces,
         maybe(":", zero_or_more(non_space)),
          "@"),

    #host name
    group(zero_or_more(valid_chars, zero_or_more("-")), one_or_more(valid_chars)),

    #domain name
    zero_or_more(".", zero_or_more(valid_chars, zero_or_more("-")), one_or_more(valid_chars)),

    #TLD identifier
    group(".", valid_chars %>% at_least(2)),

    # server port number (required)
    zero_or_more(":", digit %>% between(2, 5)),

    # resource path (optional)
    maybe("/", non_space %>% zero_or_more()),

    end
  )
  success <- as.logical(grepl(baseUrlRegEx, baseUrl))
  if (!success) {
    stop("Base URL not valid, should be like http://server.org:80/WebAPI")
  }
}

# formats string/name
.formatName <- function(name) {
  gsub("_", " ", gsub("\\[(.*?)\\]_", "", gsub(" ", "_", name)))
}

# get valid source keys
.getValidSourceKeys <- function(baseUrl,sourceKeys){
  .checkBaseUrl(baseUrl)
  cdmSources <- StudyManagement::getCdmSources(baseUrl)
  cdmSources <- dplyr::filter(cdmSources, toupper(sourceKey) %in% toupper(sourceKeys))
  cdmSources <- dplyr::select(cdmSources, sourceKey)
  dplyr::pull(cdmSources, sourceKey)
}


# Parse API to native (json) and parsed (r-friendly format)
.getApiResponseParse <- function(url){#url <- baseUrl
  .checkBaseUrl(baseUrl)
  getUrl <- httr::GET(url)

  if (httr::http_type(getUrl) != "application/json") {
    stop(paste0(url, " API for did not return json"), call. = FALSE)
  } else {

    native <- httr::content(getUrl, as = 'text', type = "application/json", encoding = 'UTF-8' )

    if (stringr::str_detect(string = native, pattern = "An exception ocurred")) {
      stop(paste0(url, " API call returned an Exception error"), call. = FALSE)
    } else {
    parsed <- jsonlite::fromJSON(txt = native, simplifyVector = TRUE, simplifyDataFrame = TRUE)
    }
    result <- list(
      native = native,
      parsed = parsed
    )
    result

  }
}



.typeFieldsInComparativeEffectivnessModel <- function() {
  fieldsInteger <- c('targetId', 'comparatorId', 'outcomeId', 'analysisId')
  fieldsCharacter <- c('databaseId')

  integer <- data.frame(type = 'integer',field = fieldsInteger, stringsAsFactors = FALSE) %>% tidyr::as_tibble()
  character <- data.frame(type = 'character',field = fieldsCharacter, stringsAsFactors = FALSE) %>% tidyr::as_tibble()

  dplyr::bind_rows(integer, character) %>% dplyr::distinct()
}

.checkErrorsIdFields <- function(idFields) {

  typeIdFieldsInComparativeEffectivnessModel(idFields = idFields$field)

  errorMessage <- checkmate::makeAssertCollection()

  for (i in (1:length(idFields))) {#i = 1

  if (idFields[i] %in% integer) {
      checkmate::assertCount(x = get(idFields[i]), add = errorMessage)
    } else if (idFields[i] %in% character) {
      checkmate::assertCharacter(x = get(idFields[i]), add = errorMessage)
    } else {
      checkmate::assertChoice(x = idFields[i],
                              choices = c(integer, character),
                              null.ok = FALSE,
                              add = errorMessage)
    }
  }
  checkmate::reportAssertions(errorMessage)
}



.getDataFromMemoryOrRDMS <- function(args) {

  idFields <- names(environmentArgs) %>% stringr::str_subset(pattern = "Id$") #ends with 'Id'
  .typeFieldsInComparativeEffectivnessModel(idFields = idFields)

  errorMessage <- checkmate::makeAssertCollection()

  if (is.null(connection) & is.null(dataTable)) {
    stop(glue::glue("No connection to RDMS provided to retrieve data from {dataTable}
                     No data frame called {dataTable} provided in function call.
                     No data."
                    )
        )
  }

  if (is.null(connection)) {
    result <- get(dataTable) %>%
      dplyr::filter(targetId %in% !!targetId,
                    comparatorId %in% !!comparatorId,
                    outcomeId %in% !!outcomeId,
                    databaseId %in% !!databaseId,
                    analysisId %in% !!analysisId
      )
  } else {
    sql <- glue::glue("SELECT * FROM {SqlRender::camelCaseToSnakeCase(dataTable)}
                    where target_id = {targetId} and
                          comparator_id = {comparatorId} and
                          outcome_id = {outcomeId} and
                          database_id = {databaseId} and
                          analysis_id = {analysisId}")
    sql <- SqlRender::translate(sql, targetDialect = connection@dbms)$sql
    result <- DatabaseConnector::querySql(connection, sql)
    colnames(result) <- SqlRender::snakeCaseToCamelCase(colnames(get(result)))
  }
  result
}


# .checkErrorsPrimaryKey <- function(targetId,
#                                    comparatorId,
#                                    outcomeId,
#                                    databaseId,
#                                    analysisId) {
#   errorMessage <- checkmate::makeAssertCollection()
#
#   checkmate::assertCount(x = targetId, add = errorMessage)
#   checkmate::assertCount(x = comparatorId,, add = errorMessage)
#   checkmate::assertCount(x = outcomeId, add = errorMessage)
#   checkmate::assertCount(x = analysisId, add = errorMessage)
#   checkmate::assertCharacter(x = databaseId, add = errorMessage)
#   checkmate::reportAssertions(errorMessage)
# }
#
# .getDataFromMemoryOrRDMS <- function(targetId,
#                                      comparatorId,
#                                      outcomeId,
#                                      databaseId,
#                                      analysisId,
#                                      connection = NULL,
#                                      dataTable = NULL) {
#
#   if (is.null(connection) & is.null(dataTable)) {
#     stop(glue::glue("No connection to RDMS provided to retrieve data from {dataTable}
#                      No data frame called {dataTable} provided in function call.
#                      No data."
#                     )
#         )
#   }
#
#   if (is.null(connection)) {
#     result <- get(dataTable) %>%
#       dplyr::filter(targetId %in% !!targetId,
#                     comparatorId %in% !!comparatorId,
#                     outcomeId %in% !!outcomeId,
#                     databaseId %in% !!databaseId,
#                     analysisId %in% !!analysisId
#       )
#   } else {
#     sql <- glue::glue("SELECT * FROM {SqlRender::camelCaseToSnakeCase(dataTable)}
#                     where target_id = {targetId} and
#                           comparator_id = {comparatorId} and
#                           outcome_id = {outcomeId} and
#                           database_id = {databaseId} and
#                           analysis_id = {analysisId}")
#     sql <- SqlRender::translate(sql, targetDialect = connection@dbms)$sql
#     result <- DatabaseConnector::querySql(connection, sql)
#     colnames(result) <- SqlRender::snakeCaseToCamelCase(colnames(get(result)))
#   }
#   result
# }
gowthamrao/StudyManagement documentation built on March 9, 2020, 10:48 p.m.