# Copyright 2022 Observational Health Data Sciences and Informatics
#
# This file is part of PheValuator
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#' Export the PheValuator results to CSV files
#'
#' @param outputFolder Name of the folder where all the outputs will be written to.
#' @param exportFolder Name of the folder where all the exported files will be written to.
#' @param pheValuatorAnalysisList A list of objects of type \code{pheValuatorAnalysis} as created using
#' the \code{\link{createPheValuatorAnalysis}} function.
#' @param packageName The name of the package containing the cohort definitions. Can be left NULL if
#' \code{baseUrl} and \code{cohortSetReference} have been specified.
#' @param cohortToCreateFile The location of the cohortToCreate file within the package. Is ignored if \code{cohortSetReference} is specified.
#' @param baseUrl The base URL for the WebApi instance, for example:
#' "http://server.org:80/WebAPI". Can be left NULL if no cohort meta-data needs to be exported.
#' @param cohortSetReference A data frame with four columns, as described in the details. Can be left NULL if
#' \code{packageName} and \code{cohortToCreateFile} have been specified.
#' @param minCellCount The minimum cell count for fields contains person counts or fractions.
#' @param databaseId ID of the database
#' @param databaseName Name of the database
#' @param databaseDescription Description of the database
#'
#' @details
#' Currently three ways of executing this function are supported, which determines how the meta-data on the cohorts is retrieved:
#'
#' \enumerate{
#' \item Embedded in a study package, assuming the cohort definitions are stored in that package using the \code{ROhdsiWebApi::insertCohortDefinitionSetInPackage}. In this case, the \code{packageName} and \code{cohortToCreateFile} should be populated.
#' \item Using a WebApi interface to retrieve the cohort definitions. In this case, the \code{baseUrl} and \code{cohortSetReference} should be populated.
#' \item Skipping meta-data on the cohorts altogether. In this case, only the \code{cohortSetReference} needs to be populated.
#' }
#'
#' The \code{cohortSetReference} argument must be a data frame with the following columns:
#' \describe{
#' \item{atlasId}{The cohort ID in ATLAS.}
#' \item{atlasName}{The full name of the cohort. This will be shown in the Shiny app.}
#' \item{cohortId}{The cohort ID to use in the package. Usually the same as the cohort ID in ATLAS.}
#' \item{name}{A short name for the cohort, to use to create file names. do not use special characters.}
#' }
#' @export
exportPheValuatorResults <- function(outputFolder,
exportFolder = file.path(outputFolder, "export"),
pheValuatorAnalysisList,
packageName = NULL,
cohortToCreateFile = "settings/CohortsToCreate.csv",
baseUrl = NULL,
cohortSetReference = NULL,
minCellCount = 5,
databaseId = NULL,
databaseName = NULL,
databaseDescription = NULL
) {
if (!file.exists(exportFolder)) {
dir.create(exportFolder)
}
cohortIds <- extractCohortIds(pheValuatorAnalysisList)
if (is.null(packageName)) {
cohorts <- loadCohortsFromWebApi(baseUrl = baseUrl,
cohortSetReference = cohortSetReference,
cohortIds = cohortIds)
} else {
cohorts <- loadCohortsFromPackage(packageName = packageName,
cohortToCreateFile = cohortToCreateFile,
cohortIds = cohortIds)
}
writeToCsv(cohorts, file.path(exportFolder, "cohort.csv"))
ParallelLogger::logInfo("Saving database metadata")
database <- tibble::tibble(databaseId = databaseId,
databaseName = databaseName,
description = databaseDescription)
writeToCsv(database, file.path(exportFolder, "database.csv"))
ParallelLogger::logInfo("Saving analysis specifications")
tempFileName <- tempfile()
pheValuatorAnalysisToRow <- function(pheValuatorAnalysis) {
ParallelLogger::saveSettingsToJson(pheValuatorAnalysis, tempFileName)
row <- tibble::tibble(analysisId = pheValuatorAnalysis$analysisId,
description = pheValuatorAnalysis$description,
xSpecCohortId = pheValuatorAnalysis$createEvaluationCohortArgs$xSpecCohortId,
xSensCohortId = pheValuatorAnalysis$createEvaluationCohortArgs$xSensCohortId,
prevalenceCohortId = pheValuatorAnalysis$createEvaluationCohortArgs$prevalenceCohortId,
mainPopulationCohortId = pheValuatorAnalysis$createEvaluationCohortArgs$mainPopulationCohortId,
phenotypeCohortId = pheValuatorAnalysis$testPhenotypeAlgorithmArgs$phenotypeCohortId,
definition = readChar(tempFileName, file.info(tempFileName)$size))
return(row)
}
pheValuatorAnalysis <- lapply(pheValuatorAnalysisList, pheValuatorAnalysisToRow)
pheValuatorAnalysis <- dplyr::bind_rows(pheValuatorAnalysis)
pheValuatorAnalysis <- unique(pheValuatorAnalysis)
unlink(tempFileName)
writeToCsv(pheValuatorAnalysis, file.path(exportFolder, "phevaluator_analysis.csv"))
ParallelLogger::logInfo("Saving PheValuator results")
referenceTable <- readRDS(file.path(outputFolder, "reference.rds"))
analysisSummary <- summarizePheValuatorAnalyses(referenceTable, outputFolder)
results <- analysisSummary[, c("analysisId", "cutPoint", "truePositives", "falsePositives", "trueNegatives", "falseNegatives")]
results$databaseId <- rep(databaseId, nrow(results))
if (nrow(results) > 0) {
results <- enforceMinCellValue(results, fieldName = "truePositives", minValues = minCellCount)
results <- enforceMinCellValue(results, fieldName = "falsePositives", minValues = minCellCount)
results <- enforceMinCellValue(results, fieldName = "trueNegatives", minValues = minCellCount)
results <- enforceMinCellValue(results, fieldName = "falseNegatives", minValues = minCellCount)
}
writeToCsv(results, file.path(exportFolder, "phevaluator_result.csv"))
# Add all to zip file -------------------------------------------------------------------------------
ParallelLogger::logInfo("Adding results to zip file")
zipName <- file.path(exportFolder, paste0("PheValuatorResults_", databaseId, ".zip"))
files <- list.files(exportFolder, pattern = ".*\\.csv$")
oldWd <- setwd(exportFolder)
on.exit(setwd(oldWd), add = TRUE)
DatabaseConnector::createZipFile(zipFile = zipName, files = files)
ParallelLogger::logInfo("Results are ready for sharing at:", zipName)
}
writeToCsv <- function(data, fileName, incremental = FALSE, ...) {
colnames(data) <- SqlRender::camelCaseToSnakeCase(colnames(data))
readr::write_csv(data, fileName)
}
enforceMinCellValue <- function(data, fieldName, minValues, silent = FALSE) {
toCensor <- as.vector(!is.na(data[, fieldName]) & data[, fieldName] < minValues & data[, fieldName] != 0)
if (!silent) {
percent <- round(100 * sum(toCensor)/nrow(data), 1)
ParallelLogger::logInfo(" censoring ",
sum(toCensor),
" values (",
percent,
"%) from ",
fieldName,
" because value below minimum")
}
if (length(minValues) == 1) {
data[toCensor, fieldName] <- -minValues
} else {
data[toCensor, fieldName] <- -minValues[toCensor]
}
return(data)
}
loadCohortsFromPackage <- function(packageName, cohortToCreateFile, cohortIds) {
pathToCsv <- system.file(cohortToCreateFile, package = packageName)
cohorts <- readr::read_csv(pathToCsv, col_types = readr::cols())
cohorts$atlasId <- NULL
if (!is.null(cohortIds)) {
cohorts <- cohorts[cohorts$cohortId %in% cohortIds, ]
}
if ("atlasName" %in% colnames(cohorts)) {
cohorts <- dplyr::rename(cohorts, cohortName = "name", cohortFullName = "atlasName")
} else {
cohorts <- dplyr::rename(cohorts, cohortName = "name", cohortFullName = "fullName")
}
getSql <- function(name) {
pathToSql <- system.file("sql", "sql_server", paste0(name, ".sql"), package = packageName)
sql <- readChar(pathToSql, file.info(pathToSql)$size)
return(sql)
}
cohorts$sql <- sapply(cohorts$cohortName, getSql)
getJson <- function(name) {
pathToJson <- system.file("cohorts", paste0(name, ".json"), package = packageName)
json <- readChar(pathToJson, file.info(pathToJson)$size)
return(json)
}
cohorts$json <- sapply(cohorts$cohortName, getJson)
return(cohorts)
}
loadCohortsFromWebApi <- function(baseUrl,
cohortSetReference,
cohortIds = NULL,
generateStats = TRUE) {
cohorts <- cohortSetReference
if (!is.null(cohortIds)) {
cohorts <- cohorts[cohorts$cohortId %in% cohortIds, ]
}
cohorts <- dplyr::rename(cohorts, cohortName = "name", cohortFullName = "atlasName")
ParallelLogger::logInfo("Retrieving cohort definitions from WebAPI")
if (!is.null(baseUrl)) {
if (!isInstalled("ROhdsiWebApi")) {
stop(" ROhdsiWebApi is required but is not installed. Please install it first")
}
for (i in 1:nrow(cohorts)) {
ParallelLogger::logInfo("- Retrieving definitions for cohort ", cohorts$cohortFullName[i])
cohortExpression <- ROhdsiWebApi::getCohortDefinitionExpression(definitionId = cohorts$atlasId[i],
baseUrl = baseUrl)
cohorts$json[i] <- cohortExpression$expression
cohorts$sql[i] <- ROhdsiWebApi::getCohortDefinitionSql(definitionId = cohorts$atlasId[i],
baseUrl = baseUrl,
generateStats = generateStats)
}
}
cohorts$atlasId <- NULL
return(cohorts)
}
isInstalled <- function(pkg, version = 0) {
installedVersion <- tryCatch(utils::packageVersion(pkg), error = function(e) NA)
return(!is.na(installedVersion) && installedVersion >= version)
}
extractCohortIds <- function(pheValuatorAnalysisList) {
cohortIds <- c()
for (i in 1:length(pheValuatorAnalysisList)) {
if (is.list(pheValuatorAnalysisList[[i]]) &&
length(pheValuatorAnalysisList[[i]]) > 0) {
cohortIds <- c(cohortIds, extractCohortIds(pheValuatorAnalysisList[[i]]))
} else if (grepl("CohortId$", names(pheValuatorAnalysisList)[i])[[1]]) {
cohortIds <- c(cohortIds, pheValuatorAnalysisList[[i]])
}
}
cohortIds <- unique(cohortIds)
cohortIds <- cohortIds[cohortIds != 0]
return(cohortIds)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.