# Copyright 2020 Observational Health Data Sciences and Informatics
#
# This file is part of CovCoagEmaPrediction
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#' Execute the Study
#'
#' @details
#' This function executes the CovCoagEmaPrediction Study.
#'
#' @param connectionDetails An object of type \code{connectionDetails} as created using the
#' \code{\link[DatabaseConnector]{createConnectionDetails}} function in the
#' DatabaseConnector package.
#' @param cdmDatabaseSchema Schema name where your patient-level data in OMOP CDM format resides.
#' Note that for SQL Server, this should include both the database and
#' schema name, for example 'cdm_data.dbo'.
#' @param cdmDatabaseName Shareable name of the database
#' @param cohortDatabaseSchema Schema name where intermediate data can be stored. You will need to have
#' write priviliges in this schema. Note that for SQL Server, this should
#' include both the database and schema name, for example 'cdm_data.dbo'.
#' @param cohortTable The name of the table that will be created in the work database schema.
#' This table will hold the target population cohorts used in this
#' study.
#' @param oracleTempSchema Should be used in Oracle to specify a schema where the user has write
#' priviliges for storing temporary tables.
#' @param outputFolder Name of local folder to place results; make sure to use forward slashes
#' (/). Do not use a folder on a network drive since this greatly impacts
#' performance.
#' @param createProtocol Creates a protocol based on the analyses specification
#' @param createCohorts Create the cohortTable table with the target population and outcome cohorts?
#' @param runDiagnostic Runs a diagnostic of the T, O and tar settings for the cdmDatabaseSchema - can be used to check whether to change
#' settings or whether the prediction may not do well.
#' @param viewDiagnostic Opens a shiny app with the diagnostic results (run after runDiagnostic completes)
#' @param runAnalyses Run the model development
#' @param createResultsDoc Create a document containing the results of each prediction
#' @param createValidationPackage Create a package for sharing the models
#' @param saveModelsToJson Whether to save the models as json for validation (not all models are supported)
#' @param skeletonVersion The version of the validation skeleton to use
#' @param analysesToValidate A vector of analysis ids (e.g., c(1,3,10)) specifying which analysese to export into validation package. Default is NULL and all are exported.
#' @param packageResults Should results be packaged for later sharing?
#' @param minCellCount The minimum number of subjects contributing to a count before it can be included
#' in packaged results.
#' @param createShiny Create a shiny app with the results
#' @param createJournalDocument Do you want to create a template journal document populated with results?
#' @param analysisIdDocument Which Analysis_id do you want to create the document for?
#' @param onlyFetchData Only fetch data for the analyses without fitting models. Setting this flag will overwrite your input provided to the runAnalyses and createCohorts parameters.
#' @param sampleSize The number of patients in the target cohort to sample (if NULL uses all patients)
#' @param verbosity Sets the level of the verbosity. If the log level is at or higher in priority than the logger threshold, a message will print. The levels are:
#' \itemize{
#' \item{DEBUG}{Highest verbosity showing all debug statements}
#' \item{TRACE}{Showing information about start and end of steps}
#' \item{INFO}{Show informative information (Default)}
#' \item{WARN}{Show warning messages}
#' \item{ERROR}{Show error messages}
#' \item{FATAL}{Be silent except for fatal errors}
#' }
#' @param cdmVersion The version of the common data model
#'
#' @examples
#' \dontrun{
#' connectionDetails <- createConnectionDetails(dbms = "postgresql",
#' user = "joe",
#' password = "secret",
#' server = "myserver")
#'
#' execute(connectionDetails,
#' cdmDatabaseSchema = "cdm_data",
#' cdmDatabaseName = 'shareable name of the database'
#' cohortDatabaseSchema = "study_results",
#' cohortTable = "cohort",
#' oracleTempSchema = NULL,
#' outputFolder = "c:/temp/study_results",
#' createProtocol = T,
#' createCohorts = T,
#' runDiagnostic = F,
#' viewDiagnostic = F,
#' runAnalyses = T,
#' createResultsDoc = T,
#' createValidationPackage = T,
#' saveModelsToJson = T,
#' skeletonVersion = 'v1.0.1',
#' packageResults = F,
#' minCellCount = 5,
#' createShiny = F,
#' sampleSize = 10000,
#' verbosity = "INFO",
#' cdmVersion = 5)
#' }
#'
#' @export
execute <- function(connectionDetails,
cdmDatabaseSchema,
cdmDatabaseName = 'friendly database name',
cohortDatabaseSchema = cdmDatabaseSchema,
cohortTable = "cohort",
oracleTempSchema = cohortDatabaseSchema,
outputFolder,
createProtocol = F,
createCohorts = F,
runDiagnostic = F,
viewDiagnostic = F,
runAnalyses = F,
createResultsDoc = F,
createValidationPackage = F,
saveModelsToJson = T,
skeletonVersion = 'v0.0.1',
analysesToValidate = NULL,
packageResults = F,
minCellCount= 5,
createShiny = F,
createJournalDocument = F,
analysisIdDocument = 1,
onlyFetchData = F,
sampleSize = NULL,
verbosity = "INFO",
cdmVersion = 5) {
if (!file.exists(outputFolder))
dir.create(outputFolder, recursive = TRUE)
ParallelLogger::addDefaultFileLogger(file.path(outputFolder, "log.txt"))
if(createProtocol){
ensure_installed('officer')
createPlpProtocol(outputFolder)
}
if (createCohorts || onlyFetchData) {
ParallelLogger::logInfo("Creating cohorts")
createCohorts(connectionDetails = connectionDetails,
cdmDatabaseSchema = cdmDatabaseSchema,
cohortDatabaseSchema = cohortDatabaseSchema,
cohortTable = cohortTable,
oracleTempSchema = oracleTempSchema,
outputFolder = outputFolder)
}
if(runDiagnostic){
ParallelLogger::logInfo(paste0("Creating diagnostic results for ",cdmDatabaseName))
predictionAnalysisListFile <- system.file("settings",
"predictionAnalysisList.json",
package = "CovCoagEmaPrediction")
predictionAnalysisList <- PatientLevelPrediction::loadPredictionAnalysisList(predictionAnalysisListFile)
# extract settings
#sampleSize = predictionAnalysisList$maxSampleSize
if(!is.null(predictionAnalysisList$maxSampleSize)){
warning('sampleSize is now specified in execute() - ignoring json settings')
}
cohortIds= predictionAnalysisList$cohortIds
cohortNames = predictionAnalysisList$cohortNames
outcomeIds = predictionAnalysisList$outcomeIds
outcomeNames = predictionAnalysisList$outcomeNames
tars <- do.call(rbind, lapply(predictionAnalysisList$modelAnalysisList$populationSettings, function(x){
c(x$riskWindowStart, x$startAnchor,
x$riskWindowEnd, x$endAnchor)}))
riskWindowStart = tars[,1]
startAnchor = tars[,2]
riskWindowEnd = tars[,3]
endAnchor = tars[,4]
# run diagnostic
for(i in 1:length(cohortIds)){
cohortId <- cohortIds[i]
cohortName <- cohortNames[i]
ParallelLogger::logInfo(paste0("Target Cohort: ", cohortName, ' generating'))
diag <- tryCatch({PatientLevelPrediction::diagnostic(cdmDatabaseName = cdmDatabaseName,
connectionDetails = connectionDetails,
cdmDatabaseSchema = cdmDatabaseSchema,
oracleTempSchema = oracleTempSchema,
cohortId = cohortId,
cohortName = cohortName,
outcomeIds = outcomeIds,
outcomeNames = outcomeNames,
cohortDatabaseSchema = cohortDatabaseSchema,
cohortTable = cohortTable,
outcomeDatabaseSchema = cohortDatabaseSchema,
outcomeTable = cohortTable,
cdmVersion = cdmVersion,
outputFolder = file.path(outputFolder, 'diagnostics'),
sampleSize = sampleSize,
minCellCount = minCellCount,
riskWindowStart = as.double(riskWindowStart),
startAnchor = startAnchor,
riskWindowEnd = as.double(riskWindowEnd),
endAnchor = endAnchor)},
error = function(err) {
# error handler picks up where error was generated
ParallelLogger::logError(paste("Diagnostic error: ",err))
return(NULL)
})
}
}
if(viewDiagnostic){
ParallelLogger::logInfo(paste0("Loading diagnostic shiny app"))
checkDiagnosticResults <- dir.exists(file.path(outputFolder, 'diagnostics'))
checkShinyViewer <- dir.exists(system.file("shiny", "DiagnosticsExplorer", package = "PatientLevelPrediction"))
if(!checkDiagnosticResults){
warning('No diagnosstic results found, please execute with runDiagnostic first')
} else if(!checkShinyViewer){
warning('No DiagnosticsExplorer shiny app found in your PatientLevelPrediction library - try updating PatientLevelPrediction')
} else{
ensure_installed("shiny")
ensure_installed("shinydashboard")
ensure_installed("DT")
ensure_installed("VennDiagram")
ensure_installed("htmltools")
ensure_installed("shinyWidgets")
shinyDirectory <- system.file("shiny", "DiagnosticsExplorer", package = "PatientLevelPrediction")
shinySettings <- list(dataFolder = file.path(outputFolder, 'diagnostics'))
.GlobalEnv$shinySettings <- shinySettings
on.exit(rm(shinySettings, envir = .GlobalEnv))
shiny::runApp(shinyDirectory)
}
}
if(runAnalyses || onlyFetchData){
if(onlyFetchData) {
ParallelLogger::logInfo("Only fetching data and not running predictions")
} else {
ParallelLogger::logInfo("Running predictions")
}
predictionAnalysisListFile <- system.file("settings",
"predictionAnalysisList.json",
package = "CovCoagEmaPrediction")
predictionAnalysisList <- PatientLevelPrediction::loadPredictionAnalysisList(predictionAnalysisListFile)
predictionAnalysisList$connectionDetails = connectionDetails
predictionAnalysisList$cdmDatabaseSchema = cdmDatabaseSchema
predictionAnalysisList$cdmDatabaseName = cdmDatabaseName
predictionAnalysisList$oracleTempSchema = oracleTempSchema
predictionAnalysisList$cohortDatabaseSchema = cohortDatabaseSchema
predictionAnalysisList$cohortTable = cohortTable
predictionAnalysisList$outcomeDatabaseSchema = cohortDatabaseSchema
predictionAnalysisList$outcomeTable = cohortTable
predictionAnalysisList$cdmVersion = cdmVersion
predictionAnalysisList$outputFolder = outputFolder
predictionAnalysisList$verbosity = verbosity
predictionAnalysisList$onlyFetchData = onlyFetchData
if(!is.null(predictionAnalysisList$maxSampleSize)){
warning('sampleSize is now specified in execute() - ignoring json settings')
}
predictionAnalysisList$maxSampleSize <- sampleSize
# make backwards compatible:
if(class(predictionAnalysisList$modelAnalysisList$covariateSettings[[1]])=='covariateSettings'){
predictionAnalysisList$modelAnalysisList$covariateSettings <- predictionAnalysisList$modelAnalysisList$covariateSettings
}else{
predictionAnalysisList$modelAnalysisList$covariateSettings <- evaluateCovariateSettings(covariateSettings = predictionAnalysisList$modelAnalysisList$covariateSettings,
cohortDatabaseSchema = cohortDatabaseSchema,
cohortTable = cohortTable)
}
result <- do.call(PatientLevelPrediction::runPlpAnalyses, predictionAnalysisList)
}
if (packageResults) {
ensure_installed("OhdsiSharing")
ParallelLogger::logInfo("Packaging results")
packageResults(outputFolder = outputFolder,
minCellCount = minCellCount)
}
if(createResultsDoc){
ensure_installed("officer")
ensure_installed("gridExtra")
ensure_installed("grDevices")
createMultiPlpReport(analysisLocation=outputFolder,
protocolLocation = file.path(outputFolder,'protocol.docx'),
includeModels = F)
}
if(createValidationPackage){
ensure_installed("Hydra")
if(!is_installed("Hydra", version = '0.0.8')){
warning('Hydra need to be updated to use custom cohort covariates')
}
if(!saveModelsToJson){
ParallelLogger::logInfo('Creating validation using non-JSON models')
createValidationPackage(modelFolder = outputFolder,
outputFolder = outputFolder,
minCellCount = minCellCount,
databaseName = cdmDatabaseName,
analysisIds = analysesToValidate,
skeletonVersion = skeletonVersion)}
else{
ParallelLogger::logInfo('Creating validation using models saved to JSON files')
tryCatch({
createValidationPackageJson(devPackageName = 'CovCoagEmaPrediction',
devDatabaseName = cdmDatabaseName,
analysisLocation = outputFolder,
analysisIds = analysesToValidate,
outputFolder = outputFolder,
packageName = 'CovCoagEmaPredictionValidation',
description = 'validating models in CovCoagEmaPrediction',
skeletonVersion = skeletonVersion,
createdBy = 'anonymous',
organizationName = 'none')
}, error = function(e){ParallelLogger::logError(e)})
}
}
if (createShiny) {
ensure_installed("plotly")
ensure_installed("ggplot2")
ensure_installed("reshape2")
ensure_installed("DT")
ensure_installed("htmltools")
ensure_installed("shinydashboard")
ensure_installed("shinyWidgets")
ensure_installed("shinycssloaders")
ensure_installed("shiny")
populateShinyApp(outputDirectory = file.path(outputFolder, 'ShinyApp'),
resultDirectory = outputFolder,
minCellCount = minCellCount,
databaseName = cdmDatabaseName)
}
if(createJournalDocument){
ensure_installed("Hydra")
predictionAnalysisListFile <- system.file("settings",
"predictionAnalysisList.json",
package = "CovCoagEmaPrediction")
jsonSettings <- tryCatch({Hydra::loadSpecifications(file=predictionAnalysisListFile)},
error=function(cond) {
stop('Issue with json file...')
})
pn <- jsonlite::fromJSON(jsonSettings)
createJournalDocument(resultDirectory = outputFolder,
analysisId = analysisIdDocument,
includeValidation = T,
cohortIds = pn$cohortDefinitions$id,
cohortNames = pn$cohortDefinitions$name)
}
invisible(NULL)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.