# Copyright 2018 Observational Health Data Sciences and Informatics
#
# This file is part of CoDImputationOnlyDeathPop
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#' Create the exposure and outcome cohorts
#'
#' @details
#' This function will create the exposure and outcome cohorts following the definitions included in
#' this package.
#'
#' @param connectionDetails An object of type \code{connectionDetails} as created using the
#' \code{\link[DatabaseConnector]{createConnectionDetails}} function in the
#' DatabaseConnector package.
#' @param cdmDatabaseSchema Schema name where your patient-level data in OMOP CDM format resides.
#' Note that for SQL Server, this should include both the database and
#' schema name, for example 'cdm_data.dbo'.
#' @param cohortDatabaseSchema Schema name where intermediate data can be stored. You will need to have
#' write priviliges in this schema. Note that for SQL Server, this should
#' include both the database and schema name, for example 'cdm_data.dbo'.
#' @param cohortTable The name of the table that will be created in the work database schema.
#' This table will hold the exposure and outcome cohorts used in this
#' study.
#' @param oracleTempSchema Should be used in Oracle to specify a schema where the user has write
#' priviliges for storing temporary tables.
#' @param outputFolder Name of local folder to place results; make sure to use forward slashes
#' (/)
#'
#' @export
createCohorts <- function(connectionDetails,
cdmDatabaseSchema,
cohortDatabaseSchema,
cohortTable = "cohort",
oracleTempSchema,
outputFolder) {
if (!file.exists(outputFolder))
dir.create(outputFolder)
conn <- DatabaseConnector::connect(connectionDetails)
.createCohorts(connection = conn,
cdmDatabaseSchema = cdmDatabaseSchema,
cohortDatabaseSchema = cohortDatabaseSchema,
cohortTable = cohortTable,
oracleTempSchema = oracleTempSchema,
outputFolder = outputFolder)
# Check number of subjects per cohort:
ParallelLogger::logInfo("Counting cohorts")
sql <- SqlRender::loadRenderTranslateSql("GetCounts.sql",
"CoDImputationOnlyDeathPop",
dbms = connectionDetails$dbms,
oracleTempSchema = oracleTempSchema,
cdm_database_schema = cdmDatabaseSchema,
work_database_schema = cohortDatabaseSchema,
study_cohort_table = cohortTable)
counts <- DatabaseConnector::querySql(conn, sql)
colnames(counts) <- SqlRender::snakeCaseToCamelCase(colnames(counts))
counts <- addCohortNames(counts)
utils::write.csv(counts, file.path(outputFolder, "CohortCounts.csv"), row.names = FALSE)
DatabaseConnector::disconnect(conn)
}
addCohortNames <- function(data, IdColumnName = "cohortDefinitionId", nameColumnName = "cohortName") {
pathToCsv <- system.file("settings", "CohortsToCreate.csv", package = "CoDImputationOnlyDeathPop")
cohortsToCreate <- utils::read.csv(pathToCsv)
idToName <- data.frame(cohortId = c(cohortsToCreate$cohortId),
cohortName = c(as.character(cohortsToCreate$name)))
idToName <- idToName[order(idToName$cohortId), ]
idToName <- idToName[!duplicated(idToName$cohortId), ]
names(idToName)[1] <- IdColumnName
names(idToName)[2] <- nameColumnName
data <- merge(data, idToName, all.x = TRUE)
# Change order of columns:
idCol <- which(colnames(data) == IdColumnName)
if (idCol < ncol(data) - 1) {
data <- data[, c(1:idCol, ncol(data) , (idCol+1):(ncol(data)-1))]
}
return(data)
}
.createCohorts <- function(connection,
cdmDatabaseSchema,
vocabularyDatabaseSchema = cdmDatabaseSchema,
cohortDatabaseSchema,
cohortTable,
oracleTempSchema,
outputFolder) {
# Create study cohort table structure:
sql <- SqlRender::loadRenderTranslateSql(sqlFilename = "CreateCohortTable.sql",
packageName = "CoDImputationOnlyDeathPop",
dbms = attr(connection, "dbms"),
oracleTempSchema = oracleTempSchema,
cohort_database_schema = cohortDatabaseSchema,
cohort_table = cohortTable)
DatabaseConnector::executeSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE)
createTempModelLoc <- function(){
repeat{
loc <- file.path(tempdir(), paste0('python_models_',sample(10002323,1)))
if(!dir.exists(loc)){
return(loc)
}
}
}
# Instantiate cohorts:
pathToCsv <- system.file("settings", "CohortsToCreate.csv", package = "CoDImputationOnlyDeathPop")
cohortsToCreate <- utils::read.csv(pathToCsv)
for (i in 1:nrow(cohortsToCreate)) {
writeLines(paste("Creating cohort:", cohortsToCreate$name[i]))
sql <- SqlRender::loadRenderTranslateSql(sqlFilename = paste0(cohortsToCreate$name[i], ".sql"),
packageName = "CoDImputationOnlyDeathPop",
dbms = attr(connection, "dbms"),
oracleTempSchema = oracleTempSchema,
cdm_database_schema = cdmDatabaseSchema,
vocabulary_database_schema = vocabularyDatabaseSchema,
target_database_schema = cohortDatabaseSchema,
target_cohort_table = cohortTable,
target_cohort_id = cohortsToCreate$cohortId[i])
DatabaseConnector::executeSql(connection, sql)
}
}
clearLoggerType <- function(type='PLP log'){
logs <- ParallelLogger::getLoggers()
logNames <- unlist(lapply(logs, function(x) x$name))
ind <- which(logNames==type)
for(i in ind){
ParallelLogger::unregisterLogger(logNames[i])
}
return(NULL)
}
#' Creates the target population and outcome summary characteristics
#'
#' @details
#' This will create the patient characteristic table
#'
#' @param connectionDetails The connections details for connecting to the CDM
#' @param cdmDatabaseSchema The schema holding the CDM data
#' @param cohortDatabaseSchema The schema holding the cohort table
#' @param cohortTable The name of the cohort table
#' @param targetId The cohort definition id of the target population
#' @param outcomeId The cohort definition id of the outcome
#' @param tempCohortTable The name of the temporary table used to hold the cohort
#'
#' @return
#' A dataframe with the characteristics
#'
#' @export
getTable1 <- function(connectionDetails,
cdmDatabaseSchema,
cohortDatabaseSchema,
cohortTable,
targetId,
outcomeId,
tempCohortTable='#temp_cohort'){
covariateSettings <- FeatureExtraction::createCovariateSettings(useDemographicsGender = T)
plpData <- PatientLevelPrediction::getPlpData(connectionDetails,
cdmDatabaseSchema = cdmDatabaseSchema,
cohortId = targetId, outcomeIds = outcomeId,
cohortDatabaseSchema = cohortDatabaseSchema,
outcomeDatabaseSchema = cohortDatabaseSchema,
cohortTable = cohortTable,
outcomeTable = cohortTable,
covariateSettings=covariateSettings)
population <- PatientLevelPrediction::createStudyPopulation(plpData = plpData,
outcomeId = outcomeId,
binary = T,
includeAllOutcomes = T,
requireTimeAtRisk = T,
minTimeAtRisk = 364,
riskWindowStart = 1,
riskWindowEnd = 365,
removeSubjectsWithPriorOutcome = T)
table1 <- PatientLevelPrediction::getPlpTable(cdmDatabaseSchema = cdmDatabaseSchema,
longTermStartDays = -9999,
population=population,
connectionDetails=connectionDetails,
cohortTable=tempCohortTable)
return(table1)
}
#==========================
# Example of implementing an exisitng model in the PredictionComparison repository
#==========================
#' Checks the plp package is installed sufficiently for the network study and does other checks if needed
#'
#' @details
#' This will check that the network study dependancies work
#'
#' @param connectionDetails The connections details for connecting to the CDM
#'
#' @return
#' A number (a value other than 1 means an issue with the install)
#'
#' @export
checkInstall <- function(connectionDetails=NULL){
result <- PatientLevelPrediction::checkPlpInstallation(connectionDetails=connectionDetails,
python=F)
return(result)
}
#' Transport trained PLP models into the validation package
#'
#' @details
#' This will tranport PLP models into a validation package
#'
#' @param analysesDir The directory containing folders with PLP models
#' @param minCellCount The min cell count when trasporting the PLP model evaluation results
#' @param databaseName The name of the database as a string
#' @param outputDir the location to save the transported models (defaults to inst/plp_models)
#'
#' @return
#' The models will now be in the package
#'
#' @export
transportPlpModels <- function(analysesDir,
minCellCount = 5,
databaseName = 'sharable name of development data',
outputDir
){
if(missing(outputDir)){
outputDir <- 'inst/plp_models'
}
files <- dir(analysesDir, recursive = F, full.names = F)
files <- files[grep('Analysis_', files)]
filesIn <- file.path(analysesDir, files , 'plpResult')
filesOut <- file.path(outputDir, files, 'plpResult')
for(i in 1:length(filesIn)){
plpResult <- PatientLevelPrediction::loadPlpResult(filesIn[i])
PatientLevelPrediction::transportPlp(plpResult,
modelName= files[i], dataName=databaseName,
outputFolder = filesOut[i],
n=minCellCount,
includeEvaluationStatistics=T,
includeThresholdSummary=T, includeDemographicSummary=T,
includeCalibrationSummary =T, includePredictionDistribution=T,
includeCovariateSummary=T, save=T)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.