# @file UploadPlpDbResults.R
#
# Copyright 2021 Observational Health Data Sciences and Informatics
#
# This file is part of PatientLevelPrediction
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#' Create the results tables to store PatientLevelPrediction models and results into a database
#' @description
#' This function executes a large set of SQL statements to create tables that can store models and results
#'
#' @details
#' This function can be used to create (or delete) PatientLevelPrediction result tables
#'
#' @param conn A connection to a database created by using the
#' function \code{connect} in the
#' \code{DatabaseConnector} package.
#' @param resultSchema The name of the database schema that the result tables will be created.
#' @param targetDialect The database management system being used
#' @param deleteExistingTables If true any existing tables matching the PatientLevelPrediction result tables names will be deleted
#' @param createTables If true the PatientLevelPrediction result tables will be created
#' @param stringAppendToTables A string that appends to the PatientLevelPrediction result tables
#' @param tempEmulationSchema The temp schema used when the database management system is oracle
#' @param testFile (used for testing) The location of an sql file with the table creation code
#'
#' @return
#' Returns NULL but creates the required tables into the specified database schema.
#'
#' @export
createPlpResultTables <- function(conn,
resultSchema,
targetDialect = 'postgresql',
deleteExistingTables = T,
createTables = T,
stringAppendToTables = '',
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
testFile = NULL){
if(deleteExistingTables){
ParallelLogger::logInfo('Deleting existing tables')
tables <- c(
"CALIBRATION_SUMMARY",
"COVARIATE_SUMMARY",
"DEMOGRAPHIC_SUMMARY",
"EVALUATION_STATISTICS",
"PREDICTION_DISTRIBUTION",
"THRESHOLD_SUMMARY",
"ATTRITION", #new
"DIAGNOSTICS", #new
"RECALIBRATIONS", #new
"RESULTS",
"STUDY_MODELS",
"MODELS",
"MODEL_DESIGNS",
"MODEL_SETTINGS",
"COVARIATE_SETTINGS",
"POPULATION_SETTINGS",
"FEATURE_ENGINEERING_SETTINGS",
"SPLIT_SETTINGS",
"PLP_DATA_SETTINGS", #new
"SAMPLE_SETTINGS",
"TIDY_COVARIATES_SETTINGS", #new
"TARS",
"STUDIES",
"COHORTS",
"DATABASE_DETAILS",
"RESEARCHERS"
)
if(stringAppendToTables != ''){
tables <- paste0(toupper(gsub('_','',gsub(' ','', stringAppendToTables))), '_', tables)
}
alltables <- DatabaseConnector::getTableNames(connection = conn,
databaseSchema = resultSchema)
for(tb in tables){
if(tb %in%alltables){
sql <- 'TRUNCATE TABLE @my_schema.@table'
sql <- SqlRender::render(sql,
my_schema = resultSchema,
table=tb)
sql <- SqlRender::translate(sql, targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema)
DatabaseConnector::executeSql(conn, sql)
sql <- 'DROP TABLE @my_schema.@table'
sql <- SqlRender::render(sql,
my_schema = resultSchema,
table=tb)
sql <- SqlRender::translate(sql, targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema)
DatabaseConnector::executeSql(conn, sql)
}
}
}
if(createTables){
ParallelLogger::logInfo('Creating PLP results tables')
if(stringAppendToTables != ''){
stringAppendToTables <- paste0(toupper(gsub('_','',gsub(' ','', stringAppendToTables))), '_')
}
if(is.null(testFile)){
renderedSql <- SqlRender::loadRenderTranslateSql(sqlFilename = "PlpResultTables.sql",
packageName = "PatientLevelPrediction",
dbms = targetDialect,
tempEmulationSchema = tempEmulationSchema,
my_schema = resultSchema,
string_to_append = stringAppendToTables
)
} else {
sql <- readChar(testFile, file.info(testFile)$size)
renderedSql <- SqlRender::render(sql = sql[1],
my_schema = resultSchema,
string_to_append = stringAppendToTables)
renderedSql <- SqlRender::translate(sql = renderedSql,
targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema)
}
DatabaseConnector::executeSql(conn, renderedSql)
}
}
# could add cohortDatabaseSchema and cohortTable as inputs below, plus database table
#' Populate the PatientLevelPrediction results tables
#' @description
#' This function formats and uploads results that have been generated via an ATLAS prediction package into a database
#'
#' @details
#' This function can be used upload PatientLevelPrediction results into a database
#'
#' @param conn A connection to a database created by using the
#' function \code{connect} in the
#' \code{DatabaseConnector} package.
#' @param resultSchema (string) The name of the database schema with the result tables.
#' @param stringAppendToTables (string) A string that appends to the PatientLevelPrediction result tables
#' @param targetDialect (string) The database management system being used
#' @param tempEmulationSchema (string) The temp schema used when the database management system is oracle
#' @param packageName (string) The name of the ATLAS R package used to generate the results (this is used to extract cohort jsons)
#' @param studyJsonList (list) A list of lists per cohort with the cohort_name, cohort_id and cohort_json
#' @param studyName (string) A reference study name
#' @param studyDescription (string) A description of the study
#' @param researcherName (string) Name of the researcher who developed the study
#' @param researcherEmail (string) Email of the researcher who developed the study
#' @param researcherOrg (string) Organisation of the researcher who developed the study
#' @param databaseName (string) name of the database used to develop the model/s
#' @param databaseAcronym (string) acronym of the database used to develop the model/s
#' @param databaseVersion (int) Version of the database used to develop the model/s
#' @param databaseDescription (string) Description of the database used to develop the model/s
#' @param databaseType (string) Type of the database used to develop the model/s (e.g., claims)
#' @param valDatabases (list) A named list with details of the external validation databases. Needs to contain: name, description, version, type.
#' @param resultLocation (string) location of directory where the main package results were saved
#' @param resultPattern (string) A string to match to select models of interest
#' @param validationLocation (string) location of directory where the validation package results were saved
#' @param addInternalValidation (boolean) Whether the internval validation results should be uploaded
#' @param addExternalValidation (boolean) Whether the externval validation results should be uploaded
#' @param gsubVal (string) Remove patterns from the result name
#' @param removePattern (string) Restrict to result names with this pattern
#'
#' @return
#' Returns NULL but uploads all the results in resultLocation to the PatientLevelPrediction result tables in resultSchema
#'
#' @export
populatePlpResultTables <- function(conn,
resultSchema,
stringAppendToTables = '',
targetDialect = 'postgresql',
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
packageName,
studyJsonList,
studyName = '',
studyDescription = '',
researcherName = '',
researcherEmail = '',
researcherOrg = '',
databaseName = NULL,
databaseAcronym = NULL,
databaseVersion = 1,
databaseDescription = NULL,
databaseType = NULL,
valDatabases = list(ccae = list(name = 'CCAE',
description = '',
version = 1,
type = 'US Claims')),
resultLocation = NULL,
resultPattern = '',
validationLocation = file.path(resultLocation,'Validation'),
addInternalValidation = T,
addExternalValidation = T,
gsubVal = NULL,
removePattern = NULL
){
ensure_installed("jsonlite")
# input checks
##TODO
if(base::missing(packageName)){
if(base::missing(studyJsonList)){
stop('Need either packageName or studyJsonList')
}else{
if(is.null(studyJsonList)){
stop('studyJsonList needs to be non-null')
}
cohortType <- 'list'
jsonInput <- studyJsonList
}
} else{
if(is.null(packageName)){
stop('packageName needs to be non-null')
}
cohortType <- 'package'
jsonInput <- packageName
}
if(stringAppendToTables != ''){
stringAppendToTables <- paste0(toupper(gsub('_','',gsub(' ','', stringAppendToTables))), '_')
}
studyId <- addStudy(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
studyName = studyName,
studyDescription = studyDescription,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema)
ParallelLogger::logInfo(paste0('studyId: ', studyId))
researcherId <- addResearcher(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
researcherName = researcherName,
researcherEmail = researcherEmail,
researcherOrg = researcherOrg,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema)
ParallelLogger::logInfo(paste0('researcherId: ', researcherId))
dbId <- addDatabase(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
databaseName = databaseName,
databaseAcronym = databaseAcronym,
databaseVersion = databaseVersion,
databaseDescription = databaseDescription,
databaseType = databaseType,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema)
ParallelLogger::logInfo(paste0('dbId: ', dbId))
mdls <- dir(resultLocation, pattern = resultPattern)
removeMdls <- union(grep('.csv', mdls),grep('.txt', mdls))
if(length(removeMdls)>0){
mdls <- mdls[-removeMdls]
}
# remove pattern
if(!is.null(removePattern)){
mdls <- mdls[-grep(removePattern, mdls)]
}
for(modelRes in mdls){
ParallelLogger::logInfo(paste0('Adding results for model @ ', modelRes))
# TODO edit csv here
mdl <- tryCatch(
{PatientLevelPrediction::loadPlpResult(file.path(resultLocation, modelRes, 'plpResult'))},
error = function(e){ParallelLogger::logInfo(e);return(NULL)}
)
if(!is.null(mdl)){
# add TAR
tarId <- addTar(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
startDay = mdl$model$settings$populationSettings$riskWindowStart,
startAnchor = mdl$model$settings$populationSettings$startAnchor,
endDay = mdl$model$settings$populationSettings$riskWindowEnd,
endAnchor = mdl$model$settings$populationSettings$endAnchor,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema
)
ParallelLogger::logInfo(paste0('tarId: ', tarId))
tId <- addCohort(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
jsonInput = jsonInput, type = cohortType,
cohortId = mdl$model$trainDetails$cohortId,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema)
ParallelLogger::logInfo(paste0('tId: ', tId))
oId <- addCohort(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
jsonInput = jsonInput, type = cohortType,
cohortId = mdl$model$trainDetails$outcomeId,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema)
ParallelLogger::logInfo(paste0('oId: ', oId))
popSetId <- addPopulationSetting(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
json = mdl$model$settings$populationSettings,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema)
ParallelLogger::logInfo(paste0('popSetId: ', popSetId))
covSetId <- addCovariateSetting(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
json = mdl$model$settings$covariateSettings,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema)
ParallelLogger::logInfo(paste0('covSetId: ', covSetId))
modSetId <- addModelSetting(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
modelType = mdl$model$settings$modelSettings$model,
json = mdl$model$settings$modelSettings,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema)
ParallelLogger::logInfo(paste0('modSetId: ', modSetId))
# NEW: add plp_data_settings
plpDataSetId <- addPlpDataSetting(
conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
json = mdl$model$settings$plpDataSettings,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema
)
ParallelLogger::logInfo(paste0('plpDataSetId: ', plpDataSetId))
# NEW: add FE_settings
FESetId <- addFESetting(
conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
json = mdl$model$settings$featureEngineering,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema
)
ParallelLogger::logInfo(paste0('FESetId: ', FESetId))
# NEW: add sample_settings
sampleSetId <- addSampleSetting(
conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
json = mdl$model$settings$sampleSettings,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema
)
ParallelLogger::logInfo(paste0('sampleSetId: ', sampleSetId)
)
# NEW: add tidy_covariate_settings
tidySetId <- addTidySetting(
conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
json = mdl$model$settings$tidyCovariates,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema
)
ParallelLogger::logInfo(paste0('tidySetId: ', tidySetId))
# this is now split setting - update this function
splitId <- addSplitSettings(
conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
json = mdl$model$settings$splitSettings,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema
)
ParallelLogger::logInfo(paste0('splitId: ', splitId))
# create this function
modelDesignId <- addModelDesign( # need to create
conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
targetId = tId,
outcomeId = oId,
tarId = tarId,
plpDataSettingId = plpDataSetId,
populationSettingId = popSetId,
modelSettingId = modSetId,
covariateSettingId = covSetId,
sampleSettingId = sampleSetId,
splitSettingId = splitId, # changed from trainingId
featureEngineeringSettingId = FESetId,
tidyCovariatesSettingId = tidySetId,
researcherId = researcherId,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema
)
ParallelLogger::logInfo(paste0('modelDesignId: ', modelDesignId))
# create this function
modelId <- addModel(
conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
analysisId = mdl$model$trainDetails$analysisId, # trainDetails
modelDesignId = modelDesignId,
researcherId = researcherId,
databaseId = dbId,
hyperParamSearch = mdl$model$trainDetails$hyperParamSearch, #mdl$trainDetails$hyperParamSearch
plpModelFile = " ",
executionDateTime = format(mdl$executionSummary$ExecutionDateTime, format="%Y-%m-%d"), #mdl$trainDetails$trainingDate
trainingTime = mdl$model$trainDetails$trainingTime, #mdl$trainDetails$trainingTime
intercept = ifelse(is.list(mdl$model), mdl$model$model$coefficients[1], 0),
requireDenseMatrix = mdl$model$settings$requireDenseMatrix,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema
)
ParallelLogger::logInfo(paste0('modelId: ', modelId))
# add modelId and studyId
addStudiesModel(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
studyId = studyId,
modelId = modelId,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema)
# add internalValication
if(addInternalValidation){
ParallelLogger::logInfo('Adding internal validation results')
##if exists
if(!is.null(mdl)){
# add attrition here...
resultId <- addResult(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
modelId = modelId,
researcherId = researcherId,
databaseId = dbId,
targetId = tId,
outcomeId = oId,
tarId = tarId,
restrictPlpDataSettingId = plpDataSetId,
populationSettingId = popSetId,
executionDateTime = format(mdl$executionSummary$ExecutionDateTime, format="%Y-%m-%d"),
plpVersion = mdl$executionSummary$PackageVersion$packageVersion,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema)
ParallelLogger::logInfo(paste0('resultId: ', resultId))
# add attriition
if(!is.null(mdl$model$trainDetails$attrition)){
addAttrition(
conn = conn, resultSchema = resultSchema, targetDialect = targetDialect,
resultId = resultId,
attrition = mdl$model$trainDetails$attrition,
overWriteIfExists = T,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema
)
}
# add eval
if(!is.null(mdl$performanceEvaluation)){
addEvaluation(conn = conn, resultSchema = resultSchema, targetDialect = targetDialect,
resultId = resultId,
performanceEvaluation = mdl$performanceEvaluation,
overWriteIfExists = T,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema)
}
if(!is.null(mdl$covariateSummary)){
addCovariateSummary(conn = conn, resultSchema = resultSchema, targetDialect = targetDialect,
resultId = resultId,
covariateSummary = mdl$covariateSummary,
restrictToIncluded = T,
overWriteIfExists = T,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema)
}
}
}
# add validation results for this model
if(addExternalValidation){
ParallelLogger::logInfo('Adding external validation results')
if(is.null(validationLocation)){
validationLocation <- file.path(resultLocation, 'validation')
}
valDbs <- dir(validationLocation)
# restrict to the databases with info
valDbs <- valDbs[valDbs%in%names(valDatabases)]
if(length(valDbs)>0){
valDbs <- valDbs[!valDbs %in% c('plplog.txt')]
for(valDb in valDbs){
#get valDbId
valDbId <- addDatabase(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
databaseName = valDatabases[[valDb]]$name,
databaseAcronym = valDb,
databaseVersion = valDatabases[[valDb]]$version,
databaseDescription = valDatabases[[valDb]]$description,
databaseType =valDatabases[[valDb]]$type,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema)
validationResults <- as.character(dir(file.path(validationLocation, valDb)))
validationResults <- validationResults[validationResults != 'CohortCounts.csv']
valMods <- data.frame(validationResults = validationResults)
if(!is.null(gsubVal)){
valModsEdit <- valMods$validationResults
for(i in 1:nrow(gsubVal)){
valModsEdit <- gsub(x = valModsEdit, pattern = gsubVal[i,1], replacement = gsubVal[i,2])
}
valMods$validationResultsEdit <- valModsEdit
}else{
valMods$validationResultsEdit <- valMods$validationResults
}
# remove pattern
if(!is.null(removePattern)){
if(length(grep(removePattern, valMods$validationResultsEdit))>0){
valMods <- valMods[-grep(removePattern, valMods$validationResultsEdit),]
}
}
# restrict to analysis
ParallelLogger::logInfo(paste0('restricting to ', modelRes))
valMods <- valMods[grep(modelRes, valMods$validationResultsEdit),]
if(nrow(valMods)>0){
# load each result
for(valInd in 1:nrow(valMods)){
#resultName <- dir(file.path(validationLocation, valDb, valMods$validationResults[valInd]))
#resultName <- resultName[grep('.rds',resultName)]
ParallelLogger::logInfo(paste0('Loading validation at:', file.path(validationLocation, valDb, valMods$validationResults[valInd], 'validationResult' )))
vmdl <- tryCatch(
{PatientLevelPrediction::loadPlpResult(file.path(validationLocation, valDb, valMods$validationResults[valInd], 'validationResult' ))},
error = function(e){ParallelLogger::logInfo(e); return(NULL)}
)
if(!is.null(vmdl)){
tId <- addCohort(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
jsonInput = jsonInput, type = cohortType,
#cohortId = vmdl$model$validationDetails$cohortId,
cohortId = ifelse(
!is.null(vmdl$model$validationDetails$cohortId),
vmdl$model$validationDetails$cohortId,
vmdl$prediction$cohortId[1]
),
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema)
oId <- addCohort(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
jsonInput = jsonInput, type = cohortType,
cohortId = vmdl$model$validationDetails$outcomeId,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema)
# get tarId (added)
tarId <- addTar(
conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
startDay = vmdl$model$validationDetails$populationSettings$riskWindowStart,
startAnchor = vmdl$model$validationDetails$populationSettings$startAnchor,
endDay = vmdl$model$validationDetails$populationSettings$riskWindowEnd,
endAnchor = vmdl$model$validationDetails$populationSettings$endAnchor,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema
)
# popSetId (added)
popSetId <- addPopulationSetting(
conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
json = vmdl$model$validationDetails$populationSettings,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema
)
plpDataSetId <- addPlpDataSetting(
conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema,
json = ifelse(
!is.null(vmdl$model$validationDetails$plpDataSettings),
vmdl$model$validationDetails$plpDataSettings,
vmdl$model$settings$plpDataSettings
)
)
# add result
resultId <- addResult(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
modelId = modelId,
researcherId = researcherId,
databaseId = valDbId,
targetId = tId,
outcomeId = oId,
tarId = tarId,
restrictPlpDataSettingId = plpDataSetId,
populationSettingId = popSetId,
executionDateTime = format(vmdl$executionSummary$ExecutionDateTime, format="%Y-%m-%d"),
plpVersion = vmdl$executionSummary$PackageVersion$packageVersion,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema)
# add attrition
if(!is.null(vmdl$model$validationDetails$attrition)){
addAttrition(
conn = conn, resultSchema = resultSchema, targetDialect = targetDialect,
resultId = resultId,
attrition = vmdl$model$validationDetails$attrition,
overWriteIfExists = T,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema
)
}
# add performance
#=============
if(!is.null(vmdl$performanceEvaluation)){
addEvaluation(conn = conn, resultSchema = resultSchema, targetDialect = targetDialect,
resultId = resultId,
performanceEvaluation = vmdl$performanceEvaluation,
overWriteIfExists = T,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema)
}
if(!is.null(vmdl$covariateSummary)){
addCovariateSummary(conn = conn, resultSchema = resultSchema, targetDialect = targetDialect,
resultId = resultId,
covariateSummary = vmdl$covariateSummary,
restrictToIncluded = T,
overWriteIfExists = T,
stringAppendToTables = stringAppendToTables,
tempEmulationSchema = tempEmulationSchema)
}
}
#+++++++++++++
} # end val per database
} # end if val exists
} # val database
}
} #externalVal
} #model not null
} # per model
} #end funct
#======================
# HELPER FUNCTIONS
#======================
enc <- function(x){
return(paste0("'", x, "'"))
}
cleanNum <- function(x){
types <- unlist(lapply(1:ncol(x), function(i) class(x[,i])))
ids <- which(types%in% c("numeric", "integer" ))
for(id in ids){
okVals <- is.finite(x[,id])
if(sum(okVals)!=length(okVals)){
x[!okVals,id] <- NA
}
}
return(x)
}
checkTable <- function(conn,
resultSchema,
stringAppendToTables = '',
targetDialect,
tableName,
columnNames,
values,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
){
vals <- paste0(paste0(columnNames," = ", values), collapse = " and ")
sql <- "SELECT * from @my_schema.@string_to_append@table where @input_vals;"
sql <- SqlRender::render(sql,
my_schema = resultSchema,
table = tableName,
input_vals = vals,
string_to_append = stringAppendToTables)
sql <- SqlRender::translate(sql, targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema)
result <- DatabaseConnector::querySql(conn, sql, snakeCaseToCamelCase = T)
return(result)
}
checkJson <- function(conn,
resultSchema,
stringAppendToTables = '',
targetDialect,
tableName,
jsonColumnName,
id,
json,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
){
sql <- "SELECT * from @my_schema.@string_to_append@table;"
sql <- SqlRender::render(sql,
my_schema = resultSchema,
table = tableName,
string_to_append = stringAppendToTables)
sql <- SqlRender::translate(sql, targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema)
result <- DatabaseConnector::querySql(conn, sql, snakeCaseToCamelCase = T)
resultId <- NULL
if(nrow(result)>0){
colId <- result[,jsonColumnName] == json
if(sum(colId)>0){
resultId <- result[colId,id][1]
}
}
return(resultId)
}
# gets the column names in camelCase of a table
getColumnNames <- function(conn, resultSchema, targetDialect, tableName, stringAppendToTables = '',
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
){
sql <- "select top 1 * from @my_schema.@string_to_append@table;"
sql <- SqlRender::render(sql,
my_schema = resultSchema,
table = tableName,
string_to_append = stringAppendToTables)
sql <- SqlRender::translate(sql, targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema)
result <- DatabaseConnector::querySql(connection = conn, sql = sql, snakeCaseToCamelCase = T)
return(colnames(result))
}
# True/False check whether results exist in table
checkResultExists <- function(conn, resultSchema, targetDialect,
snakeCaseToCamelCase,
tableName,
resultId,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
){
sql <- "select * from @my_schema.@table where result_id = @result_id;"
sql <- SqlRender::render(sql,
my_schema = resultSchema,
table = tableName,
result_id = resultId)
sql <- SqlRender::translate(sql, targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema)
result <- DatabaseConnector::querySql(connection = conn, sql = sql, snakeCaseToCamelCase = T)
return(nrow(result)>0)
}
#======================
# end helpers
addStudy <- function(conn, resultSchema, targetDialect,
studyName, studyDescription,
stringAppendToTables = '',
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
){
result <- checkTable(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'studies',
columnNames = c('study_name', 'study_description'),
values = c(paste0("'",studyName,"'"),
paste0("'",studyDescription,"'")
),
tempEmulationSchema = tempEmulationSchema
)
if(nrow(result)>0){
ParallelLogger::logInfo('Study already exists')
}
else{
ParallelLogger::logInfo(paste0('Adding new study: ', studyName ))
# add my detail
sql <- "INSERT INTO @my_schema.@string_to_appendstudies(study_name, study_description)
VALUES ('@name','@desc');"
sql <- SqlRender::render(sql,
my_schema = resultSchema,
name = studyName,
desc = studyDescription,
string_to_append = stringAppendToTables
)
sql <- SqlRender::translate(sql, targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema)
DatabaseConnector::executeSql(conn, sql)
result <- checkTable(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'studies',
columnNames = c('study_name', 'study_description'),
values = c(paste0("'",studyName,"'"),
paste0("'",studyDescription,"'")
),
tempEmulationSchema = tempEmulationSchema
)
}
return(result$studyId[1])
}
addStudiesModel <- function(conn,
resultSchema,
targetDialect,
studyId,
modelId,
stringAppendToTables,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
){
result <- checkTable(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'study_models',
columnNames = c('study_id', 'model_id'),
values = c(studyId, modelId),
tempEmulationSchema = tempEmulationSchema
)
if(nrow(result)>0){
ParallelLogger::logInfo('Study and model already linked')
}
else{
ParallelLogger::logInfo(paste0('Adding link between study: ', studyId, ' and model: ', modelId ))
# add my detail
sql <- "INSERT INTO @my_schema.@string_to_appendstudy_models(study_id, model_id)
VALUES ('@studyid','@modelid');"
sql <- SqlRender::render(sql,
my_schema = resultSchema,
studyid = studyId,
modelid = modelId,
string_to_append = stringAppendToTables
)
sql <- SqlRender::translate(sql = sql, targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema)
DatabaseConnector::executeSql(conn, sql)
result <- checkTable(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'study_models',
columnNames = c('study_id', 'model_id'),
values = c(studyId, modelId),
tempEmulationSchema = tempEmulationSchema
)
}
return(invisible(result$studyId[1]))
}
addResearcher <- function(conn, resultSchema, targetDialect,
stringAppendToTables = '',
researcherName,
researcherEmail,
researcherOrg,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
){
result <- checkTable(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'researchers',
columnNames = c('researcher_name', 'researcher_email', 'researcher_affiliation'),
values = c(paste0("'",researcherName,"'"),
paste0("'",researcherEmail,"'"),
paste0("'",researcherOrg,"'")),
tempEmulationSchema = tempEmulationSchema
)
if(nrow(result)>0){
ParallelLogger::logInfo('Researcher already exists')
}
else{
ParallelLogger::logInfo(paste0('Adding Researcher: ', researcherName ))
# add my detail
sql <- "INSERT INTO @my_schema.@string_to_appendresearchers(researcher_name, researcher_email, researcher_affiliation)
VALUES ('@name','@email', '@org');"
sql <- SqlRender::render(sql,
my_schema = resultSchema,
name = researcherName,
email = researcherEmail,
org = researcherOrg,
string_to_append = stringAppendToTables
)
sql <- SqlRender::translate(sql = sql, targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema)
DatabaseConnector::executeSql(conn, sql)
result <- checkTable(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'researchers',
columnNames = c('researcher_name', 'researcher_email', 'researcher_affiliation'),
values = c(paste0("'",researcherName,"'"),
paste0("'",researcherEmail,"'"),
paste0("'",researcherOrg,"'")),
tempEmulationSchema = tempEmulationSchema
)
}
return(result$researcherId[1])
}
addDatabase <- function(conn, resultSchema, targetDialect,
stringAppendToTables = '',
databaseName,
databaseAcronym,
databaseVersion = 1,
databaseDescription,
databaseType,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
){
result <- checkTable(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'database_details',
columnNames = c('database_name', 'database_acronym',
'database_version',
'database_description', 'database_type'),
values = c(paste0("'",databaseName,"'"),
paste0("'",databaseAcronym,"'"),
databaseVersion,
paste0("'",databaseDescription,"'"),
paste0("'",databaseType,"'")),
tempEmulationSchema = tempEmulationSchema
)
if(nrow(result)>0){
ParallelLogger::logInfo(paste0('Database ', databaseName ,' already exists'))
} else {
sql <- "INSERT INTO @my_schema.@string_to_appenddatabase_details(database_name, database_acronym,
database_version,
database_description, database_type)
VALUES ('@dbName','@db', @version, '@desc', '@type');"
sql <- SqlRender::render(sql,
my_schema = resultSchema,
dbName = databaseName,
db = databaseAcronym,
version = databaseVersion,
desc = databaseDescription,
type = databaseType,
string_to_append = stringAppendToTables)
sql <- SqlRender::translate(sql, targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema)
DatabaseConnector::executeSql(conn, sql)
result <- checkTable(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'database_details',
columnNames = c('database_name', 'database_acronym', 'database_version',
'database_description', 'database_type'),
values = c(paste0("'",databaseName,"'"),
paste0("'",databaseAcronym,"'"),
databaseVersion,
paste0("'",databaseDescription,"'"),
paste0("'",databaseType,"'")),
tempEmulationSchema = tempEmulationSchema
)
}
return(result$databaseId[1])
}
addTar <- function(conn, resultSchema, targetDialect,
stringAppendToTables = '',
startDay,
startAnchor,
endDay,
endAnchor,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
){
result <- checkTable(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'tars',
columnNames = c('tar_start_day', 'tar_start_anchor',
'tar_end_day', 'tar_end_anchor'),
values = c(startDay,
paste0("'",startAnchor,"'"),
endDay,
paste0("'",endAnchor,"'")),
tempEmulationSchema = tempEmulationSchema
)
if(nrow(result)==0){
ParallelLogger::logInfo('Adding TAR')
# tars - id 1
sql <- "INSERT INTO @my_schema.@string_to_appendtars(tar_start_day, tar_start_anchor,
tar_end_day, tar_end_anchor)
VALUES (@tar_start_day, @tar_start_anchor, @tar_end_day, @tar_end_anchor);"
sql <- SqlRender::render(sql,
my_schema = resultSchema,
tar_start_day = startDay,
tar_start_anchor = paste0("'",startAnchor,"'"),
tar_end_day = endDay,
tar_end_anchor = paste0("'",endAnchor,"'"),
string_to_append = stringAppendToTables)
sql <- SqlRender::translate(sql, targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema)
DatabaseConnector::executeSql(conn, sql)
#getId of new
result <- checkTable(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'tars',
columnNames = c('tar_start_day', 'tar_start_anchor',
'tar_end_day', 'tar_end_anchor'),
values = c(startDay,
paste0("'",startAnchor,"'"),
endDay,
paste0("'",endAnchor,"'")),
tempEmulationSchema = tempEmulationSchema
)
} else {
ParallelLogger::logInfo('TAR exists')
}
return(result$tarId[1])
}
getCohortFromList <- function(jsonList, cohortId){
#cohort_name, cohort_id and cohort_json
ParallelLogger::logInfo(paste0('Adding cohorts from input list'))
id <- which(unlist(lapply(jsonList, function(x){x$cohort_id == cohortId})))[1]
json <- jsonList[[id]]$cohort_json
details <- data.frame(
cohortName = jsonList[[id]]$cohort_name,
cohortId = jsonList[[id]]$cohort_id,
webApiCohortId = jsonList[[id]]$cohort_id
)
return(list(json = json,
cohortTocreate = details))
}
# this can be simplified now we use cohort id as the json file name:
getCohortFromPackage <- function(packageName, cohortId){
ParallelLogger::logInfo(paste0('Adding cohorts from ', packageName))
# check packageName
if(!dir.exists(system.file(package = packageName))){
stop('Package path not found - set pckPath input to the location of the study package you executed')
} else {
ParallelLogger::logInfo(paste0('Extracting cohort ',cohortId,' json from ', packageName))
# check required files:
cohortToCreateLoc <- system.file('Cohorts.csv', # updated for new skeleton
package = packageName)
if(!file.exists(cohortToCreateLoc)){
stop('No Cohorts.csv in package')
}
if(!dir.exists(file.path(system.file(package = packageName), 'cohorts'))){
stop('No cohorts in package')
}
}
# add the cohorts and store the map atlas_id, cohort_id, cohort_name
cohortsToCreate <- utils::read.csv(cohortToCreateLoc)
cohortTocreate <- cohortsToCreate[cohortsToCreate$cohortId == cohortId,]
jsonFileName <- file.path(system.file(package = packageName), 'cohorts', paste0(cohortTocreate$cohortId, '.json'))
json <- readChar(jsonFileName, file.info(jsonFileName)$size)
return(list(json = json,
cohortTocreate = cohortTocreate))
}
# adds json from package unless json is specified
addCohort <- function(conn, resultSchema, targetDialect,
stringAppendToTables = '',
jsonInput, type = 'package',
cohortId,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
){
if(type == 'package'){
object <- getCohortFromPackage(packageName = jsonInput, cohortId)
} else{
object <- getCohortFromList(jsonList = jsonInput, cohortId)
}
json <- object$json
cohortTocreate <- object$cohortTocreate
# make sure the json has been converted
if(class(json)!='character'){
ParallelLogger::logInfo('converting json to character')
json <- jsonlite::serializeJSON(json, digits = 23)
}
# reduce the size to save
json <- substr(json, 1, 4000) # TESTING - FIX THIS [TODO]
#check whether cohort already in table:
result <- checkTable(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'cohorts',
columnNames = c('cohort_name', 'atlas_id'),
values = c(paste0("'",cohortTocreate$cohortName[1],"'"), cohortTocreate$cohortId[1]),
tempEmulationSchema = tempEmulationSchema
)
addNew <- F
if(nrow(result)>0){
addNew <- json %in% result$cohortJson
ParallelLogger::logInfo(paste0('json in jsons:', addNew))
}
if(addNew){
ParallelLogger::logInfo(paste0('Cohort ',cohortTocreate$cohortName,' exists in result database with id', result$cohortId))
} else{
ParallelLogger::logInfo(paste0('Adding cohort ',cohortTocreate$cohortName[1]))
data <- data.frame(cohortName = cohortTocreate$cohortName,
atlasId = cohortTocreate$cohortId,
cohortJson = json)
DatabaseConnector::insertTable(connection = conn,
databaseSchema = resultSchema,
tableName = paste0(stringAppendToTables, 'cohorts'),
data = data,
dropTableIfExists = F,
createTable = F,
tempTable = F,
progressBar = T,
camelCaseToSnakeCase = T,
tempEmulationSchema = tempEmulationSchema
)
# now check and get id
result <- checkTable(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'cohorts',
columnNames = c('cohort_name', 'atlas_id'),
values = c(paste0("'",cohortTocreate$cohortName,"'"), cohortTocreate$cohortId),
tempEmulationSchema = tempEmulationSchema
)
jsonInd <- result$cohortJson %in% json
result <- result[jsonInd,]
}
return(result$cohortId[1])
}
addPopulationSetting <- function(conn, resultSchema, targetDialect,
stringAppendToTables = '',
json,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
){
# process json to make it ordered...
# make sure the json has been converted
if(class(json)!='character'){
json <- as.character(jsonlite::serializeJSON(json, digits = 23))
}
jsonId <- checkJson(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'population_settings',
jsonColumnName = 'populationSettingsJson',
id = 'populationSettingId',
json = json,
tempEmulationSchema = tempEmulationSchema)
if(is.null(jsonId)){
ParallelLogger::logInfo('Adding new population settings')
data <- data.frame(populationSettingsJson = json)
DatabaseConnector::insertTable(connection = conn,
databaseSchema = resultSchema,
tableName = paste0(stringAppendToTables, 'population_settings'),
data = data,
dropTableIfExists = F,
createTable = F,
tempTable = F,
progressBar = T,
camelCaseToSnakeCase = T,
tempEmulationSchema = tempEmulationSchema
)
#getId of new
jsonId <- checkJson(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'population_settings',
jsonColumnName = 'populationSettingsJson',
id = 'populationSettingId',
json = json,
tempEmulationSchema = tempEmulationSchema)
} else{
ParallelLogger::logInfo('Population settings exists')
}
return(jsonId)
}
addCovariateSetting <- function(conn, resultSchema, targetDialect,
stringAppendToTables = '',
json,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
){
# process json to make it ordered...
# make sure the json has been converted
if(class(json)!='character'){
# this code created character that is too long for redshfit
#json <- as.character(jsonlite::serializeJSON(json, digits = 23))
# add attributes
if(class(json) == 'covariateSettings'){
json <- list(json)
}
json <- lapply(json, addAttributes)
#convert
json <- jsonlite::toJSON(
x = json,
pretty = T,
digits = 23,
auto_unbox=TRUE,
null = "null"
)
json <- as.character(json) # now convert to character
print(nchar(json))
}
jsonId <- checkJson(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'covariate_settings',
jsonColumnName = 'covariateSettingsJson',
id = 'covariateSettingId',
json = json,
tempEmulationSchema = tempEmulationSchema)
if(is.null(jsonId)){
ParallelLogger::logInfo('Adding new covariate settings')
data <- data.frame(covariateSettingsJson = json)
DatabaseConnector::insertTable(connection = conn,
databaseSchema = resultSchema,
tableName = paste0(stringAppendToTables, 'covariate_settings'),
data = data,
dropTableIfExists = F,
createTable = F,
tempTable = F,
progressBar = T,
camelCaseToSnakeCase = T,
tempEmulationSchema = tempEmulationSchema
)
#getId of new
jsonId <- checkJson(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'covariate_settings',
jsonColumnName = 'covariateSettingsJson',
id = 'covariateSettingId',
json = json,
tempEmulationSchema = tempEmulationSchema)
} else{
ParallelLogger::logInfo('Covariate setting exists')
}
return(jsonId)
}
addModelSetting <- function(conn, resultSchema, targetDialect,
stringAppendToTables = '',
modelType, json,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
){
# process json to make it ordered...
# make sure the json has been converted
if(class(json)!='character'){
json <- as.character(jsonlite::serializeJSON(json, digits = 23))
}
jsonId <- checkJson(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'model_settings',
jsonColumnName = 'modelSettingsJson',
id = 'modelSettingId',
json = json,
tempEmulationSchema = tempEmulationSchema)
if(is.null(jsonId)){
ParallelLogger::logInfo('Adding new model settings')
data <- data.frame(modelType = modelType,
modelSettingsJson = json)
DatabaseConnector::insertTable(connection = conn,
databaseSchema = resultSchema,
tableName = paste0(stringAppendToTables, 'model_settings'),
data = data,
dropTableIfExists = F,
createTable = F,
tempTable = F,
progressBar = T,
camelCaseToSnakeCase = T,
tempEmulationSchema = tempEmulationSchema)
#getId of new
jsonId <- checkJson(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'model_settings',
jsonColumnName = 'modelSettingsJson',
id = 'modelSettingId',
json = json,
tempEmulationSchema = tempEmulationSchema)
} else{
ParallelLogger::logInfo('Model setting exists')
}
return(jsonId)
}
addTidySetting <- function(
conn,
resultSchema,
targetDialect,
stringAppendToTables = '',
json,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
){
if(class(json)!='character'){
#modify to make smaller but keep key part
json$deletedInfrequentCovariateIds <- c()
json$normFactors <- json$normFactors %>% dplyr::filter(.data$maxValue !=1)
json <- as.character(jsonlite::serializeJSON(json, digits = 23))
}
jsonId <- checkJson(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'tidy_covariates_settings',
jsonColumnName = 'tidyCovariatesSettingsJson',
id = 'tidyCovariatesSettingId',
json = json,
tempEmulationSchema = tempEmulationSchema)
if(is.null(jsonId)){
ParallelLogger::logInfo('Adding new tidy covariates settings')
data <- data.frame(
tidyCovariatesSettingsJson = json
)
DatabaseConnector::insertTable(
connection = conn,
databaseSchema = resultSchema,
tableName = paste0(stringAppendToTables, 'tidy_covariates_settings'),
data = data,
dropTableIfExists = F,
createTable = F,
tempTable = F,
progressBar = T,
camelCaseToSnakeCase = T,
tempEmulationSchema = tempEmulationSchema
)
#getId of new
jsonId <- checkJson(
conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'tidy_covariates_settings',
jsonColumnName = 'tidyCovariatesSettingsJson',
id = 'tidyCovariatesSettingId',
json = json,
tempEmulationSchema = tempEmulationSchema
)
} else{
ParallelLogger::logInfo('tidy covariates setting exists')
}
return(jsonId)
}
addSampleSetting <- function(
conn,
resultSchema,
targetDialect,
stringAppendToTables = '',
json,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
){
if(class(json)!='character'){
json <- as.character(jsonlite::serializeJSON(json, digits = 23))
}
jsonId <- checkJson(
conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'sample_settings',
jsonColumnName = 'sampleSettingsJson',
id = 'sampleSettingId',
json = json,
tempEmulationSchema = tempEmulationSchema
)
if(is.null(jsonId)){
ParallelLogger::logInfo('Adding new sample settings')
data <- data.frame(
sampleSettingsJson = json
)
DatabaseConnector::insertTable(
connection = conn,
databaseSchema = resultSchema,
tableName = paste0(stringAppendToTables, 'sample_settings'),
data = data,
dropTableIfExists = F,
createTable = F,
tempTable = F,
progressBar = T,
camelCaseToSnakeCase = T,
tempEmulationSchema = tempEmulationSchema
)
#getId of new
jsonId <- checkJson(
conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'sample_settings',
jsonColumnName = 'sampleSettingsJson',
id = 'sampleSettingId',
json = json,
tempEmulationSchema = tempEmulationSchema
)
} else{
ParallelLogger::logInfo('sample setting exists')
}
return(jsonId)
}
addPlpDataSetting <- function(
conn,
resultSchema,
targetDialect,
stringAppendToTables = '',
json,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
){
if(class(json)!='character'){
json <- as.character(jsonlite::serializeJSON(json, digits = 23))
}
jsonId <- checkJson(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'plp_data_settings',
jsonColumnName = 'plpDataSettingsJson',
id = 'plpDataSettingId',
json = json,
tempEmulationSchema = tempEmulationSchema)
if(is.null(jsonId)){
ParallelLogger::logInfo('Adding new plp data settings')
data <- data.frame(
plpDataSettingsJson = json
)
DatabaseConnector::insertTable(
connection = conn,
databaseSchema = resultSchema,
tableName = paste0(stringAppendToTables, 'plp_data_settings'),
data = data,
dropTableIfExists = F,
createTable = F,
tempTable = F,
progressBar = T,
camelCaseToSnakeCase = T,
tempEmulationSchema = tempEmulationSchema
)
#getId of new
jsonId <- checkJson(
conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'plp_data_settings',
jsonColumnName = 'plpDataSettingsJson',
id = 'plpDataSettingId',
json = json,
tempEmulationSchema = tempEmulationSchema
)
} else{
ParallelLogger::logInfo('Split setting exists')
}
return(jsonId)
}
addFESetting <- function(
conn,
resultSchema,
targetDialect,
stringAppendToTables = '',
json,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
){
if(class(json)!='character'){
json <- as.character(jsonlite::serializeJSON(json, digits = 23))
}
jsonId <- checkJson(
conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'feature_engineering_settings',
jsonColumnName = 'featureEngineeringSettingsJson',
id = 'featureEngineeringSettingId',
json = json,
tempEmulationSchema = tempEmulationSchema
)
if(is.null(jsonId)){
ParallelLogger::logInfo('Adding new feature_engineering settings')
data <- data.frame(
featureEngineeringSettingsJson = json
)
DatabaseConnector::insertTable(
connection = conn,
databaseSchema = resultSchema,
tableName = paste0(stringAppendToTables, 'feature_engineering_settings'),
data = data,
dropTableIfExists = F,
createTable = F,
tempTable = F,
progressBar = T,
camelCaseToSnakeCase = T,
tempEmulationSchema = tempEmulationSchema
)
#getId of new
jsonId <- checkJson(
conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'feature_engineering_settings',
jsonColumnName = 'featureEngineeringSettingsJson',
id = 'featureEngineeringSettingId',
json = json,
tempEmulationSchema = tempEmulationSchema
)
} else{
ParallelLogger::logInfo('feature engineering setting exists')
}
return(jsonId)
}
addSplitSettings <- function(
conn,
resultSchema,
targetDialect,
stringAppendToTables = '',
json,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
){
if(class(json)!='character'){
json <- as.character(jsonlite::serializeJSON(json, digits = 23))
}
jsonId <- checkJson(
conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'split_settings',
jsonColumnName = 'splitSettingsJson',
id = 'splitSettingId',
json = json,
tempEmulationSchema = tempEmulationSchema
)
if(is.null(jsonId)){
ParallelLogger::logInfo('Adding new split settings')
data <- data.frame(
splitSettingsJson = json
)
DatabaseConnector::insertTable(
connection = conn,
databaseSchema = resultSchema,
tableName = paste0(stringAppendToTables, 'split_settings'),
data = data,
dropTableIfExists = F,
createTable = F,
tempTable = F,
progressBar = T,
camelCaseToSnakeCase = T,
tempEmulationSchema = tempEmulationSchema
)
#getId of new
jsonId <- checkJson(
conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'split_settings',
jsonColumnName = 'splitSettingsJson',
id = 'splitSettingId',
json = json,
tempEmulationSchema = tempEmulationSchema
)
} else{
ParallelLogger::logInfo('Split setting exists')
}
return(jsonId)
}
addModelDesign <- function(
conn,
resultSchema, targetDialect,
stringAppendToTables = stringAppendToTables,
targetId,
outcomeId,
tarId,
plpDataSettingId,
populationSettingId,
modelSettingId,
covariateSettingId,
sampleSettingId,
splitSettingId,
featureEngineeringSettingId,
tidyCovariatesSettingId,
researcherId,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
){
if(is.null(targetId)){
stop('targetId is null')
}
if(is.null(outcomeId)){
stop('outcomeId is null')
}
if(is.null(tarId)){
stop('tarId is null')
}
if(is.null(plpDataSettingId)){
stop('plpDataSettingId is null')
}
if(is.null(populationSettingId)){
stop('populationSettingId is null')
}
if(is.null(modelSettingId)){
stop('modelSettingId is null')
}
if(is.null(covariateSettingId)){
stop('covariateSettingId is null')
}
if(is.null(sampleSettingId)){
stop('sampleSettingId is null')
}
if(is.null(splitSettingId)){
stop('splitSettingId is null')
}
if(is.null(featureEngineeringSettingId)){
stop('featureEngineeringSettingId is null')
}
if(is.null(tidyCovariatesSettingId)){
stop('tidyCovariatesSettingId is null')
}
if(is.null(researcherId)){
stop('researcherId is null')
}
# process json to make it ordered...
# TODO
result <- checkTable(
conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'model_designs',
columnNames = c(
'target_id',
'outcome_id',
'tar_id',
'plp_data_setting_id',
'population_setting_id',
'model_setting_id',
'covariate_setting_id',
'sample_setting_id',
'split_setting_id',
'feature_engineering_setting_id',
'tidy_covariates_setting_id',
'researcher_id'
),
values = c(
targetId,
outcomeId,
tarId,
plpDataSettingId,
populationSettingId,
modelSettingId,
covariateSettingId,
sampleSettingId,
splitSettingId,
featureEngineeringSettingId,
tidyCovariatesSettingId,
researcherId
),
tempEmulationSchema = tempEmulationSchema
)
if(nrow(result)==0){
# model
sql <- "INSERT INTO @my_schema.@string_to_appendmodel_designs(
target_id,
outcome_id,
tar_id,
plp_data_setting_id,
population_setting_id,
model_setting_id,
covariate_setting_id,
sample_setting_id,
split_setting_id,
feature_engineering_setting_id,
tidy_covariates_setting_id,
researcher_id
) VALUES
(
@target_id,
@outcome_id,
@tar_id,
@plp_data_setting_id,
@population_setting_id,
@model_setting_id,
@covariate_setting_id,
@sample_setting_id,
@split_setting_id,
@feature_engineering_setting_id,
@tidy_covariates_setting_id,
@researcher_id
)"
sql <- SqlRender::render(
sql,
my_schema = resultSchema,
target_id = targetId,
outcome_id = outcomeId,
tar_id = tarId,
plp_data_setting_id= plpDataSettingId,
population_setting_id = populationSettingId,
model_setting_id = modelSettingId,
covariate_setting_id = covariateSettingId,
sample_setting_id = sampleSettingId,
split_setting_id = splitSettingId,
feature_engineering_setting_id = featureEngineeringSettingId,
tidy_covariates_setting_id = tidyCovariatesSettingId,
researcher_id = researcherId,
string_to_append = stringAppendToTables
)
sql <- SqlRender::translate(sql, targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema)
DatabaseConnector::executeSql(conn, sql)
#getId of new
result <- checkTable(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'model_designs',
columnNames = c(
'target_id',
'outcome_id',
'tar_id',
'plp_data_setting_id',
'population_setting_id',
'model_setting_id',
'covariate_setting_id',
'sample_setting_id',
'split_setting_id',
'feature_engineering_setting_id',
'tidy_covariates_setting_id',
'researcher_id'),
values = c(targetId,
outcomeId,
tarId,
plpDataSettingId,
populationSettingId,
modelSettingId,
covariateSettingId,
sampleSettingId,
splitSettingId,
featureEngineeringSettingId,
tidyCovariatesSettingId,
researcherId),
tempEmulationSchema = tempEmulationSchema
)
}
return(result$modelDesignId[1])
}
addModel <- function(
conn,
resultSchema,
targetDialect,
stringAppendToTables = stringAppendToTables,
analysisId,
modelDesignId,
researcherId,
databaseId,
hyperParamSearch,
plpModelFile,
executionDateTime,
trainingTime,
intercept,
requireDenseMatrix,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
){
if(is.null(analysisId)){
stop('analysisId is null')
}
if(is.null(modelDesignId)){
stop('modelName is null')
}
if(is.null(researcherId)){
stop('researcherId is null')
}
if(is.null(databaseId)){
stop('databaseId is null')
}
if(is.null(plpModelFile)){
stop('plpModelFile is null')
}
if(is.null(executionDateTime)){
stop('executionDateTime is null')
}
if(is.null(intercept)){
stop('intercept is null')
}
if(!is.null(hyperParamSearch)){
if(class(hyperParamSearch) != 'character'){
hyperParamSearch <- as.character(jsonlite::serializeJSON(hyperParamSearch, digits = 23))
}
}else{
hyperParamSearch <- ''
}
# process json to make it ordered...
# TODO
result <- checkTable(
conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'models',
columnNames = c(
'analysis_id',
'model_design_id',
'researcher_id',
'database_id',
'hyper_param_search',
'plp_model_file',
'execution_date_time',
'training_time',
'intercept',
'require_dense_matrix'
),
values = c(
enc(analysisId),
modelDesignId,
researcherId,
databaseId,
enc(hyperParamSearch),
enc(plpModelFile),
enc(executionDateTime),
enc(trainingTime),
intercept,
ifelse(requireDenseMatrix, "'T'", "'F'")
),
tempEmulationSchema = tempEmulationSchema
)
if(nrow(result)==0){
# model
sql <- "INSERT INTO @my_schema.@string_to_appendmodels(
analysis_id,
model_design_id,
researcher_id,
database_id,
hyper_param_search,
plp_model_file,
execution_date_time,
training_time,
intercept,
require_dense_matrix
) VALUES
('@analysis_id',
'@model_design_id',
@researcher_id,
@database_id,
'@hyper_param_search',
'@plp_model_file',
'@execution_date_time',
'@training_time',
@intercept,
'@require_dense_matrix'
)"
sql <- SqlRender::render(
sql,
my_schema = resultSchema,
analysis_id = analysisId,
model_design_id = modelDesignId,
researcher_id = researcherId,
database_id = databaseId,
hyper_param_search = hyperParamSearch,
plp_model_file = plpModelFile,
execution_date_time = executionDateTime,
training_time = trainingTime,
intercept = intercept,
require_dense_matrix = ifelse(requireDenseMatrix, 'T', 'F'),
string_to_append = stringAppendToTables
)
sql <- SqlRender::translate(
sql,
targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema
)
DatabaseConnector::executeSql(conn, sql)
#getId of new
result <- checkTable(
conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'models',
columnNames = c(
'analysis_id',
'model_design_id',
'researcher_id',
'database_id',
'hyper_param_search',
'plp_model_file',
'execution_date_time',
'training_time',
'intercept',
'require_dense_matrix'
),
values = c(
enc(analysisId),
modelDesignId,
researcherId,
databaseId,
enc(hyperParamSearch),
enc(plpModelFile),
enc(executionDateTime),
enc(trainingTime),
intercept,
ifelse(requireDenseMatrix, "'T'", "'F'")
),
tempEmulationSchema = tempEmulationSchema
)
}
return(result$modelId[1])
}
addResult <- function(conn, resultSchema, targetDialect,
stringAppendToTables = '',
modelId,
researcherId,
databaseId,
targetId,
outcomeId,
tarId,
restrictPlpDataSettingId,
populationSettingId,
executionDateTime,
plpVersion,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")){
result <- checkTable(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'results',
columnNames = c('model_id',
'researcher_id',
'database_id',
'target_id',
'outcome_id',
'tar_id',
'plp_data_setting_id',
'population_setting_id',
'execution_date_time',
'plp_version'),
values = c(modelId,
researcherId,
databaseId,
targetId,
outcomeId,
tarId,
restrictPlpDataSettingId,
populationSettingId,
enc(executionDateTime),
enc(plpVersion)),
tempEmulationSchema = tempEmulationSchema
)
if(nrow(result)==0){
# model
sql <- "INSERT INTO @my_schema.@string_to_appendresults (
model_id,
researcher_id,
database_id,
target_id,
outcome_id,
tar_id,
plp_data_setting_id,
population_setting_id,
execution_date_time,
plp_version
)
VALUES (@model_id, @researcher_id, @database_id, @target_id, @outcome_id, @tar_id,
@plp_data_setting_id, @population_setting_id, '@execution_date_time', '@plp_version')"
sql <- SqlRender::render(sql,
my_schema = resultSchema,
model_id = modelId,
researcher_id = researcherId,
database_id = databaseId,
target_id = targetId,
outcome_id = outcomeId,
tar_id = tarId,
plp_data_setting_id = restrictPlpDataSettingId,
population_setting_id = populationSettingId,
execution_date_time = executionDateTime,
plp_version = plpVersion,
string_to_append = stringAppendToTables)
sql <- SqlRender::translate(sql, targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema)
DatabaseConnector::executeSql(conn, sql)
#getId of new
result <- checkTable(conn = conn,
resultSchema = resultSchema,
stringAppendToTables = stringAppendToTables,
targetDialect = targetDialect,
tableName = 'results',
columnNames = c('model_id',
'researcher_id',
'database_id',
'target_id',
'outcome_id',
'tar_id',
'plp_data_setting_id',
'population_setting_id',
'execution_date_time',
'plp_version'),
values = c(modelId,
researcherId,
databaseId,
targetId,
outcomeId,
tarId,
restrictPlpDataSettingId,
populationSettingId,
enc(executionDateTime),
enc(plpVersion)),
tempEmulationSchema = tempEmulationSchema
)
}
return(result$resultId[1])
}
# attrition
addAttrition <- function(
conn, resultSchema, targetDialect,
stringAppendToTables = '',
resultId,
attrition,
overWriteIfExists = T,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")){
value <- attrition
if(is.null(value)){
return(NULL)
}
# edit names
firstLower <- function(x) {
substr(x, 1, 1) <- tolower(substr(x, 1, 1))
return(x)
}
colnames(value) <- sapply(colnames(value), firstLower )
value$resultId <- resultId
# get column names and check all present in object
columnNames <- getColumnNames(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
tableName = paste0(stringAppendToTables,'attrition'),
tempEmulationSchema = tempEmulationSchema)
isValid <- sum(colnames(value)%in%columnNames) == length(columnNames)
exists <- checkResultExists(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
tableName = paste0(stringAppendToTables,'attrition'),
resultId = resultId,
tempEmulationSchema = tempEmulationSchema)
if(isValid && (!exists || overWriteIfExists)){
# REMOVE existing result
if(exists){
sql <- "delete from @result_schema.@table_name where result_id = @result_id;"
sql <- SqlRender::render(sql,
result_id=resultId,
result_schema = resultSchema,
table_name = paste0(stringAppendToTables,'attrition'))
sql <- SqlRender::translate(sql,
targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema)
DatabaseConnector::executeSql(conn, sql)
}
# add
ParallelLogger::logInfo(paste0('Inserting attrition for result ',resultId))
DatabaseConnector::insertTable(connection = conn,
databaseSchema = resultSchema,
tableName = paste0(stringAppendToTables,'attrition'),
data = value[,columnNames],
dropTableIfExists = F, createTable = F, tempTable = F,
bulkLoad = F, camelCaseToSnakeCase = T, progressBar = T,
tempEmulationSchema = tempEmulationSchema)
}
return(invisible(NULL))
}
# evals
addEvaluation <- function(conn, resultSchema, targetDialect,
stringAppendToTables = '',
resultId,
performanceEvaluation,
overWriteIfExists = T,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")){
ParallelLogger::logInfo('Adding PredictionDistribution')
tryCatch({addPredictionDistribution(conn = conn, resultSchema = resultSchema, targetDialect = targetDialect,
stringAppendToTables = stringAppendToTables,
resultId = resultId,
performanceEvaluation = performanceEvaluation,
overWriteIfExists = overWriteIfExists,
tempEmulationSchema = tempEmulationSchema)},
error = function(e){ParallelLogger::logError(e);})
ParallelLogger::logInfo('Adding ThresholdSummary')
tryCatch({addThresholdSummary(conn = conn, resultSchema = resultSchema, targetDialect = targetDialect,
stringAppendToTables = stringAppendToTables,
resultId = resultId,
performanceEvaluation = performanceEvaluation,
overWriteIfExists = overWriteIfExists,
tempEmulationSchema = tempEmulationSchema)},
error = function(e){ParallelLogger::logError(e);})
ParallelLogger::logInfo('Adding EvaluationStatistics')
tryCatch({addEvaluationStatistics(conn = conn, resultSchema = resultSchema, targetDialect = targetDialect,
stringAppendToTables = stringAppendToTables,
resultId = resultId,
performanceEvaluation = performanceEvaluation,
overWriteIfExists = overWriteIfExists,
tempEmulationSchema = tempEmulationSchema)},
error = function(e){ParallelLogger::logError(e);})
ParallelLogger::logInfo('Adding CalibrationSummary')
tryCatch({addCalibrationSummary(conn = conn, resultSchema = resultSchema, targetDialect = targetDialect,
stringAppendToTables = stringAppendToTables,
resultId = resultId,
performanceEvaluation = performanceEvaluation,
overWriteIfExists = overWriteIfExists,
tempEmulationSchema = tempEmulationSchema)},
error = function(e){ParallelLogger::logError(e);})
ParallelLogger::logInfo('Adding DemographicSummary')
tryCatch({addDemographicSummary(conn = conn, resultSchema = resultSchema, targetDialect = targetDialect,
stringAppendToTables = stringAppendToTables,
resultId = resultId,
performanceEvaluation = performanceEvaluation,
overWriteIfExists = overWriteIfExists,
tempEmulationSchema = tempEmulationSchema)},
error = function(e){ParallelLogger::logError(e);})
return(invisible(NULL))
}
addPredictionDistribution <- function(conn, resultSchema, targetDialect,
stringAppendToTables = '',
resultId,
performanceEvaluation,
overWriteIfExists = T,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")){
value <- performanceEvaluation$predictionDistribution
if(is.null(value)){
return(NULL)
}
# edit names
firstLower <- function(x) {
substr(x, 1, 1) <- tolower(substr(x, 1, 1))
return(x)
}
colnames(value) <- sapply(colnames(value), firstLower )
if(sum(colnames(value)=='class')>0){
colnames(value)[colnames(value)=='class'] <- 'classLabel'
}
value$resultId <- resultId
# get column names and check all present in object
columnNames <- getColumnNames(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
tableName = paste0(stringAppendToTables,'prediction_distribution'),
tempEmulationSchema = tempEmulationSchema)
isValid <- sum(colnames(value)%in%columnNames) == length(columnNames)
exists <- checkResultExists(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
tableName = paste0(stringAppendToTables,'prediction_distribution'),
resultId = resultId,
tempEmulationSchema = tempEmulationSchema)
if(isValid && (!exists || overWriteIfExists)){
# REMOVE existing result
if(exists){
sql <- "delete from @result_schema.@table_name where result_id = @result_id;"
sql <- SqlRender::render(sql,
result_id=resultId,
result_schema = resultSchema,
table_name = paste0(stringAppendToTables,'prediction_distribution'))
sql <- SqlRender::translate(sql,
targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema)
DatabaseConnector::executeSql(conn, sql)
}
# add
ParallelLogger::logInfo(paste0('Inserting predictionDistribution for result ',resultId))
DatabaseConnector::insertTable(connection = conn,
databaseSchema = resultSchema,
tableName = paste0(stringAppendToTables,'prediction_distribution'),
data = value[,columnNames],
dropTableIfExists = F, createTable = F, tempTable = F,
bulkLoad = F, camelCaseToSnakeCase = T, progressBar = T,
tempEmulationSchema = tempEmulationSchema)
}
return(invisible(NULL))
}
addThresholdSummary <- function(conn, resultSchema, targetDialect,
stringAppendToTables = '',
resultId,
performanceEvaluation,
overWriteIfExists = T,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")){
value <- performanceEvaluation$thresholdSummary
if(is.null(value)){
return(NULL)
}
# check numerical columns:
value <- cleanNum(value)
# edit names
firstLower <- function(x) {
substr(x, 1, 1) <- tolower(substr(x, 1, 1))
return(x)
}
colnames(value) <- sapply(colnames(value), firstLower )
value$resultId <- resultId
# get column names and check all present in object
columnNames <- getColumnNames(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
stringAppendToTables = stringAppendToTables,
tableName = 'threshold_summary',
tempEmulationSchema = tempEmulationSchema)
isValid <- sum(colnames(value)%in%columnNames) == length(columnNames)
exists <- checkResultExists(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
tableName = paste0(stringAppendToTables,'threshold_summary'),
resultId = resultId,
tempEmulationSchema = tempEmulationSchema)
if(isValid && (!exists || overWriteIfExists)){
# REMOVE existing result
if(exists){
sql <- "delete from @result_schema.@table_name where result_id = @result_id;"
sql <- SqlRender::render(sql,
result_schema = resultSchema,
result_id = resultId,
table_name = paste0(stringAppendToTables,'threshold_summary'))
sql <- SqlRender::translate(sql,
targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema)
DatabaseConnector::executeSql(conn, sql)
}
# add
ParallelLogger::logInfo(paste0('Inserting thresholdSummary for result ',resultId))
DatabaseConnector::insertTable(connection = conn,
databaseSchema = resultSchema,
tableName = paste0(stringAppendToTables,'threshold_summary'),
data = value[,columnNames],
dropTableIfExists = F, createTable = F, tempTable = F,
bulkLoad = F, camelCaseToSnakeCase = T, progressBar = T,
tempEmulationSchema = tempEmulationSchema)
}
return(invisible(NULL))
}
addCalibrationSummary <- function(conn, resultSchema, targetDialect,
stringAppendToTables = '',
resultId,
performanceEvaluation,
overWriteIfExists = T,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")){
value <- performanceEvaluation$calibrationSummary
if(is.null(value)){
return(NULL)
}
# check numerical columns:
value <- cleanNum(value)
# edit names
firstLower <- function(x) {
substr(x, 1, 1) <- tolower(substr(x, 1, 1))
return(x)
}
colnames(value) <- sapply(colnames(value), firstLower )
value$resultId <- resultId
# get column names and check all present in object
columnNames <- getColumnNames(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
stringAppendToTables = stringAppendToTables,
tableName = 'calibration_summary',
tempEmulationSchema = tempEmulationSchema)
isValid <- sum(colnames(value)%in%columnNames) == length(columnNames)
exists <- checkResultExists(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
tableName = paste0(stringAppendToTables,'calibration_summary'),
resultId = resultId,
tempEmulationSchema = tempEmulationSchema)
if(isValid && (!exists || overWriteIfExists)){
# REMOVE existing result
if(exists){
sql <- "delete from @result_schema.@table_name where result_id = @result_id;"
sql <- SqlRender::render(sql,
result_schema = resultSchema,
result_id=resultId,
table_name = paste0(stringAppendToTables,'calibration_summary'))
sql <- SqlRender::translate(sql,
targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema)
DatabaseConnector::executeSql(conn, sql)
}
# add
ParallelLogger::logInfo(paste0('Inserting calibrationSummary for result ',resultId))
DatabaseConnector::insertTable(connection = conn,
databaseSchema = resultSchema,
tableName = paste0(stringAppendToTables,'calibration_summary'),
data = value[,columnNames],
dropTableIfExists = F, createTable = F, tempTable = F,
bulkLoad = F, camelCaseToSnakeCase = T, progressBar = T,
tempEmulationSchema = tempEmulationSchema)
}
return(invisible(NULL))
}
addEvaluationStatistics <- function(conn, resultSchema, targetDialect,
stringAppendToTables = '',
resultId,
performanceEvaluation,
overWriteIfExists = T,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")){
value <- data.frame(
evaluation = unlist(performanceEvaluation$evaluationStatistics$evaluation),
metric = unlist(performanceEvaluation$evaluationStatistics$metric),
value = as.numeric(unlist(performanceEvaluation$evaluationStatistics$value))
)
if(is.null(value)){
return(NULL)
}
# edit names
firstLower <- function(x) {
substr(x, 1, 1) <- tolower(substr(x, 1, 1))
return(x)
}
colnames(value) <- sapply(colnames(value), firstLower )
value$resultId <- resultId
# get column names and check all present in object
columnNames <- getColumnNames(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
stringAppendToTables = stringAppendToTables,
tableName = 'evaluation_statistics',
tempEmulationSchema = tempEmulationSchema)
isValid <- sum(colnames(value)%in%columnNames) == length(columnNames)
exists <- checkResultExists(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
tableName = paste0(stringAppendToTables,'evaluation_statistics'),
resultId = resultId,
tempEmulationSchema = tempEmulationSchema)
if(isValid && (!exists || overWriteIfExists)){
# REMOVE existing result
if(exists){
sql <- "delete from @result_schema.@table_name where result_id = @result_id;"
sql <- SqlRender::render(sql,
result_schema = resultSchema,
result_id = resultId,
table_name = paste0(stringAppendToTables,'evaluation_statistics'))
sql <- SqlRender::translate(sql,
targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema)
DatabaseConnector::executeSql(conn, sql)
}
# add
ParallelLogger::logInfo(paste0('Inserting evaluationSummary for result ',resultId))
DatabaseConnector::insertTable(connection = conn,
databaseSchema = resultSchema,
tableName = paste0(stringAppendToTables,'evaluation_statistics'),
data = value[,columnNames],
dropTableIfExists = F, createTable = F, tempTable = F,
bulkLoad = F, camelCaseToSnakeCase = T, progressBar = T,
tempEmulationSchema = tempEmulationSchema)
}
return(invisible(NULL))
}
addDemographicSummary <- function(conn, resultSchema, targetDialect,
stringAppendToTables = '',
resultId,
performanceEvaluation,
overWriteIfExists = T,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")){
value <- performanceEvaluation$demographicSummary
if(is.null(value)){
return(NULL)
}
# edit names
firstLower <- function(x) {
substr(x, 1, 1) <- tolower(substr(x, 1, 1))
return(x)
}
colnames(value) <- sapply(colnames(value), firstLower )
#if(sum(colnames(value)=="p50PredictedProbability")>0){
# colnames(value)[colnames(value)=="p50PredictedProbability"] <- 'medianPredictedProbability'
#}
value$resultId <- resultId
# get column names and check all present in object
columnNames <- getColumnNames(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
stringAppendToTables = stringAppendToTables,
tableName = 'demographic_summary',
tempEmulationSchema = tempEmulationSchema)
isValid <- sum(colnames(value)%in%columnNames) == length(columnNames)
exists <- checkResultExists(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
tableName = paste0(stringAppendToTables,'demographic_summary'),
resultId = resultId,
tempEmulationSchema = tempEmulationSchema)
if(isValid && (!exists || overWriteIfExists)){
# REMOVE existing result
if(exists){
sql <- "delete from @result_schema.@table_name where result_id = @result_id;"
sql <- SqlRender::render(sql,
result_schema = resultSchema,
result_id = resultId,
table_name = paste0(stringAppendToTables,'demographic_summary'))
sql <- SqlRender::translate(sql,
targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema)
DatabaseConnector::executeSql(conn, sql)
}
# add
ParallelLogger::logInfo(paste0('Inserting demographicSummary for result ',resultId))
DatabaseConnector::insertTable(connection = conn,
databaseSchema = resultSchema,
tableName = paste0(stringAppendToTables,'demographic_summary'),
data = value[,columnNames],
dropTableIfExists = F, createTable = F, tempTable = F,
bulkLoad = F, camelCaseToSnakeCase = T, progressBar = T,
tempEmulationSchema = tempEmulationSchema)
}
return(invisible(NULL))
}
addCovariateSummary <- function(conn, resultSchema, targetDialect,
stringAppendToTables = '',
resultId,
covariateSummary,
restrictToIncluded = T,
overWriteIfExists = T,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")){
value <- covariateSummary
if(is.null(value)){
return(NULL)
}
# edit names
firstLower <- function(x) {
substr(x, 1, 1) <- tolower(substr(x, 1, 1))
return(x)
}
colnames(value) <- sapply(colnames(value), firstLower )
value$resultId <- resultId
# remove _ from names
colnames(value) <- gsub('_','', colnames(value))
if(restrictToIncluded){
ParallelLogger::logInfo('Restricting to covariates included in model')
value <- value[value$covariateValue!=0 & !is.na(value$covariateValue),]
}
# get column names and check all present in object
columnNames <- getColumnNames(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
stringAppendToTables = stringAppendToTables,
tableName = 'covariate_summary',
tempEmulationSchema = tempEmulationSchema)
isValid <- sum(colnames(value)%in%columnNames) == length(columnNames)
exists <- checkResultExists(conn = conn,
resultSchema = resultSchema,
targetDialect = targetDialect,
tableName = paste0(stringAppendToTables,'covariate_summary'),
resultId = resultId,
tempEmulationSchema = tempEmulationSchema)
if(isValid && (!exists || overWriteIfExists)){
# REMOVE existing result
if(exists){
ParallelLogger::logTrace('Removing existing covariateSummary')
sql <- "delete from @result_schema.@table_name where result_id = @result_id;"
sql <- SqlRender::render(sql,
result_schema = resultSchema,
result_id = resultId,
table_name = paste0(stringAppendToTables,'covariate_summary'))
sql <- SqlRender::translate(sql,
targetDialect = targetDialect,
tempEmulationSchema = tempEmulationSchema)
DatabaseConnector::executeSql(conn, sql)
}
# add
ParallelLogger::logInfo(paste0('Inserting covariateSummary for result ',resultId))
DatabaseConnector::insertTable(connection = conn,
databaseSchema = resultSchema,
tableName = paste0(stringAppendToTables,'covariate_summary'),
data = value[,columnNames],
dropTableIfExists = F, createTable = F, tempTable = F,
bulkLoad = F, camelCaseToSnakeCase = T, progressBar = T,
tempEmulationSchema = tempEmulationSchema)
}
return(invisible(NULL))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.