# this files contains the objects used in the tests:
if (Sys.getenv("GITHUB_ACTIONS") == "true") {
# Download the PostreSQL driver ---------------------------
# If DATABASECONNECTOR_JAR_FOLDER exists, assume driver has been downloaded
jarFolder <- Sys.getenv("DATABASECONNECTOR_JAR_FOLDER", unset = "")
if (jarFolder == "") {
tempJarFolder <- tempfile("jdbcDrivers")
dir.create(tempJarFolder)
Sys.setenv("DATABASECONNECTOR_JAR_FOLDER" = tempJarFolder)
DatabaseConnector::downloadJdbcDrivers("postgresql")
}
}
if (rlang::is_installed("curl")) {
internet <- curl::has_internet()
message("Internet: ", internet)
} else {
internet <- FALSE
message("Internet: ", internet)
}
saveLoc <- tempfile("saveLoc")
dir.create(saveLoc)
if (internet && rlang::is_installed("Eunomia")) {
# PLPDATA
connectionDetails <- Eunomia::getEunomiaConnectionDetails()
Eunomia::createCohorts(connectionDetails)
outcomeId <- 3 # GIbleed
databaseDetails <- createDatabaseDetails(
connectionDetails = connectionDetails,
cdmDatabaseSchema = "main",
cdmDatabaseName = "main",
cohortDatabaseSchema = "main",
cohortTable = "cohort",
outcomeDatabaseSchema = "main",
outcomeTable = "cohort",
targetId = 1,
outcomeIds = outcomeId,
cdmVersion = 5
)
covariateSettings <- FeatureExtraction::createCovariateSettings(
useDemographicsAge = TRUE,
useDemographicsGender = TRUE,
useConditionOccurrenceAnyTimePrior = TRUE
)
plpData <- getPlpData(
databaseDetails = databaseDetails,
covariateSettings = covariateSettings,
restrictPlpDataSettings = createRestrictPlpDataSettings()
)
# POPULATION
populationSettings <- createStudyPopulationSettings(
firstExposureOnly = FALSE,
washoutPeriod = 0,
removeSubjectsWithPriorOutcome = FALSE,
priorOutcomeLookback = 99999,
requireTimeAtRisk = TRUE,
minTimeAtRisk = 10,
riskWindowStart = 0,
startAnchor = "cohort start",
riskWindowEnd = 365,
endAnchor = "cohort start"
)
# MODEL SETTINGS
lrSet <- setLassoLogisticRegression(seed = 42)
# RUNPLP - LASSO LR
plpResult <- runPlp(
plpData = plpData,
outcomeId = outcomeId,
analysisId = "Test",
analysisName = "Testing analysis",
populationSettings = populationSettings,
splitSettings = createDefaultSplitSetting(splitSeed = 12),
preprocessSettings = createPreprocessSettings(),
modelSettings = lrSet,
logSettings = createLogSettings(verbosity = "ERROR"),
executeSettings = createDefaultExecuteSettings(),
saveDirectory = saveLoc
)
# now diagnose
diagnoseResult <- diagnosePlp(
plpData = plpData,
outcomeId = outcomeId,
analysisId = "Test",
populationSettings = populationSettings,
splitSettings = createDefaultSplitSetting(splitSeed = 12),
saveDirectory = saveLoc,
modelSettings = lrSet,
logSettings = createLogSettings(
verbosity = "DEBUG",
timeStamp = TRUE,
logName = "diagnosePlp Log"
),
preprocessSettings = createPreprocessSettings(),
sampleSettings = NULL,
featureEngineeringSettings = NULL
)
#
population <- createStudyPopulation(
plpData = plpData,
outcomeId = outcomeId,
populationSettings = populationSettings
)
createSplitData <- function(plpData, population, split = "train") {
data <- PatientLevelPrediction::splitData(
plpData = plpData,
population = population,
splitSettings = PatientLevelPrediction::createDefaultSplitSetting(splitSeed = 12)
)
if (split == "train") {
return(data$Train)
} else {
return(data$Test)
}
}
trainData <- createSplitData(plpData, population)
testData <- createSplitData(plpData, population, split = "test")
# reduced Data to only use n most important features (as decided by LR)
reduceData <- function(data, n = 20) {
covariates <- plpResult$model$covariateImportance %>%
dplyr::slice_max(order_by = abs(.data$covariateValue), n = n, with_ties = FALSE) %>%
dplyr::pull(.data$covariateId)
reducedData <- list(
labels = data$labels,
folds = data$folds,
covariateData = Andromeda::andromeda(
analysisRef = data$covariateData$analysisRef
)
)
reducedData$covariateData$covariates <- trainData$covariateData$covariates %>%
dplyr::filter(.data$covariateId %in% covariates)
reducedData$covariateData$covariateRef <- trainData$covariateData$covariateRef %>%
dplyr::filter(.data$covariateId %in% covariates)
attributes(reducedData$covariateData)$metaData <- attributes(trainData$covariateData)$metaData
class(reducedData$covariateData) <- class(trainData$covariateData)
attributes(reducedData)$metaData <- attributes(trainData)$metaData
class(reducedData) <- class(data)
return(reducedData)
}
tinyTrainData <- reduceData(trainData)
tinyTestData <- reduceData(testData)
tinyPlpData <- createTinyPlpData(plpData, plpResult)
nanoData <- createTinyPlpData(plpData, plpResult, n = 2)
tinyResults <- runPlp(
plpData = nanoData,
populationSettings = populationSettings,
outcomeId = outcomeId,
analysisId = "tinyFit",
executeSettings = createExecuteSettings(
runSplitData = TRUE,
runSampleData = FALSE,
runFeatureEngineering = FALSE,
runPreprocessData = TRUE,
runModelDevelopment = TRUE,
runCovariateSummary = FALSE
),
saveDirectory = file.path(saveLoc, "tinyResults")
)
}
withr::defer(
{
if (Sys.getenv("GITHUB_ACTIONS") == "true") {
# Remove the JDBC driver folder
jarFolder <- Sys.getenv("DATABASECONNECTOR_JAR_FOLDER", unset = "")
if (jarFolder != "") {
unlink(jarFolder, recursive = TRUE)
}
}
unlink(saveLoc, recursive = TRUE)
if (internet && rlang::is_installed("Eunomia")) {
unlink(connectionDetails$server())
}
},
envir = teardown_env()
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.