# @file VignetteDataFetch.R
#
# Copyright 2021 Observational Health Data Sciences and Informatics
#
# This file is part of PatientLevelPrediction
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This code should be used to fetch the data that is used in the vignettes.
library(SqlRender)
library(DatabaseConnector)
library(PatientLevelPrediction)
setwd("s:/temp")
options(fftempdir = "s:/FFtemp")
pw <- NULL
dbms <- "sql server"
user <- NULL
server <- "RNDUSRDHIT07.jnj.com"
cdmDatabaseSchema <- "cdm_truven_mdcd.dbo"
resultsDatabaseSchema <- "scratch.dbo"
port <- NULL
dbms <- "postgresql"
server <- "localhost/ohdsi"
user <- "postgres"
pw <- "F1r3starter"
cdmDatabaseSchema <- "cdm4_sim"
resultsDatabaseSchema <- "scratch"
port <- NULL
pw <- NULL
dbms <- "pdw"
user <- NULL
server <- "JRDUSAPSCTL01"
cdmDatabaseSchema <- "cdm_truven_mdcd_v5.dbo"
resultsDatabaseSchema <- "scratch.dbo"
oracleTempSchema <- NULL
port <- 17001
cdmVersion <- "5"
connectionDetails <- DatabaseConnector::createConnectionDetails(dbms = dbms,
server = server,
user = user,
password = pw,
port = port)
sql <- SqlRender::loadRenderTranslateSql("HospitalizationCohorts.sql",
packageName = "PatientLevelPrediction",
dbms = dbms,
cdmDatabaseSchema = cdmDatabaseSchema,
resultsDatabaseSchema = resultsDatabaseSchema,
post_time = 30,
pre_time = 365)
connection <- DatabaseConnector::connect(connectionDetails)
DatabaseConnector::executeSql(connection, sql)
# Check number of subjects per cohort:
sql <- "SELECT cohort_definition_id, COUNT(*) AS count FROM @resultsDatabaseSchema.rehospitalization GROUP BY cohort_definition_id"
sql <- SqlRender::renderSql(sql, resultsDatabaseSchema = resultsDatabaseSchema)$sql
sql <- SqlRender::translateSql(sql, targetDialect = connectionDetails$dbms)$sql
DatabaseConnector::querySql(connection, sql)
DatabaseConnector::disconnect(connection)
covariateSettings <- createCovariateSettings(useCovariateDemographics = TRUE,
useCovariateDemographicsGender = TRUE,
useCovariateDemographicsRace = TRUE,
useCovariateDemographicsEthnicity = TRUE,
useCovariateDemographicsAge = TRUE,
useCovariateDemographicsYear = TRUE,
useCovariateDemographicsMonth = TRUE,
useCovariateConditionOccurrence = TRUE,
useCovariateConditionOccurrence365d = TRUE,
useCovariateConditionOccurrence30d = TRUE,
useCovariateConditionOccurrenceInpt180d = TRUE,
useCovariateConditionEra = TRUE,
useCovariateConditionEraEver = TRUE,
useCovariateConditionEraOverlap = TRUE,
useCovariateConditionGroup = TRUE,
useCovariateConditionGroupMeddra = TRUE,
useCovariateConditionGroupSnomed = TRUE,
useCovariateDrugExposure = TRUE,
useCovariateDrugExposure365d = TRUE,
useCovariateDrugExposure30d = TRUE,
useCovariateDrugEra = TRUE,
useCovariateDrugEra365d = TRUE,
useCovariateDrugEra30d = TRUE,
useCovariateDrugEraOverlap = TRUE,
useCovariateDrugEraEver = TRUE,
useCovariateDrugGroup = TRUE,
useCovariateProcedureOccurrence = TRUE,
useCovariateProcedureOccurrence365d = TRUE,
useCovariateProcedureOccurrence30d = TRUE,
useCovariateProcedureGroup = TRUE,
useCovariateObservation = TRUE,
useCovariateObservation365d = TRUE,
useCovariateObservation30d = TRUE,
useCovariateObservationCount365d = TRUE,
useCovariateMeasurement = TRUE,
useCovariateMeasurement365d = TRUE,
useCovariateMeasurement30d = TRUE,
useCovariateMeasurementCount365d = TRUE,
useCovariateMeasurementBelow = TRUE,
useCovariateMeasurementAbove = TRUE,
useCovariateConceptCounts = TRUE,
useCovariateRiskScores = TRUE,
useCovariateRiskScoresCharlson = TRUE,
useCovariateRiskScoresDCSI = TRUE,
useCovariateRiskScoresCHADS2 = TRUE,
useCovariateRiskScoresCHADS2VASc = TRUE,
useCovariateInteractionYear = FALSE,
useCovariateInteractionMonth = FALSE,
excludedCovariateConceptIds = c(),
includedCovariateConceptIds = c(),
deleteCovariatesSmallCount = 100)
plpData <- getDbPlpData(connectionDetails = connectionDetails,
cdmDatabaseSchema = cdmDatabaseSchema,
oracleTempSchema = oracleTempSchema,
cohortDatabaseSchema = resultsDatabaseSchema,
cohortTable = "rehospitalization",
cohortIds = 1,
washoutWindow = 183,
useCohortEndDate = TRUE,
windowPersistence = 0,
covariateSettings = covariateSettings,
outcomeDatabaseSchema = resultsDatabaseSchema,
outcomeTable = "rehospitalization",
outcomeIds = 2,
firstOutcomeOnly = FALSE,
cdmVersion = cdmVersion)
savePlpData(plpData, "s:/temp/PlpVignette/plpData")
# plpData <- loadPlpData('s:/temp/PlpVignette/plpData')
means <- computeCovariateMeans(plpData = plpData, outcomeId = 2)
saveRDS(means, "s:/temp/PlpVignette/means.rds")
# plotCovariateDifferenceOfTopVariables(means)
parts <- splitData(plpData, c(0.75, 0.25))
savePlpData(parts[[1]], "s:/temp/PlpVignette/plpData_train")
savePlpData(parts[[2]], "s:/temp/PlpVignette/plpData_test")
# parts <- list(); parts[[1]] <- loadPlpData('s:/temp/PlpVignette/plpData_train'); parts[[2]] <-
# loadPlpData('s:/temp/PlpVignette/plpData_test')
model <- fitPredictiveModel(parts[[1]],
modelType = "logistic",
prior = createPrior("laplace",
exclude = c(0),
useCrossValidation = TRUE),
control = createControl(noiseLevel = "quiet",
cvType = "auto",
startingVariance = 0.001,
tolerance = 1e-07,
cvRepetitions = 10,
seed = 123,
threads = 30))
saveRDS(model, file = "s:/temp/PlpVignette/model.rds")
# model <- readRDS('s:/temp/PlpVignette/model.rds')
prediction <- predictProbabilities(model, parts[[2]])
saveRDS(prediction, file = "s:/temp/PlpVignette/prediction.rds")
# prediction <- readRDS('s:/temp/PlpVignette/prediction.rds')
computeAuc(prediction, parts[[2]])
plotRoc(prediction, parts[[2]])
plotCalibration(prediction, parts[[2]], numberOfStrata = 10)
modelDetails <- getModelDetails(model, parts[[2]])
head(modelDetails)
#### Datafetch for custom covariate builders #####
createLooCovariateSettings <- function(useLengthOfObs = TRUE) {
covariateSettings <- list(useLengthOfObs = useLengthOfObs)
attr(covariateSettings, "fun") <- "getDbLooCovariateData"
class(covariateSettings) <- "covariateSettings"
return(covariateSettings)
}
getDbLooCovariateData <- function(connection,
oracleTempSchema = NULL,
cdmDatabaseSchema,
cdmVersion = "4",
cohortTempTable = "cohort_person",
rowIdField = "subject_id",
covariateSettings) {
writeLines("Constructing length of observation covariates")
if (covariateSettings$useLengthOfObs == FALSE) {
return(NULL)
}
# Temp table names must start with a '#' in SQL Server, our source dialect:
if (substr(cohortTempTable, 1, 1) != "#") {
cohortTempTable <- paste("#", cohortTempTable, sep = "")
}
# Some SQL to construct the covariate:
sql <- paste("SELECT @row_id_field AS row_id, 1 AS covariate_id,",
"DATEDIFF(DAY, cohort_start_date, observation_period_start_date)",
"AS covariate_value",
"FROM @cohort_temp_table c",
"INNER JOIN @cdm_database_schema.observation_period op",
"ON op.person_id = c.subject_id",
"WHERE cohort_start_date >= observation_period_start_date",
"AND cohort_start_date <= observation_period_end_date")
sql <- SqlRender::renderSql(sql,
cohort_temp_table = cohortTempTable,
row_id_field = rowIdField,
cdm_database_schema = cdmDatabaseSchema)$sql
sql <- SqlRender::translateSql(sql, targetDialect = attr(connection, "dbms"))$sql
# Retrieve the covariate:
covariates <- DatabaseConnector::querySql.ffdf(connection, sql)
# Convert colum names to camelCase:
colnames(covariates) <- SqlRender::snakeCaseToCamelCase(colnames(covariates))
# Construct covariate reference:
covariateRef <- data.frame(covariateId = 1,
covariateName = "Length of observation",
analysisId = 1,
conceptId = 0)
covariateRef <- ff::as.ffdf(covariateRef)
metaData <- list(sql = sql, call = match.call())
result <- list(covariates = covariates, covariateRef = covariateRef, metaData = metaData)
class(result) <- "covariateData"
return(result)
}
looCovariateSettings <- createLooCovariateSettings(useLengthOfObs = TRUE)
plpData <- getDbPlpData(connectionDetails = connectionDetails,
cdmDatabaseSchema = cdmDatabaseSchema,
cohortDatabaseSchema = resultsDatabaseSchema,
cohortTable = "mschuemi_stroke",
cohortIds = 1,
useCohortEndDate = TRUE,
windowPersistence = 0,
covariateSettings = looCovariateSettings,
outcomeDatabaseSchema = resultsDatabaseSchema,
outcomeTable = "mschuemi_stroke",
outcomeIds = 2,
firstOutcomeOnly = TRUE,
cdmVersion = cdmVersion)
covariateSettings <- createCovariateSettings(useCovariateDemographics = TRUE,
useCovariateDemographicsGender = TRUE,
useCovariateDemographicsRace = TRUE,
useCovariateDemographicsEthnicity = TRUE,
useCovariateDemographicsAge = TRUE,
useCovariateDemographicsYear = TRUE,
useCovariateDemographicsMonth = TRUE)
looCovariateSettings <- createLooCovariateSettings(useLengthOfObs = TRUE)
covariateSettingsList <- list(covariateSettings, looCovariateSettings)
plpData <- getDbPlpData(connectionDetails = connectionDetails,
cdmDatabaseSchema = cdmDatabaseSchema,
cohortDatabaseSchema = resultsDatabaseSchema,
cohortTable = "mschuemi_stroke",
cohortIds = 1,
useCohortEndDate = TRUE,
windowPersistence = 0,
covariateSettings = covariateSettingsList,
outcomeDatabaseSchema = resultsDatabaseSchema,
outcomeTable = "mschuemi_stroke",
outcomeIds = 2,
firstOutcomeOnly = TRUE,
cdmVersion = cdmVersion)
covariateSettings <- createHdpsCovariateSettings(useCovariateCohortIdIs1 = FALSE,
useCovariateDemographics = TRUE,
useCovariateDemographicsGender = TRUE,
useCovariateDemographicsRace = TRUE,
useCovariateDemographicsEthnicity = TRUE,
useCovariateDemographicsAge = TRUE,
useCovariateDemographicsYear = TRUE,
useCovariateDemographicsMonth = TRUE,
useCovariateConditionOccurrence = TRUE,
useCovariate3DigitIcd9Inpatient180d = TRUE,
useCovariate3DigitIcd9Inpatient180dMedF = TRUE,
useCovariate3DigitIcd9Inpatient180d75F = TRUE,
useCovariate3DigitIcd9Ambulatory180d = TRUE,
useCovariate3DigitIcd9Ambulatory180dMedF = TRUE,
useCovariate3DigitIcd9Ambulatory180d75F = TRUE,
useCovariateDrugExposure = TRUE,
useCovariateIngredientExposure180d = TRUE,
useCovariateIngredientExposure180dMedF = TRUE,
useCovariateIngredientExposure180d75F = TRUE,
useCovariateProcedureOccurrence = TRUE,
useCovariateProcedureOccurrenceInpatient180d = TRUE,
useCovariateProcedureOccurrenceInpatient180dMedF = TRUE,
useCovariateProcedureOccurrenceInpatient180d75F = TRUE,
useCovariateProcedureOccurrenceAmbulatory180d = TRUE,
useCovariateProcedureOccurrenceAmbulatory180dMedF = TRUE,
useCovariateProcedureOccurrenceAmbulatory180d75F = TRUE,
excludedCovariateConceptIds = c(),
includedCovariateConceptIds = c(),
deleteCovariatesSmallCount = 100)
#### Datafetch for cohort attribute covariate builder #####
library(SqlRender)
library(DatabaseConnector)
library(PatientLevelPrediction)
setwd("s:/temp")
options(fftempdir = "s:/FFtemp")
pw <- NULL
dbms <- "sql server"
user <- NULL
server <- "RNDUSRDHIT07.jnj.com"
cdmDatabaseSchema <- "cdm_truven_mdcd.dbo"
resultsDatabaseSchema <- "scratch.dbo"
port <- NULL
dbms <- "postgresql"
server <- "localhost/ohdsi"
user <- "postgres"
pw <- "F1r3starter"
cdmDatabaseSchema <- "cdm4_sim"
resultsDatabaseSchema <- "scratch"
port <- NULL
pw <- NULL
dbms <- "pdw"
user <- NULL
server <- "JRDUSAPSCTL01"
cdmDatabaseSchema <- "cdm_truven_mdcd_v5.dbo"
cohortDatabaseSchema <- "scratch.dbo"
oracleTempSchema <- NULL
port <- 17001
cdmVersion <- "5"
connectionDetails <- DatabaseConnector::createConnectionDetails(dbms = dbms,
server = server,
user = user,
password = pw,
port = port)
connection <- DatabaseConnector::connect(connectionDetails)
# Build cohorts:
sql <- SqlRender::loadRenderTranslateSql("HospitalizationCohorts.sql",
packageName = "PatientLevelPrediction",
dbms = dbms,
cdmDatabaseSchema = cdmDatabaseSchema,
resultsDatabaseSchema = cohortDatabaseSchema,
post_time = 30,
pre_time = 365)
DatabaseConnector::executeSql(connection, sql)
# Build cohort attributes:
sql <- SqlRender::loadRenderTranslateSql("LengthOfObsCohortAttr.sql",
packageName = "PatientLevelPrediction",
dbms = dbms,
cdm_database_schema = cdmDatabaseSchema,
cohort_database_schema = cohortDatabaseSchema,
cohort_table = "rehospitalization",
cohort_attribute_table = "loo_cohort_attribute",
attribute_definition_table = "loo_attribute_definition",
cohort_definition_ids = c(1, 2))
DatabaseConnector::executeSql(connection, sql)
querySql(connection, "SELECT TOP 100 * FROM scratch.dbo.loo_cohort_attribute")
looCovariateSettings <- createCohortAttrCovariateSettings(attrDatabaseSchema = cohortDatabaseSchema,
cohortAttrTable = "loo_cohort_attribute",
attrDefinitionTable = "loo_attribute_definition",
includeAttrIds = c())
plpData <- getDbPlpData(connectionDetails = connectionDetails,
cdmDatabaseSchema = cdmDatabaseSchema,
cohortDatabaseSchema = cohortDatabaseSchema,
cohortTable = "rehospitalization",
cohortIds = 1,
useCohortEndDate = TRUE,
windowPersistence = 0,
covariateSettings = looCovariateSettings,
outcomeDatabaseSchema = cohortDatabaseSchema,
outcomeTable = "rehospitalization",
outcomeIds = 2,
firstOutcomeOnly = TRUE,
cdmVersion = cdmVersion)
summary(plpData)
plpData$covariates
sql <- "DROP TABLE @cohort_database_schema.rehospitalization"
sql <- SqlRender::renderSql(sql, cohort_database_schema = cohortDatabaseSchema)$sql
sql <- SqlRender::translateSql(sql, targetDialect = attr(connection, "dbms"))$sql
looCovariateSettings <- createCohortAttrCovariateSettings(attrDatabaseSchema = cohortDatabaseSchema,
cohortAttrTable = "loo_cohort_attribute",
attrDefinitionTable = "loo_attribute_definition",
includeAttrIds = c())
covariateSettingsList <- list(looCovariateSettings, looCovariateSettings)
plpData <- getDbPlpData(connectionDetails = connectionDetails,
cdmDatabaseSchema = cdmDatabaseSchema,
cohortDatabaseSchema = cohortDatabaseSchema,
cohortTable = "rehospitalization",
cohortIds = 1,
useCohortEndDate = TRUE,
windowPersistence = 0,
covariateSettings = covariateSettingsList,
outcomeDatabaseSchema = cohortDatabaseSchema,
outcomeTable = "rehospitalization",
outcomeIds = 2,
firstOutcomeOnly = TRUE,
cdmVersion = cdmVersion)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.