# Copyright 2018 Observational Health Data Sciences and Informatics
#
# This file is part of DistributedRegressionEval
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#' Create the PLP objects
#'
#' @details
#' This function will create the PLP objects.
#'
#' @param connectionDetails An object of type \code{connectionDetails} as created using the
#' \code{\link[DatabaseConnector]{createConnectionDetails}} function in the
#' DatabaseConnector package.
#' @param cdmDatabaseSchema Schema name where your patient-level data in OMOP CDM format resides.
#' Note that for SQL Server, this should include both the database and
#' schema name, for example 'cdm_data.dbo'.
#' @param cohortDatabaseSchema Schema name where intermediate data can be stored. You will need to have
#' write priviliges in this schema. Note that for SQL Server, this should
#' include both the database and schema name, for example 'cdm_data.dbo'.
#' @param cohortTable The name of the table that will be created in the work database schema.
#' This table will hold the exposure and outcome cohorts used in this
#' study.
#' @param oracleTempSchema Should be used in Oracle to specify a schema where the user has write
#' priviliges for storing temporary tables.
#' @param outputFolder Name of local folder to place results; make sure to use forward slashes
#' (/)
#'
#' @export
createPlpObjects <- function(connectionDetails,
cdmDatabaseSchema,
cohortDatabaseSchema,
cohortTable = "cohort",
oracleTempSchema,
outputFolder) {
pathToCsv <- system.file("settings", "KnownPredictors.csv", package = "DistributedRegressionEval")
knownPredictors <- read.csv(pathToCsv)
# covariateSettings <- FeatureExtraction::createCovariateSettings(useDemographicsAge = TRUE,
# useDemographicsGender = TRUE,
# useConditionGroupEraLongTerm = TRUE,
# longTermStartDays = -365,
# endDays = 0,
# includedCovariateConceptIds = unique(knownPredictors$conceptId))
# plpData <- PatientLevelPrediction::getPlpData(connectionDetails = connectionDetails,
# cdmDatabaseSchema = cdmDatabaseSchema,
# oracleTempSchema = oracleTempSchema,
# cohortDatabaseSchema = cohortDatabaseSchema,
# cohortTable = cohortTable,
# cohortId = 102,
# washoutPeriod = 365,
# covariateSettings = covariateSettings,
# outcomeDatabaseSchema = cohortDatabaseSchema,
# outcomeTable = cohortTable,
# outcomeIds = c(3, 4, 5, 6),
# firstExposureOnly = TRUE)
#
# PatientLevelPrediction::savePlpData(plpData, file.path(outputFolder, "plpData"))
plpData <- PatientLevelPrediction::loadPlpData(file.path(outputFolder, "plpData"))
covariateData <- FeatureExtraction::tidyCovariateData(covariates = plpData$covariates,
covariateRef = plpData$covariateRef,
populationSize = plpData$metaData$populationSize,
minFraction = 0,
normalize = FALSE,
removeRedundancy = TRUE)
covariates <- ff::as.ram(ff::as.ram(covariateData$covariates))
covariateRef <- ff::as.ram(plpData$covariateRef)
s <- summary(plpData)
outcomeCounts <- s$outcomeCounts
outcomeCounts <- outcomeCounts[order(-outcomeCounts$personCount), ]
safeOutcomeId <- outcomeCounts$outcomeId[1]
for (outcomeId in s$metaData$outcomeIds) {
population <- PatientLevelPrediction::createStudyPopulation(plpData,
outcomeId = outcomeId,
includeAllOutcomes = TRUE,
requireTimeAtRisk = TRUE ,
minTimeAtRisk = 365,
riskWindowStart = 1,
addExposureDaysToStart = FALSE,
riskWindowEnd = 366,
addExposureDaysToEnd = FALSE,
removeSubjectsWithPriorOutcome = TRUE)
if (is.null(population)) {
# population is set to null if there's no one with the outcome. Create study population with 'safe' outcome, then set
# outcomeCount to 0:
population <- PatientLevelPrediction::createStudyPopulation(plpData,
outcomeId = safeOutcomeId,
includeAllOutcomes = FALSE,
requireTimeAtRisk = TRUE ,
minTimeAtRisk = 365,
riskWindowStart = 1,
addExposureDaysToStart = FALSE,
riskWindowEnd = 366,
addExposureDaysToEnd = FALSE,
removeSubjectsWithPriorOutcome = FALSE)
population$outcomeCount <- 0
}
covariateIds <- covariateRef$covariateId[covariateRef$conceptId %in% knownPredictors$conceptId[knownPredictors$outcomeId == outcomeId]]
covariateIds <- c(covariateIds, 1002) # add age
covariateSubset <- covariates[covariates$covariateId %in% covariateIds & covariates$rowId %in% population$rowId, ]
# Sparse to dense:
ncovars <- length(covariateIds)
nrows <- nrow(population)
m <- matrix(0, nrows, ncovars)
rowIs <- match(covariateSubset$rowId, population$rowId)
columnIs <- match(covariateSubset$covariateId, covariateIds)
for (i in 1:nrow(covariateSubset)) {
m[rowIs[i], columnIs[i]] <- covariateSubset$covariateValue[i]
}
data <- as.data.frame(m)
columnNames <- as.character(covariateRef$covariateName[match(covariateIds, covariateRef$covariateId)])
columnNames <- gsub(" ", "_", gsub(".*: ", "", columnNames))
colnames(data) <- columnNames
data$y <- as.integer(population$outcomeCount != 0)
data$time <- population$survivalTime
fileName <- file.path(outputFolder, paste0("data_o", outcomeId, ".rds"))
saveRDS(data, fileName)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.