R/CohortDiagnostics.R

Defines functions runCohortDiagnostics

Documented in runCohortDiagnostics

# Copyright 2020 Observational Health Data Sciences and Informatics
#
# This file is part of examplePackage
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#' Execute the cohort diagnostics
#'
#' @details
#' This function executes the cohort diagnostics.
#'
#' @param packageName                         Study package name. This package has the cohort
#'                                            specifications.
#' @param connectionDetails                   An object of type \code{connectionDetails} as created
#'                                            using the
#'                                            \code{\link[DatabaseConnector]{createConnectionDetails}}
#'                                            function in the DatabaseConnector package.
#' @param cdmDatabaseSchema                   Schema name where your patient-level data in OMOP CDM
#'                                            format resides. Note that for SQL Server, this should
#'                                            include both the database and schema name, for example
#'                                            'cdm_data.dbo'.
#' @param cohortDatabaseSchema                Schema name where intermediate data can be stored. You
#'                                            will need to have write priviliges in this schema. Note
#'                                            that for SQL Server, this should include both the
#'                                            database and schema name, for example 'cdm_data.dbo'.
#' @param cohortTable                         The name of the table that will be created in the work
#'                                            database schema. This table will hold the exposure and
#'                                            outcome cohorts used in this study.
#' @param oracleTempSchema                    Should be used in Oracle to specify a schema where the
#'                                            user has write priviliges for storing temporary tables.
#' @param outputFolder                        Name of local folder to place results; make sure to use
#'                                            forward slashes (/). Do not use a folder on a network
#'                                            drive since this greatly impacts performance.
#' @param databaseId                          A short string for identifying the database (e.g.
#'                                            'Synpuf').
#' @param databaseName                        The full name of the database (e.g. 'Medicare Claims
#'                                            Synthetic Public Use Files (SynPUFs)').
#' @param databaseDescription                 A short description (several sentences) of the database.
#' @param createCohorts                       Create the cohortTable table with the exposure and
#'                                            outcome cohorts?
#' @param runInclusionStatistics              Generate and export statistic on the cohort incusion
#'                                            rules?
#' @param runIncludedSourceConcepts           Generate and export the source concepts included in the
#'                                            cohorts?
#' @param runOrphanConcepts                   Generate and export potential orphan concepts?
#' @param runTimeDistributions                Generate and export cohort time distributions?
#' @param runBreakdownIndexEvents             Generate and export the breakdown of index events?
#' @param runIncidenceRates                   Generate and export the cohort incidence rates?
#' @param runCohortOverlap                    Generate and export the cohort overlap?
#' @param runCohortCharacterization           Generate and export the cohort characterization?
#' @param runTemporalCohortCharacterization   Generate and export the temporal cohort characterization?
#' @param minCellCount                        The minimum number of subjects contributing to a count
#'                                            before it can be included in packaged results.
#' @param incremental                         Do you want to run this in incremental mode.
#' @param incrementalFolder                   Location to store incremental files
#'
#' @export
runCohortDiagnostics <- function(packageName = "examplePackage",
                                 connectionDetails,
                                 cdmDatabaseSchema,
                                 cohortDatabaseSchema = cdmDatabaseSchema,
                                 cohortTable = "cohort",
                                 oracleTempSchema = cohortDatabaseSchema,
                                 outputFolder,
                                 databaseId = "Unknown",
                                 databaseName = "Unknown",
                                 databaseDescription = "Unknown",
                                 createCohorts = TRUE,
                                 runInclusionStatistics = TRUE,
                                 runIncludedSourceConcepts = TRUE,
                                 runOrphanConcepts = TRUE,
                                 runTimeDistributions = TRUE,
                                 runBreakdownIndexEvents = TRUE,
                                 runIncidenceRates = TRUE,
                                 runCohortOverlap = TRUE,
                                 runCohortCharacterization = TRUE,
                                 runTemporalCohortCharacterization = TRUE,
                                 incremental = TRUE,
                                 incrementalFolder = file.path(outputFolder,
                                                               "incrementalFolder"),
                                 minCellCount = 5) {
  if (!file.exists(outputFolder))
    dir.create(outputFolder, recursive = TRUE)
  
  ParallelLogger::addDefaultFileLogger(file.path(outputFolder, "log.txt"))
  ParallelLogger::addDefaultErrorReportLogger(file.path(outputFolder, "errorReportR.txt"))
  on.exit(ParallelLogger::unregisterLogger("DEFAULT_FILE_LOGGER", silent = TRUE))
  on.exit(ParallelLogger::unregisterLogger("DEFAULT_ERRORREPORT_LOGGER", silent = TRUE), add = TRUE)
  
  if (createCohorts) {
    ParallelLogger::logInfo("Creating cohorts")
    connection <- DatabaseConnector::connect(connectionDetails)
    CohortDiagnostics::instantiateCohortSet(connection = connection,
                                            cdmDatabaseSchema = cdmDatabaseSchema,
                                            cohortDatabaseSchema = cohortDatabaseSchema,
                                            oracleTempSchema = oracleTempSchema,
                                            cohortTable = cohortTable,
                                            packageName = packageName, 
                                            generateInclusionStats = runInclusionStatistics, 
                                            inclusionStatisticsFolder = file.path(outputFolder,
                                                                                  "inclusionStatisticsFolder"),
                                            createCohortTable = TRUE, 
                                            incremental = incremental,
                                            incrementalFolder = incrementalFolder)
    DatabaseConnector::disconnect(connection)
  }
  
  ParallelLogger::logInfo("Running study diagnostics")
  CohortDiagnostics::runCohortDiagnostics(packageName = packageName,
                                          connectionDetails = connectionDetails,
                                          cdmDatabaseSchema = cdmDatabaseSchema,
                                          oracleTempSchema = oracleTempSchema,
                                          cohortDatabaseSchema = cohortDatabaseSchema,
                                          cohortTable = cohortTable,
                                          inclusionStatisticsFolder = file.path(outputFolder,
                                                                                "inclusionStatisticsFolder"),
                                          exportFolder = file.path(outputFolder,
                                                                   "diagnosticsExport"),
                                          databaseId = databaseId,
                                          databaseName = databaseName,
                                          databaseDescription = databaseDescription,
                                          runInclusionStatistics = runInclusionStatistics,
                                          runIncludedSourceConcepts = runIncludedSourceConcepts,
                                          runOrphanConcepts = runOrphanConcepts,
                                          runTimeDistributions = runTimeDistributions,
                                          runBreakdownIndexEvents = runBreakdownIndexEvents,
                                          runIncidenceRate = runIncidenceRates,
                                          runCohortOverlap = runCohortOverlap,
                                          runCohortCharacterization = runCohortCharacterization,
                                          runTemporalCohortCharacterization = runTemporalCohortCharacterization,
                                          incremental = incremental,
                                          incrementalFolder = incrementalFolder,
                                          minCellCount = minCellCount)
}
ohdsi-studies/RcriCohortDiagnostics documentation built on April 2, 2021, 11:53 a.m.