R/Database.R
In Characterization: Implement Descriptive Studies Using the Common Data Model

Documented in createCharacterizationTables createSqliteDatabase insertResultsToDatabase

# @file Database.R
#
# Copyright 2024 Observational Health Data Sciences and Informatics
#
# This file is part of Characterization
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#' Create an sqlite database connection
#' @description
#' This function creates a connection to an sqlite database
#'
#' @details
#' This function creates a sqlite database and connection
#'
#' @param sqliteLocation    The location of the sqlite database
#' @family Database
#'
#' @examples
#'
#' charResultDbCD <- createSqliteDatabase()
#'
#'
#' @return
#' Returns the connection detail object to the sqlite database
#'
#' @export
createSqliteDatabase <- function(
    sqliteLocation = tempdir()
    ) {
  sqliteLocation <- file.path(
    sqliteLocation,
    "sqliteCharacterization"
  )

  if (!dir.exists(sqliteLocation)) {
    dir.create(
      path = sqliteLocation,
      recursive = TRUE
    )
  }

  connectionDetails <- DatabaseConnector::createConnectionDetails(
    dbms = "sqlite",
    server = file.path(sqliteLocation, "sqlite.sqlite")
  )

  return(connectionDetails)
}

#' Upload the results into a result database
#' @description
#' This function uploads results in csv format into a result database
#'
#' @details
#' Calls ResultModelManager uploadResults function to upload the csv files
#'
#' @param connectionDetails    The connection details to the result database
#' @param schema               The schema for the result database
#' @param resultsFolder        The folder containing the csv results
#' @param tablePrefix          A prefix to append to the result tables for the characterization results
#' @param csvTablePrefix      The prefix added to the csv results - default is 'c_'
#' @family Database
#' @return
#' Returns the connection to the sqlite database
#'
#' @examples
#'
#' # generate results into resultsFolder
#' conDet <- exampleOmopConnectionDetails()
#'
#' drSet <- createDechallengeRechallengeSettings(
#'   targetIds = c(1,2),
#'   outcomeIds = 3
#' )
#'
#' cSet <- createCharacterizationSettings(
#'   dechallengeRechallengeSettings = drSet
#' )
#'
#' runCharacterizationAnalyses(
#'   connectionDetails = conDet,
#'   targetDatabaseSchema = 'main',
#'   targetTable = 'cohort',
#'   outcomeDatabaseSchema = 'main',
#'   outcomeTable = 'cohort',
#'   cdmDatabaseSchema = 'main',
#'   characterizationSettings = cSet,
#'   outputDirectory = tempdir()
#' )
#'
#' # create sqlite database
#' charResultDbCD <- createSqliteDatabase()
#'
#' # create database results tables
#' createCharacterizationTables(
#'    connectionDetails = charResultDbCD,
#'    resultSchema = 'main'
#'  )
#'
#' # insert results
#' insertResultsToDatabase(
#'  connectionDetails = charResultDbCD,
#'  schema = 'main',
#'  resultsFolder = tempdir()
#' )
#'
#'
#' @export
insertResultsToDatabase <- function(
    connectionDetails,
    schema,
    resultsFolder,
    tablePrefix = "",
    csvTablePrefix = "c_") {
  specLoc <- system.file("settings", "resultsDataModelSpecification.csv",
    package = "Characterization"
  )
  specs <- utils::read.csv(specLoc)
  colnames(specs) <- SqlRender::snakeCaseToCamelCase(colnames(specs))
  specs$tableName <- paste0(csvTablePrefix, specs$tableName)
  ResultModelManager::uploadResults(
    connectionDetails = connectionDetails,
    schema = schema,
    resultsFolder = resultsFolder,
    tablePrefix = tablePrefix,
    specifications = specs,
    purgeSiteDataBeforeUploading = FALSE
  )

  return(invisible(NULL))
}

## TODO add this into the csv exporting
removeMinCell <- function(
    data,
    minCellCount = 0,
    minCellCountColumns = list()) {
  for (columns in minCellCountColumns) {
    ind <- apply(
      X = data[, columns, drop = FALSE],
      MARGIN = 1,
      FUN = function(x) sum(x < minCellCount) > 0
    )

    if (sum(ind) > 0) {
      ParallelLogger::logInfo(
        paste0(
          "Removing values less than ",
          minCellCount,
          " from ",
          paste(columns, collapse = " and ")
        )
      )
      data[ind, columns] <- -1
    }
  }
  return(data)
}


#' Create the results tables to store characterization results into a database
#' @description
#' This function executes a large set of SQL statements to create tables that can store results
#'
#' @details
#' This function can be used to create (or delete) Characterization result tables
#'
#' @param connectionDetails            The connectionDetails to a database created by using the
#'                                     function \code{createConnectDetails} in the
#'                                     \code{DatabaseConnector} package.
#' @param resultSchema                 The name of the database schema that the result tables will be created.
#' @param targetDialect                The database management system being used
#' @param deleteExistingTables         If true any existing tables matching the Characterization result tables names will be deleted
#' @param createTables                 If true the Characterization result tables will be created
#' @param tablePrefix                  A string appended to the Characterization result tables
#' @param tempEmulationSchema          The temp schema used when the database management system is oracle
#'
#' @family Database
#'
#' @return
#' Returns NULL but creates the required tables into the specified database schema.
#'
#' @examples
#' # create sqlite database
#' charResultDbCD <- createSqliteDatabase()
#'
#' # create database results tables
#' createCharacterizationTables(
#'    connectionDetails = charResultDbCD,
#'    resultSchema = 'main'
#'  )
#'
#' @export
createCharacterizationTables <- function(
    connectionDetails,
    resultSchema,
    targetDialect = "postgresql",
    deleteExistingTables = TRUE,
    createTables = TRUE,
    tablePrefix = "c_",
    tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")) {
  errorMessages <- checkmate::makeAssertCollection()
  .checkTablePrefix(
    tablePrefix = tablePrefix,
    errorMessages = errorMessages
  )
  checkmate::reportAssertions(errorMessages)

  conn <- DatabaseConnector::connect(connectionDetails = connectionDetails)
  on.exit(DatabaseConnector::disconnect(conn))

  alltables <- tolower(
    DatabaseConnector::getTableNames(
      connection = conn,
      databaseSchema = resultSchema
    )
  )
  tables <- getResultTables()
  tables <- paste0(tablePrefix, tables)

  # adding this to not create tables if all tables esist
  if (sum(tables %in% alltables) == length(tables) & !deleteExistingTables) {
    message("All tables exist so no need to recreate")
    createTables <- FALSE
  }

  if (deleteExistingTables) {
    message("Deleting existing tables")

    for (tb in tables) {
      if (tb %in% alltables) {
        sql <- "DELETE FROM @my_schema.@table"
        sql <- SqlRender::render(
          sql = sql,
          my_schema = resultSchema,
          table = tb
        )
        sql <- SqlRender::translate(
          sql = sql,
          targetDialect = targetDialect,
          tempEmulationSchema = tempEmulationSchema
        )
        DatabaseConnector::executeSql(
          connection = conn,
          sql = sql
        )

        sql <- "DROP TABLE @my_schema.@table"
        sql <- SqlRender::render(
          sql = sql,
          my_schema = resultSchema,
          table = tb
        )
        sql <- SqlRender::translate(
          sql = sql,
          targetDialect = targetDialect,
          tempEmulationSchema = tempEmulationSchema
        )
        DatabaseConnector::executeSql(
          connection = conn,
          sql = sql
        )
      }
    }
  }

  if (createTables) {
    ParallelLogger::logInfo("Creating characterization results tables")
    renderedSql <- SqlRender::loadRenderTranslateSql(
      sqlFilename = "ResultTables.sql",
      packageName = "Characterization",
      dbms = targetDialect,
      tempEmulationSchema = tempEmulationSchema,
      my_schema = resultSchema,
      table_prefix = tablePrefix
    )

    DatabaseConnector::executeSql(
      connection = conn,
      sql = renderedSql
    )

    # add database migration here in the future
    migrateDataModel(
      connectionDetails = connectionDetails,
      connection = conn,
      databaseSchema = resultSchema,
      tablePrefix = tablePrefix
    )
  }
}


migrateDataModel <- function(
    connectionDetails,
    connection,
    databaseSchema,
    tablePrefix = ""
    ) {
  ParallelLogger::logInfo("Migrating data set")
  migrator <- getDataMigrator(
    connectionDetails = connectionDetails,
    databaseSchema = databaseSchema,
    tablePrefix = tablePrefix
  )
  migrator$executeMigrations()
  migrator$finalize()

  ParallelLogger::logInfo("Updating version number")
  updateVersionSql <- SqlRender::loadRenderTranslateSql("UpdateVersionNumber.sql",
    packageName = utils::packageName(),
    database_schema = databaseSchema,
    table_prefix = tablePrefix,
    dbms = connectionDetails$dbms
  )

  if(missing(connection)){
    connection <- DatabaseConnector::connect(connectionDetails = connectionDetails)
    on.exit(DatabaseConnector::disconnect(connection))
  }
  DatabaseConnector::executeSql(connection, updateVersionSql)
}


getDataMigrator <- function(connectionDetails, databaseSchema, tablePrefix = "") {
  ResultModelManager::DataMigrationManager$new(
    connectionDetails = connectionDetails,
    databaseSchema = databaseSchema,
    tablePrefix = tablePrefix,
    migrationPath = "migrations",
    packageName = utils::packageName()
  )
}

getResultTables <- function() {
  return(
    unique(
      c(
        readr::read_csv(
          file = system.file(
            "settings",
            "resultsDataModelSpecification.csv",
            package = "Characterization"
          ),
          show_col_types = FALSE
        )$table_name,
        "migration", "package_version"
      )
    )
  )
}



# Removes scientific notation for any columns that are
# formatted as doubles. Based on this GitHub issue:
# https://github.com/tidyverse/readr/issues/671#issuecomment-300567232
formatDouble <- function(x, scientific = FALSE, ...) {
  doubleCols <- vapply(x, is.double, logical(1))
  x[doubleCols] <- lapply(x[doubleCols], format, scientific = scientific, ...)

  return(x)
}

Any scripts or data that you put into this service are public.

Characterization documentation built on April 4, 2025, 2:02 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

Characterization
Implement Descriptive Studies Using the Common Data Model

R/Database.R
In Characterization: Implement Descriptive Studies Using the Common Data Model

Defines functions formatDouble getResultTables getDataMigrator migrateDataModel createCharacterizationTables removeMinCell insertResultsToDatabase createSqliteDatabase

Documented in createCharacterizationTables createSqliteDatabase insertResultsToDatabase

Try the Characterization package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

Characterization Implement Descriptive Studies Using the Common Data Model

R/Database.R In Characterization: Implement Descriptive Studies Using the Common Data Model

Defines functions formatDouble getResultTables getDataMigrator migrateDataModel createCharacterizationTables removeMinCell insertResultsToDatabase createSqliteDatabase

Documented in createCharacterizationTables createSqliteDatabase insertResultsToDatabase

Try the Characterization package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

Characterization
Implement Descriptive Studies Using the Common Data Model

R/Database.R
In Characterization: Implement Descriptive Studies Using the Common Data Model