R/Database.R

Defines functions formatDouble getResultTables getDataMigrator migrateDataModel createCharacterizationTables removeMinCell insertResultsToDatabase createSqliteDatabase

Documented in createCharacterizationTables createSqliteDatabase insertResultsToDatabase

# @file Database.R
#
# Copyright 2024 Observational Health Data Sciences and Informatics
#
# This file is part of Characterization
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#' Create an sqlite database connection
#' @description
#' This function creates a connection to an sqlite database
#'
#' @details
#' This function creates a sqlite database and connection
#'
#' @param sqliteLocation    The location of the sqlite database
#' @family Database
#'
#' @examples
#'
#' charResultDbCD <- createSqliteDatabase()
#'
#'
#' @return
#' Returns the connection detail object to the sqlite database
#'
#' @export
createSqliteDatabase <- function(
    sqliteLocation = tempdir()
    ) {
  sqliteLocation <- file.path(
    sqliteLocation,
    "sqliteCharacterization"
  )

  if (!dir.exists(sqliteLocation)) {
    dir.create(
      path = sqliteLocation,
      recursive = TRUE
    )
  }

  connectionDetails <- DatabaseConnector::createConnectionDetails(
    dbms = "sqlite",
    server = file.path(sqliteLocation, "sqlite.sqlite")
  )

  return(connectionDetails)
}

#' Upload the results into a result database
#' @description
#' This function uploads results in csv format into a result database
#'
#' @details
#' Calls ResultModelManager uploadResults function to upload the csv files
#'
#' @param connectionDetails    The connection details to the result database
#' @param schema               The schema for the result database
#' @param resultsFolder        The folder containing the csv results
#' @param tablePrefix          A prefix to append to the result tables for the characterization results
#' @param csvTablePrefix      The prefix added to the csv results - default is 'c_'
#' @family Database
#' @return
#' Returns the connection to the sqlite database
#'
#' @examples
#'
#' # generate results into resultsFolder
#' conDet <- exampleOmopConnectionDetails()
#'
#' drSet <- createDechallengeRechallengeSettings(
#'   targetIds = c(1,2),
#'   outcomeIds = 3
#' )
#'
#' cSet <- createCharacterizationSettings(
#'   dechallengeRechallengeSettings = drSet
#' )
#'
#' runCharacterizationAnalyses(
#'   connectionDetails = conDet,
#'   targetDatabaseSchema = 'main',
#'   targetTable = 'cohort',
#'   outcomeDatabaseSchema = 'main',
#'   outcomeTable = 'cohort',
#'   cdmDatabaseSchema = 'main',
#'   characterizationSettings = cSet,
#'   outputDirectory = tempdir()
#' )
#'
#' # create sqlite database
#' charResultDbCD <- createSqliteDatabase()
#'
#' # create database results tables
#' createCharacterizationTables(
#'    connectionDetails = charResultDbCD,
#'    resultSchema = 'main'
#'  )
#'
#' # insert results
#' insertResultsToDatabase(
#'  connectionDetails = charResultDbCD,
#'  schema = 'main',
#'  resultsFolder = tempdir()
#' )
#'
#'
#' @export
insertResultsToDatabase <- function(
    connectionDetails,
    schema,
    resultsFolder,
    tablePrefix = "",
    csvTablePrefix = "c_") {
  specLoc <- system.file("settings", "resultsDataModelSpecification.csv",
    package = "Characterization"
  )
  specs <- utils::read.csv(specLoc)
  colnames(specs) <- SqlRender::snakeCaseToCamelCase(colnames(specs))
  specs$tableName <- paste0(csvTablePrefix, specs$tableName)
  ResultModelManager::uploadResults(
    connectionDetails = connectionDetails,
    schema = schema,
    resultsFolder = resultsFolder,
    tablePrefix = tablePrefix,
    specifications = specs,
    purgeSiteDataBeforeUploading = FALSE
  )

  return(invisible(NULL))
}

## TODO add this into the csv exporting
removeMinCell <- function(
    data,
    minCellCount = 0,
    minCellCountColumns = list()) {
  for (columns in minCellCountColumns) {
    ind <- apply(
      X = data[, columns, drop = FALSE],
      MARGIN = 1,
      FUN = function(x) sum(x < minCellCount) > 0
    )

    if (sum(ind) > 0) {
      ParallelLogger::logInfo(
        paste0(
          "Removing values less than ",
          minCellCount,
          " from ",
          paste(columns, collapse = " and ")
        )
      )
      data[ind, columns] <- -1
    }
  }
  return(data)
}


#' Create the results tables to store characterization results into a database
#' @description
#' This function executes a large set of SQL statements to create tables that can store results
#'
#' @details
#' This function can be used to create (or delete) Characterization result tables
#'
#' @param connectionDetails            The connectionDetails to a database created by using the
#'                                     function \code{createConnectDetails} in the
#'                                     \code{DatabaseConnector} package.
#' @param resultSchema                 The name of the database schema that the result tables will be created.
#' @param targetDialect                The database management system being used
#' @param deleteExistingTables         If true any existing tables matching the Characterization result tables names will be deleted
#' @param createTables                 If true the Characterization result tables will be created
#' @param tablePrefix                  A string appended to the Characterization result tables
#' @param tempEmulationSchema          The temp schema used when the database management system is oracle
#'
#' @family Database
#'
#' @return
#' Returns NULL but creates the required tables into the specified database schema.
#'
#' @examples
#' # create sqlite database
#' charResultDbCD <- createSqliteDatabase()
#'
#' # create database results tables
#' createCharacterizationTables(
#'    connectionDetails = charResultDbCD,
#'    resultSchema = 'main'
#'  )
#'
#' @export
createCharacterizationTables <- function(
    connectionDetails,
    resultSchema,
    targetDialect = "postgresql",
    deleteExistingTables = TRUE,
    createTables = TRUE,
    tablePrefix = "c_",
    tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")) {
  errorMessages <- checkmate::makeAssertCollection()
  .checkTablePrefix(
    tablePrefix = tablePrefix,
    errorMessages = errorMessages
  )
  checkmate::reportAssertions(errorMessages)

  conn <- DatabaseConnector::connect(connectionDetails = connectionDetails)
  on.exit(DatabaseConnector::disconnect(conn))

  alltables <- tolower(
    DatabaseConnector::getTableNames(
      connection = conn,
      databaseSchema = resultSchema
    )
  )
  tables <- getResultTables()
  tables <- paste0(tablePrefix, tables)

  # adding this to not create tables if all tables esist
  if (sum(tables %in% alltables) == length(tables) & !deleteExistingTables) {
    message("All tables exist so no need to recreate")
    createTables <- FALSE
  }

  if (deleteExistingTables) {
    message("Deleting existing tables")

    for (tb in tables) {
      if (tb %in% alltables) {
        sql <- "DELETE FROM @my_schema.@table"
        sql <- SqlRender::render(
          sql = sql,
          my_schema = resultSchema,
          table = tb
        )
        sql <- SqlRender::translate(
          sql = sql,
          targetDialect = targetDialect,
          tempEmulationSchema = tempEmulationSchema
        )
        DatabaseConnector::executeSql(
          connection = conn,
          sql = sql
        )

        sql <- "DROP TABLE @my_schema.@table"
        sql <- SqlRender::render(
          sql = sql,
          my_schema = resultSchema,
          table = tb
        )
        sql <- SqlRender::translate(
          sql = sql,
          targetDialect = targetDialect,
          tempEmulationSchema = tempEmulationSchema
        )
        DatabaseConnector::executeSql(
          connection = conn,
          sql = sql
        )
      }
    }
  }

  if (createTables) {
    ParallelLogger::logInfo("Creating characterization results tables")
    renderedSql <- SqlRender::loadRenderTranslateSql(
      sqlFilename = "ResultTables.sql",
      packageName = "Characterization",
      dbms = targetDialect,
      tempEmulationSchema = tempEmulationSchema,
      my_schema = resultSchema,
      table_prefix = tablePrefix
    )

    DatabaseConnector::executeSql(
      connection = conn,
      sql = renderedSql
    )

    # add database migration here in the future
    migrateDataModel(
      connectionDetails = connectionDetails,
      connection = conn,
      databaseSchema = resultSchema,
      tablePrefix = tablePrefix
    )
  }
}


migrateDataModel <- function(
    connectionDetails,
    connection,
    databaseSchema,
    tablePrefix = ""
    ) {
  ParallelLogger::logInfo("Migrating data set")
  migrator <- getDataMigrator(
    connectionDetails = connectionDetails,
    databaseSchema = databaseSchema,
    tablePrefix = tablePrefix
  )
  migrator$executeMigrations()
  migrator$finalize()

  ParallelLogger::logInfo("Updating version number")
  updateVersionSql <- SqlRender::loadRenderTranslateSql("UpdateVersionNumber.sql",
    packageName = utils::packageName(),
    database_schema = databaseSchema,
    table_prefix = tablePrefix,
    dbms = connectionDetails$dbms
  )

  if(missing(connection)){
    connection <- DatabaseConnector::connect(connectionDetails = connectionDetails)
    on.exit(DatabaseConnector::disconnect(connection))
  }
  DatabaseConnector::executeSql(connection, updateVersionSql)
}


getDataMigrator <- function(connectionDetails, databaseSchema, tablePrefix = "") {
  ResultModelManager::DataMigrationManager$new(
    connectionDetails = connectionDetails,
    databaseSchema = databaseSchema,
    tablePrefix = tablePrefix,
    migrationPath = "migrations",
    packageName = utils::packageName()
  )
}

getResultTables <- function() {
  return(
    unique(
      c(
        readr::read_csv(
          file = system.file(
            "settings",
            "resultsDataModelSpecification.csv",
            package = "Characterization"
          ),
          show_col_types = FALSE
        )$table_name,
        "migration", "package_version"
      )
    )
  )
}



# Removes scientific notation for any columns that are
# formatted as doubles. Based on this GitHub issue:
# https://github.com/tidyverse/readr/issues/671#issuecomment-300567232
formatDouble <- function(x, scientific = FALSE, ...) {
  doubleCols <- vapply(x, is.double, logical(1))
  x[doubleCols] <- lapply(x[doubleCols], format, scientific = scientific, ...)

  return(x)
}

Try the Characterization package in your browser

Any scripts or data that you put into this service are public.

Characterization documentation built on April 4, 2025, 2:02 a.m.