R/createDdl.R

Defines functions createAsciiHeader createForeignKeys createPrimaryKeys createDdl

Documented in createDdl createForeignKeys createPrimaryKeys

# Copyright 2019 Observational Health Data Sciences and Informatics
#
# This file is part of CdmDdlBase
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#' Create the OHDSI-SQL Common Data Model DDL code
#'
#' The createDdl, createForeignKeys, and createPrimaryKeys functions each return a character string
#' containing their respective DDL SQL code in OHDSQL dialect for a specific CDM version.
#' The SQL they generate needs to be rendered and translated before it can be executed.
#'
#' The DDL SQL code is created from a two csv files that detail the OMOP CDM Specifications.
#' These files also form the basis of the CDM documentation and the Data Quality
#' Dashboard.
#'
#' @param cdmVersion The version of the CDM you are creating, e.g. 5.3, 5.4
#' @return A character string containing the OHDSQL DDL
#' @importFrom utils read.csv
#' @export
#' @examples
#'\dontrun{
#' ddl <- createDdl("5.4")
#' pk <- createPrimaryKeys("5.4")
#' fk <- createForeignKeys("5.4")
#'}
createDdl <- function(cdmVersion){
  # prevent check NOTE from occurring due to non-standard evaluation of variables
  cdmTableName <- cdmFieldName <- isRequired <- cdmDatatype <- NULL

  # argument checks
  stopifnot(is.character(cdmVersion), length(cdmVersion) == 1, cdmVersion %in% listSupportedVersions())

  cdmTableCsvLoc <- system.file(file.path("csv", paste0("OMOP_CDMv", cdmVersion, "_Table_Level.csv")), package = "CommonDataModel", mustWork = TRUE)
  cdmFieldCsvLoc <- system.file(file.path("csv", paste0("OMOP_CDMv", cdmVersion, "_Field_Level.csv")), package = "CommonDataModel", mustWork = TRUE)

  tableSpecs <- read.csv(cdmTableCsvLoc, stringsAsFactors = FALSE)
  cdmSpecs <- read.csv(cdmFieldCsvLoc, stringsAsFactors = FALSE)

  tableList <- tableSpecs$cdmTableName

  sql_result <- c()
  sql_result <- c(paste0("--@targetDialect CDM DDL Specification for OMOP Common Data Model ", cdmVersion))
  for (tableName in tableList){
    fields <- subset(cdmSpecs, cdmTableName == tableName)
    fieldNames <- fields$cdmFieldName

    if ('person_id' %in% fieldNames){
      query <- "\n\n--HINT DISTRIBUTE ON KEY (person_id)\n"
    } else {
      query <- "\n\n--HINT DISTRIBUTE ON RANDOM\n"
    }

    sql_result <- c(sql_result, query, paste0("CREATE TABLE @cdmDatabaseSchema.", tableName, " ("))

    n_fields <- length(fieldNames)
    for(fieldName in fieldNames) {

      if (subset(fields, cdmFieldName == fieldName, isRequired) == "Yes") {
        nullable_sql <- (" NOT NULL")
      } else {
        nullable_sql <- (" NULL")
      }

      if (fieldName == fieldNames[[n_fields]]) {
        closing_sql <- (" );")
      } else {
        closing_sql <- (",")
      }

      if (fieldName=="offset") {
        field <- paste0('"',fieldName,'"')
      } else {
        field <- fieldName
      }
      fieldSql <- paste0("\n\t\t\t",
                         field," ",
                         subset(fields, cdmFieldName == fieldName, cdmDatatype),
                         nullable_sql,
                         closing_sql)
      sql_result <- c(sql_result, fieldSql)
    }
    sql_result <- c(sql_result, "")
  }
  return(paste0(sql_result, collapse = ""))
}


#' @describeIn createDdl createPrimaryKeys Returns a string containing the OHDSQL for creation of primary keys in the OMOP CDM.
#' @return A string containing the OHDSQL for creation of primary keys in the OMOP CDM.
#' @export
createPrimaryKeys <- function(cdmVersion){
  # prevent check NOTE from occurring due to non-standard evaluation of variables
  isPrimaryKey <- cdmFieldName <- NULL

  # argument checks
  stopifnot(is.character(cdmVersion), length(cdmVersion) == 1, cdmVersion %in% listSupportedVersions())

  cdmFieldCsvLoc <- system.file(file.path("csv", paste0("OMOP_CDMv", cdmVersion, "_Field_Level.csv")), package = "CommonDataModel", mustWork = TRUE)
  cdmSpecs <- read.csv(cdmFieldCsvLoc, stringsAsFactors = FALSE)

  primaryKeys <- subset(cdmSpecs, isPrimaryKey == "Yes")
  pkFields <- primaryKeys$cdmFieldName

  sql_result <- c(paste0("--@targetDialect CDM Primary Key Constraints for OMOP Common Data Model ", cdmVersion, "\n"))
  for (pkField in pkFields){

    subquery <- subset(primaryKeys, cdmFieldName==pkField)

    sql_result <- c(sql_result, paste0("\nALTER TABLE @cdmDatabaseSchema.", subquery$cdmTableName, " ADD CONSTRAINT xpk_", subquery$cdmTableName, " PRIMARY KEY NONCLUSTERED (", subquery$cdmFieldName , ");\n"))

  }
  return(paste0(sql_result, collapse = ""))
}

#' @describeIn createDdl createForeignKeys Returns a string containing the OHDSQL for creation of foreign keys in the OMOP CDM.
#' @return A string containing the OHDSQL for creation of foreign keys in the OMOP CDM.
#' @export
createForeignKeys <- function(cdmVersion){
  # prevent check NOTE from occurring due to non-standard evaluation of variables
  isForeignKey <- NULL

  # argument checks
  stopifnot(is.character(cdmVersion), length(cdmVersion) == 1, cdmVersion %in% listSupportedVersions())

  cdmFieldCsvLoc <- system.file(file.path("csv", paste0("OMOP_CDMv", cdmVersion, "_Field_Level.csv")), package = "CommonDataModel", mustWork = TRUE)
  cdmSpecs <- read.csv(cdmFieldCsvLoc, stringsAsFactors = FALSE)

  foreignKeys <- subset(cdmSpecs, isForeignKey == "Yes")
  foreignKeys$key <- paste0(foreignKeys$cdmTableName, "_", foreignKeys$cdmFieldName)

  sql_result <- c(paste0("--@targetDialect CDM Foreign Key Constraints for OMOP Common Data Model ", cdmVersion, "\n"))
  for (foreignKey in foreignKeys$key){

    subquery <- subset(foreignKeys, foreignKeys$key==foreignKey)

    sql_result <- c(sql_result, paste0("\nALTER TABLE @cdmDatabaseSchema.", subquery$cdmTableName, " ADD CONSTRAINT fpk_", subquery$cdmTableName, "_", subquery$cdmFieldName, " FOREIGN KEY (", subquery$cdmFieldName , ") REFERENCES @cdmDatabaseSchema.", subquery$fkTableName, " (", subquery$fkFieldName, ");\n"))

  }
  return(paste0(sql_result, collapse = ""))
}



# A helper function that will return a character string with the omop ascii art given a major and minor cdm version
# example: cat(createAsciiHeader(5, 3))
createAsciiHeader <- function(major, minor) {

  stopifnot(is.numeric(major), is.numeric(minor), length(major) == 1, length(minor) == 1)
  stopifnot(major %in% 0:99, minor %in% 0:99)

  # An inner function that returns an ascii art matrix for any number between 0 and 99
  numberMatrix <- function(num){
    stopifnot(is.numeric(num), num %in% 0:99)

    # An inner function that returns a 7x7 matrix of number ascii art for the number 0 through 9
    # for the number 1 a 7x5 matrix is returned because 1 is narrower than other numbers.
    singleDigit <- function(num) {
      nums <- c('  ###     #    #####  ##### #      ####### ##### ####### #####  #####  #   #   ##   #     ##     ##    # #      #     ##    # #     ##     ##     # # #         #      ##    # #      #          #  #     ##     ##     #   #    #####  ##### #    # ###### ######    #    #####  #######     #   #   #            ########      ##     #  #    #     #      # #   #    #   #      #     #     # #     ##     #  #    #     ##     #  ###   ##### ####### #####      #  #####  #####   #     #####  ##### ')
      numsMatrix <- matrix(data = strsplit(nums, character(0))[[1]], nrow = 7, byrow = T)
      cols <- seq(num*7+1, num*7+7, by = 1)
      out <- numsMatrix[1:7, cols]
      # the number 1 is narrower than the other numbers
      if(num == 1) out<- out[1:7, 2:6]
      out
    }

    if(num < 10){
      return(singleDigit(num))
    } else {
      space <- matrix(rep(" ", 7), nrow = 7)
      return(cbind(singleDigit(floor(num/10)), space, singleDigit(num %% 10)))
    }
  }

  omop <- c('.
  ####### #     # ####### ######      #####  ######  #     #          .
  #     # ##   ## #     # #     #    #     # #     # ##   ##    #    #.
  #     # # # # # #     # #     #    #       #     # # # # #    #    #.
  #     # #  #  # #     # ######     #       #     # #  #  #    #    #.
  #     # #     # #     # #          #       #     # #     #    #    #.
  #     # #     # #     # #          #     # #     # #     #     #  # .
  ####### #     # ####### #           #####  ######  #     #      ##  ')

  # convert to matrix and remove first column
  omop <- matrix(strsplit(omop, character(0))[[1]], nrow = 7, byrow = TRUE)
  omop <- omop[,c(-1, -2)]

  dot <- matrix(c(rep(" ", 3*4), rep("#", 3*3)), nrow = 7, byrow = TRUE)
  space <- matrix(rep(" ", 7), nrow = 7)
  newline <- matrix(rep("\n", 7, nrow = 7))


  header <- character(0)
  headerMatrix <- cbind(omop, space, numberMatrix(major), space, dot, space, numberMatrix(minor), newline)
  for(i in 1:7) {
    header <- c(header, as.character(headerMatrix[i,]))
  }
  header <- paste(header, collapse = "")
  return(header)
}

Try the CommonDataModel package in your browser

Any scripts or data that you put into this service are public.

CommonDataModel documentation built on Oct. 2, 2024, 1:08 a.m.