Nothing
################################################################################
## The functions
## initEnvironment
## disconnectDB
## genericQuery
##
## A set of internal helper functions are included too
##
## History:
## -----------------------------------------------------------------------------
## Date Programmer Note
## ---------- -------------------- ------------------------------------------
## 2020-12-04 Bo Larsen Initial version
################################################################################
################################################################################
## Published functions
################################################################################
#' Initialize the environment.
#'
#' Open or create a SEND database and return a token for the open database
#' connection.
#'
#' If the function is executed with parameter \code{dbCreate=FALSE} (default),
#' a connection to the specified database is opened. Dependent of the type of
#' database (parameter \code{dbType}), a login using specified user credentials
#' (parameters \code{dbUser} and \code{dbPwd}) may be done.\cr
#' The database must contain a set of tables representing the SEND domains
#' compliant with SEND IG version 3.0 and/on 3.1.\cr
#'
#' If the function is executed with parameter \code{dbCreate=TRUE}, an empty
#' database is created and opened. This is only supported for a SQLite database,
#' i.e. parameter \code{dbType='sqlite'}. The SEND domain tables may then be
#' created by execution of the function \code{\link{dbCreateSchema}}.
#'
#' Besides the open database connection, a set of CDISC SEND controlled
#' terminology values are imported. If parameter \code{ctFile} is specified with
#' a path to an Excel file containing a CDISC SEND ct version downloaded from
#' \url{https://evs.nci.nih.gov/ftp1/CDISC/SEND/}, the content
#' from this file is imported and used by some of the package's functions.
#' Else a set of CDISC SEND CT values which are included in the packages is
#' used by the package's functions. It's the newest CDISC SEND CT version at the
#' time of the build of the current version of the package which is included.
#'
#' @param dbType Mandatory, character\cr
#' The type of database, valid values (case insensitive):
#' \itemize{
#' \item 'sqlite'
#' \item 'oracle'
#' }
#' @param dbPath Mandatory, character\cr
#' The path to the database (path to file or another kind of db reference)
#' @param dbCreate Mandatory, boolean\cr
#' If \code{TRUE}, a new database is to be created - this is only valid for
#' \code{dbType} 'sqlite'
#' @param dbUser Mandatory, character - if login credentials are required for the
#' specific db type\cr
#' The user name to be used for login to database.
#' @param dbPwd Mandatory, character - if login credentials are required for the
#' specific db type\cr
#' The password to be used for login to database.
#' @param dbSchema Optional, character\cr
#' The table owner of the SEND table in the specific database.\cr
#' This parameter is only relevant to specify if it is necessary to prefix
#' table names with schema in SQL statements i the database.
#' @param ctFile Optional, character.\cr
#' Name (full path) of CDISC CT file in Excel xls format to be imported.
#' Only relevant to use if another CDISC CT version than the version
#' included in packages is wanted.\cr
#'
#' @return The function returns a token which is a data structure describing
#' the open database connection. This token must be given as input parameter
#' to all functions accessing the actual database.
#'
#' @export
#'
#' @examples
#'
#' \dontrun{
#' db <- initEnvironment(dbType='sqlite',
#' dbPath='//servername/SendData/db/send.db',
#' ctFile='//servername/SendData/metadata/SEND_Terminology_2019-12-27.xls')
#' db <- initEnvironment(dbType='oracle',
#' dbPath='dbserver:1521/send_db',
#' dbUser='ME',
#' dbPwd='mypassword',
#' dbSchema = 'send',
#' ctFile='//servername/SendData/metadata/SEND_Terminology_2019-12-27.xls')
#' }
#'
#'
################################################################################
# Import some functions from used packages to used by all functions in the
# package:
#' @importFrom data.table .N
#' @importFrom data.table .SD
#' @importFrom data.table %like%
#' @importFrom data.table :=
#' @importFrom stats na.omit
#' @importFrom magrittr %>%
#'
initEnvironment<-function(dbType=NULL,
dbPath=NULL,
dbCreate=FALSE,
dbUser=NULL,
dbPwd=NULL,
dbSchema=NULL,
ctFile=NULL) {
## Evaluate database specification parameters
# dbType
errMsg<-paste0('Parameter dbType must be one of: ', paste(sapply(validDbTypes[,c('db_type')], paste0), collapse=','))
if (is.null(dbType) | isTRUE(is.na(dbType)) | isTRUE(dbType==''))
stop(errMsg)
else if (nrow(dbProperties<-validDbTypes[db_type==tolower(dbType)]) == 0)
stop(errMsg)
dbType<-tolower(dbType)
errMsg<-'Parameter %s must be a non-empty string'
# dbPath
if (is.null(dbPath) | isTRUE(is.na(dbPath)) | isTRUE(dbPath==''))
stop(sprintf(errMsg, 'dbPath'))
# dbUSer and dbPwd
if (as.logical(dbProperties[,c('req_credentials')])) {
if (is.null(dbUser) | isTRUE(is.na(dbUser)) | isTRUE(dbUser==''))
stop(sprintf(errMsg, 'dbUser'))
if (is.null(dbPwd) | isTRUE(is.na(dbPwd)) | isTRUE(dbPwd==''))
stop(sprintf(errMsg,'dbPwd'))
}
# dbSchema - set to "" if no value are specified
if (is.null(dbSchema) | isTRUE(is.na(dbSchema)) | isTRUE(dbSchema==''))
dbSchema<-""
## Verifying specified CT file - and import
ctDataFile <- prepareCtMetadata(ctFile)
# dbCreate
if (isTRUE(is.na(dbCreate)) | isFALSE(typeof(dbCreate) == 'logical'))
stop('Parameter dbCreate must be TRUE or FALSE')
if (dbCreate & dbType != 'sqlite')
stop('Parameter dbCreate=TRUE only allowed for SQLite database')
## Check for existence of package for the specified db type
##
if (!requireNamespace(as.character(dbProperties[,c('package_name')]), quietly = TRUE))
stop(sprintf('Package "%s" needed for access to a(n) %s database. Please install it.',
as.character(dbProperties[,c('package_name')]),
as.character(dbProperties[,c('db_type')])),
call. = FALSE)
## if file based db type - check for existence of db file
if (dbType == 'sqlite') {
dbFileExists <-file.exists(dbPath)
if (!dbFileExists & !dbCreate)
stop(paste0('The database file ' , dbPath, ' could not be found'))
if (dbCreate & dbFileExists)
stop(paste0('Cannot create a new database because the database file ' , dbPath, ' exists.'))
}
## Connect to the database
# - execute the function specific for the db type to create the connections
if (as.logical(dbProperties[,c('req_credentials')]))
dbInputParams<-paste0('(dbPath, dbUser, dbPwd)')
else
dbInputParams<-paste0('(dbPath)')
dbHandle<-eval(parse(text=paste0('connectDB_',dbProperties[,c('db_type')],dbInputParams)))
## Verify existence of function specific for db type to execute a generic query
genericQueryName <- paste0('genericQuery_', dbProperties[,c('db_type')])
if (!exists(genericQueryName))
stop(sprintf('A function named %s is missing', genericQueryName))
## Verify existence of function specific for db type to execute disconnect
# from database
disconnectDBName <- paste0('disconnectDB_', dbProperties[,c('db_type')])
if (!exists(disconnectDBName))
stop(sprintf('A function named %s is missing', disconnectDBName))
## Verify existence of function specific for db type to verify existence of
# specific table in database
dbExistsTableName <- paste0('dbExistsTable_', dbProperties[,c('db_type')])
if (!exists(dbExistsTableName))
stop(sprintf('A function named %s is missing', dbExistsTableName))
## Verify existence of function specific for db type to list columns in a
# specific table in database
dbListFieldsName <- paste0('dbListFields_', dbProperties[,c('db_type')])
if (!exists(dbListFieldsName))
stop(sprintf('A function named %s is missing', dbListFieldsName))
## Return a db token to be included in all calls to database related functions.
# It includes:
# - The temporary RData file containing imported CDISC CT data
# - Handle to the open database connection
# - Database schema to select data from
# - Pointers to function specific for the database type:
# - execution of a generic SQL query
# - disconnect from database
list(ctDataFile = ctDataFile,
dbType = dbType,
dbHandle = dbHandle,
dbSchema = dbSchema,
genericQuery = get(genericQueryName),
disconnectDB = get(disconnectDBName),
dbExistsTable = get(dbExistsTableName),
dbListFieldsTable = get(dbListFieldsName)
)
}
################################################################################
#' Disconnect from the open database.
#'
#' Close database session and disconnect from open database.
#'
#' @param dbToken Mandatory\cr
#' Token for the open database connection (see \code{\link{initEnvironment}}).
#' @return No return value, called for side effects\cr
#'
#' @export
#'
#' @examples
#' \dontrun{
#' disconnectDB()
#' }
disconnectDB <- function(dbToken) {
# Execute the disconnect function specific for the current database type.
dbToken$disconnectDB(dbToken$dbHandle)
}
################################################################################
#' Execute database query and returns fetched rows.
#'
#' The function executes a SQL select statements in the database and returns
#' the fetched set of rows as a data.table.
#'
#' @param dbToken Mandatory\cr
#' Token for the open database connection (see \code{\link{initEnvironment}}).
#' @param queryString Mandatory, character.\cr
#' The select statement to execute
#' @param queryParams Optional, character.\cr
#' A variable with values for bind variable referenced in the where clause of
#' the select statement
#'
#' @return Data.table with the set of fetched rows
#'
#' @export
#'
#' @examples
#'
#' \dontrun{
#' genericQuery(dbToken,
#' 'select studyid, tsseq, tsgrpid, tsparmcd, tsval from ts')
#' genericQuery(dbToken,
#' 'select studyid, tsval from ts where tsprmcd = "SDESIGN" and studyid in (:1)',
#' list("1234546","222333","444555"))
#' }
genericQuery <- function(dbToken, queryString, queryParams=NULL) {
# Prepare the given select statement by adding potential schema name
# to all tables and execute the genericQuery function specific for the
# actual db type
# ## ADD POSIBILITY FOR MULTIPLE QUERY PARAMS
dbToken$genericQuery(dbToken$dbHandle,
selectStmtAddSchema(dbToken$dbSchema,
queryString),
queryParams)
}
################################################################################
#' Get labels for columns in a data.table
#'
#' @param table Mandatory\cr
#' The data.table to get column labels for
#'
#' @return A named vector with each column/label pair. If a column have no
#' defined label, the label is 'na'
#' @export
#'
#' @examples
#'
#' \dontrun{
#' colLabels = getTabColLabels(controlAnimalsAll)
#' }
getTabColLabels <- function(table) {
# Extract column names from input table and add sequence to ensure the output is sorted in the same order
tabCols <- data.table::setnames(data.table::as.data.table(colnames(table), keep.rownames=T), 'V1', 'COLUMN_NAME')
tabCols$seq <- seq.int(nrow(tabCols))
# Extract the labels for the set of columns from the lists of
# - SEND IG columns
# - additional sendigR specific columns
# where column names are equal
dt <- data.table::setorder(
data.table::merge.data.table(tabCols,
data.table::rbindlist(list(unique(sendIGcolumns[,list(COLUMN_NAME, LABEL)]),
additionalColumns[is.na(REGEXP)]),
use.names = TRUE, fill = TRUE),
by='COLUMN_NAME',
all.x = TRUE),'seq')[,list(COLUMN_NAME,
LABEL = ifelse(is.na(LABEL),
'na',
LABEL))]
# For columns where an input column does not match any column/label pair,
# check if they match any of the column/label pairs defined by regular expression
naCols <- dt[LABEL == 'na', which=TRUE]
if (length(naCols) > 0) {
regexCols <- additionalColumns[REGEXP == 'Y']
if (length(regexCols) > 0)
for (i in 1:length(naCols)) {
for (j in 1:nrow(regexCols)) {
if (grepl(paste0('^',regexCols[j]$COLUMN_NAME,'$'), dt[naCols[i]]$COLUMN_NAME)) {
dt[naCols[i]]$LABEL <- regexCols[j]$LABEL
break
}
}
}
}
stats::setNames(dt$LABEL, dt$COLUMN_NAME)
}
################################################################################
## Helper functions
################################################################################
################################################################################
## Check if specified table exists in database
# Returns boolean
dbExistsTable <- function(dbToken, table) {
if (dbToken$dbSchema == '')
dbToken$dbExistsTable(dbToken$dbHandle, table)
else
dbToken$dbExistsTable(dbToken$dbHandle, dbToken$dbSchema ,table)
}
################################################################################
## Return list of columns in specified database table
dbListFields<- function(dbToken, table) {
if (dbToken$dbSchema == '')
dbToken$dbListFields(dbToken$dbHandle, table)
else
dbToken$dbListFields(dbToken$dbHandle, dbToken$dbSchema, table)
}
################################################################################
## Prepare the SEND CT metadata to be used in functions which checks data
## against specific CDISC code lists.
# If a CT file is specified
# - import code list and values from CT file
# Else use the CT code lists and value metadata included in package
# Save code lists and value in temporary Rdata files
# Return the R data file name
prepareCtMetadata<-function(ctFile) {
if (!is.null(ctFile) & isFALSE(is.na(ctFile)) & isFALSE(ctFile==''))
{
# Check file is XLS and exists
if (! tolower(xfun::file_ext(ctFile)) == 'xls')
stop(paste0('The ctFile ' , ctFile, ' is not an XLS file'))
if (!file.exists(ctFile)) {
stop(paste0('The ctFile ' , ctFile, 'could not be found'))
}
# Import content from worksheet named SEND<sep>Terminology<something>
# - include relevant columns and all rows
ctSheets<-readxl::excel_sheets(ctFile)
ctAll<-data.table::as.data.table(readxl::read_xls(ctFile,
sheet=ctSheets[grepl('send[_ ]terminology',
tolower(ctSheets) )]))[,c("Code", "Codelist Code", "CDISC Submission Value")]
data.table::setnames(ctAll, c("Codelist Code","CDISC Submission Value"),
c("CodelistCode","CDISCSubmissionValue"))
# Extract all code list names
CDISCctCodeLists = ctAll[is.na(CodelistCode), c('Code', 'CDISCSubmissionValue')]
data.table::setnames(CDISCctCodeLists, c("Code","CDISCSubmissionValue"),
c("CodelistCode","CodeList"))
# Extract all code list values
CDISCctCodeValues = ctAll[!is.na(CodelistCode), c('CodelistCode','CDISCSubmissionValue')]
}
# Save the all CDISC CT code lists and values in a temporary RData file
CDISCctFile = tempfile('CDISCct', fileext='.RData')
save(CDISCctCodeLists, CDISCctCodeValues, file = CDISCctFile)
# Return file name with saved data
CDISCctFile
}
################################################################################
## Extract values for specific code lists from extracted CDISC CT code lists/values
getCTCodListValues<-function(dbToken, pCodeList=NULL) {
if (is.null(pCodeList) | isTRUE(is.na(pCodeList)) | isTRUE(pCodeList=='')) {
stop('Input parameter codeList must have assigned a code list name')
}
# load the CDISC ct data from temporary RData file
load(dbToken$ctDataFile)
# Check if the requested code list exists
if (!toupper(pCodeList) %in% CDISCctCodeLists$CodeList) {
stop('The specified code list does not exist in the CDISC terminolgy file')
}
# Extract and return a character vector with all value for the requested code list
data.table::merge.data.table(CDISCctCodeLists[CodeList==toupper(pCodeList), c('CodelistCode')],
CDISCctCodeValues[!is.na(CodelistCode)],
by = 'CodelistCode')$CDISCSubmissionValue;
}
################################################################################
## Take a SQL statement as input
# return the statement modified in this way:
# - substitute line shifts (\n) with space
# - substitute multiple consecutive spaces with one space
# - substitute '==' with '='
# - if the global defined db schame names is not empty -
# - add the schema plus '.' in front of all table names ,
# i.e. names following a 'from ' or 'join ' except
# subqueries (starting with '(')
selectStmtAddSchema <- function(dbSchema, stmt) {
vSchema <- ifelse(dbSchema=='','',paste0(dbSchema,'.'))
stmt %>%
stringr::str_replace_all('\n', ' ') %>%
stringr::str_replace_all(' +', ' ') %>%
stringr::str_replace_all('=+', '=') %>%
stringr::str_replace_all(stringr::regex('(from |join )([^(])', ignore_case = TRUE),
paste0('\\1', vSchema, '\\2'))
}
################################################################################
# Prepare a set of final set rows to be returned from a data extraction
# function (i.e. the function calling this function).
# - merge potential pairs of xxx_MSG columns.x/y originated from both
# - the data table given as input to the calling extraction function
# - generated by extraction function
# respectively
# - set the column order to be
# 1 - the columns from the data table given as input to the calling
# extraction function excluding potential xxx_MSG columns.
# 2 - the columns added by the calling extraction function
# 3 - the potential xxx_MSG columns in alphabetically order
#
# Parameters:
# dt - the data table to process
# srcCols - the list of columns (vector) in the data table given as input to the calling
# function. May be empty.
# addCols - the list of columns (vector) added by the calling function
prepareFinalResults <- function(dt, srcCols, addCols) {
if (exists('..colList'))
rm('..colList')
# Get list of message columns which may be included from from two merged
# tables (i.e. both a _MSG.x and _MSG.y column included)
mergedMsgColList <- names(dt)[names(dt) %like% 'MSG.x$']
# Statement to merge content of a given message column which exists in both tables
# - merge the _MSG column from each of the merged tables into one column
# - non-empty messages are separated by '|'
# - exclude the original _MSG columns after the merge
mergeMsgColStmt <- "dt[,`:=` (msgCol=ifelse(!is.na(msgCol.x) & !is.na(msgCol.y), paste(msgCol.y, msgCol.x, sep='|'), DescTools::Coalesce(msgCol.x, msgCol.y)))][, `:=` (msgCol.x=NULL,msgCol.y=NULL)]"
# Execute merge for each of message column included from two tables
# - remove the last '.x' from message column name
for (msgCol in mergedMsgColList)
eval(str2expression(gsub('msgCol', gsub('.x$','',msgCol), mergeMsgColStmt)))
if (length(srcCols) == 1 & srcCols[1] == '')
# Only columns added by calling function included
colList <- addCols
else
# Add the columns specified in addCols to the list of columns specified in
# srcCols - excluding any potential MSG columns
colList <- append(srcCols[!srcCols %like% 'MSG$'], addCols)
# Get list of potential message columns
# - sort names alphabetically
msgColList <- sort(names(dt)[names(dt) %like% 'MSG$'])
if (length(msgColList) > 0)
# Add message column list
colList <- append(colList, msgColList)
# Include specified columns in correct order and return
eval(str2expression("data.table::setcolorder(dt[,..colList], colList)"))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.