#### Creating DB from list of data frames
#############################################################################
#' Creates GADS data base.
#'
#' Function to create a relational data base.
#'
#' Primary keys need to be unique in each data frame. Foreign keys need to have the same names as primary keys they are referencing too. All three lists need to be named in the exact same way and order.
#'
#'@param dfList Named list of data frames per level
#'@param primeKeys Named character vector of one or more primary Keys (identification variables).
#'@param filePath Path of db file (including name) to be created, has to end on db.
#'
#'@return Creates a data base in the given path, returns a list of logicals corresponding to data input list.
#'
#'@examples
#'# Create in memory data base.
#' createDB(dfList = datList, pkList = pkList, fkList = fkList, filePath = ":memory:")
#'
createDB <- function(allList, filePath) {
stopifnot(all(c("dfList", "labelList", "pkList", "fkList") %in% names(allList)))
dfList <- allList$dfList
labelList <- allList$labelList
pkList <- allList$pkList
fkList <- allList$fkList
# 1) Create queries for actual data tables
dtQueries <- Map(writeQ_create, df = dfList, primeKey = pkList, foreignKey = fkList, df_name = names(dfList))
# Create query for data table with merging information (meta data table)
metaQuery <- writeQ_mergeOrder(dfMergeOrder = names(dfList))
# Create query for data table with variable, value and missing labels (relation to other data tables?)
labelDT_name <- "Labels"
labelQuery <- writeQ_create(df = labelList, df_name = labelDT_name,
primeKey = c("varName", "value", "data_table"), foreignKey = NULL)
# all queries into one object
createQueries <- c(metaQuery, labelQuery, dtQueries)
# 2) Create empty Data base
init_DB(filePath)
# Establish Connection, disconnect when function exits
con <- dbConnect(RSQLite::SQLite(), dbname = filePath)
on.exit(dbDisconnect(con))
# 3) Execute "create Queries"
lapply(createQueries, dbExecute_safe, conn = con)
# 4) fill data base tables with data
# normale data tables
lapply(seq_along(dfList), function(i)
dbWriteTable(conn = con, name = names(dfList)[i], value = dfList[[i]], append = TRUE))
# label data table
dbWriteTable(conn = con, name = labelDT_name, value = labelList, append = TRUE)
return()
}
# 01a) Create Queries actual data tables ---------------------------------------------------------
## write query for data tables (without foreign keys)
writeQ_create <- function(df, primeKey, foreignKey, df_name) {
# write string for variable definitions
varDefinitions <- write_varDef(df)
# write partial query for primary definition
pkDefinition <- write_primeKey(primeKey)
# write partial query for foreign Key definition if fk is defined
if(is.null(foreignKey$References)) {
fkDefinition <- ""
} else {
fkDefinition <- write_foreignKey(foreignKey)
}
# write query including create, variable definitions and primary key
paste("CREATE TABLE", paste(df_name), "(",
varDefinitions,
pkDefinition,
fkDefinition,
");")
}
# variable definitions
write_varDef <- function(df) {
varList <- vector(mode = "character", length = ncol(df))
# determine type of all variables
for(i in seq(ncol(df))) {
if(is.double(df[, i]) || is.integer(df[, i])) varType = "REAL"
else if(is.character(df[, i])) varType = "TEXT"
else if(is.factor(df[, i])) varType = "TEXT"
else stop("invalid variable type")
# write syntax per variable
varList[i] <- paste(names(df)[i], varType, ",")
}
# paste all together
paste(varList, collapse = " ")
}
# primary key definition
write_primeKey <- function(primeKey) {
pk <- paste(primeKey, collapse = ", ")
paste("PRIMARY KEY (", pk, ")")
}
# foreign key definition
write_foreignKey <- function(foreignKey) {
ref <- foreignKey$References
fk <- paste(foreignKey$Keys, collapse = ", ")
paste("FOREIGN KEY (", fk,")", "REFERENCES", ref, "(", fk,")")
}
# 01b) Create Queries meta information data tables ---------------------------------------------------------
## query which saves the order of data frame merging
writeQ_mergeOrder <- function(dfMergeOrder) {
createQ <- "CREATE TABLE metaInformation ( mergeOrder TEXT );"
insertQ <- paste("INSERT INTO metainformation (mergeOrder)",
"VALUES ( '", paste(dfMergeOrder, collapse = " "),"' );")
c(createQ, insertQ)
}
# 02) Create Empty Database ---------------------------------------------------------
## create DB
init_DB <- function(filePath) {
# check path / file
check_filePath(filePath)
# create DB, throws an error if sqlite3 not in Path!
shell(cmd = paste("sqlite3", filePath, ".databases", sep = " "), mustWork = TRUE)
}
## check path for db
check_filePath <- function(filePath){
if(identical(filePath, ":memory:")) {
message("filePath points to work memory")
return()
}
lastSL <- rev(unlist(gregexpr("/", filePath)))[[1]]
lastDot <- rev(unlist(gregexpr("\\.", filePath)))[[1]]
# divide string
directory <- substr(filePath, 1, lastSL)
fileFormat <- substr(filePath, lastDot, nchar(filePath))
# check directory
if(!dir.exists(directory)) stop(paste(directory, "is not an existing directory"))
# check file name
if(file.exists(filePath)) stop(paste(filePath, "is an existing data base"))
if(!identical(fileFormat, ".db")) stop("Filename does not end on .db")
return()
}
# 03) Execute Create Queries ---------------------------------------------------------
# safe version of dbExecute with verbose error
dbExecute_safe <- function(conn, statement) {
check <- try(dbExecute(conn = conn, statement = statement))
if(class(check) == "try-error") {
stop(paste("Error while trying to execute the following query", statement))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.