R/05_DB_pulls.R

#### Creating data frames from GADS DB
#############################################################################
#' Creates GADS data frame.
#'
#' Function to get a data frame from GADS DB.
#'
#' ...
#'
#'@param filePath Path of the existing db file.

#'@return Returns a data frame.
#'
#'@examples
#'# # See vignette.
#'
#'
getDF <- function(filePath, vSelect = NULL) {
  # 1) check input
  check_dbPath(dbPath = filePath)
  # check names and sort to data tables
  varList <- prep_vSelect(vSelect = vSelect, filePath = filePath)

  # Establish Connection, disconnect when function exits
  con <- dbConnect(RSQLite::SQLite(), dbname = filePath)
  on.exit(dbDisconnect(con))

  # 2) get names/structure/mergeorder for data base
  keyList <- dbKeys(filePath, includeMeta = FALSE)
  mergeOrder <- get_mergeOrder(con)

  # 3) create query
  pullQ <- writeQ_pull(mergeOrder = mergeOrder, fkList = keyList$fkList, varList = varList)

  # 4) execute query
  df <- dbGet_safe(conn = con, statement = pullQ)

  df
}

### 1) Input check ---------------------------------------------------------
prep_vSelect <- function(vSelect, filePath) {
  allNames <- dbNames(filePath = filePath)

  # check if all names anywhere in data set
  allNames_vec <- unique(unlist(allNames))
  check_vSelect(vSelect, allNames_vec)

  # create List with data table attribution
  varList <- lapply(allNames, order_vSelect, vSelect = vSelect)

  varList
}

check_vSelect <- function(vSelect, allNames_vec) {
  missVars <- !vSelect %in% allNames_vec
  if(any(missVars)) {
    vars <- vSelect[missVars]
    vars <- paste(vars, collapse = ", ")
    stop(paste(vars, "are not in the data base"))
  }
}

order_vSelect <- function(allNames_df, vSelect) {
  if(is.null(vSelect)) return(allNames_df)
  allNames_df[allNames_df %in% vSelect]
}



### 2) Get Meta-Information from DB ---------------------------------------------------------
get_mergeOrder <- function(con) {
  q <- "SELECT * FROM metaInformation;"
  mergeOrder <- dbGetQuery(conn = con, statement = q)
  # restore original format
  mO <- unlist(strsplit(unlist(mergeOrder), " "))
  names(mO) <- NULL
  mO[mO != ""]
}


### 3) Create Pull Query ---------------------------------------------------------
writeQ_pull <- function(varList, mergeOrder, fkList) {
  ljoins <- write_LJoins(mergeOrder = mergeOrder, fkList = fkList)
  selVars <- write_SEL(varList = varList)
  # put together query
  paste("SELECT DISTINCT", selVars ,
        "FROM",
        mergeOrder[1],
        ljoins, ";")
}
# part of query for left joins
write_LJoins <- function(mergeOrder, fkList) {
  joins <- vector("character")
  for(i in 2:length(mergeOrder)) {
    keyName <- fkList[[mergeOrder[i]]]$Keys
    joins[i-1] <- paste("LEFT JOIN", mergeOrder[i],
                        "using (", paste(keyName, collapse = ", "), " )", sep = " ")
  }
  paste(joins, collapse = " ")
}
# part of query for variable selection
write_SEL <- function(varList = NULL) {
  # default: selects all variables
  if(is.null(varList)) return(" * ")

  # otherwise:
  varVec <- unlist(Map(write_varNames, df_name = names(varList), vars = varList))
  varVec <- paste(varVec, collapse = " ")
  # remove last comma
  varVec <- substr(x = varVec, start = 1, stop = nchar(varVec) - 2)
  varVec
}
# create variables names for single data frame
write_varNames <- function(df_name, vars) {
  paste(df_name, ".", vars, ", ", sep = "")
  # paste(vars, ", ", sep = "")
}

# right know duplicated variables in data set!!!



# 04) Execute Queries ---------------------------------------------------------
# safe version of dbExecute with verbose error
dbGet_safe <- function(conn, statement) {
  check <- try(dbGetQuery(conn = conn, statement = statement))
  if(class(check) == "try-error") {
    stop(paste("Error while trying to execute the following query", statement))
  }
  check
}
b-becker/eatGADS documentation built on May 24, 2019, 8:47 p.m.