R/database-utilities-creating.R

Defines functions populate_lookup_tables generate_sql_columns generate_fk_columns generate_index_name order_tables_by_creation_order get_table_creation_order determine_fk_dependencies check_if_rules_exist update_rules_and_order get_fk_referenced_table generate_table_creation_query create_table create_schema create_all_tables_from_configurations

Documented in create_all_tables_from_configurations populate_lookup_tables

# Database utilities for creating tables and schemas
#' @include load-configurations.R

#' @title Creates tables based on yaml configuration files
#'
#' @param drop_first A boolean for dropping tables before creating
#'
#' @description  Calls \code{run_query} and \code{get_table_schema_names}
#' 
#' @return Boolean depending on existance of table
#' 
#' @export
#' 
create_all_tables_from_configurations <- function (drop_first = FALSE) {
  
  # Drop the tables first if specified
  if (drop_first) {
    lapply(database_tables, function (x) drop_table(get_full_table_name(x)))
  }
  
  # Order tables before creating them
  database_tables_ordered <- order_tables_by_creation_order(database_tables)
  
  # Create each table
  tables_created_successfully <- sapply(database_tables_ordered, function (db_tab) {
    return (create_table(db_tab))
  })
  
  # Return whether all were created successfully or not
  return (all(tables_created_successfully))
}

# Create a new schema
create_schema <- function (schema_name, conn = default_conn_to_mysql_server()) {
  if (substitute(conn) == "default_conn_to_mysql_server()") {
    on.exit(close_conn_to_mysql_server(conn))
  }
  q <- paste0("CREATE SCHEMA ", schema_name, ";")
  out <- try(run_query(q, conn), silent = TRUE)
  query_errored <- class(out) == "try-error"
  if (query_errored) {
    warning(out[1])
  }
  return (!query_errored && check_if_schema_exists(schema_name, conn))
}

# Will be a method for creating a table from a btclearn_table object
create_table <- function (database_table, conn = default_conn_to_mysql_server()) {
  if (substitute(conn) == "default_conn_to_mysql_server()") {
    on.exit(close_conn_to_mysql_server(conn))
  }
  
  # Check if the table already exists
  full_table_name <- get_full_table_name(database_table)
  table_exists <- check_if_table_exists(full_table_name, conn)
  if (table_exists) {
    warning("Table '", full_table_name, "' already exists, not overwriting")
    return (FALSE)
  }
  
  # Check if the database/schema exists -- if it doesn't, then create it
  schema_name   <- database_table@database_name
  schema_exists <- check_if_schema_exists(schema_name, conn)
  if (!schema_exists) {
    schema_exists <- create_schema(schema_name, conn)
  }
  if (!schema_exists) {
    warning("Could not create schema: ", schema_name)
    return (FALSE)
  }
  
  # Genrate the query required to build a table and run it
  table_creation_query  <- generate_table_creation_query(database_table)
  run_query(table_creation_query, conn)
  
  # Verify that the table now exists
  table_exists <- check_if_table_exists(full_table_name, conn)
  if (!table_exists) {
    warning("Table '", full_table_name, "' was not created successfully")
    return (FALSE)
  }
  
  return (table_exists)
}

# Will be a method for generating the SQL that creates the table
generate_table_creation_query <- function (database_table) {
  
  # Extract out some relevant information about the table
  schema_name <- database_table@database_name
  table_name  <- database_table@table_name
  pk_name     <- database_table@primary_key
  fk_names    <- database_table@foreign_keys
  table_cols  <- database_table@columns
  
  # Generate the beginning of the query
  use_statement    <- paste0("USE ", schema_name, ";") 
  create_statement <- paste0("CREATE TABLE ", table_name, " (\n",
                             generate_sql_columns(table_cols, pk_name, fk_names),
                             ");")
  
  # Return both the USE statement and the CREATE statement
  return (list(use_statement = use_statement, 
               create_statement = create_statement))
}

# Get the referenced table from the SQL foreign key syntax
get_fk_referenced_table <- function (fk_sql) {
  paren_start <- stringr::str_locate(fk_sql, "[(]")[1, 1]
  referenced_table <- gsub(" ", "", substr(fk_sql, start = 1, stop = paren_start - 1))
  return (referenced_table)
}

# Update rules and order
update_rules_and_order <- function (must_be_created_before_rules, table_creation_order) {
  # Find which tables have their rules satisfied and place them in the ordering
  for (i in 1:length(must_be_created_before_rules)) {
    rules <-  must_be_created_before_rules[[i]] 
    tab   <- names(must_be_created_before_rules)[i]
    if (identical(rules, character(0))) {
      table_creation_order <- unique(c(table_creation_order, tab))
      for (j in 1:length(must_be_created_before_rules)) {
        rm_ind <- which(must_be_created_before_rules[[j]] == tab)
        if (length(rm_ind) > 0) {
          must_be_created_before_rules[[j]] <- must_be_created_before_rules[[j]][-rm_ind]
        }
      }
    }
  }
  return (list(must_be_created_before_rules = must_be_created_before_rules,
               table_creation_order = table_creation_order))
}

# Check if ordering rules exist in a list of tables
check_if_rules_exist <- function (must_be_created_before_rules) {
  return (!all(sapply(must_be_created_before_rules, function (x) identical(x, character(0)))))  
}

# Figure out which tables have foreign key dependencies
determine_fk_dependencies <- function (database_tables) {
  
  # For each table to be created, figure out its referenced tables
  db <- unique(sapply(database_tables, function (x) x@database_name))
  must_be_created_before_rules <- lapply(database_tables, function (db_tab) {
    tables_to_be_created_before <- if (check_if_table_has_foreign_key(db_tab)) {
      paste0(db, ".", sapply(db_tab@foreign_keys, get_fk_referenced_table))
    } else {
      character(0)
    }
    names(tables_to_be_created_before) <- NULL
    return (tables_to_be_created_before)
  })
  names(must_be_created_before_rules) <- sapply(database_tables, get_full_table_name)
  return (must_be_created_before_rules)
}

# Figure out the order to create tables within a database/schemam
# returning the indices of order to create
get_table_creation_order <- function (database_tables) {
  
  # Figure out which rules apply to this database
  must_be_created_before_rules <- determine_fk_dependencies(database_tables)
  
  # Create the order of tables, removing from the rules list after assignment
  max_checks <- 1000; n_checks   <- 0
  table_creation_order <- c()
  rules_exist_in_list  <- check_if_rules_exist(must_be_created_before_rules)
  while (rules_exist_in_list) {
    
    # Iteratively update the rules and order
    rules_and_order <- update_rules_and_order(must_be_created_before_rules,
                                              table_creation_order)
    must_be_created_before_rules <- rules_and_order$must_be_created_before_rules
    table_creation_order <- rules_and_order$table_creation_order
    
    # Check if all rules are satisfied and add the remaining tables if so
    rules_exist_in_list  <- check_if_rules_exist(must_be_created_before_rules)
    if (!rules_exist_in_list) {
      missing_tables <-
        names(must_be_created_before_rules)[!(names(must_be_created_before_rules)
                                              %in% table_creation_order)]
      table_creation_order <- unique(c(table_creation_order, missing_tables))
    }
    
    # Stop if this will never figure out the order
    n_checks <- n_checks + 1
    if (n_checks == max_checks) {
      stop("Cannot determine the correct order to create database tables")
    }
  }
  return (match(table_creation_order, names(must_be_created_before_rules)))
}

# Order tables by creation order
order_tables_by_creation_order <- function (database_tables) {
  table_creation_order    <- get_table_creation_order(database_tables)
  database_tables_ordered <- database_tables[table_creation_order]
  return (database_tables_ordered)
}

# Use to generate an INDEX name
generate_index_name <- function (col_name, reference_table_name) {
  return (paste0(col_name, "_", reference_table_name, "_index"))
}

# Generates foreign key SQL table create columns
generate_fk_columns <- function (fk_names) {
  fk_sql_code <- sapply(1:length(fk_names), function (i) {
    fk_name <- names(fk_names)[i]
    fk_sql  <- fk_names[[i]]
    index_name <- generate_index_name(fk_name, get_fk_referenced_table(fk_sql))
    return (paste0("  INDEX ", index_name, " (", fk_name, "),\n",
                   "  FOREIGN KEY(", fk_name, ") REFERENCES ", fk_sql))
  })
  return (paste0(fk_sql_code, collapse = ",\n"))
}

# Use to generate the SQL for a table's columns when creating that table
generate_sql_columns <- function (table_cols, pk_name, fk_names) {
  sql_code <- ""
  for (i in 1:length(table_cols)) {
    col_name <- names(table_cols)[i]
    col_type <- table_cols[[i]]
    sql_code <- paste0(sql_code, "  ", col_name, " ", col_type,
                       ifelse(col_name == pk_name, " AUTO_INCREMENT", ""),
                       ",\n")
  }
  sql_code <- paste0(sql_code, "  PRIMARY KEY(", pk_name, ")")
  if (length(fk_names) > 0) {
    sql_code <- paste0(sql_code, ",\n",
                       generate_fk_columns(fk_names))
    
  }
  sql_code <- paste0(sql_code, "\n")
  return (sql_code)
}

#' @title Create or update lookup tables
#' 
#' @description This function will check for already existing lookup tables -- if
#' it finds them, then it will append any new lookups to those tables, if it 
#' doesn't find any then it will create new lookup tables. Note that this
#' function needs to be updated for each lookup table created. 
#'  
#' @return A list of the lookup tables that were created
#'
#' @export
#'  
populate_lookup_tables <- function () {
  
  # Define the data source of lookup tables
  all_tickers   <- unique(unlist(c(names(gdax_config@tickers),
                                   names(cryptowatch_config@tickers))))
  all_exchanges <- unique(unlist(c(names(cryptowatch_config@exchanges))))
  
  # Find which tickers need to be added and add rows for them
  existing_lookup_tickers <- get_table(ticker_lookup)$ticker_name
  tickers_to_add <- all_tickers[which(!(all_tickers %in% existing_lookup_tickers))]
  for (ticker in tickers_to_add) {
    q <- paste0("INSERT INTO ", ticker_lookup, "(ticker_name)\n",
                "VALUES ('", ticker, "');")
    run_query(q)
  }
  
  # Do the same for exchanges
  existing_lookup_exchanges <- get_table(exchange_lookup)$exchange_name
  exchanges_to_add <- all_exchanges[which(!(all_exchanges %in% existing_lookup_exchanges))]
  for (exchange in exchanges_to_add) {
    q <- paste0("INSERT INTO ", exchange_lookup, "(exchange_name)\n",
                "VALUES ('", exchange, "');")
    run_query(q)
  }
  
  # Grab the lookup tables and return them
  return (list(ticker_lookup = get_table(ticker_lookup),
               exchange_lookup = get_table(exchange_lookup)))
}
kyleengel/btclearn documentation built on June 7, 2018, 12:26 a.m.