R/dbManagement.R

#' initializeLocalDatabase
#' 
#' Initialize database
#' @param path path of database
#' @param overwrite overwrite existing database
#' @return database
#' @export
initializeLocalDatabase = function(path = ".", overwrite = FALSE) {
  
  db = paste0(path, "/mlrRandomBotDatabase.db")
  
  if (file.exists(db) & overwrite) {
    unlink(db, force = TRUE)
  } 
  
  src = src_sqlite(db, create = !file.exists(db))
  
  if(!db_has_table(src$con, "run.table")){
    run.table = data.frame(run.id = integer(0L), 
      task.id = integer(0L), 
      setup.id = integer(0L),
      flow.id = integer(0L),
      flow.name = character(0L),
      flow.version = character(0L),
      flow.source = character(0L),
      learner.name = character(0L),
      data.name = character(0L),
      upload.time = character(0L),
      measure.name = character(0L), 
      measure.value = numeric(0L),
      stringsAsFactors = FALSE)
    copy_to(src, run.table, temporary = FALSE)
  }
  if(!db_has_table(src$con, "hyperpar.table")){
    hyperpar.table = data.frame(run.id = integer(0L), 
      hyperpar.name = character(0L), 
      hyperpar.value = character(0L),
      stringsAsFactors = FALSE)
    copy_to(src, hyperpar.table, temporary = FALSE)
  }
  if(!db_has_table(src$con, "meta.table")){
    meta.table = data.frame(task.id = integer(0L),
      data.id = integer(0L),
      name = character(0L),
      majority.class.size = integer(0L),
      max.nominal.att.distinct.values = integer(0L),
      minority.class.size = integer(0L),
      number.of.classes = integer(0L),
      number.of.features = integer(0L),
      number.of.instances = integer(0L),
      number.of.instances.with.missing.values = integer(0L),
      number.of.missing.values = integer(0L),
      number.of.numeric.features = integer(0L),
      number.of.symbolic.features = integer(0L),
      stringsAsFactors = FALSE)
    copy_to(src, meta.table, temporary = FALSE)
  }
  if(!db_has_table(src$con, "runtime.table")){
    runtime.table = data.frame(run.id = integer(0L),
      run.time = numeric(0L),
      sci.mark = numeric(0L),
      stringsAsFactors = FALSE)
    copy_to(src, runtime.table, temporary = FALSE)
  }
  if(!db_has_table(src$con, "reference.table")){
    reference.table = data.frame(run.id = integer(0L), 
      task.id = integer(0L), 
      setup.id = integer(0L),
      flow.id = integer(0L),
      flow.name = character(0L),
      flow.version = character(0L),
      flow.source = character(0L),
      learner.name = character(0L),
      data.name = character(0L),
      upload.time = character(0L),
      measure.name = character(0L), 
      measure.value = numeric(0L),
      stringsAsFactors = FALSE)
    copy_to(src, reference.table, temporary = FALSE)
  }
  
  return(src)
}

#' updateRunTable
#' 
#' Save run results to db
#' @param run.tag 
#' @param local.db 
#' @export
updateRunTable = function(run.tag, local.db){
  run.ids = local.db %>% tbl(sql("SELECT DISTINCT [run.id] FROM [run.table]")) %>% collect(n = Inf)
  df = getRunTable(run.tag = run.tag, excl.run.ids = run.ids$run.id)
  if(!is.null(df)){
    db_insert_into(local.db$con, "run.table", df)
  }
}

#' updateReferenceTable
#' 
#' Save reference run results to db
#' @param run.tag 
#' @param local.db
#' @export 
updateReferenceTable = function(run.tag, local.db){
  run.ids = local.db %>% tbl(sql("SELECT DISTINCT [run.id] FROM [reference.table]")) %>% collect(n = Inf)
  df = getRunTable(run.tag = run.tag, excl.run.ids = run.ids$run.id)
  if(!is.null(df)){
    db_insert_into(local.db$con, "reference.table", df)
  }
}

#' updateHyperparTable
#' 
#' Save hyperparameters for run to db
#' @param run.tag 
#' @param local.db 
#' @export
updateHyperparTable = function(run.tag, local.db){
  run.ids = local.db %>% tbl(sql("SELECT DISTINCT [run.id] FROM [hyperpar.table]")) %>% collect(n = Inf) 
  df = getHyperparTable(run.tag = run.tag, excl.run.ids = run.ids$run.id)
  if(!is.null(df)){
    db_insert_into(local.db$con, "hyperpar.table", df)
  }
}

#' updateMetaTable
#' 
#' Save meta data for task to db
#' @param task.tag 
#' @param local.db 
#' @export
updateMetaTable = function(task.tag, local.db){
  task.ids = local.db %>% tbl(sql("SELECT DISTINCT [task.id] FROM [meta.table]")) %>% collect(n = Inf)
  df = getMetaFeaturesTable(task.tag = task.tag)
  df = df[df$task.id %in% setdiff(df$task.id, task.ids),]
  if(!is.null(df)){
    db_insert_into(local.db$con, "meta.table", df)
  }
}

#' updateRunTimeTable
#' 
#' Save user times to db
#' @param local.db 
#' @export
updateRunTimeTable = function(local.db){
  qry_sql = paste0("SELECT DISTINCT a.[run.id] FROM [run.table] As a ",
    "LEFT JOIN [runtime.table] As b ON a.[run.id] = b.[run.id] ",
    "WHERE b.[run.id] IS NULL")
  run.ids = local.db %>% tbl(sql(qry_sql)) %>% collect(n = Inf)
  run.chunks = split(run.ids$run.id, ceiling(seq_along(run.ids$run.id)/100))
  
  #run.chunks = run.chunks[1:10] #just for testing: can be switched off
  
  for(i in seq_along(run.chunks)){
    print(paste("Chunk", i, "of", length(run.chunks)))
    df = getRunTime(run.chunks[[i]])
    if(!is.null(df)){
      db_insert_into(local.db$con, "runtime.table", df)
    }
    df = NULL
  }
}


#' updateLocalDatabase
#' 
#' Check each tables of the local database for update requirement
#' @param path path of database
#' @param run.tag tag for OMLRun
#' @param task.tag tag for OMLTask
#' @return database
#' 
#' @export
updateLocalDatabase = function(path = ".", run.tag = "mlrRandomBot", task.tag = "study_14") {
  
  local.db = initializeLocalDatabase(path = path)
  updateRunTable(run.tag = run.tag, local.db)
  updateHyperparTable(run.tag = run.tag, local.db)
  updateMetaTable(task.tag = task.tag, local.db)
  updateRunTimeTable(local.db)
}
ja-thomas/OMLbots documentation built on May 18, 2019, 7:15 a.m.