R/create_model.R

Defines functions create_model_lstm_cnn_target_middle create_model_wavenet create_model_lstm_cnn get_hyper_param

Documented in create_model_lstm_cnn get_hyper_param

#' Extract hyperparameters from model
#' 
#' @param model A keras model.
#' @export
get_hyper_param <- function(model){
  layer.lstm <- 0 
  use.cudnn <- FALSE
  bidirectional <- FALSE 
  use.codon.cnn <- FALSE
  learning.rate <- keras::k_eval(model$optimizer$lr)
  solver <- stringr::str_to_lower(model$optimizer$get_config()["name"])
  
  layerList <- keras::get_config(model)["layers"]
  for (i in 1:length(layerList)){
    layer_class_name <- layerList[[i]]$class_name
    
    if (layer_class_name == "Conv1D"){
      use.codon.cnn <- TRUE
    }
    
    if (layer_class_name == "MaxPooling1D"){
    }
    
    if (layer_class_name == "BatchNormalization") {
    }
    
    if (layer_class_name == "CuDNNLSTM"){
      layers.lstm <- layer.lstm + 1
      use.cudnn <- TRUE
      layer.size <- layerList[[i]]$config$units
      recurrent_dropout <- 0  
      dropout <- 0    
    }
    
    if (layer_class_name == "LSTM"){
      layers.lstm <- layer.lstm + 1
      layer.size <- layerList[[i]]$config$units
      recurrent_dropout <- layerList[[i]]$config$recurrent_dropout  
      dropout <- layerList[[i]]$config$dropout    
    }
    
    if (layer_class_name == "Bidirectional"){
      bidirectional <- TRUE 
    }
    
    if (layer_class_name == "Dense"){
    }
    
    if (layer_class_name == "Activation"){
    }
  }
  
  list(HP_DROPOUT = dropout, 
       HP_RECURRENT_DROPOUT = recurrent_dropout, 
       HP_LAYER.SIZE =  layer.size, 
       HP_OPTIMIZER = solver, 
       HP_USE.CUDNN = use.cudnn, 
       HP_NUM_LAYERS = layers.lstm,
       HP_LEARNING.RATE = learning.rate, 
       HP_USE.CODON.CNN = use.codon.cnn, 
       HP_BIDIRECTIONAL = bidirectional 
  )
}

#' @title Creates LSTM/CNN network
#'
#' @description
#' Creates a netwotk consisting of an arbitrary number of LSTM layers (>0) and an optional CNN layer at the beginning. Last layer is a
#' dense layer with softmax activation.
#' @param maxlen Length of predictor sequence.
#' @param dropout Fraction of the units to drop for inputs.
#' @param recurrent_dropout Fraction of the units to drop for recurrent state.
#' @param layer.size Number of cells per network layer.
#' @param layers.lstm Number of LSTM layers.
#' @param solver Optimization method, options are "adam", "adagrad", "rmsprop" or "sgd".
#' @param use.codon.cnn First layer is a CNN layer with size of 3 to mimic codons (experimental).
#' @param learning.rate Learning rate for optimizer.
#' @param use.cudnn If true, using layer_cudnn_lstm() instead of layer_lstm() which is if GPU supports cudnn.
#' @param use.multiple.gpus If true, multi_gpu_model() will be used based on gpu_num.
#' @param gpu.num Number of GPUs to be used, only relevant if multiple_gpu is true.
#' @param merge.on.cpu True on default, false recommend if the server supports NVlink, only relevant if use.multiple.gpu is true.
#' @param bidirectional Use bidirectional wrapper for lstm layers.
#' @param num_targets Number of possible predictions. Determines number of neurons in dense layer.
#' @param vocabulary.size Number of unique character in vocabulary.
#' @export
create_model_lstm_cnn <- function(
  maxlen = 50,
  dropout = 0,
  recurrent_dropout = 0,
  layer.size = 128,
  layers.lstm = 2,
  solver = "adam",
  use.codon.cnn = FALSE,
  learning.rate = 0.001,
  use.cudnn = TRUE,
  use.multiple.gpus = FALSE,
  merge.on.cpu = TRUE,
  gpu.num = 2,
  num_targets = 4,
  vocabulary.size = 4,
  bidirectional = FALSE,
  compile = TRUE){
  
  stopifnot(maxlen > 0)
  stopifnot(dropout <= 1 & dropout >= 0)
  stopifnot(recurrent_dropout <= 1 & recurrent_dropout >= 0)
  stopifnot(layer.size > 0)
  stopifnot(layers.lstm > 0)
  
  if (use.cudnn & (recurrent_dropout > 0 | recurrent_dropout > 0)){
    warning("Dropout is not supported by cuDNN and will be ignored")
  } 
  
  if (use.multiple.gpus) {
    # init template model under a CPU device scope
    with(tf$device("/cpu:0"), {
      model <- keras::keras_model_sequential()
    })
  } else {
    model <- keras::keras_model_sequential()
  }
  
  if (use.codon.cnn) {
    model %>% 
      keras::layer_conv_1d(
        kernel_size = 3,
        # 3 charactes are representing a codon
        padding = "same",
        activation = "relu",
        filters = 81,
        input_shape = c(maxlen, vocabulary.size)
      )  %>%
      keras::layer_max_pooling_1d(pool_size = 3)  %>%
      keras::layer_batch_normalization(momentum = .8)
  }
  
  # following layers
  if (use.cudnn) {
    if (layers.lstm > 1){
      if (bidirectional){
        for (i in 1:(layers.lstm - 1)) {
          model %>%
            keras::bidirectional(
              input_shape = c(maxlen, vocabulary.size),
              keras::layer_cudnn_lstm(
                units = layer.size,
                return_sequences = TRUE
              ) 
            )
        } 
        
      } else {
        for (i in 1:(layers.lstm - 1)) {
          model %>%
            keras::layer_cudnn_lstm(
              layer.size,
              input_shape = c(maxlen, vocabulary.size),
              return_sequences = TRUE
            )
        } 
      }
    }   
    # last LSTM layer
    if (bidirectional){
      model %>%
        keras::bidirectional(
          input_shape = c(maxlen, vocabulary.size),
          keras::layer_cudnn_lstm(units = layer.size)
        )
    } else {
      model %>% keras::layer_cudnn_lstm(layer.size, input_shape = c(maxlen, vocabulary.size))
    }
    
  } else {
    # non-cudnn
    if (layers.lstm > 1){
      if (bidirectional){
        for (i in 1:(layers.lstm - 1)) {
          model %>%
            keras::bidirectional(
              input_shape = c(maxlen, vocabulary.size),
              keras::layer_lstm(
                units = layer.size,
                return_sequences = TRUE,
                dropout = dropout,
                recurrent_dropout = recurrent_dropout
              )
            )
        } 
      } else {
        for (i in 1:(layers.lstm - 1)) {
          model %>%
            keras::layer_lstm(
              layer.size,
              input_shape = c(maxlen, vocabulary.size),
              return_sequences = TRUE
              
            )
        } 
      }
    }  
    # last LSTM layer
    if (bidirectional){
      model %>%
        keras::bidirectional(
          input_shape = c(maxlen, vocabulary.size),
          keras::layer_lstm(units = layer.size, dropout = dropout, recurrent_dropout = recurrent_dropout)
        )
    } else {
      model %>%
        keras::layer_lstm(layer.size, input_shape = c(maxlen, vocabulary.size),
                          dropout = dropout, recurrent_dropout = recurrent_dropout)
    }
  }
  
  
  model %>% keras::layer_dense(num_targets) %>% 
    keras::layer_activation("softmax")
  
  # print model layout to screen, should be done before multi_gpu_model 
  summary(model)
  
  if (use.multiple.gpus) {
    model <- keras::multi_gpu_model(model,
                                    gpus = gpu.num,
                                    cpu_merge = merge.on.cpu)
  }
  
  # choose optimization method
  if (solver == "adam"){
    optimizer <-  keras::optimizer_adam(lr = learning.rate)
  }
  if (solver == "adagrad"){
    optimizer <- keras::optimizer_adagrad(lr = learning.rate)
  }
  if (solver == "rmsprop"){
    optimizer <- keras::optimizer_rmsprop(lr = learning.rate)
  }
  if (solver == "sgd"){
    optimizer <- keras::optimizer_sgd(lr = learning.rate)
  }
  
  model %>% keras::compile(loss = "categorical_crossentropy",
                           optimizer = optimizer, metrics = c("acc"))
  
  return(model)
}

create_model_wavenet <- function(){
  
}

#######

create_model_lstm_cnn_target_middle <- function(
  maxlen = 50,
  dropout = 0,
  recurrent_dropout = 0,
  layer.size = 128,
  layers.lstm = 2,
  solver = "adam",
  use.codon.cnn = FALSE,
  learning.rate = 0.001,
  use.cudnn = TRUE,
  use.multiple.gpus = FALSE,
  merge.on.cpu = TRUE,
  gpu.num = 2,
  num_targets = 4,
  vocabulary.size = 4,
  bidirectional = FALSE,
  compile = TRUE){
  
  input_tensor_1 <- keras::layer_input(c(maxlen, vocabulary.size))
  
  if (use.codon.cnn) {
    output_tensor_1 <- input_tensor_1 %>%
      keras::layer_conv_1d(
        kernel_size = 3,
        # 3 charactes are representing a codon
        padding = "same",
        activation = "relu",
        filters = 81
      )  %>%
      keras::layer_max_pooling_1d(pool_size = 3)  %>%
      keras::layer_batch_normalization(momentum = .8)
  } else {
    output_tensor_1 <- input_tensor_1
  }
  
  # following layers
  if (use.cudnn) {
    if (layers.lstm > 1){
      if (bidirectional){
        for (i in 1:(layers.lstm - 1)) {
          output_tensor_1 <- output_tensor_1 %>%
            keras::bidirectional(
              input_shape = c(maxlen, vocabulary.size),
              keras::layer_cudnn_lstm(
                units = layer.size,
                return_sequences = TRUE
              )
            )
        }
        
      } else {
        for (i in 1:(layers.lstm - 1)) {
          output_tensor_1 <- output_tensor_1 %>%
            keras::layer_cudnn_lstm(
              layer.size,
              input_shape = c(maxlen, vocabulary.size),
              return_sequences = TRUE
            )
        }
      }
    }
    # last LSTM layer
    if (bidirectional){
      output_tensor_1 <- output_tensor_1 %>%
        keras::bidirectional(
          input_shape = c(maxlen, vocabulary.size),
          keras::layer_cudnn_lstm(units = layer.size)
        )
    } else {
      output_tensor_1 <- output_tensor_1 %>%
        keras::layer_cudnn_lstm(layer.size, input_shape = c(maxlen, vocabulary.size))
    }
    
  } else {
    # non-cudnn
    if (layers.lstm > 1){
      if (bidirectional){
        for (i in 1:(layers.lstm - 1)) {
          output_tensor_1 <- output_tensor_1 %>%
            keras::bidirectional(
              input_shape = c(maxlen, vocabulary.size),
              keras::layer_lstm(
                units = layer.size,
                return_sequences = TRUE,
                dropout = dropout,
                recurrent_dropout = recurrent_dropout
              )
            )
        }
      } else {
        for (i in 1:(layers.lstm - 1)) {
          output_tensor_1 <- output_tensor_1 %>%
            keras::layer_lstm(
              layer.size,
              input_shape = c(maxlen, vocabulary.size),
              return_sequences = TRUE
              
            )
        }
      }
    }
    # last LSTM layer
    if (bidirectional){
      output_tensor_1 <- output_tensor_1 %>%
        keras::bidirectional(
          input_shape = c(maxlen, vocabulary.size),
          keras::layer_lstm(units = layer.size, dropout = dropout, recurrent_dropout = recurrent_dropout)
        )
    } else {
      output_tensor_1 <- output_tensor_1 %>%
        keras::layer_lstm(layer.size, input_shape = c(maxlen, vocabulary.size),
                          dropout = dropout, recurrent_dropout = recurrent_dropout)
    }
  }
  
  input_tensor_2 <- keras::layer_input(c(maxlen, vocabulary.size))
  
  if (use.codon.cnn) {
    output_tensor_2 <- input_tensor_2 %>%
      keras::layer_conv_1d(
        kernel_size = 3,
        # 3 charactes are representing a codon
        padding = "same",
        activation = "relu",
        filters = 81
      )  %>%
      keras::layer_max_pooling_1d(pool_size = 3)  %>%
      keras::layer_batch_normalization(momentum = .8)
  } else {
    output_tensor_2 <- input_tensor_2
  }
  
  # following layers
  if (use.cudnn) {
    if (layers.lstm > 1){
      if (bidirectional){
        for (i in 1:(layers.lstm - 1)) {
          output_tensor_2 <- output_tensor_2 %>%
            keras::bidirectional(
              input_shape = c(maxlen, vocabulary.size),
              keras::layer_cudnn_lstm(
                units = layer.size,
                return_sequences = TRUE
              )
            )
        }
        
      } else {
        for (i in 1:(layers.lstm - 1)) {
          output_tensor_2 <- output_tensor_2 %>%
            keras::layer_cudnn_lstm(
              layer.size,
              input_shape = c(maxlen, vocabulary.size),
              return_sequences = TRUE
            )
        }
      }
    }
    # last LSTM layer
    if (bidirectional){
      output_tensor_2 <- output_tensor_2 %>%
        keras::bidirectional(
          input_shape = c(maxlen, vocabulary.size),
          keras::layer_cudnn_lstm(units = layer.size)
        )
    } else {
      output_tensor_2 <- output_tensor_2 %>%
        keras::layer_cudnn_lstm(layer.size, input_shape = c(maxlen, vocabulary.size))
    }
    
  } else {
    # non-cudnn
    if (layers.lstm > 1){
      if (bidirectional){
        for (i in 1:(layers.lstm - 1)) {
          output_tensor_2 <- output_tensor_2 %>%
            keras::bidirectional(
              input_shape = c(maxlen, vocabulary.size),
              keras::layer_lstm(
                units = layer.size,
                return_sequences = TRUE,
                dropout = dropout,
                recurrent_dropout = recurrent_dropout
              )
            )
        }
      } else {
        for (i in 1:(layers.lstm - 1)) {
          output_tensor_2 <- output_tensor_2 %>%
            keras::layer_lstm(
              layer.size,
              input_shape = c(maxlen, vocabulary.size),
              return_sequences = TRUE
              
            )
        }
      }
    }
    # last LSTM layer
    if (bidirectional){
      output_tensor_2 <- output_tensor_2 %>%
        keras::bidirectional(
          input_shape = c(maxlen, vocabulary.size),
          keras::layer_lstm(units = layer.size, dropout = dropout, recurrent_dropout = recurrent_dropout)
        )
    } else {
      output_tensor_2 <- output_tensor_2 %>%
        keras::layer_lstm(layer.size, input_shape = c(maxlen, vocabulary.size),
                          dropout = dropout, recurrent_dropout = recurrent_dropout)
    }
  }
  
  output_tensor <- keras::layer_concatenate(list(output_tensor_1, output_tensor_2))
  
  output_tensor <- output_tensor %>%
    keras::layer_dense(vocabulary.size) %>%
    keras::layer_activation("softmax")
  
  # print model layout to screen, should be done before multi_gpu_model
  model <- keras::keras_model(inputs = list(input_tensor_1, input_tensor_2), outputs = output_tensor)
  summary(model)
  
  if (use.multiple.gpus) {
    model <- keras::multi_gpu_model(model,
                                    gpus = gpu.num,
                                    cpu_merge = merge.on.cpu)
  }
  
  # choose optimization method
  if (solver == "adam")
    optimizer <-
    keras::optimizer_adam(lr = learning.rate)
  if (solver == "adagrad")
    optimizer <-
    keras::optimizer_adagrad(lr = learning.rate)
  if (solver == "rmsprop")
    optimizer <-
    keras::optimizer_rmsprop(lr = learning.rate)
  if (solver == "sgd")
    optimizer <-
    keras::optimizer_sgd(lr = learning.rate)
  
  model %>% keras::compile(loss = "categorical_crossentropy",
                           optimizer = optimizer, metrics = c("acc"))
  model
}


# dummyGen <- function(batch.size = 2, maxlen = 7, voc_size = 4){
#   n <- batch.size
#   function(){
#     i1 <- array(runif(batch.size * maxlen * voc_size), dim = c(batch.size, maxlen, voc_size))
#     i2 <- array(runif(batch.size * maxlen * voc_size), dim = c(batch.size, maxlen, voc_size))
#     out <- array(runif(batch.size * voc_size), dim = c(batch.size, voc_size))
#     return(list(inputs = list(i1, i2), targets = out))
#   }
# }
# 
# gen <- dummyGen()
# gen.val <- dummyGen()
# 
# history <-
#   model %>% keras::fit_generator(
#     generator = gen,
#     validation_data = gen.val,
#     validation_steps = 3,
#     steps_per_epoch = 10,
#     epochs = 3
#   )
hiddengenome/altum documentation built on April 22, 2020, 9:33 p.m.