R/create_model_lstm_cnn.R

Defines functions create_model_lstm_cnn_target_middle create_model_lstm_cnn

Documented in create_model_lstm_cnn create_model_lstm_cnn_target_middle

#' @title Create LSTM/CNN network
#'
#' @description Creates a network consisting of an arbitrary number of CNN, LSTM and dense layers.
#' Last layer is a dense layer.
#' 
#' @param maxlen Length of predictor sequence.
#' @param dropout_lstm Fraction of the units to drop for inputs.
#' @param recurrent_dropout_lstm Fraction of the units to drop for recurrent state.
#' @param layer_lstm Number of cells per network layer. Can be a scalar or vector.
#' @param layer_dense Vector specifying number of neurons per dense layer after last LSTM or CNN layer (if no LSTM used).
#' @param dropout_dense Dropout rates between dense layers. No dropout if `NULL`.
#' @param solver Optimization method, options are `"adam", "adagrad", "rmsprop"` or `"sgd"`.
#' @param learning_rate Learning rate for optimizer.
#' @param bidirectional Use bidirectional wrapper for lstm layers.
#' @param vocabulary_size Number of unique character in vocabulary.
#' @param stateful Boolean. Whether to use stateful LSTM layer.
#' @param batch_size Number of samples that are used for one network update. Only used if \code{stateful = TRUE}.
#' @param compile Whether to compile the model.
#' @param kernel_size Size of 1d convolutional layers. For multiple layers, assign a vector. (e.g, `rep(3,2)` for two layers and kernel size 3)
#' @param filters Number of filters. For multiple layers, assign a vector.
#' @param strides Stride values. For multiple layers, assign a vector.
#' @param pool_size Integer, size of the max pooling windows. For multiple layers, assign a vector.
#' @param padding Padding of CNN layers, e.g. `"same", "valid"` or `"causal"`.
#' @param dilation_rate Integer, the dilation rate to use for dilated convolution.
#' @param gap Whether to apply global average pooling after last CNN layer.
#' @param use_bias Boolean. Usage of bias for CNN layers.
#' @param residual_block Boolean. If true, the residual connections are used in CNN. It is not used in the first convolutional layer.
#' @param residual_block_length Integer. Determines how many convolutional layers (or triplets when `size_reduction_1D_conv` is `TRUE`) exist
#  between the legs of a residual connection. e.g. if the `length kernel_size/filters` is 7 and `residual_block_length` is 2, there are 1+(7-1)*2 convolutional
#  layers in the model when `size_reduction_1Dconv` is FALSE and 1+(7-1)*2*3 convolutional layers when `size_reduction_1Dconv` is TRUE.
#' @param size_reduction_1Dconv Boolean. When `TRUE`, the number of filters in the convolutional layers is reduced to 1/4 of the number of filters of
#  the original layer by a convolution layer with kernel size 1, and number of filters are increased back to the original value by a convolution layer
#  with kernel size 1 after the convolution with original kernel size with reduced number of filters.
#' @param label_input Integer or `NULL`. If not `NULL`, adds additional input layer of \code{label_input} size.
#' @param zero_mask Boolean, whether to apply zero masking before LSTM layer. Only used if model does not use any CNN layers.
#' @param label_smoothing Float in \[0, 1\]. If 0, no smoothing is applied. If > 0, loss between the predicted
#' labels and a smoothed version of the true labels, where the smoothing squeezes the labels towards 0.5.
#' The closer the argument is to 1 the more the labels get smoothed.
#' @param label_noise_matrix Matrix of label noises. Every row stands for one class and columns for percentage of labels in that class.
#' If first label contains 5 percent wrong labels and second label no noise, then
#' 
#' \code{label_noise_matrix <- matrix(c(0.95, 0.05, 0, 1), nrow = 2, byrow = TRUE )}
#' @param last_layer_activation Activation function of output layer(s). For example `"sigmoid"` or `"softmax"`.
#' @param loss_fn Either `"categorical_crossentropy"` or `"binary_crossentropy"`. If `label_noise_matrix` given, will use custom `"noisy_loss"`.
#' @param num_output_layers Number of output layers.
#' @param auc_metric Whether to add AUC metric.
#' @param f1_metric Whether to add F1 metric.
#' @param bal_acc Whether to add balanced accuracy.
#' @param verbose Boolean.
#' @param batch_norm_momentum Momentum for the moving mean and the moving variance.
#' @param model_seed Set seed for model parameters in tensorflow if not `NULL`.
#' @param mixed_precision Whether to use mixed precision (https://www.tensorflow.org/guide/mixed_precision).
#' @param mirrored_strategy Whether to use distributed mirrored strategy. If NULL, will use distributed mirrored strategy only if >1 GPU available.   
#' @examplesIf reticulate::py_module_available("tensorflow")
#' create_model_lstm_cnn(
#'   maxlen = 500,
#'   vocabulary_size = 4,
#'   kernel_size = c(8, 8, 8),
#'   filters = c(16, 32, 64),
#'   pool_size = c(3, 3, 3),
#'   layer_lstm = c(32, 64),
#'   layer_dense = c(128, 4),
#'   learning_rate = 0.001)
#' 
#' @returns A keras model, stacks CNN, LSTM and dense layers.   
#' @export
create_model_lstm_cnn <- function(
    maxlen = 50,
    dropout_lstm = 0,
    recurrent_dropout_lstm = 0,
    layer_lstm = NULL,
    layer_dense = c(4),
    dropout_dense = NULL,
    kernel_size = NULL,
    filters = NULL,
    strides = NULL,
    pool_size = NULL,
    solver = "adam",
    learning_rate = 0.001,
    vocabulary_size = 4,
    bidirectional = FALSE,
    stateful = FALSE,
    batch_size = NULL,
    compile = TRUE,
    padding = "same",
    dilation_rate = NULL,
    gap = FALSE,
    use_bias = TRUE,
    residual_block = FALSE,
    residual_block_length = 1,
    size_reduction_1Dconv = FALSE,
    label_input = NULL,
    zero_mask = FALSE,
    label_smoothing = 0,
    label_noise_matrix = NULL,
    last_layer_activation = "softmax",
    loss_fn = "categorical_crossentropy",
    num_output_layers = 1,
    auc_metric = FALSE,
    f1_metric = FALSE,
    bal_acc = FALSE,
    verbose = TRUE,
    batch_norm_momentum = 0.99,
    model_seed = NULL,
    mixed_precision = FALSE,
    mirrored_strategy = NULL) {
  
  if (mixed_precision) tensorflow::tf$keras$mixed_precision$set_global_policy("mixed_float16")
  
  if (is.null(mirrored_strategy)) mirrored_strategy <- ifelse(count_gpu() > 1, TRUE, FALSE)
  if (mirrored_strategy) {
    mirrored_strategy <- tensorflow::tf$distribute$MirroredStrategy()
    with(mirrored_strategy$scope(), { 
      argg <- as.list(environment())
      argg$mirrored_strategy <- FALSE
      model <- do.call(create_model_lstm_cnn, argg)
    })
    return(model)
  }
  
  layer_dense <- as.integer(layer_dense)
  #browser()
  if (!is.null(model_seed)) tensorflow::tf$random$set_seed(model_seed)
  num_targets <- layer_dense[length(layer_dense)]
  layers.lstm <- length(layer_lstm)
  use.cnn <- ifelse(!is.null(kernel_size), TRUE, FALSE)
  
  if (!is.null(layer_lstm)) {
    stopifnot(length(layer_lstm) == 1 | (length(layer_lstm) ==  layers.lstm))
  }
  
  if (layers.lstm == 0 & !use.cnn) {
    stop("Model does not use LSTM or CNN layers.")
  }
  
  if (is.null(strides)) strides <- rep(1L, length(filters))
  if (is.null(dilation_rate) & use.cnn) dilation_rate <- rep(1L, length(filters))
  
  if (use.cnn) {
    same_length <- (length(kernel_size) == length(filters)) &
      (length(filters) == length(strides)) &
      (length(strides) == length(dilation_rate))
    if (!same_length) {
      stop("kernel_size, filters, dilation_rate and strides must have the same length")
    }
    if (residual_block & (padding != "same")) {
      stop("Padding option must be same when residual block is used.")
    }
  }
  
  stopifnot(maxlen > 0)
  stopifnot(dropout_lstm <= 1 & dropout_lstm >= 0)
  stopifnot(recurrent_dropout_lstm <= 1 & recurrent_dropout_lstm >= 0)
  
  if (length(layer_lstm) == 1) {
    layer_lstm <- rep(layer_lstm, layers.lstm)
  }
  
  if (stateful) {
    input_tensor <- keras::layer_input(batch_shape = c(batch_size, maxlen, vocabulary_size))
  } else {
    input_tensor <- keras::layer_input(shape = c(maxlen, vocabulary_size))
  }
  
  if (use.cnn) {
    for (i in 1:length(filters)) {
      if (i == 1) {
        output_tensor <- input_tensor %>%
          keras::layer_conv_1d(
            kernel_size = kernel_size[i],
            padding = padding,
            activation = "relu",
            filters = filters[i],
            strides = strides[i],
            dilation_rate = dilation_rate[i],
            input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
            use_bias = use_bias
          )
        if (!is.null(pool_size) && pool_size[i] > 1) {
          output_tensor <- output_tensor %>% keras::layer_max_pooling_1d(pool_size = pool_size[i])
        }
        output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum)
      } else {
        if (residual_block){
          if ((strides[i] > 1) | (pool_size[i] > 1)) {
            residual_layer <- output_tensor %>% keras::layer_average_pooling_1d(pool_size=strides[i]*pool_size[i])
          } else {
            residual_layer <- output_tensor
          }
          if (filters[i-1] != filters[i]){
            residual_layer <- residual_layer %>%
              keras::layer_conv_1d(
                kernel_size = 1,
                padding = padding,
                activation = "relu",
                filters = filters[i],
                strides = 1,
                dilation_rate = dilation_rate[i],
                input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
                use_bias = use_bias
              )
            residual_layer <- residual_layer %>% keras::layer_batch_normalization(momentum = batch_norm_momentum)
          }
          if (residual_block_length > 1){
            for (j in 1:(residual_block_length-1)){
              if (size_reduction_1Dconv){
                output_tensor <- output_tensor %>%
                  keras::layer_conv_1d(
                    kernel_size = 1,
                    padding = padding,
                    activation = "relu",
                    filters = filters[i]/4,
                    strides = 1,
                    dilation_rate = dilation_rate[i],
                    input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
                    use_bias = use_bias
                  )
                output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum)
                
                output_tensor <- output_tensor %>%
                  keras::layer_conv_1d(
                    kernel_size = kernel_size[i],
                    padding = padding,
                    activation = "relu",
                    filters = filters[i]/4,
                    strides = 1,
                    dilation_rate = dilation_rate[i],
                    input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
                    use_bias = use_bias
                  )
                output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum)
                
                output_tensor <- output_tensor %>%
                  keras::layer_conv_1d(
                    kernel_size = 1,
                    padding = padding,
                    activation = "relu",
                    filters = filters[i],
                    strides = 1,
                    dilation_rate = dilation_rate[i],
                    input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
                    use_bias = use_bias
                  )
                output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum)
                
              } else {
                output_tensor <- output_tensor %>%
                  keras::layer_conv_1d(
                    kernel_size = kernel_size[i],
                    padding = padding,
                    activation = "relu",
                    filters = filters[i],
                    strides = 1,
                    dilation_rate = dilation_rate[i],
                    input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
                    use_bias = use_bias
                  )
                output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum)
              }
            }
          }
        }
        if (size_reduction_1Dconv){
          output_tensor <- output_tensor %>%
            keras::layer_conv_1d(
              kernel_size = 1,
              padding = padding,
              activation = "relu",
              filters = filters[i]/4,
              strides = 1,
              dilation_rate = dilation_rate[i],
              input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
              use_bias = use_bias
            )
          output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum)
          
          output_tensor <- output_tensor %>%
            keras::layer_conv_1d(
              kernel_size = kernel_size[i],
              padding = padding,
              activation = "relu",
              filters = filters[i]/4,
              strides = strides[i],
              dilation_rate = dilation_rate[i],
              input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
              use_bias = use_bias
            )
          output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum)
          
          output_tensor <- output_tensor %>%
            keras::layer_conv_1d(
              kernel_size = 1,
              padding = padding,
              activation = "relu",
              filters = filters[i],
              strides = 1,
              dilation_rate = dilation_rate[i],
              input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
              use_bias = use_bias
            )
          output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum)
          
        } else {
          output_tensor <- output_tensor %>%
            keras::layer_conv_1d(
              kernel_size = kernel_size[i],
              padding = padding,
              activation = "relu",
              filters = filters[i],
              strides = strides[i],
              dilation_rate = dilation_rate[i],
              input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
              use_bias = use_bias
            )
          output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum)
        }
        if (!is.null(pool_size) && pool_size[i] > 1) {
          output_tensor <- output_tensor %>% keras::layer_max_pooling_1d(pool_size = pool_size[i])
        }
        #output_tensor <- output_tensor %>% keras::layer_batch_normalization(momentum = batch_norm_momentum)
        if (residual_block){
          output_tensor <- keras::layer_add(list(output_tensor, residual_layer))
        }
      }
    }
  } else {
    if (zero_mask) {
      output_tensor <- input_tensor %>% keras::layer_masking()
    } else {
      output_tensor <- input_tensor
    }
  }
  # lstm layers
  if (layers.lstm > 0) {
    if (layers.lstm > 1) {
      if (bidirectional) {
        for (i in 1:(layers.lstm - 1)) {
          output_tensor <- output_tensor %>%
            keras::bidirectional(
              input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
              keras::layer_lstm(
                units = layer_lstm[i],
                return_sequences = TRUE,
                dropout = dropout_lstm,
                recurrent_dropout = recurrent_dropout_lstm,
                stateful = stateful,
                recurrent_activation = "sigmoid"
              )
            )
        }
      } else {
        for (i in 1:(layers.lstm - 1)) {
          output_tensor <- output_tensor %>%
            keras::layer_lstm(
              layer_lstm[i],
              input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
              return_sequences = TRUE,
              dropout = dropout_lstm,
              recurrent_dropout = recurrent_dropout_lstm,
              stateful = stateful,
              recurrent_activation = "sigmoid"
            )
        }
      }
    }
    # last LSTM layer
    if (bidirectional) {
      output_tensor <- output_tensor %>%
        keras::bidirectional(
          input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
          keras::layer_lstm(units = layer_lstm[length(layer_lstm)], dropout = dropout_lstm, recurrent_dropout = recurrent_dropout_lstm,
                            stateful = stateful, recurrent_activation = "sigmoid")
        )
    } else {
      output_tensor <- output_tensor %>%
        keras::layer_lstm(units = layer_lstm[length(layer_lstm)],
                          input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
                          dropout = dropout_lstm, recurrent_dropout = recurrent_dropout_lstm, stateful = stateful,
                          recurrent_activation = "sigmoid")
    }
  }
  
  if (gap) {
    if (layers.lstm != 0) {
      stop("Global average pooling not compatible with using LSTM layer")
    }
    output_tensor <- output_tensor %>% keras::layer_global_average_pooling_1d()
  } else {
    if (layers.lstm == 0) {
      output_tensor <- output_tensor %>% keras::layer_flatten()
    }
  }
  
  if (!is.null(label_input)) {
    input_label_list <- list()
    for (i in 1:length(label_input)) {
      if (!stateful) {
        eval(parse(text = paste0("label_input_layer_", as.character(i), "<- keras::layer_input(c(label_input[i]))")))
      } else {
        eval(parse(text = paste0("label_input_layer_", as.character(i), "<- keras::layer_input(batch_shape = c(batch_size, label_input[i]))")))
      }
      input_label_list[[i]] <- eval(parse(text = paste0("label_input_layer_", as.character(i))))
    }
    output_tensor <- keras::layer_concatenate(c(
      input_label_list, output_tensor
    )
    )
  }
  
  if (length(layer_dense) > 1) {
    for (i in 1:(length(layer_dense) - 1)) {
      if (!is.null(dropout_dense)) output_tensor <- output_tensor %>% keras::layer_dropout(dropout_dense[i])
      output_tensor <- output_tensor %>% keras::layer_dense(units = layer_dense[i], activation = "relu")
    }
  }
  
  if (num_output_layers == 1) {
    if (!is.null(dropout_dense)) output_tensor <- output_tensor %>% keras::layer_dropout(dropout_dense[length(dropout_dense)])
    output_tensor <- output_tensor %>%
      keras::layer_dense(units = num_targets, activation = last_layer_activation, dtype = "float32")
  } else {
    output_list <- list()
    for (i in 1:num_output_layers) {
      layer_name <- paste0("output_", i, "_", num_output_layers)
      if (!is.null(dropout_dense)) {
        output_list[[i]] <- output_tensor %>% keras::layer_dropout(dropout_dense[length(dropout_dense)])
        output_list[[i]] <- output_list[[i]] %>%
          keras::layer_dense(units = num_targets, activation = last_layer_activation, name = layer_name, dtype = "float32")
      } else {
        output_list[[i]] <- output_tensor %>%
          keras::layer_dense(units = num_targets, activation = last_layer_activation, name = layer_name, dtype = "float32")
      }
    }
  }
  
  if (!is.null(label_input)) {
    label_inputs <- list()
    for (i in 1:length(label_input)) {
      eval(parse(text = paste0("label_inputs$label_input_layer_", as.character(i), "<- label_input_layer_", as.character(i))))
    }
    if (num_output_layers == 1) {
      model <- keras::keras_model(inputs = list(label_inputs, input_tensor), outputs = output_tensor)
    } else {
      model <- keras::keras_model(inputs = list(label_inputs, input_tensor), outputs = output_list)
    }
  } else {
    if (num_output_layers == 1) {
      model <- keras::keras_model(inputs = input_tensor, outputs = output_tensor)
    } else {
      model <- keras::keras_model(inputs = input_tensor, outputs = output_list)
    }
  }
  
  if (compile) {
    model <- compile_model(model = model, label_smoothing = label_smoothing, layer_dense = layer_dense,
                           solver = solver, learning_rate = learning_rate, loss_fn = loss_fn, 
                           num_output_layers = num_output_layers, label_noise_matrix = label_noise_matrix,
                           bal_acc = bal_acc, f1_metric = f1_metric, auc_metric = auc_metric)
  }
  
  argg <- c(as.list(environment()))
  model <- add_hparam_list(model, argg)
  
  if (verbose) model$summary()
  return(model)
}


#' @title Create LSTM/CNN network to predict middle part of a sequence
#'
#' @description
#' Creates a network consisting of an arbitrary number of CNN, LSTM and dense layers.
#' Function creates two sub networks consisting each of (optional) CNN layers followed by an arbitrary number of LSTM layers. Afterwards the last LSTM layers
#' get concatenated and followed by one or more dense layers. Last layer is a dense layer.
#' Network tries to predict target in the middle of a sequence. If input is AACCTAAGG, input tensors should correspond to x1 = AACC, x2 = GGAA and y = T.
#' 
#' @inheritParams create_model_lstm_cnn
#' @examplesIf reticulate::py_module_available("tensorflow")
#' create_model_lstm_cnn_target_middle(
#'   maxlen = 500,
#'   vocabulary_size = 4,
#'   kernel_size = c(8, 8, 8),
#'   filters = c(16, 32, 64),
#'   pool_size = c(3, 3, 3),
#'   layer_lstm = c(32, 64),
#'   layer_dense = c(128, 4),
#'   learning_rate = 0.001)
#'  
#' @returns A keras model with two input layers. Consists of LSTN, CNN and dense layers.
#' @export
create_model_lstm_cnn_target_middle <- function(
    maxlen = 50,
    dropout_lstm = 0,
    recurrent_dropout_lstm = 0,
    layer_lstm = 128,
    solver = "adam",
    learning_rate = 0.001,
    vocabulary_size = 4,
    bidirectional = FALSE,
    stateful = FALSE,
    batch_size = NULL,
    padding = "same",
    compile = TRUE,
    layer_dense = NULL,
    kernel_size = NULL,
    filters = NULL,
    pool_size = NULL,
    strides = NULL,
    label_input = NULL,
    zero_mask = FALSE,
    label_smoothing = 0,
    label_noise_matrix = NULL,
    last_layer_activation = "softmax",
    loss_fn = "categorical_crossentropy",
    num_output_layers = 1,
    f1_metric = FALSE,
    auc_metric = FALSE,
    bal_acc = FALSE,
    verbose = TRUE,
    batch_norm_momentum = 0.99,
    model_seed = NULL,
    mixed_precision = FALSE,
    mirrored_strategy = NULL) {
  
  if (mixed_precision) tensorflow::tf$keras$mixed_precision$set_global_policy("mixed_float16")
  
  if (is.null(mirrored_strategy)) mirrored_strategy <- ifelse(count_gpu() > 1, TRUE, FALSE)
  if (mirrored_strategy) {
    mirrored_strategy <- tensorflow::tf$distribute$MirroredStrategy()
    with(mirrored_strategy$scope(), { 
      argg <- as.list(environment())
      argg$mirrored_strategy <- FALSE
      model <- do.call(create_model_lstm_cnn_target_middle, argg)
    })
    return(model)
  }
  
  layer_dense <- as.integer(layer_dense)
  if (!is.null(model_seed)) tensorflow::tf$random$set_seed(model_seed)
  use.cnn <- ifelse(!is.null(kernel_size), TRUE, FALSE)
  num_targets <- layer_dense[length(layer_dense)]
  layers.lstm <- length(layer_lstm)
  
  stopifnot(length(layer_lstm) == 1 | (length(layer_lstm) ==  layers.lstm))
  stopifnot(maxlen > 0)
  stopifnot(dropout_lstm <= 1 & dropout_lstm >= 0)
  stopifnot(recurrent_dropout_lstm <= 1 & recurrent_dropout_lstm >= 0)
  
  if (!is.null(layer_lstm)) {
    stopifnot(length(layer_lstm) == 1 | (length(layer_lstm) ==  layers.lstm))
  }
  
  if (is.null(strides)) {
    strides <- rep(1L, length(filters))
  }
  
  if (use.cnn) {
    same_length <- (length(kernel_size) == length(filters)) & (length(filters) == length(strides))
    if (!same_length) {
      stop("kernel_size, filters and strides must have the same length")
    }
  }
  
  # length of split sequences
  maxlen_1 <- ceiling(maxlen/2)
  maxlen_2 <- floor(maxlen/2)
  if (stateful) {
    input_tensor_1 <- keras::layer_input(batch_shape = c(batch_size, maxlen_1, vocabulary_size))
  } else {
    input_tensor_1 <- keras::layer_input(shape = c(maxlen_1, vocabulary_size))
  }
  
  if (use.cnn) {
    for (i in 1:length(filters)) {
      if (i == 1) {
        output_tensor_1 <- input_tensor_1 %>%
          keras::layer_conv_1d(
            kernel_size = kernel_size[i],
            padding = padding,
            activation = "relu",
            filters = filters[i],
            strides = strides[i],
            input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL)
          )
        if (!is.null(pool_size) && pool_size[i] > 1) {
          output_tensor_1 <- output_tensor_1 %>% keras::layer_max_pooling_1d(pool_size = pool_size[i])
        }
        output_tensor_1 <- output_tensor_1 %>% keras::layer_batch_normalization(momentum = batch_norm_momentum)
      } else {
        output_tensor_1 <- output_tensor_1 %>%
          keras::layer_conv_1d(
            kernel_size = kernel_size[i],
            padding = padding,
            activation = "relu",
            strides = strides[i],
            filters = filters[i]
          )
        if (!is.null(pool_size) && pool_size[i] > 1) {
          output_tensor_1 <- output_tensor_1 %>% keras::layer_max_pooling_1d(pool_size = pool_size[i])
        }
        output_tensor_1 <- output_tensor_1 %>% keras::layer_batch_normalization(momentum = batch_norm_momentum)
      }
    }
  } else {
    if (zero_mask) {
      output_tensor_1 <- input_tensor_1 %>% keras::layer_masking()
    } else {
      output_tensor_1 <- input_tensor_1
    }
  }
  
  # lstm layers
  if (!is.null(layers.lstm) && layers.lstm > 0) {
    if (layers.lstm > 1) {
      if (bidirectional) {
        for (i in 1:(layers.lstm - 1)) {
          output_tensor_1 <- output_tensor_1 %>%
            keras::bidirectional(
              input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
              keras::layer_lstm(
                units = layer_lstm[i],
                return_sequences = TRUE,
                dropout = dropout_lstm,
                recurrent_dropout = recurrent_dropout_lstm,
                stateful = stateful,
                recurrent_activation = "sigmoid"
              )
            )
        }
      } else {
        for (i in 1:(layers.lstm - 1)) {
          output_tensor_1 <- output_tensor_1 %>%
            keras::layer_lstm(
              units = layer_lstm[i],
              input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
              return_sequences = TRUE,
              dropout = dropout_lstm,
              recurrent_dropout = recurrent_dropout_lstm,
              stateful = stateful,
              recurrent_activation = "sigmoid"
            )
        }
      }
    }
    
    # last LSTM layer
    if (bidirectional) {
      output_tensor_1 <- output_tensor_1 %>%
        keras::bidirectional(
          input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
          keras::layer_lstm(units = layer_lstm[length(layer_lstm)], dropout = dropout_lstm, recurrent_dropout = recurrent_dropout_lstm,
                            stateful = stateful, recurrent_activation = "sigmoid")
        )
    } else {
      output_tensor_1 <- output_tensor_1 %>%
        keras::layer_lstm(units = layer_lstm[length(layer_lstm)],
                          input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
                          dropout = dropout_lstm, recurrent_dropout = recurrent_dropout_lstm, stateful = stateful,
                          recurrent_activation = "sigmoid")
    }
  }
  
  if (stateful) {
    input_tensor_2 <- keras::layer_input(batch_shape = c(batch_size, maxlen_2, vocabulary_size))
  } else {
    input_tensor_2 <- keras::layer_input(shape = c(maxlen_2, vocabulary_size))
  }
  
  if (use.cnn) {
    for (i in 1:length(filters)) {
      if (i == 1) {
        output_tensor_2 <- input_tensor_2 %>%
          keras::layer_conv_1d(
            kernel_size = kernel_size[i],
            padding = padding,
            activation = "relu",
            filters = filters[i],
            strides = strides[i],
            input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL)
          )
        if (!is.null(pool_size) && pool_size[i] > 1) {
          output_tensor_2 <- output_tensor_2 %>% keras::layer_max_pooling_1d(pool_size = pool_size[i])
        }
        output_tensor_2 <- output_tensor_2 %>% keras::layer_batch_normalization(momentum = batch_norm_momentum)
      } else {
        output_tensor_2 <- output_tensor_2 %>%
          keras::layer_conv_1d(
            kernel_size = kernel_size[i],
            padding = padding,
            activation = "relu",
            strides = strides[i],
            filters = filters[i]
          )
        if (!is.null(pool_size) && pool_size[i] > 1) {
          output_tensor_2 <- output_tensor_2 %>% keras::layer_max_pooling_1d(pool_size = pool_size[i])
        }
        output_tensor_2 <- output_tensor_2 %>% keras::layer_batch_normalization(momentum = batch_norm_momentum)
      }
    }
  } else {
    if (zero_mask) {
      output_tensor_2 <- input_tensor_2 %>% keras::layer_masking()
    } else {
      output_tensor_2 <- input_tensor_2
    }
  }
  
  
  # lstm layers
  if (!is.null(layers.lstm) && layers.lstm > 0) {
    if (layers.lstm > 1) {
      if (bidirectional) {
        for (i in 1:(layers.lstm - 1)) {
          output_tensor_2 <- output_tensor_2 %>%
            keras::bidirectional(
              input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
              keras::layer_lstm(
                units = layer_lstm[i],
                return_sequences = TRUE,
                dropout = dropout_lstm,
                recurrent_dropout = recurrent_dropout_lstm,
                stateful = stateful,
                recurrent_activation = "sigmoid"
              )
            )
        }
      } else {
        for (i in 1:(layers.lstm - 1)) {
          output_tensor_2 <- output_tensor_2 %>%
            keras::layer_lstm(
              units = layer_lstm[i],
              input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
              return_sequences = TRUE,
              dropout = dropout_lstm,
              recurrent_dropout = recurrent_dropout_lstm,
              stateful = stateful,
              recurrent_activation = "sigmoid"
            )
        }
      }
    }
    
    # last LSTM layer
    if (bidirectional) {
      output_tensor_2 <- output_tensor_2 %>%
        keras::bidirectional(
          input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
          keras::layer_lstm(units = layer_lstm[length(layer_lstm)], dropout = dropout_lstm, recurrent_dropout = recurrent_dropout_lstm,
                            stateful = stateful, recurrent_activation = "sigmoid")
        )
    } else {
      output_tensor_2 <- output_tensor_2 %>%
        keras::layer_lstm(units = layer_lstm[length(layer_lstm)],
                          input_shape = switch(stateful + 1, c(maxlen, vocabulary_size), NULL),
                          dropout = dropout_lstm, recurrent_dropout = recurrent_dropout_lstm, stateful = stateful,
                          recurrent_activation = "sigmoid")
    }
  }
  
  output_tensor <- keras::layer_concatenate(list(output_tensor_1, output_tensor_2))
  
  if (layers.lstm == 0) {
    output_tensor <- output_tensor %>% keras::layer_flatten()
  }
  
  if (!is.null(label_input)) {
    input_label_list <- list()
    for (i in 1:length(label_input)) {
      if (!stateful) {
        eval(parse(text = paste0("label_input_layer_", as.character(i), "<- keras::layer_input(c(label_input[i]))")))
      } else {
        eval(parse(text = paste0("label_input_layer_", as.character(i), "<- keras::layer_input(batch_shape = c(batch_size, label_input[i]))")))
      }
      input_label_list[[i]] <- eval(parse(text = paste0("label_input_layer_", as.character(i))))
    }
    output_tensor <- keras::layer_concatenate(c(
      input_label_list, output_tensor
    )
    )
  }
  
  if (length(layer_dense) > 1) {
    for (i in 1:(length(layer_dense) - 1)) {
      output_tensor <- output_tensor %>% keras::layer_dense(units = layer_dense[i], activation = "relu")
    }
  }
  
  if (num_output_layers == 1) {
    output_tensor <- output_tensor %>%
      keras::layer_dense(units = num_targets, activation = last_layer_activation, dtype = "float32")
  }  else {
    output_list <- list()
    for (i in 1:num_output_layers) {
      layer_name <- paste0("output_", i, "_", num_output_layers)
      output_list[[i]] <- output_tensor %>%
        keras::layer_dense(units = num_targets, activation = last_layer_activation, name = layer_name, dtype = "float32")
    }
  }
  
  # print model layout to screen
  if (!is.null(label_input)) {
    label_inputs <- list()
    for (i in 1:length(label_input)) {
      eval(parse(text = paste0("label_inputs$label_input_layer_", as.character(i), "<- label_input_layer_", as.character(i))))
    }
    model <- keras::keras_model(inputs = c(label_inputs, input_tensor_1, input_tensor_2), outputs = output_tensor)
  } else {
    model <- keras::keras_model(inputs = list(input_tensor_1, input_tensor_2), outputs = output_tensor)
  }
  
  if (compile) {
    model <- compile_model(model = model, label_smoothing = label_smoothing, layer_dense = layer_dense,
                           solver = solver, learning_rate = learning_rate, loss_fn = loss_fn, 
                           num_output_layers = num_output_layers, label_noise_matrix = label_noise_matrix,
                           bal_acc = bal_acc, f1_metric = f1_metric, auc_metric = auc_metric)
  }
  
  argg <- c(as.list(environment()))
  model <- add_hparam_list(model, argg)
  reticulate::py_set_attr(x = model, name = "hparam", value = model$hparam)
  
  if (verbose) model$summary()
  return(model)
}
GenomeNet/deepG documentation built on Dec. 24, 2024, 12:11 p.m.