R/create_model_utils.R

Defines functions get_hyper_param merge_models remove_add_layers check_layer_name reshape_input get_optimizer set_optimizer get_output_activations manage_metrics get_cp load_cp compile_model get_layer_names

Documented in compile_model get_output_activations load_cp merge_models remove_add_layers reshape_input

get_layer_names <- function(model) {
  
  n <- length(model$layers)
  layer_names <- vector("character", n)
  for (i in 1:n) {
    layer_names[i] <- model$layers[[i]]$name
  }
  layer_names
} 

#' Compile model
#' 
#' @inheritParams create_model_lstm_cnn
#' @param model A keras model.
#' @examplesIf reticulate::py_module_available("tensorflow")
#' 
#' model <- create_model_lstm_cnn(layer_lstm = 8, compile = FALSE)
#' model <- compile_model(model = model,
#'                        solver = 'adam',
#'                       learning_rate = 0.01,
#'                        loss_fn = 'categorical_crossentropy')
#' 
#' @returns A compiled keras model.
#' @export
compile_model <- function(model, solver, learning_rate, loss_fn, label_smoothing = 0,
                          num_output_layers = 1, label_noise_matrix = NULL,
                          bal_acc = FALSE, f1_metric = FALSE, auc_metric = FALSE,
                          layer_dense = NULL) {
  
  optimizer <- set_optimizer(solver, learning_rate) 
  if (num_output_layers == 1) num_targets <- model$output_shape[[2]]
  
  #add metrics
  if (loss_fn == "binary_crossentropy") {
    model_metrics <- c(tensorflow::tf$keras$metrics$BinaryAccuracy(name = "acc"))
  } else if (loss_fn == "sparse_categorical_crossentropy") {
    model_metrics <- tensorflow::tf$keras$metrics$SparseCategoricalAccuracy(name = "acc")
  } else {
    model_metrics <- c("acc")
  } 
  
  cm_dir <- NULL
  if (num_output_layers == 1) {
    cm_dir <- file.path(tempdir(), paste(sample(letters, 7), collapse = ""))
    while (dir.exists(cm_dir)) {
      cm_dir <- file.path(tempdir(), paste(sample(letters, 7), collapse = ""))
    }
    dir.create(cm_dir)
    model$cm_dir <- cm_dir
    
    if (loss_fn == "sparse_categorical_crossentropy") {
      
      if (length(model$outputs) == 1 & length(model$output_shape) == 3) {
        loss_fn <- tensorflow::tf$keras$losses$SparseCategoricalCrossentropy(
          reduction=tensorflow::tf$keras$losses$Reduction$NONE
        )
      }
    }
    
    if (loss_fn == "categorical_crossentropy") {
      
      if (length(model$outputs) == 1 & length(model$output_shape) == 3) {
        loss_fn <- tensorflow::tf$keras$losses$CategoricalCrossentropy(
          label_smoothing=label_smoothing,
          reduction=tensorflow::tf$keras$losses$Reduction$NONE,
          name='categorical_crossentropy'
        )
      }
    }  
    
    if (loss_fn == "categorical_crossentropy" | loss_fn == "sparse_categorical_crossentropy") {
      
      if (bal_acc) {
        macro_average_cb <- balanced_acc_wrapper(num_targets, cm_dir)
        model_metrics <- c(macro_average_cb, "acc")
      }
      
      if (f1_metric) {
        f1 <- f1_wrapper(num_targets)
        model_metrics <- c(model_metrics, f1)
      }
    }
    
    if (auc_metric) {
      auc <- auc_wrapper(model_output_size = layer_dense[length(layer_dense)],
                         loss = loss_fn)
      model_metrics <- c(model_metrics, auc)
    }
    
  }
  
  if (label_smoothing > 0 & !is.null(label_noise_matrix)) {
    stop("Can not use label smoothing and label noise at the same time. Either set label_smoothing = 0 or label_noise_matrix = NULL")
  }
  
  if (label_smoothing > 0) {
    if (loss_fn == "categorical_crossentropy") {
      smooth_loss <- tensorflow::tf$losses$CategoricalCrossentropy(label_smoothing = label_smoothing, name = "smooth_loss")
    }
    if (loss_fn == "binary_crossentropy") {
      smooth_loss <- tensorflow::tf$losses$BinaryCrossentropy(label_smoothing = label_smoothing, name = "smooth_loss")
    }
    model %>% keras::compile(loss = smooth_loss,
                             optimizer = optimizer, metrics = model_metrics)
  } else if (!is.null(label_noise_matrix)) {
    row_sums <- rowSums(label_noise_matrix)
    if (!all(row_sums == 1)) {
      warning("Sum of noise matrix rows don't add up to 1")
    }
    noisy_loss <- noisy_loss_wrapper(solve(label_noise_matrix))
    model %>% keras::compile(loss =  noisy_loss,
                             optimizer = optimizer, metrics = model_metrics)
  } else {
    model %>% keras::compile(loss = loss_fn,
                             optimizer = optimizer, metrics = model_metrics)
  }
  
  model
  
}


#' Load checkpoint 
#' 
#' Load checkpoint from directory. Chooses best checkpoint based on some condition. Condition
#' can be best accuracy, best loss, last epoch number or a specified epoch number.
#' 
#' @inheritParams create_model_lstm_cnn
#' @param cp_path A directory containing checkpoints or a single checkpoint file. 
#' If a directory, choose checkpoint based on `cp_filter` or `ep_index`.
#' @param cp_filter Condition to choose checkpoint if `cp_path` is a directory.
#' Either "acc" for best validation accuracy, "loss" for best validation loss or "last_ep" for last epoch.
#' @param ep_index Load checkpoint from specific epoch number. If not `NULL`, has priority over `cp_filter`.
#' @param compile Whether to load compiled model.
#' @param re_compile Whether to compile model with parameters from `learning_rate`,
#' `solver` and `loss`.  
#' @param add_custom_object Named list of custom objects.
#' @param verbose Whether to print chosen checkpoint path.
#' @param loss Loss function. Only used if model gets compiled.
#' @param margin Margin for contrastive loss, see \link{loss_cl}.
#' @examples
#' \donttest{
#' library(keras)
#' model <- create_model_lstm_cnn(layer_lstm = 8)
#' checkpoint_folder <- tempfile()
#' dir.create(checkpoint_folder)
#' keras::save_model_hdf5(model, file.path(checkpoint_folder, 'Ep.007-val_loss11.07-val_acc0.6.hdf5'))
#' keras::save_model_hdf5(model, file.path(checkpoint_folder, 'Ep.019-val_loss8.74-val_acc0.7.hdf5'))
#' keras::save_model_hdf5(model, file.path(checkpoint_folder, 'Ep.025-val_loss0.03-val_acc0.8.hdf5'))
#' model <- load_cp(cp_path = checkpoint_folder, cp_filter = "last_ep")
#' }
#' @returns A keras model loaded from a checkpoint.
#' @export
load_cp <- function(cp_path, cp_filter = "last_ep", ep_index = NULL, compile = FALSE,
                    learning_rate = 0.01, solver = "adam", re_compile = FALSE,
                    loss = "categorical_crossentropy",
                    add_custom_object = NULL, margin = 1,
                    verbose = TRUE, mirrored_strategy = FALSE) {
  
  if (is.null(mirrored_strategy)) mirrored_strategy <- ifelse(count_gpu() > 1, TRUE, FALSE)
  if (mirrored_strategy) {
    mirrored_strategy <- tensorflow::tf$distribute$MirroredStrategy()
    with(mirrored_strategy$scope(), { 
      argg <- as.list(environment())
      argg$mirrored_strategy <- FALSE
      model <- do.call(load_cp, argg)
    })
    return(model)
  }
  
  # custom objects to load transformer architecture
  custom_objects <- list(
    "layer_pos_embedding" = layer_pos_embedding_wrapper(),
    "layer_pos_sinusoid" = layer_pos_sinusoid_wrapper(),
    "layer_transformer_block" = layer_transformer_block_wrapper(),
    "layer_euc_dist" = layer_euc_dist_wrapper(),
    "layer_cosine_sim" = layer_cosine_sim_wrapper(),
    "layer_aggregate_time_dist" = layer_aggregate_time_dist_wrapper(),
    "loss_cl_margin___margin_" = loss_cl(margin=margin)
  )
  
  if (!is.null(add_custom_object)) {
    for (i in 1:length(add_custom_object)) {
      custom_objects[[names(add_custom_object)[i]]] <- add_custom_object[[i]]
    }
  }
  
  cp <- get_cp(cp_path = cp_path, cp_filter = cp_filter,
               ep_index = ep_index, verbose = verbose) 
  
  model <- keras::load_model_hdf5(cp, compile = compile, custom_objects = custom_objects)
  
  if (re_compile) {
    optimizer <- set_optimizer(solver, learning_rate)
    model %>% keras::compile(loss = loss,
                             optimizer = optimizer,
                             metrics = model$metrics)
  }
  
  return(model)
  
}

get_cp  <- function(cp_path, cp_filter = "last_ep", ep_index = NULL, verbose = TRUE) {
  
  if (!is.null(cp_filter)) {
    stopifnot(cp_filter %in% c("acc", "loss", "last_ep"))
    if (!is.null(ep_index)) {
      cp_filter <- NULL
    }
  } 
  
  is_directory <- dir.exists(cp_path)
  if (is_directory) {
    cps <- list.files(cp_path, full.names = TRUE)
    files_basename <- basename(cps)
    stopifnot(xor(is.null(cp_filter), is.null(ep_index)))
  } else {
    stopifnot(file.exists(cp_path))
    cp <- cp_path
  } 
  
  if (is_directory & !is.null(cp_filter)) {
    
    if (cp_filter == "acc") {
      if (!all(stringr::str_detect(files_basename, "acc"))) {
        stop("No accuracy information in checkpoint names ('acc' string), use other metric.")
      }
      acc_scores <- files_basename %>% stringr::str_extract("acc\\d++\\.\\d++") %>% 
        stringr::str_remove("acc") %>% as.numeric()
      # use later epoch for ties
      index <- which.max(rank(acc_scores, ties.method = "last"))
    }
    
    if (cp_filter == "loss") {
      if (!all(stringr::str_detect(files_basename, "loss"))) {
        stop("No loss information in checkpoint names ('loss' string), use other metric.")
      }
      loss_scores <- files_basename %>% stringr::str_extract("loss\\d++\\.\\d++") %>% 
        stringr::str_remove("loss") %>% as.numeric()
      index <- which.min(rank(loss_scores, ties.method = "last"))
    }
    
    if (cp_filter == "last_ep") {
      ep_scores <- files_basename %>% stringr::str_extract("Ep\\.\\d++") %>% 
        stringr::str_remove("Ep\\.") %>% as.numeric()
      index <- which.max(ep_scores)
    }
    
  }
  
  if (is_directory & !is.null(ep_index)) {
    ep_scores <- files_basename %>% stringr::str_extract("Ep\\.\\d++") %>% 
      stringr::str_remove("Ep\\.") %>% as.numeric()
    index <- which(ep_scores == ep_index)
  }
  
  if (is_directory) {
    cp <- cps[index]
  }
  
  if (verbose) {
    cat("Using checkpoint", cp, "\n")
    cat("(Date created:", as.character(file.info(cp)$mtime), ")\n")
  }
  
  return(cp)
  
}


# temporary fix for metric bugs
manage_metrics <- function(model, compile = FALSE) {
  
  dummy_gen <- generator_dummy(model, batch_size = 1)
  z <- dummy_gen()
  x <- z[[1]]
  y <- z[[2]]
  
  if (length(model$metrics) == 0) {
    suppressMessages(
      eval <- model$evaluate(x, y, verbose = 0L)
    )
  }
  
  if (compile) {
    metric_names <- vector("character", length(model$metrics))
    for (i in 1:length(model$metrics)) {
      metric_names[i] <-  model$metrics[[i]]$name
    }
    
    duplicated_index <- duplicated(metric_names)
    loss_index <- stringr::str_detect(metric_names, "loss")
    index <- duplicated_index | loss_index
    
    # remove double metrics
    model <- model %>% keras::compile(loss = model$loss,
                                      optimizer = model$optimizer,
                                      metrics = model$metrics[!index])
    suppressMessages(
      eval <- model$evaluate(x, y, verbose = 0L)
    )
  }
  
  return(model)
  
}


#' Get activation functions of output layers
#' 
#' Get activation functions of output layers.
#' 
#' @param model A keras model.
#' @examplesIf reticulate::py_module_available("tensorflow")
#' model <-  create_model_lstm_cnn(
#'   maxlen = 50,
#'   layer_lstm = 8,
#'   layer_dense = c(64, 2),
#'   verbose = FALSE)
#' get_output_activations(model)
#' 
#' @returns Character vector with names of activation functions of model outputs.
#' @export
get_output_activations <- function(model) {
  
  out_names <- model$output_names
  act_vec <- vector("character", length(out_names))
  count <- 1
  
  for (layer_name in out_names) {
    act_name <- model$get_layer(layer_name)$get_config()$activation
    if (is.null(act_name)) act_name <- "linear"
    act_vec[count] <- act_name
    count <- count + 1
  }
  return(act_vec)
}


set_optimizer <- function(solver = "adam", learning_rate = 0.01) {
  
  stopifnot(solver %in% c("adam", "adagrad", "rmsprop", "sgd"))
  
  named_lr <- "lr" %in% names(formals(keras::optimizer_adam))
  if (named_lr) {
    arg_list <- list(lr = learning_rate)
  } else {
    arg_list <- list(learning_rate = learning_rate)
  }
  
  if (solver == "adam")
    keras_optimizer <- do.call(keras::optimizer_adam, arg_list)
  if (solver == "adagrad")
    keras_optimizer <- do.call(keras::optimizer_adagrad, arg_list)
  if (solver == "rmsprop")
    keras_optimizer <- do.call(keras::optimizer_rmsprop, arg_list)
  if (solver == "sgd")
    keras_optimizer <- do.call(keras::optimizer_sgd, arg_list)
  
  return(keras_optimizer)
  
}


#' Get solver and learning_rate from model.
#'
#' @returns Keras optimizer.
#' @noRd
get_optimizer <- function(model) {
  solver <- stringr::str_to_lower(model$optimizer$get_config()["name"])
  learning_rate <- keras::k_eval(model$optimizer$lr)
  if (solver == "adam") {
    optimizer <-  keras::optimizer_adam(learning_rate)
  }
  if (solver == "adagrad") {
    optimizer <- keras::optimizer_adagrad(learning_rate)
  }
  if (solver == "rmsprop") {
    optimizer <- keras::optimizer_rmsprop(learning_rate)
  }
  if (solver == "sgd") {
    optimizer <- keras::optimizer_sgd(learning_rate)
  }
  return(optimizer)
}

#' Replace input layer
#'
#' Replace first layer of model with new input layer of different shape. Only works for sequential models that
#' use CNN and LSTM layers.
#'
#' @param model A keras model.
#' @param input_shape The new input shape vector (without batch size).
#' @examplesIf reticulate::py_module_available("tensorflow")
#' model_1 <-  create_model_lstm_cnn(
#'   maxlen = 50,
#'   kernel_size = c(10, 10),
#'   filters = c(64, 128),
#'   pool_size = c(2, 2),
#'   layer_lstm = c(32),
#'   verbose = FALSE,
#'   layer_dense = c(64, 2))
#' model <- reshape_input(model_1, input_shape = c(120, 4))
#' model
#' 
#' @returns A keras model with changed input shape of input model.
#' @export
reshape_input <- function(model, input_shape) {
  
  in_layer <- keras::layer_input(shape = input_shape)
  for (i in 2:length(model$layers)) {
    layer_name <- model$layers[[i]]$name
    if (i == 2) {
      out_layer <- in_layer %>% model$get_layer(layer_name)()
    } else {
      out_layer <- out_layer %>% model$get_layer(layer_name)()
    }
  }
  new_model <- tensorflow::tf$keras$Model(in_layer, out_layer)
  return(new_model)
}


#' Check if layer is in model
#' 
#' @returns Error message if model does not contain layer of certain name.
#' @noRd
check_layer_name <- function(model, layer_name) {
  num_layers <- length(model$layers)
  layer_names <- vector("character")
  for (i in 1:num_layers) {
    layer_names[i] <- model$layers[[i]]$name
  }
  if (!(layer_name %in% layer_names)) {
    message <- paste0("Model has no layer named ", "'", layer_name, "'")
    stop(message)
  }
}


#' Remove layers from model and add dense layers
#' 
#' Function takes a model as input and removes all layers after a certain layer, specified in \code{layer_name} argument.
#' Optional to add dense layers on top of pruned model. Model can have multiple output layers with separate loss/activation functions.
#' You can freeze all the weights of the pruned model by setting \code{freeze_base_model = TRUE}.
#'
#' @inheritParams create_model_lstm_cnn
#' @param layer_name Name of last layer to use from old model.
#' @param model A keras model. 
#' @param dense_layers List of vectors specifying number of units for each dense layer. If this is a list of length > 1, model
#' has multiple output layers.
#' @param shared_dense_layers Vector with number of units for dense layer. These layers will be connected on top of layer in 
#' argument `layer_name`. Can be used to have shared dense layers, before model has multiple output layers. Don't use if model has just one output layer 
#' (use only `dense_layers`).   
#' @param last_activation List of activations for last entry for each list entry from \code{dense_layers}. Either `"softmax"`, `"sigmoid"` or `"linear"`.
#' @param output_names List of names for each output layer.
#' @param losses List of loss function for each output.
#' @param verbose Boolean.
#' @param dropout List of vectors with dropout rates for each new dense layer.
#' @param dropout_shared Vectors of dropout rates for dense layer from `shared_dense_layers`.
#' @param freeze_base_model Whether to freeze all weights before new dense layers.
#' @param compile Boolean, whether to compile the new model.
#' @param flatten Whether to add flatten layer before new dense layers.
#' @param learning_rate Learning rate if `compile = TRUE`, default learning rate of the old model.
#' @param global_pooling "max_ch_first" for global max pooling with channel first
#' ([keras docs](https://keras.io/api/layers/pooling_layers/global_average_pooling1d/)),
#' "max_ch_last" for global max pooling with channel last, "average_ch_first" for global average pooling with channel first, 
#' "average_ch_last" for global average pooling with channel last or `NULL` for no global pooling. 
#' "both_ch_first" or "both_ch_last" to combine average and max pooling. "all" for all 4 options at once.
#' @examplesIf reticulate::py_module_available("tensorflow")
#' model_1 <- create_model_lstm_cnn(layer_lstm = c(64, 64),
#'                                  maxlen = 50,
#'                                  layer_dense = c(32, 4), 
#'                                  verbose = FALSE)
#' # get name of second to last layer 
#' num_layers <- length(model_1$get_config()$layers)
#' layer_name <- model_1$get_config()$layers[[num_layers-1]]$name
#' # add dense layer with multi outputs and separate loss/activation functions
#' model_2 <- remove_add_layers(model = model_1,
#'                              layer_name = layer_name,
#'                              dense_layers = list(c(32, 16, 1), c(8, 1), c(12, 5)),
#'                              losses = list("binary_crossentropy", "mae",
#'                                            "categorical_crossentropy"),
#'                              last_activation = list("sigmoid", "linear", "softmax"),
#'                              freeze_base_model = TRUE,
#'                              output_names = list("out_1_binary_classsification", 
#'                                                  "out_2_regression", 
#'                                                  "out_3_classification")
#' ) 
#' 
#' @returns A keras model; added and/or removed layers from some base model. 
#' @export
remove_add_layers <- function(model = NULL,
                              layer_name = NULL,
                              dense_layers = NULL,
                              shared_dense_layers = NULL,
                              last_activation = list("softmax"),
                              output_names = NULL,
                              losses = NULL,
                              verbose = TRUE,
                              dropout = NULL,
                              dropout_shared = NULL,
                              freeze_base_model = FALSE,
                              compile = FALSE,
                              learning_rate = 0.001,
                              solver = "adam",
                              flatten = FALSE,
                              global_pooling = NULL,
                              model_seed = NULL,
                              mixed_precision = FALSE,
                              mirrored_strategy = NULL) {
  
  if (mixed_precision) tensorflow::tf$keras$mixed_precision$set_global_policy("mixed_float16")
  
  if (is.null(mirrored_strategy)) mirrored_strategy <- ifelse(count_gpu() > 1, TRUE, FALSE)
  if (mirrored_strategy) {
    mirrored_strategy <- tensorflow::tf$distribute$MirroredStrategy()
    with(mirrored_strategy$scope(), { 
      argg <- as.list(environment())
      argg$mirrored_strategy <- FALSE
      model <- do.call(remove_add_layers, argg)
    })
    return(model)
  }
  
  if (!is.null(model_seed)) tensorflow::tf$random$set_seed(model_seed)
  if (!is.null(layer_name)) check_layer_name(model, layer_name)
  if (!is.null(shared_dense_layers) & is.null(dense_layers)) {
    stop("You need to specify output layers in dense_layers argument")
  }
  if (!is.null(shared_dense_layers) & length(dense_layers) == 1) {
    stop("If your model has just one output layer, use only dense_layers argument (and set shared_dense_layers = NULL).")
  }
  if (!is.null(global_pooling)) {
    stopifnot(global_pooling %in% c("max_ch_first", "max_ch_last", "average_ch_first", "average_ch_last", "both_ch_first", "both_ch_last", "all"))
  }
  
  if (!is.list(dense_layers)) {
    dense_layers <- list(dense_layers)
  }
  
  if (!is.null(dropout) && !is.list(dropout)) {
    dropout <- list(dropout)
  }
  
  if (is.null(losses)) {
    losses <- list()
    for (i in 1:length(last_activation)) {
      if (last_activation[[i]] == "softmax") loss <- "categorical_crossentropy"
      if (last_activation[[i]] == "sigmoid") loss <- "binary_crossentropy"
      if (last_activation[[i]] == "linear") loss <- "mse"
      losses[[i]] <- loss
    }
  }
  
  if (is.null(output_names)) {
    output_names <- vector("list", length = length(dense_layers))
  }
  
  if (length(dense_layers) != length(last_activation)) {
    stop("Length of dense_layers and last_activation must be the same")
  }
  
  if (length(dense_layers) != length(output_names)) {
    stop("Length of dense_layers and output_names must be the same")
  }
  
  if (!is.null(dropout)) {
    for (i in 1:length(dense_layers)) {
      stopifnot(length(dropout[[i]]) == length(dense_layers[[i]]))
    }
  }
  
  if (verbose) {
    print("Original model: ")
    print(model$summary())
  }
  
  is_sequential <- any(stringr::str_detect(class(model), "sequential"))
  
  if (!is.null(layer_name)) {
    model_new <- tensorflow::tf$keras$Model(model$input, model$get_layer(layer_name)$output)
    
    if (freeze_base_model) {
      keras::freeze_weights(model_new)
    }
    
    if (!is.null(global_pooling)) {
      if (global_pooling == "max_ch_first") {
        out <- model_new$output %>% keras::layer_global_max_pooling_1d(data_format="channels_first")
      } else if (global_pooling == "max_ch_last") {
        out <- model_new$output %>% keras::layer_global_max_pooling_1d(data_format="channels_last")
      } else if (global_pooling ==  "average_ch_first") {
        out <- model_new$output %>% keras::layer_global_average_pooling_1d(data_format="channels_first")
      } else if (global_pooling ==  "average_ch_last") { 
        out <- model_new$output %>% keras::layer_global_average_pooling_1d(data_format="channels_last")
      } else if (global_pooling ==  "both_ch_last") { 
        out1 <- model_new$output %>% keras::layer_global_average_pooling_1d(data_format="channels_last")
        out2 <- model_new$output %>% keras::layer_global_max_pooling_1d(data_format="channels_last")
        out <- keras::layer_concatenate(list(out1, out2))
      } else if (global_pooling ==  "both_ch_first") {
        out1 <- model_new$output %>% keras::layer_global_average_pooling_1d(data_format="channels_first")
        out2 <- model_new$output %>% keras::layer_global_max_pooling_1d(data_format="channels_first")
        out <- keras::layer_concatenate(list(out1, out2))
      } else {
        out1 <- model_new$output %>% keras::layer_global_average_pooling_1d(data_format="channels_first")
        out2 <- model_new$output %>% keras::layer_global_max_pooling_1d(data_format="channels_first")
        out3 <- model_new$output %>% keras::layer_global_average_pooling_1d(data_format="channels_last")
        out4 <- model_new$output %>% keras::layer_global_max_pooling_1d(data_format="channels_last")
        out <- keras::layer_concatenate(list(out1, out2, out3, out4))
      }       
      model_new <- tensorflow::tf$keras$Model(model_new$input, out)
    }
    
    if (flatten) {
      out <- model_new$output %>% keras::layer_flatten()
      model_new <- tensorflow::tf$keras$Model(model_new$input, out)
    }
    
    if (!is.null(shared_dense_layers)) {
      out <- model_new$output 
      for (i in 1:length(shared_dense_layers)) {
        if (!is.null(dropout_shared)) {
          out <- out %>% keras::layer_dropout(dropout_shared[i])
        }
        out <- out %>% keras::layer_dense(shared_dense_layers[i], activation = "relu")
      }
      model_new <- tensorflow::tf$keras$Model(model_new$input, out)
    }
    
    output_list <- list()
    name_dense_index <- 1
    
    if (!is.null(dense_layers[[1]])) {
      for (output_num in 1:length(dense_layers)) {
        for (i in 1:length(dense_layers[[output_num]])) {
          if (i == length(dense_layers[[output_num]])) {
            activation <- last_activation[[output_num]]
            dtype <- "float32"
          } else {
            activation <- "relu"
            dtype <- NULL
          }
          
          if (i == length(dense_layers[[output_num]])) {
            layer_name <- output_names[[output_num]]
          } else {
            layer_name <- paste0("dense_new_", name_dense_index)
            name_dense_index <- name_dense_index + 1
          }
          
          if (is.null(dropout)) {
            if (i == 1) {
              output_list[[output_num]] <- model_new$output %>%
                keras::layer_dense(units = dense_layers[[output_num]][i], activation = activation,
                                   name = layer_name, dtype = dtype)
            } else {
              output_list[[output_num]] <- output_list[[output_num]] %>%
                keras::layer_dense(units = dense_layers[[output_num]][i], activation = activation,
                                   name = layer_name, dtype = dtype)
            }
          } else {
            if (i == 1) {
              output_list[[output_num]] <- model_new$output %>%
                keras::layer_dropout(rate = dropout[[output_num]][i]) %>%
                keras::layer_dense(units = dense_layers[[output_num]][i], activation = activation,
                                   name = layer_name, dtype = dtype)
            } else {
              output_list[[output_num]] <- output_list[[output_num]] %>%
                keras::layer_dropout(rate = dropout[[output_num]][i]) %>%
                keras::layer_dense(units = dense_layers[[output_num]][i], activation = activation,
                                   name = layer_name, dtype = dtype)
            }
          }
        }
      }
      model_new <- tensorflow::tf$keras$Model(model_new$input, output_list)
    }
  }
  
  if (verbose) {
    print("New model: ")
    print(model_new$summary())
    for (i in 1:length(model_new$layers)) {
      cat(model_new$layers[[i]]$name , "trainable:" , model_new$layers[[i]]$trainable, "\n")
    }
  }
  
  if (compile) {
    if (is.null(learning_rate)) {
      learning_rate <- keras::k_eval(model$optimizer$lr)
    } else {
      learning_rate <- learning_rate
    }
    
    if (is.null(solver)) {
      solver <- stringr::str_to_lower(model$optimizer$get_config()["name"])
    } 
    
    optimizer <- set_optimizer(solver, learning_rate) 
    
    metric_list <- list()
    for (i in 1:length(losses)) {
      metric_list[[i]] <- ifelse(losses[[i]] == "binary_crossentropy", "binary_accuracy", "acc")
    }
    
    model_new %>% keras::compile(loss = losses,
                                 optimizer = optimizer,
                                 metrics = metric_list)
  }
  
  model_new
}


#' Merge two models
#' 
#' Combine two models at certain layers and add dense layer(s) afterwards.
#'
#' @param models List of two models.
#' @param layer_names Vector of length 2 with names of layers to merge.
#' @param freeze_base_model Boolean vector of length 2. Whether to freeze weights of individual models.
#' @inheritParams create_model_lstm_cnn
#' @examplesIf reticulate::py_module_available("tensorflow")
#' model_1 <- create_model_lstm_cnn(layer_lstm = c(64, 64), maxlen = 50, layer_dense = c(32, 4),
#'                                  verbose = FALSE)
#' model_2 <- create_model_lstm_cnn(layer_lstm = c(32), maxlen = 40, 
#'                                  layer_dense = c(8, 2), verbose = FALSE)
#' # get names of second to last layers
#' num_layers_1 <- length(model_1$get_config()$layers)
#' layer_name_1 <- model_1$get_config()$layers[[num_layers_1 - 1]]$name
#' num_layers_2 <- length(model_2$get_config()$layers)
#' layer_name_2 <- model_2$get_config()$layers[[num_layers_2 - 1]]$name
#' # merge models
#' model <- merge_models(models = list(model_1, model_2),
#'                       layer_names = c(layer_name_1, layer_name_2),
#'                       layer_dense = c(6, 2), 
#'                       freeze_base_model = c(FALSE, FALSE)) 
#' 
#' @returns A keras model merging two input models.                        
#' @export
merge_models <- function(models, layer_names, layer_dense, solver = "adam",
                         learning_rate = 0.0001,
                         freeze_base_model = c(FALSE, FALSE),
                         model_seed = NULL) {
  
  if (!is.null(model_seed)) tensorflow::tf$random$set_seed(model_seed)
  
  model_1 <- remove_add_layers(model = models[[1]],
                               layer_name = layer_names[1],
                               dense_layers = NULL,
                               verbose = FALSE,
                               dropout = NULL,
                               freeze_base_model = freeze_base_model[1],
                               compile = FALSE,
                               learning_rate = NULL)
  
  model_2 <- remove_add_layers(model = models[[2]],
                               layer_name = layer_names[2],
                               dense_layers = NULL,
                               verbose = FALSE,
                               dropout = NULL,
                               freeze_base_model = freeze_base_model[2],
                               compile = FALSE,
                               learning_rate = NULL)
  
  # choose optimization method
  optimizer <- set_optimizer(solver, learning_rate) 
  
  output_tensor <- keras::layer_concatenate(c(model_1$output, model_2$output))
  num_targets <- layer_dense[length(layer_dense)]
  
  if (length(layer_dense) > 1) {
    for (i in 1:(length(layer_dense) - 1)) {
      output_tensor <- output_tensor %>% keras::layer_dense(units = layer_dense[i], activation = "relu")
    }
  }
  
  output_tensor <- output_tensor %>%
    keras::layer_dense(units = num_targets, activation = "softmax")
  
  model <- keras::keras_model(inputs = list(model_1$input, model_2$input), outputs = output_tensor)
  model %>% keras::compile(loss = "categorical_crossentropy",
                           optimizer = optimizer, metrics = c("acc"))
  model
}


#' Extract hyperparameters from model
#'
#' @param model A keras model.
#' @returns List of hyperparameters.
#' @noRd
get_hyper_param <- function(model) {
  layers.lstm <- 0
  use.cudnn <- FALSE
  bidirectional <- FALSE
  use.codon.cnn <- FALSE
  learning_rate <- keras::k_eval(model$optimizer$lr)
  solver <- stringr::str_to_lower(model$optimizer$get_config()["name"])
  
  layerList <- keras::get_config(model)["layers"]
  for (i in 1:length(layerList)) {
    layer_class_name <- layerList[[i]]$class_name
    
    if (layer_class_name == "Conv1D") {
      use.codon.cnn <- TRUE
    }
    
    if (layer_class_name == "MaxPooling1D") {
    }
    
    if (layer_class_name == "BatchNormalization") {
    }
    
    if (layer_class_name == "CuDNNLSTM") {
      layers.lstm <- layers.lstm + 1
      use.cudnn <- TRUE
      lstm_layer_size <- layerList[[i]]$config$units
      recurrent_dropout_lstm <- 0
      dropout_lstm <- 0
    }
    
    if (layer_class_name == "LSTM") {
      layers.lstm <- layers.lstm + 1
      lstm_layer_size <- layerList[[i]]$config$units
      recurrent_dropout_lstm <- layerList[[i]]$config$recurrent_dropout_lstm
      dropout_lstm <- layerList[[i]]$config$dropout
    }
    # TODO: wrong output since bidirectional is layer wrapper (?)
    if (layer_class_name == "Bidirectional") {
      bidirectional <- TRUE
      if (layerList[[i]]$config$layer$class_name == "LSTM") {
        use.cudnn <- FALSE
        layers.lstm <- layers.lstm + 1
        lstm_layer_size <- layerList[[i]]$config$layer$config$units
        recurrent_dropout_lstm <- layerList[[i]]$config$layer$config$recurrent_dropout
        dropout_lstm <- layerList[[i]]$config$layer$config$dropout
      } else {
        use.cudnn <- FALSE
        layers.lstm <- layers.lstm + 1
        lstm_layer_size <- layerList[[i]]$config$layer$config$units
      }
    }
    
    if (layer_class_name == "Dense") {
    }
    
    if (layer_class_name == "Activation") {
    }
  }
  
  list(dropout = dropout_lstm,
       recurrent_dropout = recurrent_dropout_lstm,
       lstm_layer_size =  lstm_layer_size,
       solver = solver,
       use.cudnn = use.cudnn,
       layers.lstm = layers.lstm,
       learning_rate = learning_rate,
       use.codon.cnn = use.codon.cnn,
       bidirectional = bidirectional
  )
}
GenomeNet/deepG documentation built on Dec. 24, 2024, 12:11 p.m.