R/metrics.R

Defines functions focal_loss_multiclass loss_cl cosine_similarity euclidean_distance exp_decay stepdecay sgdr cpcloss noisy_loss_wrapper auc_wrapper balanced_acc_wrapper f1_wrapper

Documented in auc_wrapper balanced_acc_wrapper exp_decay f1_wrapper focal_loss_multiclass loss_cl noisy_loss_wrapper sgdr stepdecay

#' F1 metric
#' 
#' Compute F1 metric. If loss is `"categorical_crossentropy"`, number of targets must be 2. If
#' loss is `"binary_crossentropy"` and number of targets > 1, will flatten `y_true` and `y_pred` matrices 
#' to a single vector (rather than computing separate F1 scores for each class).
#'
#' @param num_targets Size of model output.
#' @param loss Loss function of model.
#' @examplesIf reticulate::py_module_available("tensorflow")
#' 
#' y_true <- c(1,0,0,1,1,0,1,0,0)  
#' y_pred <-  c(0.9,0.05,0.05,0.9,0.05,0.05,0.9,0.05,0.05) 
#' \donttest{
#' library(keras)
#' f1_metric <- f1_wrapper(3L, "binary_crossentropy")
#' f1_metric$update_state(y_true, y_pred)
#' f1_metric$result()  
#' 
#' 
#' # add metric to a model
#' 
#' num_targets <- 1
#' model <- create_model_lstm_cnn(maxlen = 20,
#'                                layer_lstm = 8,
#'                                bal_acc = FALSE,
#'                                last_layer_activation = "sigmoid",
#'                                loss_fn = "binary_crossentropy",
#'                                layer_dense = c(8, num_targets))
#' 
#' f1_metric <- f1_wrapper(num_targets, loss = model$loss)
#' model %>% keras::compile(loss = model$loss, 
#'                          optimizer = model$optimizer,
#'                          metrics = c(model$metrics, f1_metric))
#' }                    
#' @returns A keras metric.                          
#' @export
f1_wrapper <- function(num_targets = 2, loss = "binary_crossentropy") {
  
  stopifnot(loss %in% c("binary_crossentropy", "categorical_crossentropy"))
  
  if (loss == "binary_crossentropy" & num_targets > 1) {
    message("Will flatten y_true and y_pred matrices to a single vector for evaluation
            rather than computing separate F1 scores for each class and taking the mean.")
  }
  
  if (loss == "categorical_crossentropy" & num_targets != 2) {
    stop("Output size must be two, when loss is categorical_crossentropy")
  }
  
  f1_stateful <- reticulate::PyClass("f1",
                                     inherit = tensorflow::tf$keras$metrics$Metric,
                                     list(
                                       
                                       `__init__` = function(self, num_targets, loss) {
                                         super()$`__init__`(name = "f1")
                                         self$num_targets <- num_targets
                                         self$f1_score <- 0
                                         self$loss <- loss
                                         self$rc <- tensorflow::tf$keras$metrics$Recall()
                                         self$pr <- tensorflow::tf$keras$metrics$Precision()
                                         NULL
                                       },
                                       
                                       update_state = function(self, y_true, y_pred, sample_weight = NULL) {
                                         if (self$loss == "binary_crossentropy") {
                                           self$rc$update_state(y_true, y_pred)
                                           self$pr$update_state(y_true, y_pred)
                                         } else {
                                           self$rc$update_state(y_true[ , 1], y_pred[ , 1])
                                           self$pr$update_state(y_true[ , 1], y_pred[ , 1])
                                         }
                                         NULL
                                       },
                                       
                                       result = function(self) {
                                         self$f1_score <- self$compute_f1()
                                         return(self$f1_score)
                                       },
                                       
                                       compute_f1 = function(self) {
                                         f1 <- (2 * self$pr$result() * self$rc$result())/(self$pr$result() + self$rc$result() + tensorflow::tf$constant(1e-15))
                                         return(f1)
                                       },
                                       
                                       reset_state = function(self) {
                                         self$rc$reset_state()
                                         self$pr$reset_state()
                                         #self$f1_score$assign(0)
                                         NULL
                                       }
                                       
                                     ))
  
  return(f1_stateful(num_targets = num_targets, loss = loss))
}


#' Balanced accuracy metric
#'
#' Compute balanced accuracy as additional score. Useful for imbalanced data. Only implemented for 
#' model with mutually exclusive targets.
#'
#' @param num_targets Number of targets.
#' @param cm_dir Directory of confusion matrix used to compute balanced accuracy.
#' @examplesIf reticulate::py_module_available("tensorflow")
#' 
#' y_true <- c(1,0,0,1,
#'             0,1,0,0,
#'             0,0,1,0) %>% matrix(ncol = 3)
#' y_pred <- c(0.9,0.1,0.2,0.1,
#'             0.05,0.7,0.2,0.0,
#'             0.05,0.2,0.6,0.9) %>% matrix(ncol = 3)
#' 
#' cm_dir <- tempfile() 
#' dir.create(cm_dir)
#' \donttest{
#' bal_acc_metric <- balanced_acc_wrapper(num_targets = 3L, cm_dir = cm_dir)
#' bal_acc_metric$update_state(y_true, y_pred)
#' bal_acc_metric$result()
#' as.array(bal_acc_metric$cm)
#' }
#' 
#' @returns A keras metric.                          
#' @export
balanced_acc_wrapper <- function(num_targets, cm_dir) {
  balanced_acc_stateful <- reticulate::PyClass("balanced_acc",
                                               inherit = tensorflow::tf$keras$metrics$Metric,
                                               list(
                                                 
                                                 `__init__` = function(self, num_targets, cm_dir) {
                                                   super()$`__init__`(name = "balanced_acc")
                                                   self$num_targets <- num_targets
                                                   self$cm_dir <- cm_dir
                                                   self$count <- 0
                                                   self$cm <- self$add_weight(name = "cm_matrix", shape = c(num_targets, num_targets), initializer="zeros")
                                                   NULL
                                                 },
                                                 
                                                 update_state = function(self, y_true, y_pred, sample_weight = NULL) {
                                                   self$cm$assign_add(self$compute_cm(y_true, y_pred))
                                                   NULL
                                                 },
                                                 
                                                 result = function(self) {
                                                   balanced_acc <- self$compute_balanced_acc()
                                                   #self$store_cm()
                                                   return(balanced_acc)
                                                 },
                                                 
                                                 compute_cm = function(self, y_true, y_pred) {
                                                   labels <- tensorflow::tf$math$argmax(y_true, axis = 1L)
                                                   predictions <- tensorflow::tf$math$argmax(y_pred, axis = 1L)
                                                   current_cm <- tensorflow::tf$math$confusion_matrix(
                                                     labels = labels, predictions = predictions,
                                                     dtype = "float32", num_classes = self$num_targets)
                                                   current_cm <- tensorflow::tf$transpose(current_cm)
                                                   return(current_cm)
                                                 },
                                                 
                                                 compute_balanced_acc = function(self) {
                                                   diag <- tensorflow::tf$linalg$diag_part(self$cm)
                                                   col_sums <- tensorflow::tf$math$reduce_sum(self$cm, axis=0L)
                                                   average_per_class <- tensorflow::tf$math$divide(diag, col_sums)
                                                   nan_index <- tensorflow::tf$math$logical_not(tensorflow::tf$math$is_nan(average_per_class))
                                                   average_per_class <- tensorflow::tf$boolean_mask(average_per_class, nan_index)
                                                   acc_sum <- tensorflow::tf$math$reduce_sum(average_per_class)
                                                   balanced_acc <- tensorflow::tf$math$divide(acc_sum, tensorflow::tf$math$count_nonzero(col_sums, dtype= acc_sum$dtype))
                                                   return(balanced_acc)
                                                 },
                                                 
                                                 reset_state = function(self) {
                                                   self$store_cm()
                                                   self$count <- self$count + 1
                                                   self$cm$assign_sub(self$cm)
                                                   NULL
                                                 },
                                                 
                                                 store_cm = function(self) {
                                                   #if (self$count > 0) {
                                                   if (self$count %% 2 != 0) {
                                                     file_name <- file.path(self$cm_dir, paste0("cm_val_", floor(self$count/2), ".rds"))
                                                   } else {
                                                     file_name <- file.path(self$cm_dir, paste0("cm_train_", floor(self$count/2), ".rds"))
                                                   }
                                                   saveRDS(keras::k_eval(self$cm), file_name)
                                                   NULL
                                                   #}
                                                 }
                                                 
                                               ))
  return(balanced_acc_stateful(num_targets = num_targets, cm_dir = cm_dir))
}


#' Mean AUC score
#'
#' Compute AUC score as additional metric. If model has several output neurons with binary crossentropy loss, will use the average score.
#'
#' @param model_output_size Number of neurons in model output layer.
#' @param loss Loss function of model, for which metric will be applied to; must be `"binary_crossentropy"`
#' or `"categorical_crossentropy"`.
#' @examplesIf reticulate::py_module_available("tensorflow")
#' 
#' y_true <- c(1,0,0,1,1,0,1,0,0) %>% matrix(ncol = 3)
#' y_pred <- c(0.9,0.05,0.05,0.9,0.05,0.05,0.9,0.05,0.05) %>% matrix(ncol = 3)
#' 
#' \donttest{
#' library(keras)
#' auc_metric <- auc_wrapper(3L, "binary_crossentropy")
#' 
#' auc_metric$update_state(y_true, y_pred)
#' auc_metric$result()  
#' 
#' # add metric to a model
#' num_targets <- 4
#' model <- create_model_lstm_cnn(maxlen = 20,
#'                                layer_lstm = 8,
#'                                bal_acc = FALSE,
#'                                last_layer_activation = "sigmoid",
#'                                loss_fn = "binary_crossentropy",
#'                                layer_dense = c(8, num_targets))
#' 
#' auc_metric <- auc_wrapper(num_targets, loss = model$loss)
#' model %>% keras::compile(loss = model$loss, 
#'                          optimizer = model$optimizer,
#'                          metrics = c(model$metrics, auc_metric))
#' }
#' @returns A keras metric.                          
#' @export
auc_wrapper <- function(model_output_size,
                        loss = "binary_crossentropy") {
  
  multi_label <- FALSE
  stopifnot(loss %in% c("binary_crossentropy", "categorical_crossentropy"))
  
  if (loss == "categorical_crossentropy" & model_output_size != 2) {
    stop("Output size must be two, when loss is categorical_crossentropy")
  }
  
  if (loss == "categorical_crossentropy") {
    label_weights <- c(1L, 0L)
  } else {
    label_weights <- NULL
  }
  
  if (loss == "binary_crossentropy" & model_output_size > 1) {
    multi_label <- TRUE
  }
  
  metric_name <- ifelse(loss == "binary_crossentropy" & model_output_size > 1,
                        "mean_AUC", "AUC")
  
  auc_metric <- tensorflow::tf$keras$metrics$AUC(label_weights = label_weights,
                                                 multi_label = multi_label)
  
  return(auc_metric)
}

#' Loss function for label noise
#' 
#' Implements approach from this [paper](https://arxiv.org/abs/1609.03683) and code from 
#' [here](https://github.com/giorgiop/loss-correction/blob/15a79de3c67c31907733392085c333547c2f2b16/loss.py#L16-L21). 
#' Can be used if labeled data contains noise, i.e. some of the data is labeled wrong.
#'
#' @param noise_matrix Matrix of noise distribution.
#' @importFrom magrittr "%>%"
#' @examplesIf reticulate::py_module_available("tensorflow")
#' # If first label contains 5% wrong labels and second label no noise
#' noise_matrix <- matrix(c(0.95, 0.05, 0, 1), nrow = 2, byrow = TRUE)
#' noisy_loss <- noisy_loss_wrapper(noise_matrix)
#' 
#' @returns A function implementing noisy loss.                          
#' @export
noisy_loss_wrapper <- function(noise_matrix) {
  inverted_noise_matrix <- solve(noise_matrix)
  inverted_noise_matrix <- tensorflow::tf$cast(inverted_noise_matrix, dtype = "float32")
  noisy_loss <- function(y_true, y_pred) {
    y_pred <- y_pred / keras::k_sum(y_pred, axis = -1, keepdims = TRUE)
    y_pred <- keras::k_clip(y_pred, tensorflow::tf$keras$backend$epsilon(), 1.0 - tensorflow::tf$keras$backend$epsilon())
    loss <- -1 * keras::k_sum(keras::k_dot(y_true, inverted_noise_matrix) * keras::k_log(y_pred), axis=-1)
    return(loss)
  }
  noisy_loss
}

cpcloss <- function(latents,
                    context,
                    target_dim = 64,
                    emb_scale = 0.1 ,
                    steps_to_ignore = 2,
                    steps_to_predict = 3,
                    steps_skip = 1,
                    batch_size = 32,
                    k = 5,
                    train_type = "cpc") {
  # define empty lists for metrics
  loss <- list()
  acc <- list()
  # create context tensor
  ctx <- context(latents)
  c_dim <- latents$shape[[2]]
  
  # loop for different distances of predicted patches
  for (i in seq(steps_to_ignore, (steps_to_predict - 1), steps_skip)) {
    # define patches to be deleted
    c_dim_i <- c_dim - i - 1
    if (train_type == "Self-GenomeNet") {
      steps_to_ignore <- 1
      steps_to_predict <- 2
      steps_skip <- 1
      target_dim <- ctx$shape[[3]]
      ctx_conv <-
        ctx %>% keras::layer_conv_1d(kernel_size = 1, filters = target_dim)
      logits <- tensorflow::tf$zeros(list(0L,as.integer(batch_size*2)))
      for (j in seq_len(c_dim - (i + 1))) {
        basepos <- ctx_conv[, j,] %>% keras::k_reshape(c(-1, target_dim))
        targetpos <-
          ctx[, (c_dim - j - i), ] %>% keras::k_reshape(c(-1, target_dim))
        logits_j <- tensorflow::tf$matmul(basepos, tensorflow::tf$transpose(targetpos))
        logits <- tensorflow::tf$concat(list(logits, logits_j), axis = 0L)
      }
      # define labels
      labels <-
        rep(c(seq(batch_size, (
          2 * batch_size - 1
        )), (seq(
          0, (batch_size - 1)
        ))), (c_dim - (i + 1))) %>% as.integer()
    } else {
      c_dim_i <- c_dim - i - 1
      # define total number of elements in context tensor
      total_elements <- batch_size * c_dim_i
      # add conv layer and reshape tensor for matrix multiplication
      targets <-
        latents %>% keras::layer_conv_1d(kernel_size = 1, filters = target_dim) %>% keras::k_reshape(c(-1, target_dim))
      # add conv layer and reshape for matrix multiplication
      preds_i <-
        ctx %>% keras::layer_conv_1d(kernel_size = 1, filters = target_dim)
      preds_i <- preds_i[, (1:(c_dim - i - 1)),]
      preds_i <- keras::k_reshape(preds_i, c(-1, target_dim)) * emb_scale
      
      # define logits normally
      logits <- tensorflow::tf$matmul(preds_i, tensorflow::tf$transpose(targets))
      
      # get position of labels
      b <- floor(seq(0, total_elements - 1) / c_dim_i)
      col <- seq(0, total_elements - 1) %% c_dim_i
      
      # define labels
      labels <- b * c_dim + col + (i + 1)
      labels <- as.integer(labels)
      
    }
    # calculate loss and accuracy for each step
    loss[[length(loss) + 1]] <-
      tensorflow::tf$nn$sparse_softmax_cross_entropy_with_logits(labels, logits) %>%
      tensorflow::tf$stack(axis = 0) %>% tensorflow::tf$reduce_mean()
    acc[[length(acc) + 1]] <-
      tensorflow::tf$keras$metrics$sparse_top_k_categorical_accuracy(tensorflow::tf$cast(labels, dtype = "int64"), logits, as.integer(k)) %>%
      tensorflow::tf$stack(axis = 0) %>% tensorflow::tf$reduce_mean()
  }
  # convert to tensor for output
  loss <- loss %>% tensorflow::tf$stack(axis = 0) %>% tensorflow::tf$reduce_mean()
  acc <- acc %>% tensorflow::tf$stack(axis = 0) %>% tensorflow::tf$reduce_mean()
  return(tensorflow::tf$stack(list(loss, acc)))
}

#' Stochastic Gradient Descent with Warm Restarts
#' 
#' Compute the learning Rate for a given epoch using Stochastic Gradient Descent with Warm Restarts. Implements approach from this [paper](https://arxiv.org/abs/1608.03983).
#'
#' @param lrmin Lower limit of the range for the learning rate.
#' @param lrmax Upper limit of the range for the learning rate.
#' @param restart Number of epochs until a restart is conducted.
#' @param mult Factor, by which the number of epochs until a restart is increased at every restart.
#' @param epoch Epoch, for which the learning rate shall be calculated.
#' @examples 
#' sgdr(lrmin = 5e-10, lrmax = 5e-2, restart = 50,
#' mult = 1, epoch = 5)
#' 
#' @returns A numeric value.
#' @export
sgdr <- function(lrmin = 5e-10,
                 lrmax = 5e-2,
                 restart = 50,
                 mult = 1,
                 epoch = NULL) {
  iter <- c()
  position <- c()
  i <- 0
  while (length(iter) < epoch) {
    iter <- c(iter, rep(i, restart * mult ^ i))
    position <- c(position, c(1:(restart * mult ^ i)))
    i <- i + 1
  }
  restart2 <- (restart * mult ^ iter[epoch])
  epoch <- position[epoch]
  return(lrmin + 1 / 2 * (lrmax - lrmin) * (1 + cos((epoch / restart2) * pi)))
}

#' Step Decay
#' 
#' Compute the learning Rate for a given epoch using Step Decay.
#'
#' @param lrmax Upper limit of the range for the learning rate.
#' @param newstep Number of epochs until the learning rate is reduced.
#' @param mult Factor, by which the number of epochs until a restart is decreased after a new step.
#' @param epoch Epoch, for which the learning rate shall be calculated.
#' @examples
#' stepdecay(lrmax = 0.005, newstep = 50,
#' mult = 0.7, epoch = 3)
#' 
#' @returns A numeric value.
#' @export
stepdecay <- function(lrmax = 0.005,
                      newstep = 50,
                      mult = 0.7,
                      epoch = NULL) {
  return(lrmax * (mult ^ (floor((
    epoch
  ) / newstep))))
}

#' Exponential Decay
#' 
#' Compute the learning Rate for a given epoch using Exponential Decay.
#'
#' @param lrmax Upper limit of the range for the learning rate.
#' @param mult Factor, by which the number of epochs until a restart is decreased after a new step.
#' @param epoch Epoch, for which the learning rate shall be calculated.
#' @examples
#' exp_decay(lrmax = 0.005, mult = 0.1, epoch = 8) 
#' 
#' @returns A numeric value.
#' @export
exp_decay <- function(lrmax = 0.005,
                      mult = 0.1,
                      epoch = NULL) {
  return(lrmax * exp(-mult * epoch))
}


euclidean_distance <- function(vects) {
  x <- vects[[1]]
  y <- vects[[2]]
  sum_square <- tensorflow::tf$math$reduce_sum(tensorflow::tf$math$square(x - y), axis=1L, keepdims=TRUE)
  return(tensorflow::tf$math$sqrt(tensorflow::tf$math$maximum(sum_square, tensorflow::tf$keras$backend$epsilon())))
}

cosine_similarity <- function(vects) {
  x <- vects[[1]]
  y <- vects[[2]]
  xy_dot <- tensorflow::tf$math$reduce_sum(x*y, axis=1L, keepdims=TRUE)
  x_norm <- tensorflow::tf$math$sqrt(tensorflow::tf$math$reduce_sum(tensorflow::tf$math$square(x), axis=1L, keepdims=TRUE))
  y_norm <- tensorflow::tf$math$sqrt(tensorflow::tf$math$reduce_sum(tensorflow::tf$math$square(y), axis=1L, keepdims=TRUE))
  return(xy_dot/(x_norm*y_norm))
}

#' Contrastive loss 
#' 
#' Contrastive loss as used here: https://keras.io/examples/vision/siamese_contrastive/.
#'
#' @param margin Integer, baseline for distance for which pairs should be classified as dissimilar.
#' @examplesIf reticulate::py_module_available("tensorflow")
#' cl <- loss_cl(margin=1)
#' 
#' @returns A function implementing contrastive loss.
#' @export
loss_cl <- function(margin=1) {
  
  contrastive_loss <- function(y_true, y_pred) {
    
    square_pred <- tensorflow::tf$math$square(y_pred)
    margin_square <- tensorflow::tf$math$square(tensorflow::tf$math$maximum(margin - (y_pred), 0))
    l <- tensorflow::tf$math$reduce_mean(
      (1 - y_true) * square_pred + (y_true) * margin_square
    )
    return(l)
  }
  
  return(contrastive_loss)
  
}

#' Focal loss for two or more labels
#' 
#' @param y_true Vector of true values.
#' @param y_pred Vector of predicted values.
#' @param gamma Focusing parameter.
#' @param alpha Vector of weighting factors.
#' @examplesIf reticulate::py_module_available("tensorflow")
#' y_true <- matrix(c(0, 1, 0, 0, 0, 1), nrow = 2, byrow = TRUE)
#' y_pred <- matrix(c(0.15, 0.8, 0.05,
#'                    0.08, 0.02, 0.9), nrow = 2, byrow = TRUE) 
#' fl <- focal_loss_multiclass(y_true, y_pred)
#' fl$numpy()
#' 
#' @returns A function implementing focal loss.
#' @export
focal_loss_multiclass <- function(y_true, y_pred, gamma = 2.5, alpha = c(1)) {
  y_pred <- keras::k_clip(y_pred, keras::k_epsilon(), 1.0 - keras::k_epsilon())
  cd_loss <- -y_true * keras::k_log(y_pred) # categorical cross entropy
  fl_loss <- alpha * keras::k_pow(1. - y_pred, gamma) * cd_loss
  return(keras::k_sum(fl_loss, axis = -1))
}
GenomeNet/deepG documentation built on Dec. 24, 2024, 12:11 p.m.