###############################################################################
# DeepLift
###############################################################################
#' @title Deep learning important features (DeepLift)
#'
#' @description
#' This is an implementation of the \emph{deep learning important features
#' (DeepLift)} algorithm introduced by Shrikumar et al. (2017). It's a local
#' method for interpreting a single element \eqn{x} of the dataset concerning
#' a reference value \eqn{x'} and returns the contribution of each input
#' feature from the difference of the output (\eqn{y=f(x)}) and reference
#' output (\eqn{y'=f(x')}) prediction. The basic idea of this method is to
#' decompose the difference-from-reference prediction with respect to the
#' input features, i.e.,
#' \deqn{\Delta y = y - y' = \sum_i C(x_i).}
#' Compared to \emph{Layer-wise relevance propagation} (see [LRP]), the
#' DeepLift method is an exact decomposition and not an approximation, so we
#' get real contributions of the input features to the
#' difference-from-reference prediction. There are two ways to handle
#' activation functions: the *Rescale* rule (`'rescale'`) and
#' *RevealCancel* rule (`'reveal_cancel'`).
#'
#' The R6 class can also be initialized using the [`run_deeplift`] function
#' as a helper function so that no prior knowledge of R6 classes is required.
#'
#' @template param-converter
#' @template param-data
#' @template param-output_idx
#' @template param-output_label
#' @template param-channels_first
#' @template param-ignore_last_act
#' @template param-x_ref
#' @template param-dtype
#' @template param-verbose
#' @template field-x_ref
#' @template examples-DeepLift
#'
#' @references
#' A. Shrikumar et al. (2017) \emph{Learning important features through
#' propagating activation differences.} ICML 2017, p. 4844-4866
#'
#' @family methods
#' @export
DeepLift <- R6Class(
classname = "DeepLift",
inherit = InterpretingMethod,
public = list(
#' @field rule_name (`character(1)`)\cr
#' Name of the applied rule to calculate the contributions.
#' Either `'rescale'` or `'reveal_cancel'`.\cr
rule_name = NULL,
x_ref = NULL,
#' @description
#' Create a new instance of the `DeepLift` R6 class. When initialized,
#' the method *DeepLift* is applied to the given data and the results are stored in
#' the field `result`.
#'
#' @param rule_name (`character(1)`)\cr
#' Name of the applied rule to calculate the
#' contributions. Use either `'rescale'` or `'reveal_cancel'`. \cr
#' @param winner_takes_all (`logical(1)`)\cr
#' This logical argument is only relevant for MaxPooling
#' layers and is otherwise ignored. With this layer type, it is possible that
#' the position of the maximum values in the pooling kernel of the normal input
#' \eqn{x} and the reference input \eqn{x'} may not match, which leads to a
#' violation of the summation-to-delta property. To overcome this problem,
#' another variant is implemented, which treats a MaxPooling layer as an
#' AveragePooling layer in the backward pass only, leading to an uniform
#' distribution of the upper-layer contribution to the lower layer.\cr
initialize = function(converter, data,
channels_first = TRUE,
output_idx = NULL,
output_label = NULL,
ignore_last_act = TRUE,
rule_name = "rescale",
x_ref = NULL,
winner_takes_all = TRUE,
verbose = interactive(),
dtype = "float") {
super$initialize(converter, data, channels_first, output_idx, output_label,
ignore_last_act, winner_takes_all, verbose, dtype)
cli_check(checkChoice(rule_name, c("rescale", "reveal_cancel")),
"rule_name")
self$rule_name <- rule_name
if (is.null(x_ref)) {
x_ref <- lapply(lapply(self$data, dim),
function(x) array(0, dim = c(1, x[-1])))
}
self$x_ref <- private$test_data(x_ref, name = "x_ref")
# Check if x_ref is only a single instance
num_instances <- unlist(lapply(self$x_ref, function(x) dim(x)[1]))
if (any(num_instances != 1)) {
stopf("For the method {.code DeepLift}, you have to pass ",
"only a single instance for the argument {.arg x_ref}. ",
"You passed (for at least one input layer) '",
max(num_instances), "' data instances!")
}
self$converter$model$forward(self$data,
channels_first = self$channels_first,
save_input = TRUE,
save_preactivation = TRUE,
save_output = TRUE
)
self$converter$model$update_ref(self$x_ref,
channels_first = self$channels_first,
save_input = TRUE,
save_preactivation = TRUE,
save_output = TRUE
)
self$result <- private$run("DeepLift")
}
),
private = list(
print_method_specific = function() {
i <- cli_ul()
cli_li(paste0("{.field rule_name}: '", self$rule_name, "'"))
cli_li(paste0("{.field winner_takes_all}: ", self$winner_takes_all))
all_zeros <- all(unlist(lapply(self$x_ref,
function(x) all(as_array(x) == 0))))
if (all_zeros) {
s <- "zeros"
} else {
values <- unlist(lapply(self$x_ref, as_array))
s <- paste0("mean: ", mean(values), " (q1: ", quantile(values, 0.25),
", q3: ", quantile(values, 0.75), ")")
}
cli_li(paste0("{.field x_ref}: ", s))
cli_end(id = i)
}
)
)
###############################################################################
# DeepSHAP
###############################################################################
#' Deep Shapley additive explanations (DeepSHAP)
#'
#' @description
#' The *DeepSHAP* method extends the [`DeepLift`] technique by not only
#' considering a single reference value but by calculating the average
#' from several, ideally representative reference values at each layer. The
#' obtained feature-wise results are approximate Shapley values for the
#' chosen output, where the conditional expectation is computed using these
#' different reference values, i.e., the *DeepSHAP* method decompose the
#' difference from the prediction and the mean prediction \eqn{f(x) - E[f(\tilde{x})]}
#' in feature-wise effects. The reference values can be passed by the argument
#' `data_ref`.
#'
#' The R6 class can also be initialized using the [`run_deepshap`] function
#' as a helper function so that no prior knowledge of R6 classes is required.
#'
#' @template param-converter
#' @template param-data
#' @template param-output_idx
#' @template param-output_label
#' @template param-channels_first
#' @template param-ignore_last_act
#' @template param-dtype
#' @template param-verbose
#' @template examples-DeepSHAP
#'
#' @references
#' S. Lundberg & S. Lee (2017) \emph{A unified approach to interpreting model
#' predictions.} NIPS 2017, p. 4768–4777
#'
#' @family methods
#' @export
DeepSHAP <- R6Class(
classname = "DeepSHAP",
inherit = InterpretingMethod,
public = list(
#' @field rule_name (`character(1)`)\cr
#' Name of the applied rule to calculate the contributions.
#' Either `'rescale'` or `'reveal_cancel'`.\cr
#' @field data_ref (`list`)\cr
#' The passed reference dataset for estimating the conditional expectation
#' as a `list` of `torch_tensors` in the selected
#' data format (field `dtype`) matching the corresponding shapes of the
#' individual input layers. Besides, the channel axis is moved to the
#' second position after the batch size because internally only the
#' format *channels first* is used.\cr
rule_name = NULL,
data_ref = NULL,
#' @description
#' Create a new instance of the `DeepSHAP` R6 class. When initialized,
#' the method *DeepSHAP* is applied to the given data and the results are
#' stored in the field `result`.
#'
#' @param rule_name (`character(1)`)\cr
#' Name of the applied rule to calculate the
#' contributions. Use either `'rescale'` or `'reveal_cancel'`. \cr
#' @param winner_takes_all (`logical(1)`)\cr
#' This logical argument is only relevant for MaxPooling
#' layers and is otherwise ignored. With this layer type, it is possible that
#' the position of the maximum values in the pooling kernel of the normal input
#' \eqn{x} and the reference input \eqn{x'} may not match, which leads to a
#' violation of the summation-to-delta property. To overcome this problem,
#' another variant is implemented, which treats a MaxPooling layer as an
#' AveragePooling layer in the backward pass only, leading to an uniform
#' distribution of the upper-layer contribution to the lower layer.\cr
#' @param data_ref ([`array`], [`data.frame`],
#' \code{\link[torch]{torch_tensor}} or `list`)\cr
#' The reference data which is used to estimate the conditional expectation.
#' These must have the same format as the input data of the passed model to
#' the converter object. This means either
#' \itemize{
#' \item an `array`, `data.frame`, `torch_tensor` or array-like format of
#' size *(batch_size, dim_in)*, if e.g., the model has only one input layer, or
#' \item a `list` with the corresponding input data (according to the
#' upper point) for each of the input layers.
#' \item or `NULL` (default) to use only a zero baseline for the estimation.\cr
#' }
#' @param limit_ref (`integer(1)`)\cr
#' This argument limits the number of instances taken from the reference
#' dataset `data_ref` so that only random `limit_ref` elements and not
#' the entire dataset are used to estimate the conditional expectation.
#' A too-large number can significantly increase the computation time.\cr
initialize = function(converter, data,
channels_first = TRUE,
output_idx = NULL,
output_label = NULL,
ignore_last_act = TRUE,
rule_name = "rescale",
data_ref = NULL,
limit_ref = 100,
winner_takes_all = TRUE,
verbose = interactive(),
dtype = "float") {
super$initialize(converter, data, channels_first, output_idx, output_label,
ignore_last_act, winner_takes_all, verbose, dtype)
cli_check(checkChoice(rule_name, c("rescale", "reveal_cancel")), "rule_name")
self$rule_name <- rule_name
cli_check(checkInt(limit_ref), "limit_ref")
# For default values of data_ref, DeepLift with zero baseline is applied
if (is.null(data_ref)) {
data_ref <- lapply(lapply(self$data, dim),
function(x) array(0, dim = c(1, x[-1])))
}
self$data_ref <- private$test_data(data_ref, name = "data_ref")
# For computational reasons, the number of instances in the reference
# dataset 'data_ref' is limited by the value `limit_ref`
num_samples <- dim(self$data_ref[[1]])[1]
ids <- sample.int(num_samples, min(num_samples, limit_ref))
self$data_ref <- lapply(self$data_ref, function(x) x[ids, drop = FALSE])
# Repeat values, s.t. `data` and `data_ref` have the same number of
# instances
num_samples <- dim(self$data[[1]])[1]
num_samples_ref <- dim(self$data_ref[[1]])[1]
data <- lapply(self$data, torch_repeat_interleave,
repeats = as.integer(num_samples_ref),
dim = 1) # now of shape (batch_size * num_samples_ref, input_dim)
repeat_input <- function(x) {
torch_cat(lapply(seq_len(num_samples), function(i) x))
}
data_ref <- lapply(self$data_ref, repeat_input) # now of shape (batch_size * num_samples_ref, input_dim)
# Forward for normal input
self$converter$model$forward(data,
channels_first = self$channels_first,
save_input = TRUE,
save_preactivation = TRUE,
save_output = TRUE
)
self$converter$model$update_ref(data_ref,
channels_first = self$channels_first,
save_input = TRUE,
save_preactivation = TRUE,
save_output = TRUE
)
result <- private$run("DeepSHAP")
# For the DeepSHAP method, we only get the multiplier.
# Hence, we have to multiply this by the differences of inputs
fun <- function(result, out_idx, in_idx, x, x_ref, n) {
res <- result[[out_idx]][[in_idx]]
if (is.null(res)) {
res <- NULL
} else {
res <- res * (x[[in_idx]] - x_ref[[in_idx]])$unsqueeze(-1)
res <- torch_stack(res$chunk(n), dim = 1)$mean(2)
}
}
result <- apply_results(result, fun, x = data, x_ref = data_ref,
n = num_samples)
self$result <- result
}
),
private = list(
print_method_specific = function() {
i <- cli_ul()
cli_li(paste0("{.field rule_name}: '", self$rule_name, "'"))
cli_li(paste0("{.field winner_takes_all}: ", self$winner_takes_all))
all_zeros <- all(unlist(lapply(self$data_ref,
function(x) all(as_array(x) == 0))))
if (all_zeros) {
s <- "zeros"
} else {
values <- unlist(lapply(self$data_ref, as_array))
s <- paste0("mean: ", mean(values), " (q1: ", quantile(values, 0.25),
", q3: ", quantile(values, 0.75), ")")
}
cli_li(paste0("{.field data_ref}: ", s))
cli_end(id = i)
}
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.