#' @title Individual Variable Effect
#'
#' @param x a model to be explained, or an explainer created with function \code{\link[DALEX]{explain}}.
#' @param data validation dataset. Used to determine univariate distributions, calculation of quantiles,
#' correlations and so on. It will be extracted from `x` if it’s an explainer.
#' @param predict_function predict function that operates on the model `x`. Since the model is a black box,
#' the `predict_function` is the only interface to access values from the model. It should be a function that
#' takes at least a model `x` and data and returns vector of predictions. If model response has more than
#' a single number (like multiclass models) then this function should return a marix/data.frame of the size
#' `m` x `d`, where `m` is the number of observations while `d` is the dimensionality of model response.
#' It will be extracted from `x` if it’s an explainer.
#' @param new_observation an observation/observations to be explained. Required for local/instance level
#' explainers. Columns in should correspond to columns in the data argument. Data set should not contain any additional columns.
#' @param ... other parameters.
#' @param label name of the model. By default it’s extracted from the class attribute of the model
#' @param method an estimation method of SHAP values. Currently the only availible is `KernelSHAP`.
#' @param nsamples number of samples or "auto". Note that number must be as integer. Use `as.integer()`.
#'
#'
#'
#' @return an object of class individual_variable_effect with shap values of each variable for each new observation.
#' Columns:
#' \itemize{
#' \item first d columns contains variable values.
#' \item _id_ - id of observation, number of row in `new_observation` data.
#' \item _ylevel_ - level of y
#' \item _yhat_ -predicted value for level of y
#' \item _yhat_mean_ - expected value of prediction, mean of all predictions
#' \item _vname_ - variable name
#' \item _attribution_ - attribution of variable
#' \item _sign_ a sign of attribution
#' \item _label_ a label
#' }
#'
#' In order to use shapper with other python virtual environment following R command are required to execute
#' reticulate::use_virtualenv("path_to_your_env")
#' or for conda
#' reticulate::use_conda("name_of_conda_env")
#' before attaching shapper.
#'
#' @importFrom reticulate r_to_py
#'
#' @examples
#' have_shap <- reticulate::py_module_available("shap")
#'
#' if(have_shap){
#' library("shapper")
#' library("DALEX")
#' library("randomForest")
#' Y_train <- HR$status
#' x_train <- HR[ , -6]
#' set.seed(123)
#' model_rf <- randomForest(x = x_train, y = Y_train, ntree= 50)
#' p_function <- function(model, data) predict(model, newdata = data, type = "prob")
#'
#' ive_rf <- individual_variable_effect(model_rf, data = x_train, predict_function = p_function,
#' new_observation = x_train[1:2,], nsamples = 50)
#' ive_rf
#' } else{
#' print('Python testing environment is required.')
#' }
#'
#'
#' @export
#' @aliases shap
#'
#' @rdname individual_variable_effect
individual_variable_effect <- function(x, ...) {
UseMethod("individual_variable_effect")
}
#' @export
#' @rdname individual_variable_effect
individual_variable_effect.explainer <- function(x,
new_observation,
method = "KernelSHAP",
nsamples = "auto",
...) {
# extracts model, data and predict function from the explainer
model <- x$model
data <- x$data
predict_function <- x$predict_function
label <- x$label
individual_variable_effect.default(
model,
data,
predict_function,
new_observation = new_observation,
label = label,
method = method,
nsamples = nsamples,
...
)
}
#' @importFrom utils tail
#' @export
#' @rdname individual_variable_effect
individual_variable_effect.default <-
function(x,
data,
predict_function = predict,
new_observation,
label = tail(class(x), 1),
method = "KernelSHAP",
nsamples = "auto",
...) {
# check if data correct
if(!all(colnames(data)==colnames(new_observation))){
stop("Columns in new obseravtion and data does not match")
}
# transform factors to numerics and keep factors' levels
data_classes <- sapply(data, class)
factors <- list()
data_numeric <- data
for (col in names(data_classes)) {
if (data_classes[col] == "factor") {
factors[[col]] <- levels(data[, col])
data_numeric[, col] <- as.numeric(data_numeric[, col]) - 1
}
}
# force nsamples to be an integer
if (is.numeric(nsamples))
nsamples <- as.integer(round(nsamples))
p_function <- function(new_data) {
new_data <- as.data.frame(new_data)
colnames(new_data) <- colnames(data)
for (col in names(factors)) {
new_data[, col] <- factor(new_data[, col],
levels = c(0:(length(factors[[col]]) - 1)),
labels = factors[[col]])
}
res <- as.data.frame(predict_function(x, new_data))
if (nrow(res) == 1) {
res[2, ] <- 0
res <- r_to_py(res)
res$drop(res$index[1], inplace = TRUE)
}
return(res)
}
explainer = shap_reference$KernelExplainer(p_function, data_numeric)
new_observation_releveled <- new_observation
new_observation_numeric <- new_observation
for (col in names(factors)) {
new_observation_releveled[, col] <-
factor(new_observation_releveled[, col], levels = factors[[col]])
new_observation_numeric[, col] <-
as.numeric(new_observation_releveled[, col]) - 1
}
shap_values = explainer$shap_values(new_observation_numeric, nsamples = nsamples)
expected_value = explainer$expected_value
predictions <- predict_function(x, new_observation_releveled)
variables <- colnames(data)
# create data to return
new_data <- new_observation
new_data$`_id_` <- c(1:nrow(new_data))
# add multiple predictions
new_data <-
new_data[rep(1:nrow(new_data), each = length(shap_values)),]
if (is.null(colnames(predictions))) {
new_data$`_ylevel_` <- ""
new_data <- unique(new_data)
} else {
new_data$`_ylevel_` <-
rep(colnames(predictions), times = nrow(new_observation))
}
new_data$`_yhat_` <- as.vector(t(predictions))
new_data$`_yhat_mean_` <-
rep(expected_value, times = nrow(new_observation))
# add multiple variables
new_data <- new_data[rep(1:nrow(new_data), each = ncol(data)),]
new_data$`_vname_` <- rep(variables, times = length(predictions))
attribution <- numeric()
for (i in 1:nrow(new_observation)) {
for (j in 1:length(shap_values)) {
shap_attributes <- shap_values[[j]]
if (is.matrix(shap_attributes)) {
attribution <- c(attribution, shap_attributes[i, ])
} else {
attribution <- c(attribution, shap_attributes[i])
}
}
}
new_data$`_attribution_` <- attribution
new_data$`_sign_` <- factor(sign(new_data$`_attribution_`))
new_data$`_sign_` <- ifelse(new_data$`_sign_` == 1, "+", "-")
new_data$`_label_` <- label
class(new_data) <- c("individual_variable_effect", "data.frame")
return(new_data)
}
#' @export
#' @rdname individual_variable_effect
shap <- individual_variable_effect
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.