Nothing
#' @title Individual Variable Effect
#'
#' @param x a model to be explained, or an explainer created with function \code{\link[DALEX]{explain}}.
#' @param data validation dataset. Used to determine univariate distributions, calculation of quantiles,
#' correlations and so on. It will be extracted from `x` if it’s an explainer.
#' @param predict_function predict function that operates on the model `x`. Since the model is a black box,
#' the `predict_function` is the only interface to access values from the model. It should be a function that
#' takes at least a model `x` and data and returns vector of predictions. If model response has more than
#' a single number (like multiclass models) then this function should return a marix/data.frame of the size
#' `m` x `d`, where `m` is the number of observations while `d` is the dimensionality of model response.
#' It will be extracted from `x` if it’s an explainer.
#' @param new_observation an observation/observations to be explained. Required for local/instance level
#' explainers. Columns in should correspond to columns in the data argument. Data set should not contain any additional columns.
#' @param ... other parameters.
#' @param label name of the model. By default it’s extracted from the class attribute of the model
#' @param method an estimation method of SHAP values. Currently the only availible is `KernelSHAP`.
#' @param nsamples number of samples or "auto". Note that number must be as integer. Use `as.integer()`.
#'
#'
#'
#' @return an object of class individual_variable_effect with shap values of each variable for each new observation.
#' Columns:
#' \itemize{
#' \item first d columns contains variable values.
#' \item _id_ - id of observation, number of row in `new_observation` data.
#' \item _ylevel_ - level of y
#' \item _yhat_ -predicted value for level of y
#' \item _yhat_mean_ - expected value of prediction, mean of all predictions
#' \item _vname_ - variable name
#' \item _attribution_ - attribution of variable
#' \item _sign_ a sign of attribution
#' \item _label_ a label
#' }
#'
#' In order to use shapper with other python virtual environment following R command are required to execute
#' reticulate::use_virtualenv("path_to_your_env")
#' or for conda
#' reticulate::use_conda("name_of_conda_env")
#' before attaching shapper.
#'
#' @importFrom reticulate r_to_py
#'
#' @examples
#' have_shap <- reticulate::py_module_available("shap")
#'
#' if(have_shap){
#' library("shapper")
#' library("DALEX")
#' library("randomForest")
#' Y_train <- HR$status
#' x_train <- HR[ , -6]
#' set.seed(123)
#' model_rf <- randomForest(x = x_train, y = Y_train, ntree= 50)
#' p_function <- function(model, data) predict(model, newdata = data, type = "prob")
#'
#' ive_rf <- individual_variable_effect(model_rf, data = x_train, predict_function = p_function,
#' new_observation = x_train[1:2,], nsamples = 50)
#' ive_rf
#' } else{
#' print('Python testing environment is required.')
#' }
#'
#'
#' @export
#' @aliases shap
#'
#' @rdname individual_variable_effect
individual_variable_effect <- function(x, ...) {
UseMethod("individual_variable_effect")
}
#' @export
#' @rdname individual_variable_effect
individual_variable_effect.explainer <- function(x,
new_observation,
method = "KernelSHAP",
nsamples = "auto",
...) {
# extracts model, data and predict function from the explainer
model <- x$model
data <- x$data
predict_function <- x$predict_function
label <- x$label
individual_variable_effect.default(
model,
data,
predict_function,
new_observation = new_observation,
label = label,
method = method,
nsamples = nsamples,
...
)
}
#' @importFrom utils tail
#' @export
#' @rdname individual_variable_effect
individual_variable_effect.default <-
function(x,
data,
predict_function = predict,
new_observation,
label = tail(class(x), 1),
method = "KernelSHAP",
nsamples = "auto",
...) {
# check if data correct
if(!all(colnames(data)==colnames(new_observation))){
stop("Columns in new obseravtion and data does not match")
}
# transform factors to numerics and keep factors' levels
data_classes <- sapply(data, class)
factors <- list()
data_numeric <- data
for (col in names(data_classes)) {
if (data_classes[col] == "factor") {
factors[[col]] <- levels(data[, col])
data_numeric[, col] <- as.numeric(data_numeric[, col]) - 1
}
}
# force nsamples to be an integer
if (is.numeric(nsamples))
nsamples <- as.integer(round(nsamples))
p_function <- function(new_data) {
new_data <- as.data.frame(new_data)
colnames(new_data) <- colnames(data)
for (col in names(factors)) {
new_data[, col] <- factor(new_data[, col],
levels = c(0:(length(factors[[col]]) - 1)),
labels = factors[[col]])
}
res <- as.data.frame(predict_function(x, new_data))
if (nrow(res) == 1) {
res[2, ] <- 0
res <- r_to_py(res)
res$drop(res$index[1], inplace = TRUE)
}
return(res)
}
explainer = shap_reference$KernelExplainer(p_function, data_numeric)
new_observation_releveled <- new_observation
new_observation_numeric <- new_observation
for (col in names(factors)) {
new_observation_releveled[, col] <-
factor(new_observation_releveled[, col], levels = factors[[col]])
new_observation_numeric[, col] <-
as.numeric(new_observation_releveled[, col]) - 1
}
shap_values = explainer$shap_values(new_observation_numeric, nsamples = nsamples)
expected_value = explainer$expected_value
predictions <- predict_function(x, new_observation_releveled)
variables <- colnames(data)
# create data to return
new_data <- new_observation
new_data$`_id_` <- c(1:nrow(new_data))
# add multiple predictions
new_data <-
new_data[rep(1:nrow(new_data), each = length(shap_values)),]
if (is.null(colnames(predictions))) {
new_data$`_ylevel_` <- ""
new_data <- unique(new_data)
} else {
new_data$`_ylevel_` <-
rep(colnames(predictions), times = nrow(new_observation))
}
new_data$`_yhat_` <- as.vector(t(predictions))
new_data$`_yhat_mean_` <-
rep(expected_value, times = nrow(new_observation))
# add multiple variables
new_data <- new_data[rep(1:nrow(new_data), each = ncol(data)),]
new_data$`_vname_` <- rep(variables, times = length(predictions))
attribution <- numeric()
for (i in 1:nrow(new_observation)) {
for (j in 1:length(shap_values)) {
shap_attributes <- shap_values[[j]]
if (is.matrix(shap_attributes)) {
attribution <- c(attribution, shap_attributes[i, ])
} else {
attribution <- c(attribution, shap_attributes[i])
}
}
}
new_data$`_attribution_` <- attribution
new_data$`_sign_` <- factor(sign(new_data$`_attribution_`))
new_data$`_sign_` <- ifelse(new_data$`_sign_` == 1, "+", "-")
new_data$`_label_` <- label
class(new_data) <- c("individual_variable_effect", "data.frame")
return(new_data)
}
#' @export
#' @rdname individual_variable_effect
shap <- individual_variable_effect
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.