knitr::opts_chunk$set(
  collapse = FALSE,
  comment = "#>",
  warning = FALSE,
  message = FALSE,
  eval = FALSE
)

R & Python Examples

R

The modelStudio() function uses DALEX explainers created with DALEX::explain() or DALEXtra::explain_*().

# packages for the explainer objects
install.packages("DALEX")
install.packages("DALEXtra")

mlr dashboard

In this example, we make a studio for the ranger model on the apartments data.

# load packages and data
library(mlr)
library(DALEXtra)
library(modelStudio)

data <- DALEX::apartments

# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]

# fit a model
task <- makeRegrTask(id = "apartments", data = train, target = "m2.price")
learner <- makeLearner("regr.ranger", predict.type = "response")
model <- train(learner, task)

# create an explainer for the model
explainer <- explain_mlr(model,
                         data = test,
                         y = test$m2.price,
                         label = "mlr")

# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")

# make a studio for the model
modelStudio(explainer, new_observation)

mlr3 dashboard

In this example, we make a studio for the ranger model on the titanic data.

# load packages and data
library(mlr3)
library(mlr3learners)
library(DALEXtra)
library(modelStudio)

data <- DALEX::titanic_imputed

# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]

# mlr3 TaskClassif takes target as factor
train$survived <- as.factor(train$survived)

# fit a model
task <- TaskClassif$new(id = "titanic", backend = train, target = "survived")
learner <- lrn("classif.ranger", predict_type = "prob")
learner$train(task)

# create an explainer for the model
explainer <- explain_mlr3(learner,
                          data = test,
                          y = test$survived,
                          label = "mlr3")

# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")

# make a studio for the model
modelStudio(explainer, new_observation)

xgboost dashboard

In this example, we make a studio for the xgboost model on the titanic data.

# load packages and data
library(xgboost)
library(DALEX)
library(modelStudio)

data <- DALEX::titanic_imputed

# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]

train_matrix <- model.matrix(survived ~.-1, train)
test_matrix <- model.matrix(survived ~.-1, test)

# fit a model
xgb_matrix <- xgb.DMatrix(train_matrix, label = train$survived)
params <- list(max_depth = 3, objective = "binary:logistic", eval_metric = "auc")
model <- xgb.train(params, xgb_matrix, nrounds = 500)

# create an explainer for the model
explainer <- explain(model,
                     data = test_matrix,
                     y = test$survived,
                     type = "classification",
                     label = "xgboost")

# pick observations
new_observation <- test_matrix[1:2, , drop=FALSE]
rownames(new_observation) <- c("id1", "id2")

# make a studio for the model
modelStudio(explainer, new_observation)

caret dashboard

In this example, we make a studio for the gbm model on the titanic data.

# load packages and data
library(caret)
library(DALEX)
library(modelStudio)

data <- DALEX::titanic_imputed

# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]

# caret train takes target as factor
train$survived <- as.factor(train$survived)

# fit a model
cv <- trainControl(method = "repeatedcv", number = 3, repeats = 3)
model <- train(survived ~ ., data = train, method = "gbm", trControl = cv, verbose = FALSE)

# create an explainer for the model
explainer <- explain(model,
                     data = test,
                     y = test$survived,
                     label = "caret")

# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")

# make a studio for the model
modelStudio(explainer, new_observation)

h2o dashboard

In this example, we make a studio for the h2o.automl model on the titanic data.

# load packages and data
library(h2o)
library(DALEXtra)
library(modelStudio)

data <- DALEX::titanic_imputed

# init h2o
h2o.init()
h2o.no_progress()

# split the data
h2o_split <- h2o.splitFrame(as.h2o(data))
train <- h2o_split[[1]]
test <- as.data.frame(h2o_split[[2]])

# h2o automl takes target as factor
train$survived <- as.factor(train$survived)

# fit a model
automl <- h2o.automl(y = "survived", training_frame = train, max_runtime_secs = 30)
model <- automl@leader

# create an explainer for the model
explainer <- explain_h2o(model,
                         data = test,
                         y = test$survived,
                         label = "h2o")

# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")

# make a studio for the model
modelStudio(explainer, new_observation,
            B = 5)

# shutdown h2o
h2o.shutdown(prompt = FALSE)

parsnip dashboard

In this example, we make a studio for the ranger model on the apartments data.

# load packages and data
library(parsnip)
library(DALEX)
library(modelStudio)

data <- DALEX::apartments

# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]

# fit a model
model <- rand_forest() %>%
         set_engine("ranger", importance = "impurity") %>%
         set_mode("regression") %>%
         fit(m2.price ~ ., data = train)

# create an explainer for the model
explainer <- explain(model,
                     data = test,
                     y = test$m2.price,
                     label = "parsnip")

# make a studio for the model
modelStudio(explainer)

tidymodels dashboard

In this example, we make a studio for the ranger model on the titanic data.

# load packages and data
library(tidymodels)
library(DALEXtra)
library(modelStudio)

data <- DALEX::titanic_imputed

# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]

# tidymodels fit takes target as factor
train$survived <- as.factor(train$survived)

# fit a model
rec <- recipe(survived ~ ., data = train) %>%
       step_normalize(fare)

clf <- rand_forest(mtry = 2) %>%
       set_engine("ranger") %>%
       set_mode("classification")

wflow <- workflow() %>%
         add_recipe(rec) %>%
         add_model(clf)

model <- wflow %>% fit(data = train)

# create an explainer for the model
explainer <- explain_tidymodels(model,
                                data = test,
                                y = test$survived,
                                label = "tidymodels")

# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")

# make a studio for the model
modelStudio(explainer, new_observation)

Python

The modelStudio() function uses dalex explainers created with dalex.Explainer().

```{bash, eval=FALSE, engine="sh"}

package for the Explainer object

pip install dalex -U

Use `pickle` Python module and `reticulate` R package to easily make a studio for a model.

```r
# package for pickle load
install.packages("reticulate")

scikit-learn dashboard

In this example, we make a studio for the Pipeline SVR model on the fifa data.

First, use dalex in Python:

```{python, python.reticulate = FALSE, eval = FALSE}

load packages and data

import dalex as dx from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.svm import SVR from numpy import log

data = dx.datasets.load_fifa() X = data.drop(columns=['overall', 'potential', 'value_eur', 'wage_eur', 'nationality'], axis=1) y = log(data.value_eur)

split the data

X_train, X_test, y_train, y_test = train_test_split(X, y)

fit a pipeline model

model = Pipeline([('scale', StandardScaler()), ('svm', SVR())]) model.fit(X_train, y_train)

create an explainer for the model

explainer = dx.Explainer(model, data=X_test, y=y_test, label='scikit-learn')

pack the explainer into a pickle file

explainer.dump(open('explainer_scikitlearn.pickle', 'wb'))

Then, use `modelStudio` in R:

```r
# load the explainer from the pickle file
library(reticulate)
explainer <- py_load_object("explainer_scikitlearn.pickle", pickle = "pickle")

# make a studio for the model
library(modelStudio)
modelStudio(explainer, B = 5)

lightgbm dashboard

In this example, we make a studio for the Pipeline LGBMClassifier model on the titanic data.

First, use dalex in Python:

```{python, python.reticulate = FALSE, eval = FALSE}

load packages and data

import dalex as dx from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.impute import SimpleImputer from sklearn.compose import ColumnTransformer from lightgbm import LGBMClassifier

data = dx.datasets.load_titanic() X = data.drop(columns='survived') y = data.survived

split the data

X_train, X_test, y_train, y_test = train_test_split(X, y)

fit a pipeline model

numerical_features = ['age', 'fare', 'sibsp', 'parch'] numerical_transformer = Pipeline( steps=[ ('imputer', SimpleImputer(strategy='median')), ('scaler', StandardScaler()) ] ) categorical_features = ['gender', 'class', 'embarked'] categorical_transformer = Pipeline( steps=[ ('imputer', SimpleImputer(strategy='constant', fill_value='missing')), ('onehot', OneHotEncoder(handle_unknown='ignore')) ] )

preprocessor = ColumnTransformer( transformers=[ ('num', numerical_transformer, numerical_features), ('cat', categorical_transformer, categorical_features) ] )

classifier = LGBMClassifier(n_estimators=300)

model = Pipeline( steps=[ ('preprocessor', preprocessor), ('classifier', classifier) ] ) model.fit(X_train, y_train)

create an explainer for the model

explainer = dx.Explainer(model, data=X_test, y=y_test, label='lightgbm')

pack the explainer into a pickle file

explainer.dump(open('explainer_lightgbm.pickle', 'wb'))

Then, use `modelStudio` in R:

```r
# load the explainer from the pickle file
library(reticulate)
explainer <- py_load_object("explainer_lightgbm.pickle", pickle = "pickle")

# make a studio for the model
library(modelStudio)
modelStudio(explainer)

keras/tensorflow dashboard

In this example, we make a studio for the Pipeline KerasClassifier model on the titanic data.

First, use dalex in Python:

```{python, python.reticulate = FALSE, eval = FALSE}

load packages and data

import dalex as dx from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.impute import SimpleImputer from sklearn.compose import ColumnTransformer from keras.wrappers.scikit_learn import KerasClassifier from keras.layers import Dense from keras.models import Sequential

data = dx.datasets.load_titanic() X = data.drop(columns='survived') y = data.survived

split the data

X_train, X_test, y_train, y_test = train_test_split(X, y)

fit a pipeline model

numerical_features = ['age', 'fare', 'sibsp', 'parch'] numerical_transformer = Pipeline( steps=[ ('imputer', SimpleImputer(strategy='median')), ('scaler', StandardScaler()) ] ) categorical_features = ['gender', 'class', 'embarked'] categorical_transformer = Pipeline( steps=[ ('imputer', SimpleImputer(strategy='constant', fill_value='missing')), ('onehot', OneHotEncoder(handle_unknown='ignore')) ] )

preprocessor = ColumnTransformer( transformers=[ ('num', numerical_transformer, numerical_features), ('cat', categorical_transformer, categorical_features) ] )

def create_architecture(): model = Sequential() # there are 17 inputs after the pipeline model.add(Dense(60, input_dim=17, activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model

classifier = KerasClassifier(build_fn=create_architecture, epochs=100, batch_size=32, verbose=False)

model = Pipeline( steps=[ ('preprocessor', preprocessor), ('classifier', classifier) ] ) model.fit(X_train, y_train)

create an explainer for the model

explainer = dx.Explainer(model, data=X_test, y=y_test, label='keras')

pack the explainer into a pickle file

explainer.dump(open('explainer_keras.pickle', 'wb'))

Then, use `modelStudio` in R:

```r
# load the explainer from the pickle file
library(reticulate)

#! add blank create_architecture function before load !
py_run_string('
def create_architecture():
    return True
')

explainer <- py_load_object("explainer_keras.pickle", pickle = "pickle")

# make a studio for the model
library(modelStudio)
modelStudio(explainer)

References



ModelOriented/dime documentation built on Sept. 2, 2023, 4:49 a.m.