In ModelOriented/dime: Interactive Studio for Explanatory Model Analysis

knitr::opts_chunk$set(
  collapse = FALSE,
  comment = "#>",
  warning = FALSE,
  message = FALSE,
  eval = FALSE
)

R & Python Examples

R

The modelStudio() function uses DALEX explainers created with DALEX::explain() or DALEXtra::explain_*().

# packages for the explainer objects
install.packages("DALEX")
install.packages("DALEXtra")

mlr dashboard

In this example, we make a studio for the ranger model on the apartments data.

# load packages and data
library(mlr)
library(DALEXtra)
library(modelStudio)

data <- DALEX::apartments

# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]

# fit a model
task <- makeRegrTask(id = "apartments", data = train, target = "m2.price")
learner <- makeLearner("regr.ranger", predict.type = "response")
model <- train(learner, task)

# create an explainer for the model
explainer <- explain_mlr(model,
                         data = test,
                         y = test$m2.price,
                         label = "mlr")

# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")

# make a studio for the model
modelStudio(explainer, new_observation)

mlr3 dashboard

In this example, we make a studio for the ranger model on the titanic data.

# load packages and data
library(mlr3)
library(mlr3learners)
library(DALEXtra)
library(modelStudio)

data <- DALEX::titanic_imputed

# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]

# mlr3 TaskClassif takes target as factor
train$survived <- as.factor(train$survived)

# fit a model
task <- TaskClassif$new(id = "titanic", backend = train, target = "survived")
learner <- lrn("classif.ranger", predict_type = "prob")
learner$train(task)

# create an explainer for the model
explainer <- explain_mlr3(learner,
                          data = test,
                          y = test$survived,
                          label = "mlr3")

# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")

# make a studio for the model
modelStudio(explainer, new_observation)

xgboost dashboard

In this example, we make a studio for the xgboost model on the titanic data.

# load packages and data
library(xgboost)
library(DALEX)
library(modelStudio)

data <- DALEX::titanic_imputed

# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]

train_matrix <- model.matrix(survived ~.-1, train)
test_matrix <- model.matrix(survived ~.-1, test)

# fit a model
xgb_matrix <- xgb.DMatrix(train_matrix, label = train$survived)
params <- list(max_depth = 3, objective = "binary:logistic", eval_metric = "auc")
model <- xgb.train(params, xgb_matrix, nrounds = 500)

# create an explainer for the model
explainer <- explain(model,
                     data = test_matrix,
                     y = test$survived,
                     type = "classification",
                     label = "xgboost")

# pick observations
new_observation <- test_matrix[1:2, , drop=FALSE]
rownames(new_observation) <- c("id1", "id2")

# make a studio for the model
modelStudio(explainer, new_observation)

caret dashboard

In this example, we make a studio for the gbm model on the titanic data.

# load packages and data
library(caret)
library(DALEX)
library(modelStudio)

data <- DALEX::titanic_imputed

# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]

# caret train takes target as factor
train$survived <- as.factor(train$survived)

# fit a model
cv <- trainControl(method = "repeatedcv", number = 3, repeats = 3)
model <- train(survived ~ ., data = train, method = "gbm", trControl = cv, verbose = FALSE)

# create an explainer for the model
explainer <- explain(model,
                     data = test,
                     y = test$survived,
                     label = "caret")

# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")

# make a studio for the model
modelStudio(explainer, new_observation)

h2o dashboard

In this example, we make a studio for the h2o.automl model on the titanic data.

# load packages and data
library(h2o)
library(DALEXtra)
library(modelStudio)

data <- DALEX::titanic_imputed

# init h2o
h2o.init()
h2o.no_progress()

# split the data
h2o_split <- h2o.splitFrame(as.h2o(data))
train <- h2o_split[[1]]
test <- as.data.frame(h2o_split[[2]])

# h2o automl takes target as factor
train$survived <- as.factor(train$survived)

# fit a model
automl <- h2o.automl(y = "survived", training_frame = train, max_runtime_secs = 30)
model <- automl@leader

# create an explainer for the model
explainer <- explain_h2o(model,
                         data = test,
                         y = test$survived,
                         label = "h2o")

# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")

# make a studio for the model
modelStudio(explainer, new_observation,
            B = 5)

# shutdown h2o
h2o.shutdown(prompt = FALSE)

parsnip dashboard

In this example, we make a studio for the ranger model on the apartments data.

# load packages and data
library(parsnip)
library(DALEX)
library(modelStudio)

data <- DALEX::apartments

# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]

# fit a model
model <- rand_forest() %>%
         set_engine("ranger", importance = "impurity") %>%
         set_mode("regression") %>%
         fit(m2.price ~ ., data = train)

# create an explainer for the model
explainer <- explain(model,
                     data = test,
                     y = test$m2.price,
                     label = "parsnip")

# make a studio for the model
modelStudio(explainer)

tidymodels dashboard

In this example, we make a studio for the ranger model on the titanic data.

# load packages and data
library(tidymodels)
library(DALEXtra)
library(modelStudio)

data <- DALEX::titanic_imputed

# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]

# tidymodels fit takes target as factor
train$survived <- as.factor(train$survived)

# fit a model
rec <- recipe(survived ~ ., data = train) %>%
       step_normalize(fare)

clf <- rand_forest(mtry = 2) %>%
       set_engine("ranger") %>%
       set_mode("classification")

wflow <- workflow() %>%
         add_recipe(rec) %>%
         add_model(clf)

model <- wflow %>% fit(data = train)

# create an explainer for the model
explainer <- explain_tidymodels(model,
                                data = test,
                                y = test$survived,
                                label = "tidymodels")

# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")

# make a studio for the model
modelStudio(explainer, new_observation)

Python

The modelStudio() function uses dalex explainers created with dalex.Explainer().

```{bash, eval=FALSE, engine="sh"}

package for the Explainer object

pip install dalex -U

Use `pickle` Python module and `reticulate` R package to easily make a studio for a model.

```r
# package for pickle load
install.packages("reticulate")

scikit-learn dashboard

In this example, we make a studio for the Pipeline SVR model on the fifa data.

First, use dalex in Python:

```{python, python.reticulate = FALSE, eval = FALSE}

load packages and data

import dalex as dx from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.svm import SVR from numpy import log

data = dx.datasets.load_fifa() X = data.drop(columns=['overall', 'potential', 'value_eur', 'wage_eur', 'nationality'], axis=1) y = log(data.value_eur)

split the data

X_train, X_test, y_train, y_test = train_test_split(X, y)

fit a pipeline model

model = Pipeline([('scale', StandardScaler()), ('svm', SVR())]) model.fit(X_train, y_train)

create an explainer for the model

explainer = dx.Explainer(model, data=X_test, y=y_test, label='scikit-learn')

pack the explainer into a pickle file

explainer.dump(open('explainer_scikitlearn.pickle', 'wb'))

Then, use `modelStudio` in R:

```r
# load the explainer from the pickle file
library(reticulate)
explainer <- py_load_object("explainer_scikitlearn.pickle", pickle = "pickle")

# make a studio for the model
library(modelStudio)
modelStudio(explainer, B = 5)

lightgbm dashboard

In this example, we make a studio for the Pipeline LGBMClassifier model on the titanic data.

First, use dalex in Python:

```{python, python.reticulate = FALSE, eval = FALSE}

load packages and data

import dalex as dx from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.impute import SimpleImputer from sklearn.compose import ColumnTransformer from lightgbm import LGBMClassifier

data = dx.datasets.load_titanic() X = data.drop(columns='survived') y = data.survived

split the data

X_train, X_test, y_train, y_test = train_test_split(X, y)

fit a pipeline model

numerical_features = ['age', 'fare', 'sibsp', 'parch'] numerical_transformer = Pipeline( steps=[ ('imputer', SimpleImputer(strategy='median')), ('scaler', StandardScaler()) ] ) categorical_features = ['gender', 'class', 'embarked'] categorical_transformer = Pipeline( steps=[ ('imputer', SimpleImputer(strategy='constant', fill_value='missing')), ('onehot', OneHotEncoder(handle_unknown='ignore')) ] )

preprocessor = ColumnTransformer( transformers=[ ('num', numerical_transformer, numerical_features), ('cat', categorical_transformer, categorical_features) ] )

classifier = LGBMClassifier(n_estimators=300)

model = Pipeline( steps=[ ('preprocessor', preprocessor), ('classifier', classifier) ] ) model.fit(X_train, y_train)

create an explainer for the model

explainer = dx.Explainer(model, data=X_test, y=y_test, label='lightgbm')

pack the explainer into a pickle file

explainer.dump(open('explainer_lightgbm.pickle', 'wb'))

Then, use `modelStudio` in R:

```r
# load the explainer from the pickle file
library(reticulate)
explainer <- py_load_object("explainer_lightgbm.pickle", pickle = "pickle")

# make a studio for the model
library(modelStudio)
modelStudio(explainer)

keras/tensorflow dashboard

In this example, we make a studio for the Pipeline KerasClassifier model on the titanic data.

First, use dalex in Python:

```{python, python.reticulate = FALSE, eval = FALSE}

load packages and data

data = dx.datasets.load_titanic() X = data.drop(columns='survived') y = data.survived

split the data

X_train, X_test, y_train, y_test = train_test_split(X, y)

fit a pipeline model

preprocessor = ColumnTransformer( transformers=[ ('num', numerical_transformer, numerical_features), ('cat', categorical_transformer, categorical_features) ] )

def create_architecture(): model = Sequential() # there are 17 inputs after the pipeline model.add(Dense(60, input_dim=17, activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model

classifier = KerasClassifier(build_fn=create_architecture, epochs=100, batch_size=32, verbose=False)

model = Pipeline( steps=[ ('preprocessor', preprocessor), ('classifier', classifier) ] ) model.fit(X_train, y_train)

create an explainer for the model

explainer = dx.Explainer(model, data=X_test, y=y_test, label='keras')

pack the explainer into a pickle file

explainer.dump(open('explainer_keras.pickle', 'wb'))

Then, use `modelStudio` in R:

```r
# load the explainer from the pickle file
library(reticulate)

#! add blank create_architecture function before load !
py_run_string('
def create_architecture():
    return True
')

explainer <- py_load_object("explainer_keras.pickle", pickle = "pickle")

# make a studio for the model
library(modelStudio)
modelStudio(explainer)

References

Theoretical introduction to the plots: Explanatory Model Analysis. Explore, Explain, and Examine Predictive Models.
The input object is implemented in DALEX
Feature Importance, Ceteris Paribus, Partial Dependence and Accumulated Dependence explanations are implemented in ingredients
Break Down and Shapley Values explanations are implemented in iBreakDown

ModelOriented/dime documentation built on Sept. 2, 2023, 4:49 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

ModelOriented/dime
Interactive Studio for Explanatory Model Analysis

In ModelOriented/dime: Interactive Studio for Explanatory Model Analysis

R & Python Examples

R

mlr dashboard

mlr3 dashboard

xgboost dashboard

caret dashboard

h2o dashboard

parsnip dashboard

tidymodels dashboard

Python

package for the Explainer object

scikit-learn dashboard

load packages and data

split the data

fit a pipeline model

create an explainer for the model

pack the explainer into a pickle file

lightgbm dashboard

load packages and data

split the data

fit a pipeline model

create an explainer for the model

pack the explainer into a pickle file

keras/tensorflow dashboard

load packages and data

split the data

fit a pipeline model

create an explainer for the model

pack the explainer into a pickle file

References

R Package Documentation

Browse R Packages

We want your feedback!

ModelOriented/dime Interactive Studio for Explanatory Model Analysis

In ModelOriented/dime: Interactive Studio for Explanatory Model Analysis

R & Python Examples

R

mlr dashboard

mlr3 dashboard

xgboost dashboard

caret dashboard

h2o dashboard

parsnip dashboard

tidymodels dashboard

Python

package for the Explainer object

scikit-learn dashboard

load packages and data

split the data

fit a pipeline model

create an explainer for the model

pack the explainer into a pickle file

lightgbm dashboard

load packages and data

split the data

fit a pipeline model

create an explainer for the model

pack the explainer into a pickle file

keras/tensorflow dashboard

load packages and data

split the data

fit a pipeline model

create an explainer for the model

pack the explainer into a pickle file

References

R Package Documentation

Browse R Packages

We want your feedback!

ModelOriented/dime
Interactive Studio for Explanatory Model Analysis