modelStudio - R & Python examples"

knitr::opts_chunk$set(
  collapse = FALSE,
  comment = "#>",
  warning = FALSE,
  message = FALSE,
  eval = FALSE
)

R & Python Examples

R

The modelStudio() function uses DALEX explainers created with DALEX::explain() or DALEXtra::explain_*().

# packages for the explainer objects
install.packages("DALEX")
install.packages("DALEXtra")

mlr dashboard

In this example, we make a studio for the ranger model on the apartments data.

# load packages and data
library(mlr)
library(DALEXtra)
library(modelStudio)

data <- DALEX::apartments

# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]

# fit a model
task <- makeRegrTask(id = "apartments", data = train, target = "m2.price")
learner <- makeLearner("regr.ranger", predict.type = "response")
model <- train(learner, task)

# create an explainer for the model
explainer <- explain_mlr(model,
                         data = test,
                         y = test$m2.price,
                         label = "mlr")

# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")

# make a studio for the model
modelStudio(explainer, new_observation)

mlr3 dashboard

In this example, we make a studio for the ranger model on the titanic data.

# load packages and data
library(mlr3)
library(mlr3learners)
library(DALEXtra)
library(modelStudio)

data <- DALEX::titanic_imputed

# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]

# mlr3 TaskClassif takes target as factor
train$survived <- as.factor(train$survived)

# fit a model
task <- TaskClassif$new(id = "titanic", backend = train, target = "survived")
learner <- lrn("classif.ranger", predict_type = "prob")
learner$train(task)

# create an explainer for the model
explainer <- explain_mlr3(learner,
                          data = test,
                          y = test$survived,
                          label = "mlr3")

# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")

# make a studio for the model
modelStudio(explainer, new_observation)

xgboost dashboard

In this example, we make a studio for the xgboost model on the titanic data.

# load packages and data
library(xgboost)
library(DALEX)
library(modelStudio)

data <- DALEX::titanic_imputed

# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]

train_matrix <- model.matrix(survived ~.-1, train)
test_matrix <- model.matrix(survived ~.-1, test)

# fit a model
xgb_matrix <- xgb.DMatrix(train_matrix, label = train$survived)
params <- list(max_depth = 3, objective = "binary:logistic", eval_metric = "auc")
model <- xgb.train(params, xgb_matrix, nrounds = 500)

# create an explainer for the model
explainer <- explain(model,
                     data = test_matrix,
                     y = test$survived,
                     type = "classification",
                     label = "xgboost")

# pick observations
new_observation <- test_matrix[1:2, , drop=FALSE]
rownames(new_observation) <- c("id1", "id2")

# make a studio for the model
modelStudio(explainer, new_observation)

caret dashboard

In this example, we make a studio for the gbm model on the titanic data.

# load packages and data
library(caret)
library(DALEX)
library(modelStudio)

data <- DALEX::titanic_imputed

# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]

# caret train takes target as factor
train$survived <- as.factor(train$survived)

# fit a model
cv <- trainControl(method = "repeatedcv", number = 3, repeats = 3)
model <- train(survived ~ ., data = train, method = "gbm", trControl = cv, verbose = FALSE)

# create an explainer for the model
explainer <- explain(model,
                     data = test,
                     y = test$survived,
                     label = "caret")

# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")

# make a studio for the model
modelStudio(explainer, new_observation)

h2o dashboard

In this example, we make a studio for the h2o.automl model on the titanic data.

# load packages and data
library(h2o)
library(DALEXtra)
library(modelStudio)

data <- DALEX::titanic_imputed

# init h2o
h2o.init()
h2o.no_progress()

# split the data
h2o_split <- h2o.splitFrame(as.h2o(data))
train <- h2o_split[[1]]
test <- as.data.frame(h2o_split[[2]])

# h2o automl takes target as factor
train$survived <- as.factor(train$survived)

# fit a model
automl <- h2o.automl(y = "survived", training_frame = train, max_runtime_secs = 30)
model <- automl@leader

# create an explainer for the model
explainer <- explain_h2o(model,
                         data = test,
                         y = test$survived,
                         label = "h2o")

# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")

# make a studio for the model
modelStudio(explainer, new_observation,
            B = 5)

# shutdown h2o
h2o.shutdown(prompt = FALSE)

parsnip dashboard

In this example, we make a studio for the ranger model on the apartments data.

# load packages and data
library(parsnip)
library(DALEX)
library(modelStudio)

data <- DALEX::apartments

# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]

# fit a model
model <- rand_forest() %>%
         set_engine("ranger", importance = "impurity") %>%
         set_mode("regression") %>%
         fit(m2.price ~ ., data = train)

# create an explainer for the model
explainer <- explain(model,
                     data = test,
                     y = test$m2.price,
                     label = "parsnip")

# make a studio for the model
modelStudio(explainer)

tidymodels dashboard

In this example, we make a studio for the ranger model on the titanic data.

# load packages and data
library(tidymodels)
library(DALEXtra)
library(modelStudio)

data <- DALEX::titanic_imputed

# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]

# tidymodels fit takes target as factor
train$survived <- as.factor(train$survived)

# fit a model
rec <- recipe(survived ~ ., data = train) %>%
       step_normalize(fare)

clf <- rand_forest(mtry = 2) %>%
       set_engine("ranger") %>%
       set_mode("classification")

wflow <- workflow() %>%
         add_recipe(rec) %>%
         add_model(clf)

model <- wflow %>% fit(data = train)

# create an explainer for the model
explainer <- explain_tidymodels(model,
                                data = test,
                                y = test$survived,
                                label = "tidymodels")

# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")

# make a studio for the model
modelStudio(explainer, new_observation)

Python

The modelStudio() function uses dalex explainers created with dalex.Explainer().

```{bash, eval=FALSE, engine="sh"}

package for the Explainer object

pip install dalex -U

Use `pickle` Python module and `reticulate` R package to easily make a studio for a model.

```r
# package for pickle load
install.packages("reticulate")

scikit-learn dashboard

In this example, we make a studio for the Pipeline SVR model on the fifa data.

First, use dalex in Python:

```{python, python.reticulate = FALSE, eval = FALSE}

load packages and data

import dalex as dx from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.svm import SVR from numpy import log

data = dx.datasets.load_fifa() X = data.drop(columns=['overall', 'potential', 'value_eur', 'wage_eur', 'nationality'], axis=1) y = log(data.value_eur)

split the data

X_train, X_test, y_train, y_test = train_test_split(X, y)

fit a pipeline model

model = Pipeline([('scale', StandardScaler()), ('svm', SVR())]) model.fit(X_train, y_train)

create an explainer for the model

explainer = dx.Explainer(model, data=X_test, y=y_test, label='scikit-learn')

pack the explainer into a pickle file

explainer.dump(open('explainer_scikitlearn.pickle', 'wb'))

Then, use `modelStudio` in R:

```r
# load the explainer from the pickle file
library(reticulate)
explainer <- py_load_object("explainer_scikitlearn.pickle", pickle = "pickle")

# make a studio for the model
library(modelStudio)
modelStudio(explainer, B = 5)

lightgbm dashboard

In this example, we make a studio for the Pipeline LGBMClassifier model on the titanic data.

First, use dalex in Python:

```{python, python.reticulate = FALSE, eval = FALSE}

load packages and data

import dalex as dx from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.impute import SimpleImputer from sklearn.compose import ColumnTransformer from lightgbm import LGBMClassifier

data = dx.datasets.load_titanic() X = data.drop(columns='survived') y = data.survived

split the data

X_train, X_test, y_train, y_test = train_test_split(X, y)

fit a pipeline model

numerical_features = ['age', 'fare', 'sibsp', 'parch'] numerical_transformer = Pipeline( steps=[ ('imputer', SimpleImputer(strategy='median')), ('scaler', StandardScaler()) ] ) categorical_features = ['gender', 'class', 'embarked'] categorical_transformer = Pipeline( steps=[ ('imputer', SimpleImputer(strategy='constant', fill_value='missing')), ('onehot', OneHotEncoder(handle_unknown='ignore')) ] )

preprocessor = ColumnTransformer( transformers=[ ('num', numerical_transformer, numerical_features), ('cat', categorical_transformer, categorical_features) ] )

classifier = LGBMClassifier(n_estimators=300)

model = Pipeline( steps=[ ('preprocessor', preprocessor), ('classifier', classifier) ] ) model.fit(X_train, y_train)

create an explainer for the model

explainer = dx.Explainer(model, data=X_test, y=y_test, label='lightgbm')

pack the explainer into a pickle file

explainer.dump(open('explainer_lightgbm.pickle', 'wb'))

Then, use `modelStudio` in R:

```r
# load the explainer from the pickle file
library(reticulate)
explainer <- py_load_object("explainer_lightgbm.pickle", pickle = "pickle")

# make a studio for the model
library(modelStudio)
modelStudio(explainer)

keras/tensorflow dashboard

In this example, we make a studio for the Pipeline KerasClassifier model on the titanic data.

First, use dalex in Python:

```{python, python.reticulate = FALSE, eval = FALSE}

load packages and data

import dalex as dx from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.impute import SimpleImputer from sklearn.compose import ColumnTransformer from keras.wrappers.scikit_learn import KerasClassifier from keras.layers import Dense from keras.models import Sequential

data = dx.datasets.load_titanic() X = data.drop(columns='survived') y = data.survived

split the data

X_train, X_test, y_train, y_test = train_test_split(X, y)

fit a pipeline model

numerical_features = ['age', 'fare', 'sibsp', 'parch'] numerical_transformer = Pipeline( steps=[ ('imputer', SimpleImputer(strategy='median')), ('scaler', StandardScaler()) ] ) categorical_features = ['gender', 'class', 'embarked'] categorical_transformer = Pipeline( steps=[ ('imputer', SimpleImputer(strategy='constant', fill_value='missing')), ('onehot', OneHotEncoder(handle_unknown='ignore')) ] )

preprocessor = ColumnTransformer( transformers=[ ('num', numerical_transformer, numerical_features), ('cat', categorical_transformer, categorical_features) ] )

def create_architecture(): model = Sequential() # there are 17 inputs after the pipeline model.add(Dense(60, input_dim=17, activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model

classifier = KerasClassifier(build_fn=create_architecture, epochs=100, batch_size=32, verbose=False)

model = Pipeline( steps=[ ('preprocessor', preprocessor), ('classifier', classifier) ] ) model.fit(X_train, y_train)

create an explainer for the model

explainer = dx.Explainer(model, data=X_test, y=y_test, label='keras')

pack the explainer into a pickle file

explainer.dump(open('explainer_keras.pickle', 'wb'))

Then, use `modelStudio` in R:

```r
# load the explainer from the pickle file
library(reticulate)

#! add blank create_architecture function before load !
py_run_string('
def create_architecture():
    return True
')

explainer <- py_load_object("explainer_keras.pickle", pickle = "pickle")

# make a studio for the model
library(modelStudio)
modelStudio(explainer)

References



Try the modelStudio package in your browser

Any scripts or data that you put into this service are public.

modelStudio documentation built on March 7, 2023, 6:56 p.m.