knitr::opts_chunk$set( collapse = FALSE, comment = "#>", warning = FALSE, message = FALSE, eval = FALSE )
The modelStudio()
function uses DALEX
explainers created with DALEX::explain()
or DALEXtra::explain_*()
.
# packages for the explainer objects install.packages("DALEX") install.packages("DALEXtra")
In this example, we make a studio for the ranger
model on the apartments
data.
# load packages and data library(mlr) library(DALEXtra) library(modelStudio) data <- DALEX::apartments # split the data index <- sample(1:nrow(data), 0.7*nrow(data)) train <- data[index,] test <- data[-index,] # fit a model task <- makeRegrTask(id = "apartments", data = train, target = "m2.price") learner <- makeLearner("regr.ranger", predict.type = "response") model <- train(learner, task) # create an explainer for the model explainer <- explain_mlr(model, data = test, y = test$m2.price, label = "mlr") # pick observations new_observation <- test[1:2,] rownames(new_observation) <- c("id1", "id2") # make a studio for the model modelStudio(explainer, new_observation)
In this example, we make a studio for the ranger
model on the titanic
data.
# load packages and data library(mlr3) library(mlr3learners) library(DALEXtra) library(modelStudio) data <- DALEX::titanic_imputed # split the data index <- sample(1:nrow(data), 0.7*nrow(data)) train <- data[index,] test <- data[-index,] # mlr3 TaskClassif takes target as factor train$survived <- as.factor(train$survived) # fit a model task <- TaskClassif$new(id = "titanic", backend = train, target = "survived") learner <- lrn("classif.ranger", predict_type = "prob") learner$train(task) # create an explainer for the model explainer <- explain_mlr3(learner, data = test, y = test$survived, label = "mlr3") # pick observations new_observation <- test[1:2,] rownames(new_observation) <- c("id1", "id2") # make a studio for the model modelStudio(explainer, new_observation)
In this example, we make a studio for the xgboost
model on the titanic
data.
# load packages and data library(xgboost) library(DALEX) library(modelStudio) data <- DALEX::titanic_imputed # split the data index <- sample(1:nrow(data), 0.7*nrow(data)) train <- data[index,] test <- data[-index,] train_matrix <- model.matrix(survived ~.-1, train) test_matrix <- model.matrix(survived ~.-1, test) # fit a model xgb_matrix <- xgb.DMatrix(train_matrix, label = train$survived) params <- list(max_depth = 3, objective = "binary:logistic", eval_metric = "auc") model <- xgb.train(params, xgb_matrix, nrounds = 500) # create an explainer for the model explainer <- explain(model, data = test_matrix, y = test$survived, type = "classification", label = "xgboost") # pick observations new_observation <- test_matrix[1:2, , drop=FALSE] rownames(new_observation) <- c("id1", "id2") # make a studio for the model modelStudio(explainer, new_observation)
In this example, we make a studio for the gbm
model on the titanic
data.
# load packages and data library(caret) library(DALEX) library(modelStudio) data <- DALEX::titanic_imputed # split the data index <- sample(1:nrow(data), 0.7*nrow(data)) train <- data[index,] test <- data[-index,] # caret train takes target as factor train$survived <- as.factor(train$survived) # fit a model cv <- trainControl(method = "repeatedcv", number = 3, repeats = 3) model <- train(survived ~ ., data = train, method = "gbm", trControl = cv, verbose = FALSE) # create an explainer for the model explainer <- explain(model, data = test, y = test$survived, label = "caret") # pick observations new_observation <- test[1:2,] rownames(new_observation) <- c("id1", "id2") # make a studio for the model modelStudio(explainer, new_observation)
In this example, we make a studio for the h2o.automl
model on the titanic
data.
# load packages and data library(h2o) library(DALEXtra) library(modelStudio) data <- DALEX::titanic_imputed # init h2o h2o.init() h2o.no_progress() # split the data h2o_split <- h2o.splitFrame(as.h2o(data)) train <- h2o_split[[1]] test <- as.data.frame(h2o_split[[2]]) # h2o automl takes target as factor train$survived <- as.factor(train$survived) # fit a model automl <- h2o.automl(y = "survived", training_frame = train, max_runtime_secs = 30) model <- automl@leader # create an explainer for the model explainer <- explain_h2o(model, data = test, y = test$survived, label = "h2o") # pick observations new_observation <- test[1:2,] rownames(new_observation) <- c("id1", "id2") # make a studio for the model modelStudio(explainer, new_observation, B = 5) # shutdown h2o h2o.shutdown(prompt = FALSE)
In this example, we make a studio for the ranger
model on the apartments
data.
# load packages and data library(parsnip) library(DALEX) library(modelStudio) data <- DALEX::apartments # split the data index <- sample(1:nrow(data), 0.7*nrow(data)) train <- data[index,] test <- data[-index,] # fit a model model <- rand_forest() %>% set_engine("ranger", importance = "impurity") %>% set_mode("regression") %>% fit(m2.price ~ ., data = train) # create an explainer for the model explainer <- explain(model, data = test, y = test$m2.price, label = "parsnip") # make a studio for the model modelStudio(explainer)
In this example, we make a studio for the ranger
model on the titanic
data.
# load packages and data library(tidymodels) library(DALEXtra) library(modelStudio) data <- DALEX::titanic_imputed # split the data index <- sample(1:nrow(data), 0.7*nrow(data)) train <- data[index,] test <- data[-index,] # tidymodels fit takes target as factor train$survived <- as.factor(train$survived) # fit a model rec <- recipe(survived ~ ., data = train) %>% step_normalize(fare) clf <- rand_forest(mtry = 2) %>% set_engine("ranger") %>% set_mode("classification") wflow <- workflow() %>% add_recipe(rec) %>% add_model(clf) model <- wflow %>% fit(data = train) # create an explainer for the model explainer <- explain_tidymodels(model, data = test, y = test$survived, label = "tidymodels") # pick observations new_observation <- test[1:2,] rownames(new_observation) <- c("id1", "id2") # make a studio for the model modelStudio(explainer, new_observation)
The modelStudio()
function uses dalex
explainers created with dalex.Explainer()
.
```{bash, eval=FALSE, engine="sh"}
pip install dalex -U
Use `pickle` Python module and `reticulate` R package to easily make a studio for a model. ```r # package for pickle load install.packages("reticulate")
In this example, we make a studio for the Pipeline SVR
model on the fifa
data.
First, use dalex
in Python:
```{python, python.reticulate = FALSE, eval = FALSE}
import dalex as dx from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.svm import SVR from numpy import log
data = dx.datasets.load_fifa() X = data.drop(columns=['overall', 'potential', 'value_eur', 'wage_eur', 'nationality'], axis=1) y = log(data.value_eur)
X_train, X_test, y_train, y_test = train_test_split(X, y)
model = Pipeline([('scale', StandardScaler()), ('svm', SVR())]) model.fit(X_train, y_train)
explainer = dx.Explainer(model, data=X_test, y=y_test, label='scikit-learn')
explainer.dump(open('explainer_scikitlearn.pickle', 'wb'))
Then, use `modelStudio` in R: ```r # load the explainer from the pickle file library(reticulate) explainer <- py_load_object("explainer_scikitlearn.pickle", pickle = "pickle") # make a studio for the model library(modelStudio) modelStudio(explainer, B = 5)
In this example, we make a studio for the Pipeline LGBMClassifier
model on the titanic
data.
First, use dalex
in Python:
```{python, python.reticulate = FALSE, eval = FALSE}
import dalex as dx from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.impute import SimpleImputer from sklearn.compose import ColumnTransformer from lightgbm import LGBMClassifier
data = dx.datasets.load_titanic() X = data.drop(columns='survived') y = data.survived
X_train, X_test, y_train, y_test = train_test_split(X, y)
numerical_features = ['age', 'fare', 'sibsp', 'parch'] numerical_transformer = Pipeline( steps=[ ('imputer', SimpleImputer(strategy='median')), ('scaler', StandardScaler()) ] ) categorical_features = ['gender', 'class', 'embarked'] categorical_transformer = Pipeline( steps=[ ('imputer', SimpleImputer(strategy='constant', fill_value='missing')), ('onehot', OneHotEncoder(handle_unknown='ignore')) ] )
preprocessor = ColumnTransformer( transformers=[ ('num', numerical_transformer, numerical_features), ('cat', categorical_transformer, categorical_features) ] )
classifier = LGBMClassifier(n_estimators=300)
model = Pipeline( steps=[ ('preprocessor', preprocessor), ('classifier', classifier) ] ) model.fit(X_train, y_train)
explainer = dx.Explainer(model, data=X_test, y=y_test, label='lightgbm')
explainer.dump(open('explainer_lightgbm.pickle', 'wb'))
Then, use `modelStudio` in R: ```r # load the explainer from the pickle file library(reticulate) explainer <- py_load_object("explainer_lightgbm.pickle", pickle = "pickle") # make a studio for the model library(modelStudio) modelStudio(explainer)
In this example, we make a studio for the Pipeline KerasClassifier
model on the titanic
data.
First, use dalex
in Python:
```{python, python.reticulate = FALSE, eval = FALSE}
import dalex as dx from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.impute import SimpleImputer from sklearn.compose import ColumnTransformer from keras.wrappers.scikit_learn import KerasClassifier from keras.layers import Dense from keras.models import Sequential
data = dx.datasets.load_titanic() X = data.drop(columns='survived') y = data.survived
X_train, X_test, y_train, y_test = train_test_split(X, y)
numerical_features = ['age', 'fare', 'sibsp', 'parch'] numerical_transformer = Pipeline( steps=[ ('imputer', SimpleImputer(strategy='median')), ('scaler', StandardScaler()) ] ) categorical_features = ['gender', 'class', 'embarked'] categorical_transformer = Pipeline( steps=[ ('imputer', SimpleImputer(strategy='constant', fill_value='missing')), ('onehot', OneHotEncoder(handle_unknown='ignore')) ] )
preprocessor = ColumnTransformer( transformers=[ ('num', numerical_transformer, numerical_features), ('cat', categorical_transformer, categorical_features) ] )
def create_architecture(): model = Sequential() # there are 17 inputs after the pipeline model.add(Dense(60, input_dim=17, activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model
classifier = KerasClassifier(build_fn=create_architecture, epochs=100, batch_size=32, verbose=False)
model = Pipeline( steps=[ ('preprocessor', preprocessor), ('classifier', classifier) ] ) model.fit(X_train, y_train)
explainer = dx.Explainer(model, data=X_test, y=y_test, label='keras')
explainer.dump(open('explainer_keras.pickle', 'wb'))
Then, use `modelStudio` in R: ```r # load the explainer from the pickle file library(reticulate) #! add blank create_architecture function before load ! py_run_string(' def create_architecture(): return True ') explainer <- py_load_object("explainer_keras.pickle", pickle = "pickle") # make a studio for the model library(modelStudio) modelStudio(explainer)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.