knitr::opts_chunk$set(
  collapse = FALSE,
  comment = "#>",
  warning = FALSE,
  message = FALSE,
  fig.align = 'center', 
  fig.width = 12, 
  fig.height = 8
)

Data

This vignette demonstrates the use of the triplot package on Fifa20 dataset. Dataset was downloaded from the Kaggle website, preprocessed and made available in DALEX package.

Dataset contains 37 features that describe each player performance. We will be exploring models that are predicting players value in Euro (in millions).

library("DALEX")
library("triplot")

data(fifa)

fifa$value_eur <- fifa$value_eur/10^6

fifa[, c("nationality", "overall", "potential", 
         "wage_eur")] <- NULL

head(fifa[,c(1:6)])

Build models for predicting players value

For the further analysis, we prepare 3 models:

library("ranger")
library("gbm")

fifa_rf <- ranger(value_eur~., data = fifa)

fifa_rf_mod <- ranger(value_eur~., data = fifa,
                      mtry = 18, splitrule = "variance", min.node.size = 5)

fifa_gbm <- gbm(value_eur~., data = fifa, n.trees = 250,
                     interaction.depth = 4, distribution = "gaussian")

Build explainers

For each of those models, we build DALEX explainer. Using explainers, simplifies further models analysis.

fifa_rf_exp <- DALEX::explain(fifa_rf,
                              data = fifa[,-1], #without target coulmn
                              y = fifa$value_eur,
                              label = "RF", 
                              verbose = FALSE)

fifa_rf_m_exp <- DALEX::explain(fifa_rf_mod,
                              data = fifa[,-1], 
                              y = fifa$value_eur,
                              label = "RF tuned",
                              verbose = FALSE)

fifa_gbm_exp <- DALEX::explain(fifa_gbm,
                               data = fifa[, -1], 
                               y = fifa$value_eur,
                               predict_function =
                                 function(m, x)
                                   predict(m, x, n.trees = 250),
                               label = "GBM",
                               verbose = FALSE)

Triplot for model

'Triplot' objects allow us to investigate the importance of each feature (plot on the left), features correlation (plot on the right) as well as the change hierarchical aspects importance (plot in the middle).

fifa_rf_tri <- model_triplot(fifa_rf_exp, B = 1, N = 5000)

fifa_rf_m_tri <- model_triplot(fifa_rf_m_exp, B = 1, N = 5000)

fifa_gbm_tri <- model_triplot(fifa_gbm_exp, B = 1, N = 5000)

plot(fifa_rf_tri, show_model_label = TRUE, margin_mid = 0)

plot(fifa_rf_m_tri, show_model_label = TRUE, margin_mid = 0)

plot(fifa_gbm_tri, show_model_label = TRUE, margin_mid = 0)
fifa_vg <- list(
  "age" = "age",
  "body" = c("height_cm", "weight_kg"),
  "attacking" = c("attacking_crossing",
                "attacking_finishing", "attacking_heading_accuracy",
                "attacking_short_passing", "attacking_volleys"),
  "skill" = c("skill_dribbling",
            "skill_curve", "skill_fk_accuracy", "skill_long_passing",
            "skill_ball_control"),
  "movement" = c("movement_acceleration", "movement_sprint_speed",
               "movement_agility", "movement_reactions", "movement_balance"),
  "power" = c("power_shot_power", "power_jumping", "power_stamina", "power_strength",
            "power_long_shots"),
  "mentality" = c("mentality_aggression", "mentality_interceptions",
                "mentality_positioning", "mentality_vision", "mentality_penalties",
                "mentality_composure"),
  "defending" = c("defending_marking", "defending_standing_tackle",
                "defending_sliding_tackle"),
  "goalkeeping" = c("goalkeeping_diving",
                   "goalkeeping_handling", "goalkeeping_kicking",
                   "goalkeeping_positioning", "goalkeeping_reflexes"))

Investigate models for chosen instances (players)

For the instance level model analysis, we choose top player (value wise).

set.seed(123)

top_player <- fifa[order(fifa$value_eur, decreasing = TRUE),][1,]

print(top_player)

aspects_top_player_rf <- predict_aspects(fifa_rf_exp, 
                new_observation = top_player, 
                variable_groups = fifa_vg)

aspects_top_player_rf_m <- predict_aspects(fifa_rf_m_exp, 
                new_observation = top_player, 
                variable_groups = fifa_vg)

aspects_top_player_gbm <- predict_aspects(fifa_gbm_exp, 
                new_observation = top_player, 
                variable_groups = fifa_vg)

plot(aspects_top_player_rf,
      aspects_top_player_rf_m,
      aspects_top_player_gbm)
fifa_rf_m_tri_player <- predict_triplot(fifa_rf_m_exp, top_player)

plot(fifa_rf_m_tri_player, show_model_label = TRUE, margin_mid = 0.2)

For comparison, for next analysis we pick one of the goal keeper's prediction to be explained.

set.seed(123)

player_gk <- fifa["J. Oblak",]

print(player_gk)

aspects_gk_rf <- predict_aspects(fifa_rf_exp, 
                new_observation = player_gk, 
                variable_groups = fifa_vg)

aspects_gk_rf_m <- predict_aspects(fifa_rf_m_exp, 
                new_observation = player_gk, 
                variable_groups = fifa_vg)

aspects_gk_gbm <- predict_aspects(fifa_gbm_exp, 
                new_observation = player_gk, 
                variable_groups = fifa_vg)
plot(aspects_gk_rf,
      aspects_gk_rf_m,
      aspects_gk_gbm)

Session info

sessionInfo()


ModelOriented/triplot documentation built on March 10, 2021, 6:26 p.m.