knitr::opts_chunk$set( collapse = FALSE, comment = "#>", warning = FALSE, message = FALSE, fig.align = 'center', fig.width = 12, fig.height = 8 )
This vignette demonstrates the use of the triplot package on Fifa20 dataset. Dataset was downloaded from the Kaggle website, preprocessed and made available in DALEX package.
Dataset contains 37 features that describe each player performance. We will be exploring models that are predicting players value in Euro (in millions).
library("DALEX") library("triplot") data(fifa) fifa$value_eur <- fifa$value_eur/10^6 fifa[, c("nationality", "overall", "potential", "wage_eur")] <- NULL head(fifa[,c(1:6)])
For the further analysis, we prepare 3 models:
library("ranger") library("gbm") fifa_rf <- ranger(value_eur~., data = fifa) fifa_rf_mod <- ranger(value_eur~., data = fifa, mtry = 18, splitrule = "variance", min.node.size = 5) fifa_gbm <- gbm(value_eur~., data = fifa, n.trees = 250, interaction.depth = 4, distribution = "gaussian")
For each of those models, we build DALEX explainer. Using explainers, simplifies further models analysis.
fifa_rf_exp <- DALEX::explain(fifa_rf, data = fifa[,-1], #without target coulmn y = fifa$value_eur, label = "RF", verbose = FALSE) fifa_rf_m_exp <- DALEX::explain(fifa_rf_mod, data = fifa[,-1], y = fifa$value_eur, label = "RF tuned", verbose = FALSE) fifa_gbm_exp <- DALEX::explain(fifa_gbm, data = fifa[, -1], y = fifa$value_eur, predict_function = function(m, x) predict(m, x, n.trees = 250), label = "GBM", verbose = FALSE)
'Triplot' objects allow us to investigate the importance of each feature (plot on the left), features correlation (plot on the right) as well as the change hierarchical aspects importance (plot in the middle).
fifa_rf_tri <- model_triplot(fifa_rf_exp, B = 1, N = 5000) fifa_rf_m_tri <- model_triplot(fifa_rf_m_exp, B = 1, N = 5000) fifa_gbm_tri <- model_triplot(fifa_gbm_exp, B = 1, N = 5000) plot(fifa_rf_tri, show_model_label = TRUE, margin_mid = 0) plot(fifa_rf_m_tri, show_model_label = TRUE, margin_mid = 0) plot(fifa_gbm_tri, show_model_label = TRUE, margin_mid = 0)
fifa_vg <- list( "age" = "age", "body" = c("height_cm", "weight_kg"), "attacking" = c("attacking_crossing", "attacking_finishing", "attacking_heading_accuracy", "attacking_short_passing", "attacking_volleys"), "skill" = c("skill_dribbling", "skill_curve", "skill_fk_accuracy", "skill_long_passing", "skill_ball_control"), "movement" = c("movement_acceleration", "movement_sprint_speed", "movement_agility", "movement_reactions", "movement_balance"), "power" = c("power_shot_power", "power_jumping", "power_stamina", "power_strength", "power_long_shots"), "mentality" = c("mentality_aggression", "mentality_interceptions", "mentality_positioning", "mentality_vision", "mentality_penalties", "mentality_composure"), "defending" = c("defending_marking", "defending_standing_tackle", "defending_sliding_tackle"), "goalkeeping" = c("goalkeeping_diving", "goalkeeping_handling", "goalkeeping_kicking", "goalkeeping_positioning", "goalkeeping_reflexes"))
For the instance level model analysis, we choose top player (value wise).
set.seed(123) top_player <- fifa[order(fifa$value_eur, decreasing = TRUE),][1,] print(top_player) aspects_top_player_rf <- predict_aspects(fifa_rf_exp, new_observation = top_player, variable_groups = fifa_vg) aspects_top_player_rf_m <- predict_aspects(fifa_rf_m_exp, new_observation = top_player, variable_groups = fifa_vg) aspects_top_player_gbm <- predict_aspects(fifa_gbm_exp, new_observation = top_player, variable_groups = fifa_vg) plot(aspects_top_player_rf, aspects_top_player_rf_m, aspects_top_player_gbm)
fifa_rf_m_tri_player <- predict_triplot(fifa_rf_m_exp, top_player) plot(fifa_rf_m_tri_player, show_model_label = TRUE, margin_mid = 0.2)
For comparison, for next analysis we pick one of the goal keeper's prediction to be explained.
set.seed(123) player_gk <- fifa["J. Oblak",] print(player_gk) aspects_gk_rf <- predict_aspects(fifa_rf_exp, new_observation = player_gk, variable_groups = fifa_vg) aspects_gk_rf_m <- predict_aspects(fifa_rf_m_exp, new_observation = player_gk, variable_groups = fifa_vg) aspects_gk_gbm <- predict_aspects(fifa_gbm_exp, new_observation = player_gk, variable_groups = fifa_vg)
plot(aspects_gk_rf,
aspects_gk_rf_m,
aspects_gk_gbm)
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.