inst/doc/getting-started-with-cheem.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  echo       = TRUE,   # code
  include    = TRUE,   # plots
  results    = "show", # text: "hide", "show"
  ## stop("REPLACE ME"):
  eval       = FALSE,   # chunk code
  message    = FALSE,
  warning    = FALSE,
  error      = FALSE,
  collapse   = TRUE,
  comment    = "#>",
  fig.height = 4,
  fig.width  = 6,
  fig.align  = "center",
  cache      = FALSE
)

## ----limenonlinear, echo=FALSE, fig.cap="Illustration of non-linear classification boundary. The use of local explanations approximates the feature importance in the vicinity of one instance. This allow us to understand a change in which features would result in a red plus being classified as a blue circle. From _Ribiro, M. et. all. (2017). Why should I trust you?_"----
# knitr::include_graphics("../inst/shiny_apps/cheem/www/lime_nonlinear.png")

## -----------------------------------------------------------------------------
# ## Download if not installed
# if(!require(cheem))    install.packages("cheem", dependencies = TRUE)
# if(!require(treeshap)) install.packages("treeshap", dependencies = TRUE)
# if(!require(shapviz))  install.packages("shapviz", dependencies = TRUE)
# ## Load onto session
# library(cheem)
# library(xgboost)
# library(shapviz)
# 
# ## Setup
# X    <- amesHousing2018_NorthAmes[, 1:9]
# Y    <- amesHousing2018_NorthAmes$SalePrice
# clas <- amesHousing2018_NorthAmes$SubclassMS
# 
# ## Model and predict
# ames_train    <- data.matrix(X) %>% xgb.DMatrix(label = Y)
# ames_xgb_fit  <- xgboost(data = ames_train, max.depth = 3, nrounds = 25)
# ames_xgb_pred <- predict(ames_xgb_fit, newdata = ames_train)
# ames_xgb_pred %>% head()
# 
# ## SHAP values
# shp <- shapviz(ames_xgb_fit, X_pred = ames_train, X = X)
# ## Keep just the [n, p] local explanations
# ames_xgb_shap <- shp$S
# ames_xgb_shap %>% head()

## -----------------------------------------------------------------------------
# ## Preprocessing for cheem analysis
# ames_chm <- cheem_ls(X, Y,
#                      class      = clas,
#                      attr_df    = ames_xgb_shap,
#                      pred       = ames_xgb_pred,
#                      label      = "Ames, xgb, shap")
# names(ames_chm)

## ----out.width="100%"---------------------------------------------------------
# prim <- 1
# comp <- 17
# global_view(ames_chm, primary_obs = prim, comparison_obs = comp,
#             height_px = 240, width_px = 720,
#             as_ggplot = TRUE, color = "log_maha.data")

## ----out.width="100%", eval = FALSE-------------------------------------------
# ## Normalized attribution basis of the PI
# bas <- sug_basis(ames_xgb_shap, rownum = prim)
# ## Default feature to manipulate:
# #### the feature with largest separation between PI and CI attribution
# mv  <- sug_manip_var(
#   ames_xgb_shap, primary_obs = prim, comparison_obs = comp)
# ## Make the radial tour
# ggt <- radial_cheem_tour(
#   ames_chm, basis = bas, manip_var = mv,
#   primary_obs = prim, comparison_obs = comp, angle = .15)
# 
# ## Animate it
# animate_gganimate(ggt, fps = 6)
#   #height = 2, width = 4.5, units = "in", res = 150
# ## Or as a plotly html widget
# #animate_plotly(ggt, fps = 6)

## ----echo=FALSE, out.width="100%"---------------------------------------------
# ## To mitigate file size (CRAN note) and run time create a gif and include that instead of executing code to make inline.
# if(FALSE){
#   prim <- 1
#   comp <- 17
#   bas <- sug_basis(shap_df, rownum = prim)
#   mv  <- sug_manip_var(
#     shap_df, primary_obs = prim, comparison_obs = comp)
#   ggt <- radial_cheem_tour(
#     this_ls, basis = bas, manip_var = mv,
#     primary_obs = prim, comparison_obs = comp, angle = .15)
# 
#   #### .gif is about .2 Mb saved, while HTML widget was about 7 Mb.
#   anim <- animate_gganimate(
#     ggt, fps = 6, height = 2, width = 4.5, units = "in", res = 150)
#   gganimate::anim_save("tour_penguins.gif", animation = anim)#, path = "./vignettes")
#   beepr::beep()
# }
# #knitr::include_graphics("tour_penguins.gif")

## -----------------------------------------------------------------------------
# library(ggplot2)
# prim <- 1
# 
# ggplot(penguins_na.rm, aes(x = bill_length_mm,
#                            y = flipper_length_mm,
#                            colour = species,
#                            shape = species)) +
#   geom_point() +
#   ## Highlight PI, *
#   geom_point(data = penguins_na.rm[prim, ],
#              shape = 8, size = 5, alpha = 0.8) +
#   ## Theme, scaling, color, and labels
#   theme_bw() +
#   theme(aspect.ratio = 1) +
#   scale_color_brewer(palette = "Dark2") +
#   labs(y = "Flipper length [mm]", x = "Bill length [mm]",
#        color = "Observed species", shape = "Observed species")

## ----eval=FALSE, echo=TRUE----------------------------------------------------
# if(!require(shapviz)) install.packages("shapviz")
# if(!require(xgboost)) install.packages("xgboost")
# library(shapviz)
# library(xgboost)
# set.seed(3653)
# 
# ## Setup
# X    <- spinifex::penguins_na.rm[, 1:4]
# Y    <- spinifex::penguins_na.rm$species
# clas <- spinifex::penguins_na.rm$species
# 
# ## Model and predict
# peng_train    <- data.matrix(X) %>%
#   xgb.DMatrix(label = Y)
# peng_xgb_fit  <- xgboost(data = peng_train, max.depth = 3, nrounds = 25)
# peng_xgb_pred <- predict(peng_xgb_fit, newdata = peng_train)
# 
# ## SHAP
# peng_xgb_shap <- shapviz(peng_xgb_fit, X_pred = peng_train, X = X)
# ## Keep just the [n, p] local explanations
# peng_xgb_shap <- peng_xgb_shap$S

## ----eval=FALSE, echo=TRUE----------------------------------------------------
# if(!require(treeshap)) install.packages("treeshap")
# if(!require(randomForest)) install.packages("randomForest")
# library(treeshap)
# library(randomForest)
# 
# ## Setup
# X    <- spinifex::wine[, -1:2]
# Y    <- spinifex::wine$Alcohol
# clas <- spinifex::wine$Type
# 
# ## Fit randomForest::randomForest
# wine_rf_fit  <- randomForest::randomForest(
#   X, Y, ntree = 125,
#   mtry = ifelse(is_discrete(Y), sqrt(ncol(X)), ncol(X) / 3),
#   nodesize = max(ifelse(is_discrete(Y), 1, 5), nrow(X) / 500))
# wine_rf_pred <- predict(wine_rf_fit)
# 
# ## treeshap::treeshap()
# wine_rf_tshap <- wine_rf_fit %>%
#   treeshap::randomForest.unify(X) %>%
#   treeshap::treeshap(X, interactions = FALSE, verbose = FALSE)
# ## Keep just the [n, p] local explanations
# wine_rf_tshap <- wine_rf_tshap$shaps

## ----eval=FALSE, echo=TRUE----------------------------------------------------
# if(!require(DALEX)) install.packages("DALEX")
# library(DALEX)
# 
# ## Setup
# X    <- dragons[, c(1:4, 6)]
# Y    <- dragons$life_length
# clas <- dragons$colour
# 
# ## Model and predict
# drag_lm_fit  <- lm(data = data.frame(Y, X), Y ~ .)
# drag_lm_pred <- predict(drag_lm_fit)
# 
# ## SHAP via DALEX, versatile but slow
# drag_lm_exp <- explain(drag_lm_fit, data = X, y = Y,
#                        label = "Dragons, LM, SHAP")
# ## DALEX::predict_parts_shap is flexible, but slow and one row at a time
# drag_lm_shap <- matrix(NA, nrow(X), ncol(X))
# sapply(1:nrow(X), function(i){
#   pps <- predict_parts_shap(drag_lm_exp, new_observation = X[i, ])
#   ## Keep just the [n, p] local explanations
#   drag_lm_shap[i, ] <<- tapply(
#     pps$contribution, pps$variable, mean, na.rm = TRUE) %>% as.vector()
# })
# drag_lm_shap <- as.data.frame(drag_lm_shap)

Try the cheem package in your browser

Any scripts or data that you put into this service are public.

cheem documentation built on Aug. 27, 2025, 1:09 a.m.