inst/doc/randomForestExplainer.R

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, fig.width = 7, fig.height = 5)

## -----------------------------------------------------------------------------
library(randomForest)
# devtools::install_github("MI2DataLab/randomForestExplainer")
library(randomForestExplainer)

## -----------------------------------------------------------------------------
data(Boston, package = "MASS")
Boston$chas <- as.logical(Boston$chas)
str(Boston)

## -----------------------------------------------------------------------------
set.seed(2017)
forest <- randomForest(medv ~ ., data = Boston, localImp = TRUE)

## -----------------------------------------------------------------------------
forest

## -----------------------------------------------------------------------------
# min_depth_frame <- min_depth_distribution(forest)
# save(min_depth_frame, file = "min_depth_frame.rda")
load("min_depth_frame.rda")
head(min_depth_frame, n = 10)

## -----------------------------------------------------------------------------
# plot_min_depth_distribution(forest) # gives the same result as below but takes longer
plot_min_depth_distribution(min_depth_frame)

## ----fig.height = 7-----------------------------------------------------------
plot_min_depth_distribution(min_depth_frame, mean_sample = "relevant_trees", k = 15)

## -----------------------------------------------------------------------------
# importance_frame <- measure_importance(forest)
# save(importance_frame, file = "importance_frame.rda")
load("importance_frame.rda")
importance_frame

## -----------------------------------------------------------------------------
# plot_multi_way_importance(forest, size_measure = "no_of_nodes") # gives the same result as below but takes longer
plot_multi_way_importance(importance_frame, size_measure = "no_of_nodes")

## -----------------------------------------------------------------------------
plot_multi_way_importance(importance_frame, x_measure = "mse_increase", y_measure = "node_purity_increase", size_measure = "p_value", no_of_labels = 5)

## -----------------------------------------------------------------------------
# plot_importance_ggpairs(forest) # gives the same result as below but takes longer
plot_importance_ggpairs(importance_frame)

## -----------------------------------------------------------------------------
# plot_importance_rankings(forest) # gives the same result as below but takes longer
plot_importance_rankings(importance_frame)

## -----------------------------------------------------------------------------
# (vars <- important_variables(forest, k = 5, measures = c("mean_min_depth", "no_of_trees"))) # gives the same result as below but takes longer
(vars <- important_variables(importance_frame, k = 5, measures = c("mean_min_depth", "no_of_trees")))

## -----------------------------------------------------------------------------
# interactions_frame <- min_depth_interactions(forest, vars)
# save(interactions_frame, file = "interactions_frame.rda")
load("interactions_frame.rda")
head(interactions_frame[order(interactions_frame$occurrences, decreasing = TRUE), ])

## -----------------------------------------------------------------------------
# plot_min_depth_interactions(forest) # calculates the interactions_frame for default settings so may give different results than the function below depending on our settings and takes more time
plot_min_depth_interactions(interactions_frame)

## -----------------------------------------------------------------------------
# interactions_frame <- min_depth_interactions(forest, vars, mean_sample = "relevant_trees", uncond_mean_sample = "relevant_trees")
# save(interactions_frame, file = "interactions_frame_relevant.rda")
load("interactions_frame_relevant.rda")
plot_min_depth_interactions(interactions_frame)

## -----------------------------------------------------------------------------
plot_predict_interaction(forest, Boston, "rm", "lstat")

## ---- eval = FALSE------------------------------------------------------------
#  explain_forest(forest, interactions = TRUE, data = Boston)

Try the randomForestExplainer package in your browser

Any scripts or data that you put into this service are public.

randomForestExplainer documentation built on July 12, 2020, 1:06 a.m.