xgboostScoreExplain: Explain an observation's score from an xgboost model and show...

Description Usage Arguments Value Note See Also Examples

Description

By taking the trained xgboost model and a single observation (without the dependent variable) as the input, the function produces a summary of each tree including information such as gain, features on the path, and feature weights. In addition, the function ouputs the aggregated individual feature importance, the calculated prediction of the observation, as well as the overall feature importance.

Usage

1
2
3
xgboostScoreExplain(model, modelType = c("multiClassification",
  "binaryClassification", "regression"), target = "target", dt.singleRow,
  nclasses = 3, shiny = F, sampleData = NULL, top_n_features = 10)

Arguments

model

A xgboost model

modelType

Either c("binaryClassification", "regression")

target

The target variable

dt.singleRow

A single row of observation

nclasses

Number of classes if you are explaining a mulitiClassification model, default is 3

shiny

T/F, whether or not to generate an shiny app as part of the returned object

sampleData

The sample data which which was used to train the model

Value

a list containing summaryTrees, featureWtsIndividual, pred, featureImportanceOverall and (shiny)

Note

The function now only supports binaryClassification model. These two limitations will be enabled in the future.

See Also

xgboostModel

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
require(xgboost)

# regression --------------
data(mtcars)

m.train <- data.matrix(mtcars[, -1])
dtrain <- xgb.DMatrix(data = m.train, label = mtcars$mpg, missing = NaN)

params <- list(objective = "reg:linear",
               booster = "gbtree",
               eval_metric = "rmse",
               eta                 = 0.02,
               max_depth           = 4,
               subsample           = 0.83,
               colsample_bytree    = 0.77
)

model <- xgb.train(params = params
                   , data = dtrain
                   , nrounds = 3
                   # , watchlist = watchlist
                   , maximize = F
                   , print.every.n = 1
                   # , early.stop.round = 1
)

dt.singleRow<- mtcars[1, ]
m.try <- data.matrix(dt.singleRow[, -1])
dtry <- xgb.DMatrix(data = m.try, label = dt.singleRow$mpg, missing = NaN)

res <- xgboostScoreExplain(model, modelType = "regression", target = "mpg", dt.singleRow = dt.singleRow)

# binaryClassification --------------
set.seed(42)
x = iris
x$Species = ifelse(x$Species == "versicolor",1, 0)
dat = splitDataToTrainTestDataFrame(x,.9,.25)
mod = xgboostModel(dat,"Species",nrounds = 200, early.stop.round = 5, verbose = 1)

# single row of observation
xx <- x[1, -5]

# model
model <- mod$model

m.try <- data.matrix(xx)
dtry <- xgb.DMatrix(data = m.try, label = x[1,]$Species, missing = NaN)

# score explain
res <- xgboostScoreExplain(model = model, modelType = "binaryClassification", target = "Species", dt.singleRow = xx, shiny = T, sampleData = x)
res


# multiClassification -----------------------------------------------------
require(data.table)
require(xgboost)
require(caret)
data(iris)

setDT(iris)

iris[, target := ifelse(Species == "setosa", 0
                        , ifelse(Species == "versicolor", 1, 2))]
iris[, Species := NULL]

# add some noise
set.seed(1)
iris <- rbind(iris
              , data.table(Sepal.Length = rnorm(20, mean = 4, 1)
                           , Sepal.Width = rnorm(20, mean = 2, .5)
                           , Petal.Length = rnorm(20, mean = 2, .1)
                           , Petal.Width = rnorm(20, mean = 1, .01)
                           , target = sample(0:2, 20, replace = T)
              )
)

set.seed(1)
ind.train <- createDataPartition(iris$target, p = .8, list = F)
dt.train <- iris[ind.train]
dt.valid <- iris[!ind.train]

mx.train <- as.matrix(dt.train[, !c("target"), with = F])
mx.valid <- as.matrix(dt.valid[, !c("target"), with = F])

dmx.train <- xgb.DMatrix(data = mx.train, label = dt.train$target)
dmx.valid <- xgb.DMatrix(data = mx.valid, label = dt.valid$target)

watchlist <- list(valid = dmx.valid, train = dmx.train)
params <- list(objective = "multi:softmax"
               , booster = "gbtree"
               , num_class = 3
               , eta = 1)

set.seed(1)
mod <- xgb.train(params = params
                 , data = dmx.train
                 , watchlist = watchlist
                 , nrounds = 10
                 , verbose = 1
                 , print.every.n = 1
                 , early.stop.round = 3
                 , maximize = F)

dt.try <- dt.valid[, !c("target"), with = F][1]
dt.try

res <- xgboostScoreExplain(model = mod, modelType = "multiClassification", target = "target", dt.singleRow = dt.try, nclasses = 3, shiny = F, sampleData = x)
res

ivanliu1989/RQuant documentation built on Sept. 13, 2019, 11:53 a.m.