# Load required packages
library(ggplot2)
library(grid)
library(pdp)

# Test function
pima_test <- function(object, ...) {
  pima <- na.omit(pima)
  trn <- subset(pima, select = -diabetes)
  pd1 <- partial(object, pred.var = "glucose", train = trn, ...)
  pd2 <- partial(object, pred.var = c("glucose", "age"), chull = TRUE,
                 train = trn, ...)
  pd3 <- partial(object, pred.var = "glucose", ice = TRUE, train = trn, ...)
  pd4 <- partial(object, pred.var = "glucose", ice = TRUE, center = TRUE,
                 train = trn, ...)
  grid.arrange(
    autoplot(pd1, rug = TRUE, train = pima, main = "PDP for glucose"),
    autoplot(pd2, main = "PDP for glucose and age"),
    autoplot(pd3, rug = TRUE, train = pima,
             main = "ICE curves for glucose", alpha = 0.1),
    autoplot(pd4, rug = TRUE, train = pima,
             main = "c-ICE curves for glucose", alpha = 0.1),
    ncol = 2,
    top = textGrob(deparse(substitute(object)),
                   gp = gpar(fontsize = 20, font = 3))
  )
}

Discriminant analysis

Package: MASS

pima_lda <- MASS::lda(diabetes ~ . ^ 2, data = na.omit(pima))
pima_test(pima_lda)
pima_test(pima_lda, prob = TRUE)
pima_qda <- MASS::qda(diabetes ~ ., data = na.omit(pima))
pima_test(pima_qda)
pima_test(pima_qda, prob = TRUE)

Package: mda

pima_fda <- mda::fda(diabetes ~ ., data = na.omit(pima), method = mda::mars,
                     degree = 2)
pima_test(pima_fda)
pima_test(pima_fda, prob = TRUE)
pima_mda <- mda::mda(diabetes ~ ., data = na.omit(pima))
pima_test(pima_mda)
pima_test(pima_mda, prob = TRUE)

Decision trees

Package: rpart

pima_rpart <- rpart::rpart(diabetes ~ ., data = na.omit(pima))
pima_test(pima_rpart)
pima_test(pima_rpart, prob = TRUE)

Package: C50

set.seed(101)  # for reproducibility
pima_C5.0 <- C50::C5.0(diabetes ~ ., data = na.omit(pima), trials = 100)
pima_test(pima_C5.0)
pima_test(pima_C5.0, prob = TRUE)

Package: `party

pima_ctree <- party::ctree(diabetes ~ ., data = na.omit(pima))
pima_test(pima_ctree)
pima_test(pima_ctree, prob = TRUE)

Package: `partykit

pima_ctree2 <- partykit::ctree(diabetes ~ ., data = na.omit(pima))
pima_test(pima_ctree2)
pima_test(pima_ctree2, prob = TRUE)

Bagging

Package: adabag

set.seed(101)  # for reproducibility
pima_bagging <- adabag::bagging(diabetes ~ ., data = na.omit(pima))
pima_test(pima_bagging, quantiles = TRUE)
pima_test(pima_bagging, quantiles = TRUE, prob = TRUE)

Package: `ipred

set.seed(101)  # for reproducibility
pima_ipred <- ipred::bagging(diabetes ~ ., data = na.omit(pima), nbagg = 500)
pima_test(pima_ipred, quantiles = TRUE)
pima_test(pima_ipred, quantiles = TRUE, prob = TRUE)

Random forests

Package: `randomForest

set.seed(101)  # for reproducibility
pima_rf <- randomForest::randomForest(diabetes ~ ., data = na.omit(pima))
pima_test(pima_rf)
pima_test(pima_rf, prob = TRUE)

Package: `party

set.seed(101)  # for reproducibility
pima_crf <- party::cforest(diabetes ~ ., data = na.omit(pima))
pima_test(pima_crf, quantiles = TRUE)
pima_test(pima_crf, quantiles = TRUE, prob = TRUE)

Package: partykit

set.seed(101)  # for reproducibility
pima_crf2 <- partykit::cforest(diabetes ~ ., data = na.omit(pima))
pima_test(pima_crf2, quantiles = TRUE)
pima_test(pima_crf2, quantiles = TRUE, prob = TRUE)

Package: ranger

set.seed(101)  # for reproducibility
pima_ranger <- ranger::ranger(diabetes ~ ., data = na.omit(pima), 
                              probability = TRUE)
pima_test(pima_ranger)
pima_test(pima_ranger, prob = TRUE)

Boosting

Package: adabag

set.seed(101)  # for reproducibility
pima_boosting <- adabag::boosting(diabetes ~ ., data = na.omit(pima))
pima_test(pima_boosting, quantiles = TRUE)
pima_test(pima_boosting, quantiles = TRUE, prob = TRUE)

Package: gbm

set.seed(101)  # for reproducibility
pima_gbm <- gbm::gbm(ifelse(diabetes == "neg", 1, 0) ~ ., 
                     data = na.omit(pima),
                     distribution = "bernoulli",
                     n.trees = 5000,
                     interaction.depth = 3,
                     shrinkage = 0.001,
                     # cv.folds = 5,
                     verbose = FALSE)
best.iter <- gbm::gbm.perf(pima_gbm, method = "OOB", plot.it = FALSE)
pima_test(pima_gbm, n.trees = best.iter)
pima_test(pima_gbm, n.trees = best.iter, prob = TRUE)

Package: xgboost

set.seed(101)  # for reproducibility
pima_xgb <- xgboost::xgboost(
  data = data.matrix(subset(pima, select = -diabetes)),
  label = unclass(pima$diabetes) - 1, objective = "binary:logistic",
  nrounds = 100, max_depth = 3, eta = 0.1, gamma = 0, colsample_bytree = 0.8, 
  min_child_weight = 1, subsample = 0.7, verbose = 0
)
pima_test(pima_xgb, which.class = 2)
pima_test(pima_xgb, prob = TRUE, which.class = 2)

Neural networks

Package: nnet

set.seed(101)  # for reproducibility
pima_nnet <- nnet::nnet(diabetes ~ ., data = na.omit(pima), size = 10, 
                        decay = 0.1, maxit = 500, trace = FALSE)
pima_test(pima_nnet)
pima_test(pima_nnet, prob = TRUE)

Support vector machines

Package: e1071

pima_svm <- e1071::svm(diabetes ~ ., data = na.omit(pima), type = "C-classification", 
                       probability = TRUE)
pima_test(pima_svm)
pima_test(pima_svm, prob = TRUE)

Package: kernlab

pima_ksvm <- kernlab::ksvm(diabetes ~ ., data = na.omit(pima), type = "C-svc", 
                           prob.model = TRUE)
pima_test(pima_ksvm)
pima_test(pima_ksvm, prob = TRUE)

Multinomial models

Package: nnet

set.seed(101)  # for reproducibility
pima_multinom <- nnet::multinom(diabetes ~ . ^ 2, data = na.omit(pima), 
                                trace = FALSE)
pima_test(pima_multinom)
pima_test(pima_multinom, prob = TRUE)


bgreenwell/pdp documentation built on June 2, 2022, 2:55 p.m.