Nothing
################################################################################
### R code from Applied Predictive Modeling (2013) by Kuhn and Johnson.
### Copyright 2013 Kuhn and Johnson
### Web Page: http://www.appliedpredictivemodeling.com
### Contact: Max Kuhn (mxkuhn@gmail.com)
###
### Chapter 7: Non-Linear Regression Models
###
### Required packages: AppliedPredictiveModeling, caret, doMC (optional), earth,
### kernlab, lattice, nnet
###
### Data used: The solubility from the AppliedPredictiveModeling package
###
### Notes:
### 1) This code is provided without warranty.
###
### 2) This code should help the user reproduce the results in the
### text. There will be differences between this code and what is is
### the computing section. For example, the computing sections show
### how the source functions work (e.g. randomForest() or plsr()),
### which were not directly used when creating the book. Also, there may be
### syntax differences that occur over time as packages evolve. These files
### will reflect those changes.
###
### 3) In some cases, the calculations in the book were run in
### parallel. The sub-processes may reset the random number seed.
### Your results may slightly vary.
###
################################################################################
################################################################################
### Load the data
library(AppliedPredictiveModeling)
data(solubility)
### Create a control funciton that will be used across models. We
### create the fold assignments explictily instead of relying on the
### random number seed being set to identical values.
library(caret)
set.seed(100)
indx <- createFolds(solTrainY, returnTrain = TRUE)
ctrl <- trainControl(method = "cv", index = indx)
################################################################################
### Section 7.1 Neural Networks
### Optional: parallel processing can be used via the 'do' packages,
### such as doMC, doMPI etc. We used doMC (not on Windows) to speed
### up the computations.
### WARNING: Be aware of how much memory is needed to parallel
### process. It can very quickly overwhelm the availible hardware. We
### estimate the memory usuage (VSIZE = total memory size) to be
### 2677M/core.
library(doMC)
registerDoMC(10)
library(caret)
nnetGrid <- expand.grid(decay = c(0, 0.01, .1),
size = c(1, 3, 5, 7, 9, 11, 13),
bag = FALSE)
set.seed(100)
nnetTune <- train(x = solTrainXtrans, y = solTrainY,
method = "avNNet",
tuneGrid = nnetGrid,
trControl = ctrl,
preProc = c("center", "scale"),
linout = TRUE,
trace = FALSE,
MaxNWts = 13 * (ncol(solTrainXtrans) + 1) + 13 + 1,
maxit = 1000,
allowParallel = FALSE)
nnetTune
plot(nnetTune)
testResults <- data.frame(obs = solTestY,
NNet = predict(nnetTune, solTestXtrans))
################################################################################
### Section 7.2 Multivariate Adaptive Regression Splines
set.seed(100)
marsTune <- train(x = solTrainXtrans, y = solTrainY,
method = "earth",
tuneGrid = expand.grid(degree = 1, nprune = 2:38),
trControl = ctrl)
marsTune
plot(marsTune)
testResults$MARS <- predict(marsTune, solTestXtrans)
marsImp <- varImp(marsTune, scale = FALSE)
plot(marsImp, top = 25)
################################################################################
### Section 7.3 Support Vector Machines
## In a recent update to caret, the method to estimate the
## sigma parameter was slightly changed. These results will
## slightly differ from the text for that reason.
set.seed(100)
svmRTune <- train(x = solTrainXtrans, y = solTrainY,
method = "svmRadial",
preProc = c("center", "scale"),
tuneLength = 14,
trControl = ctrl)
svmRTune
plot(svmRTune, scales = list(x = list(log = 2)))
svmGrid <- expand.grid(degree = 1:2,
scale = c(0.01, 0.005, 0.001),
C = 2^(-2:5))
set.seed(100)
svmPTune <- train(x = solTrainXtrans, y = solTrainY,
method = "svmPoly",
preProc = c("center", "scale"),
tuneGrid = svmGrid,
trControl = ctrl)
svmPTune
plot(svmPTune,
scales = list(x = list(log = 2),
between = list(x = .5, y = 1)))
testResults$SVMr <- predict(svmRTune, solTestXtrans)
testResults$SVMp <- predict(svmPTune, solTestXtrans)
################################################################################
### Section 7.4 K-Nearest Neighbors
### First we remove near-zero variance predictors
knnDescr <- solTrainXtrans[, -nearZeroVar(solTrainXtrans)]
set.seed(100)
knnTune <- train(x = knnDescr, y = solTrainY,
method = "knn",
preProc = c("center", "scale"),
tuneGrid = data.frame(k = 1:20),
trControl = ctrl)
knnTune
plot(knnTune)
testResults$Knn <- predict(svmRTune, solTestXtrans[, names(knnDescr)])
################################################################################
### Session Information
sessionInfo()
q("no")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.