library("caret") data(FuelEconomy, package = "jrPred") set.seed(27)
cars2010
data set with FE
as the response, using EngDispl
, NumCyl
and NumGears
as predictors. Load the data like so data("FuelEconomy", package = "jrPred")
mLM = train(FE~EngDispl+NumCyl+NumGears, method = "lm", data = cars2010)
sqrt
and resid
functions may be useful.res = resid(mLM) (trainRMSE = sqrt(mean(res*res)))
# set up train control objects tcKFOLD = trainControl(method = "cv", number = 10)
# run model mLMKFOLD = train(FE~EngDispl+NumCyl+NumGears, method = "lm", data = cars2010, trControl = tcKFOLD)
trainRMSE
getTrainPerf(mLMKFOLD)
train
also contains timing information that can be accessed via the times
component of the list. Which of the methods is fastest?$
notation can be used pick a single list component.mLMKFOLD$times$everything
# a number of trainControl objects tc2 = trainControl(method = "cv", number = 2) tc5 = trainControl(method = "cv", number = 5) tc10 = trainControl(method = "cv", number = 10) tc15 = trainControl(method = "cv", number = 15) tc20 = trainControl(method = "cv", number = 20) # train the model using each mLM2 = train(FE~EngDispl+NumCyl+NumGears, method = "lm", data = cars2010, trControl = tc2) mLM5 = train(FE~EngDispl+NumCyl+NumGears, method = "lm", data = cars2010, trControl = tc5) mLM10 = train(FE~EngDispl+NumCyl+NumGears, method = "lm", data = cars2010, trControl = tc10) mLM15 = train(FE~EngDispl+NumCyl+NumGears, method = "lm", data = cars2010, trControl = tc15) mLM20 = train(FE~EngDispl+NumCyl+NumGears, method = "lm", data = cars2010, trControl = tc20) # use a data frame to store all of the relevant information (info = data.frame("Folds" = c(2,5,10,15,20), "Time" = c(mLM2$times$everything[1], mLM5$times$everything[1], mLM10$times$everything[1], mLM15$times$everything[1], mLM20$times$everything[1]), "Estimate" = c(mLM2$results$RMSE, mLM5$results$RMSE, mLM10$results$RMSE, mLM15$results$RMSE, mLM20$results$RMSE)))
# as there are more folds it takes longer to compute, # not an issue with such a small model but something # to consider on more complicated models. # Estimates are going down as the number of folds increases. # This is because for each held out fold we are using a greater # proportion of the data in training so expect to get a better # model.
tc = trainControl(method = "cv", number = 10) mreg = train(FE~., data = cars2010, method = "lm", preProcess = c("center","scale"), trControl = tc) mridge = train(FE~., data = cars2010, method = "ridge", preProcess = c("center","scale"), trControl = tc) creg = coefficients(mreg$finalModel) cridge = predict(mridge$finalModel, mode = "fraction", s = 1, type = "coefficients") plot(creg, pch = 19) points(cridge$coefficients, col = 2, pch = 19) dotplot(resamples(list( "reg" = mreg, "ridge" = mridge )),metric = "RMSE")
mlasso = train(FE~., data = cars2010, method = "lasso", preProcess = c("center","scale"), trControl = tc) menet = train(FE~., data = cars2010, method = "enet", preProcess = c("center","scale"), trControl = tc)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.