Description Usage Arguments Value Examples
Performs cross validation with a given pipeline
1 2 | JuiceBoxCV(X_train, Y_train, numFolds, numRepeats, parCV, numCores, seedNum,
verbose_p, fn, fn_params)
|
X_train |
Training Data (excludes the response/target we wish to predict ) that will be fed into the pipeline function. |
Y_train |
Training Response/Target - The response/target that will be fed into the pipeline function. |
numFolds |
Integer indicating the number of folds to use in the cross validation procedure. |
numRepeats |
Integer indicating the number of times to repeat cross validation with numFolds. |
parCV |
Boolean indicating whether to parallelize the training prodcedure. |
numCores |
Integer indicating the number of cores to use. |
seedNum |
Integer indicating the seed number. Using the same seed will generate the same folds. |
verbose_p |
Boolean indicating if cross validation details should be printed out the screen. |
fn |
The pipeline function. The pipeline function must take parameters training data, training response, validation data, validation response. See examples for details. |
fn_params |
Additional parameters to supply to the pipeline function. See examples for details. |
Average cross validation score across all the folds and repeats.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | library(JuiceBox)
library(Metrics)
library(xgboost)
# Toy data set for classification
irisAllMat <- iris
irisTrainMat <- irisAllMat[,c(1:4)]
irisTrainResponse <- irisAllMat[,c(ncol(irisAllMat))]
irisTrainResponse <- factor(ifelse(irisTrainResponse == "setosa", "Yes", "No"))
# Toy data set for regression
mtcarsMat <- mtcars
mtcarsResponse <-mtcarsMat[,1]
mtcarsMat <- mtcarsMat[,c(2:ncol(mtcarsMat))]
# Pipelines
xgbPipeline_regression <- function(X_train, Y_train, X_test, Y_test, params)
{
Y_train <- as.numeric(Y_train) - 1
xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
eta = params[2], nround = params[3], objective = "reg:linear", verbose = 1)
predictions <- predict(xgbFit, newdata = as.matrix(X_test))
actual <- as.numeric(Y_test) - 1
rmseValue <- rmse(predictions, actual)
return(-rmseValue)
}
xgbPipeline_classification <- function(X_train, Y_train, X_test, Y_test, params)
{
Y_train <- as.numeric(Y_train) - 1
xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
eta = params[2], nround = params[3], objective = "binary:logistic", verbose = 1)
predictions <- predict(xgbFit, newdata = as.matrix(X_test))
actual <- as.numeric(Y_test) - 1
logLossValue <- logLoss(actual, predictions)
return(-logLossValue)
}
xgbPipeline_regression_extraction <- function(X_train, Y_train, X_test, Y_test, params)
{
Y_train <- as.numeric(Y_train) - 1
xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
eta = params[2], nround = params[3], objective = "reg:linear", verbose = 1)
predictions <- predict(xgbFit, newdata = as.matrix(X_test))
return(predictions)
}
xgbPipeline_classification_extraction <- function(X_train, Y_train, X_test, Y_test, params)
{
Y_train <- as.numeric(Y_train) - 1
xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
eta = params[2], nround = params[3], objective = "binary:logistic", verbose = 1)
predictions <- predict(xgbFit, newdata = as.matrix(X_test))
return(predictions)
}
# Testing JuiceBoxCV
JuiceBoxCV(X_train = irisTrainMat, Y_train = irisTrainResponse, numFolds = 2,
numRepeats = 2, parCV = FALSE, numCores = 8, seedNum = 101,
fn = xgbPipeline_classification, fn_params = c(3, 2, 5), verbose_p = 1)
JuiceBoxCV(X_train = mtcarsMat, Y_train = mtcarsResponse, numFolds = 2,
numRepeats = 2, parCV = FALSE, numCores = 8, seedNum = 101,
fn = xgbPipeline_regression, fn_params = c(3, 2, 10), verbose_p = 1)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.