JuiceBoxCV: JuiceBoxCV
In sjoshistrats/JuiceBox: Caret Extensions

Description Usage Arguments Value Examples

View source: R/hello.R

Performs cross validation with a given pipeline

1 2	JuiceBoxCV(X_train, Y_train, numFolds, numRepeats, parCV, numCores, seedNum, verbose_p, fn, fn_params)

`X_train`	Training Data (excludes the response/target we wish to predict ) that will be fed into the pipeline function.
`Y_train`	Training Response/Target - The response/target that will be fed into the pipeline function.
`numFolds`	Integer indicating the number of folds to use in the cross validation procedure.
`numRepeats`	Integer indicating the number of times to repeat cross validation with numFolds.
`parCV`	Boolean indicating whether to parallelize the training prodcedure.
`numCores`	Integer indicating the number of cores to use.
`seedNum`	Integer indicating the seed number. Using the same seed will generate the same folds.
`verbose_p`	Boolean indicating if cross validation details should be printed out the screen.
`fn`	The pipeline function. The pipeline function must take parameters training data, training response, validation data, validation response. See examples for details.
`fn_params`	Additional parameters to supply to the pipeline function. See examples for details.

Average cross validation score across all the folds and repeats.

library(JuiceBox)
library(Metrics)
library(xgboost)

# Toy data set for classification
irisAllMat <- iris
irisTrainMat <- irisAllMat[,c(1:4)]
irisTrainResponse <- irisAllMat[,c(ncol(irisAllMat))]
irisTrainResponse <- factor(ifelse(irisTrainResponse == "setosa", "Yes", "No"))

# Toy data set for regression
mtcarsMat <- mtcars
mtcarsResponse <-mtcarsMat[,1]
mtcarsMat <- mtcarsMat[,c(2:ncol(mtcarsMat))]

# Pipelines
xgbPipeline_regression <- function(X_train, Y_train, X_test, Y_test, params)
{
  Y_train <- as.numeric(Y_train) - 1
  xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
                    eta = params[2], nround = params[3], objective = "reg:linear", verbose = 1)
  predictions <- predict(xgbFit, newdata = as.matrix(X_test))
  actual <- as.numeric(Y_test) - 1
  rmseValue <- rmse(predictions, actual)
  return(-rmseValue)
}

xgbPipeline_classification <- function(X_train, Y_train, X_test, Y_test, params)
{
  Y_train <- as.numeric(Y_train) - 1
  xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
                    eta = params[2], nround = params[3], objective = "binary:logistic", verbose = 1)
  predictions <- predict(xgbFit, newdata = as.matrix(X_test))
  actual <- as.numeric(Y_test) - 1
  logLossValue <- logLoss(actual, predictions)
  return(-logLossValue)
}

xgbPipeline_regression_extraction <- function(X_train, Y_train, X_test, Y_test, params)
{
  Y_train <- as.numeric(Y_train) - 1
  xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
                    eta = params[2], nround = params[3], objective = "reg:linear", verbose = 1)
  predictions <- predict(xgbFit, newdata = as.matrix(X_test))
  return(predictions)
}

xgbPipeline_classification_extraction <- function(X_train, Y_train, X_test, Y_test, params)
{
  Y_train <- as.numeric(Y_train) - 1
  xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
                    eta = params[2], nround = params[3], objective = "binary:logistic", verbose = 1)
  predictions <- predict(xgbFit, newdata = as.matrix(X_test))
  return(predictions)
}

# Testing JuiceBoxCV
JuiceBoxCV(X_train = irisTrainMat, Y_train = irisTrainResponse, numFolds = 2,
           numRepeats = 2, parCV = FALSE, numCores = 8, seedNum = 101,
           fn = xgbPipeline_classification, fn_params = c(3, 2, 5), verbose_p = 1)

JuiceBoxCV(X_train = mtcarsMat, Y_train = mtcarsResponse, numFolds = 2,
           numRepeats = 2, parCV = FALSE, numCores = 8, seedNum = 101,
           fn = xgbPipeline_regression, fn_params = c(3, 2, 10), verbose_p = 1)