JuiceBoxCV: JuiceBoxCV

Description Usage Arguments Value Examples

View source: R/hello.R

Description

Performs cross validation with a given pipeline

Usage

1
2
JuiceBoxCV(X_train, Y_train, numFolds, numRepeats, parCV, numCores, seedNum,
  verbose_p, fn, fn_params)

Arguments

X_train

Training Data (excludes the response/target we wish to predict ) that will be fed into the pipeline function.

Y_train

Training Response/Target - The response/target that will be fed into the pipeline function.

numFolds

Integer indicating the number of folds to use in the cross validation procedure.

numRepeats

Integer indicating the number of times to repeat cross validation with numFolds.

parCV

Boolean indicating whether to parallelize the training prodcedure.

numCores

Integer indicating the number of cores to use.

seedNum

Integer indicating the seed number. Using the same seed will generate the same folds.

verbose_p

Boolean indicating if cross validation details should be printed out the screen.

fn

The pipeline function. The pipeline function must take parameters training data, training response, validation data, validation response. See examples for details.

fn_params

Additional parameters to supply to the pipeline function. See examples for details.

Value

Average cross validation score across all the folds and repeats.

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
library(JuiceBox)
library(Metrics)
library(xgboost)

# Toy data set for classification
irisAllMat <- iris
irisTrainMat <- irisAllMat[,c(1:4)]
irisTrainResponse <- irisAllMat[,c(ncol(irisAllMat))]
irisTrainResponse <- factor(ifelse(irisTrainResponse == "setosa", "Yes", "No"))

# Toy data set for regression
mtcarsMat <- mtcars
mtcarsResponse <-mtcarsMat[,1]
mtcarsMat <- mtcarsMat[,c(2:ncol(mtcarsMat))]

# Pipelines
xgbPipeline_regression <- function(X_train, Y_train, X_test, Y_test, params)
{
  Y_train <- as.numeric(Y_train) - 1
  xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
                    eta = params[2], nround = params[3], objective = "reg:linear", verbose = 1)
  predictions <- predict(xgbFit, newdata = as.matrix(X_test))
  actual <- as.numeric(Y_test) - 1
  rmseValue <- rmse(predictions, actual)
  return(-rmseValue)
}

xgbPipeline_classification <- function(X_train, Y_train, X_test, Y_test, params)
{
  Y_train <- as.numeric(Y_train) - 1
  xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
                    eta = params[2], nround = params[3], objective = "binary:logistic", verbose = 1)
  predictions <- predict(xgbFit, newdata = as.matrix(X_test))
  actual <- as.numeric(Y_test) - 1
  logLossValue <- logLoss(actual, predictions)
  return(-logLossValue)
}

xgbPipeline_regression_extraction <- function(X_train, Y_train, X_test, Y_test, params)
{
  Y_train <- as.numeric(Y_train) - 1
  xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
                    eta = params[2], nround = params[3], objective = "reg:linear", verbose = 1)
  predictions <- predict(xgbFit, newdata = as.matrix(X_test))
  return(predictions)
}

xgbPipeline_classification_extraction <- function(X_train, Y_train, X_test, Y_test, params)
{
  Y_train <- as.numeric(Y_train) - 1
  xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
                    eta = params[2], nround = params[3], objective = "binary:logistic", verbose = 1)
  predictions <- predict(xgbFit, newdata = as.matrix(X_test))
  return(predictions)
}

# Testing JuiceBoxCV
JuiceBoxCV(X_train = irisTrainMat, Y_train = irisTrainResponse, numFolds = 2,
           numRepeats = 2, parCV = FALSE, numCores = 8, seedNum = 101,
           fn = xgbPipeline_classification, fn_params = c(3, 2, 5), verbose_p = 1)

JuiceBoxCV(X_train = mtcarsMat, Y_train = mtcarsResponse, numFolds = 2,
           numRepeats = 2, parCV = FALSE, numCores = 8, seedNum = 101,
           fn = xgbPipeline_regression, fn_params = c(3, 2, 10), verbose_p = 1)

sjoshistrats/JuiceBox documentation built on May 30, 2019, 12:05 a.m.