#' JuiceBoxCV
#'
#' Performs cross validation with a given pipeline
#'
#' @import caret
#' @import doParallel
#' @import foreach
#' @import stats
#' @param X_train Training Data (excludes the response/target we wish to predict
#' ) that will be fed into the pipeline function.
#' @param Y_train Training Response/Target - The response/target that will be fed into the pipeline function.
#' @param numFolds Integer indicating the number of folds to use in the cross validation procedure.
#' @param numRepeats Integer indicating the number of times to repeat cross validation with numFolds.
#' @param parCV Boolean indicating whether to parallelize the training prodcedure.
#' @param numCores Integer indicating the number of cores to use.
#' @param seedNum Integer indicating the seed number. Using the same seed will generate the same folds.
#' @param verbose_p Boolean indicating if cross validation details should be printed out the screen.
#' @param fn The pipeline function. The pipeline function must take parameters training data, training response,
#' validation data, validation response. See examples for details.
#' @param fn_params Additional parameters to supply to the pipeline function. See examples for details.
#' @return Average cross validation score across all the folds and repeats.
#' @examples
#' library(JuiceBox)
#' library(Metrics)
#' library(xgboost)
#'
#' # Toy data set for classification
#' irisAllMat <- iris
#' irisTrainMat <- irisAllMat[,c(1:4)]
#' irisTrainResponse <- irisAllMat[,c(ncol(irisAllMat))]
#' irisTrainResponse <- factor(ifelse(irisTrainResponse == "setosa", "Yes", "No"))
#'
#' # Toy data set for regression
#' mtcarsMat <- mtcars
#' mtcarsResponse <-mtcarsMat[,1]
#' mtcarsMat <- mtcarsMat[,c(2:ncol(mtcarsMat))]
#'
#' # Pipelines
#' xgbPipeline_regression <- function(X_train, Y_train, X_test, Y_test, params)
#' {
#' Y_train <- as.numeric(Y_train) - 1
#' xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
#' eta = params[2], nround = params[3], objective = "reg:linear", verbose = 1)
#' predictions <- predict(xgbFit, newdata = as.matrix(X_test))
#' actual <- as.numeric(Y_test) - 1
#' rmseValue <- rmse(predictions, actual)
#' return(-rmseValue)
#' }
#'
#' xgbPipeline_classification <- function(X_train, Y_train, X_test, Y_test, params)
#' {
#' Y_train <- as.numeric(Y_train) - 1
#' xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
#' eta = params[2], nround = params[3], objective = "binary:logistic", verbose = 1)
#' predictions <- predict(xgbFit, newdata = as.matrix(X_test))
#' actual <- as.numeric(Y_test) - 1
#' logLossValue <- logLoss(actual, predictions)
#' return(-logLossValue)
#' }
#'
#' xgbPipeline_regression_extraction <- function(X_train, Y_train, X_test, Y_test, params)
#' {
#' Y_train <- as.numeric(Y_train) - 1
#' xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
#' eta = params[2], nround = params[3], objective = "reg:linear", verbose = 1)
#' predictions <- predict(xgbFit, newdata = as.matrix(X_test))
#' return(predictions)
#' }
#'
#' xgbPipeline_classification_extraction <- function(X_train, Y_train, X_test, Y_test, params)
#' {
#' Y_train <- as.numeric(Y_train) - 1
#' xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
#' eta = params[2], nround = params[3], objective = "binary:logistic", verbose = 1)
#' predictions <- predict(xgbFit, newdata = as.matrix(X_test))
#' return(predictions)
#' }
#'
#' # Testing JuiceBoxCV
#' JuiceBoxCV(X_train = irisTrainMat, Y_train = irisTrainResponse, numFolds = 2,
#' numRepeats = 2, parCV = FALSE, numCores = 8, seedNum = 101,
#' fn = xgbPipeline_classification, fn_params = c(3, 2, 5), verbose_p = 1)
#'
#' JuiceBoxCV(X_train = mtcarsMat, Y_train = mtcarsResponse, numFolds = 2,
#' numRepeats = 2, parCV = FALSE, numCores = 8, seedNum = 101,
#' fn = xgbPipeline_regression, fn_params = c(3, 2, 10), verbose_p = 1)
#' @export
JuiceBoxCV <- function(X_train, Y_train, numFolds, numRepeats, parCV, numCores, seedNum, verbose_p, fn, fn_params)
{
set.seed(seedNum)
CVGrid <- expand.grid(c(1:numFolds), c(1:numRepeats))
folds_list = createMultiFolds(Y_train, k = numFolds, times = numRepeats)
if(parCV == TRUE)
{
registerDoParallel(cores=numCores)
allScores <- foreach(i=1:nrow(CVGrid)) %dopar%
{
if(verbose_p == TRUE)
{
print(paste0("Fold ", CVGrid[i,1], " ::: Repeat ", CVGrid[i,2], " ++"))
}
fn_args <- expand.grid(X_train = c())
score <- NULL
score <- fn(X_train[folds_list[[i]],],
Y_train[folds_list[[i]]],
X_train[-folds_list[[i]],],
Y_train[-folds_list[[i]]],
fn_params)
if(length(score) > 1)
{
stop("Error: User pipeline function appears to return multiple values, when only one was expected.")
} else if (is.null(score)) {
stop("Error: Score returned was NULL.")
}
if(verbose_p == TRUE)
{
print(paste0("Fold ", CVGrid[i,1], " ::: Repeat ", CVGrid[i,2], " -- Score = ", score))
}
score
}
avgScore <- sum(unlist(allScores))/nrow(CVGrid)
avgSd <- sd(unlist(allScores))
if(verbose_p == TRUE)
{
print(paste0("Score = ", avgScore, ", SD = ", avgSd))
}
return(avgScore)
} else {
allScores <- c()
for(i in 1:nrow(CVGrid))
{
if(verbose_p == TRUE)
{
print(paste0("Fold ", CVGrid[i,1], " ::: Repeat ", CVGrid[i,2], " ++"))
}
score <- fn(X_train[folds_list[[i]],],
Y_train[folds_list[[i]]],
X_train[-folds_list[[i]],],
Y_train[-folds_list[[i]]],
fn_params)
if(length(score) > 1)
{
stop("Error: User pipeline function appears to return multiple values, when only one was expected.")
} else if (is.null(score)) {
stop("Error: Score returned was NULL.")
}
if(verbose_p == TRUE)
{
print(paste0("Fold ", CVGrid[i,1], " ::: Repeat ", CVGrid[i,2], " -- Score = ", score))
}
allScores = c(allScores, score)
}
avgScore <- sum(unlist(allScores))/nrow(CVGrid)
avgSd <- sd(unlist(allScores))
if(verbose_p == TRUE)
{
print(paste0("Score = ", avgScore, ", SD = ", avgSd))
}
return(avgScore)
}
}
#' extractJuice
#'
#' Returns cross validation predictions from folds (i.e. if 2 folds, then aggregate the predictions on
#' fold 2 after training only on fold 1 & the predictions on fold 1 after training only on fold 2). This
#' is helps tremendously in ensembling schemes such as stacked generalization.
#'
#' @import caret
#' @import doParallel
#' @import foreach
#' @param X_train Training Data (excludes the response/target we wish to predict
#' ) that will be fed into the pipeline function.
#' @param Y_train Training Response/Target - The response/target that will be fed into the pipeline function.
#' @param numFolds Integer indicating the number of folds to use to extract predictions
#' @param parCV Boolean indicating whether to parallelize the extraction prodcedure.
#' @param numCores Integer indicating the number of cores to use when generating predictions.
#' @param seedNum Integer indicating the seed number. Using the same seed will generate the same folds.
#' @param verbose_p Boolean indicating if prediction extraction details should be printed out the screen.
#' @param fn The pipeline function. The pipeline function must take parameters training data, training response,
#' validation data, validation response. See examples for details.
#' @param fn_params Additional parameters to supply to the pipeline function. See examples for details.
#' @return CV Fold Predictions
#' @examples
#' library(JuiceBox)
#' library(Metrics)
#' library(xgboost)
#'
#' # Toy data set for classification
#' irisAllMat <- iris
#' irisTrainMat <- irisAllMat[,c(1:4)]
#' irisTrainResponse <- irisAllMat[,c(ncol(irisAllMat))]
#' irisTrainResponse <- factor(ifelse(irisTrainResponse == "setosa", "Yes", "No"))
#'
#' # Toy data set for regression
#' mtcarsMat <- mtcars
#' mtcarsResponse <-mtcarsMat[,1]
#' mtcarsMat <- mtcarsMat[,c(2:ncol(mtcarsMat))]
#'
#' # Pipelines
#' xgbPipeline_regression <- function(X_train, Y_train, X_test, Y_test, params)
#' {
#' Y_train <- as.numeric(Y_train) - 1
#' xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
#' eta = params[2], nround = params[3], objective = "reg:linear", verbose = 1)
#' predictions <- predict(xgbFit, newdata = as.matrix(X_test))
#' actual <- as.numeric(Y_test) - 1
#' rmseValue <- rmse(predictions, actual)
#' return(-rmseValue)
#' }
#'
#' xgbPipeline_classification <- function(X_train, Y_train, X_test, Y_test, params)
#' {
#' Y_train <- as.numeric(Y_train) - 1
#' xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
#' eta = params[2], nround = params[3], objective = "binary:logistic", verbose = 1)
#' predictions <- predict(xgbFit, newdata = as.matrix(X_test))
#' actual <- as.numeric(Y_test) - 1
#' logLossValue <- logLoss(actual, predictions)
#' return(-logLossValue)
#' }
#'
#' xgbPipeline_regression_extraction <- function(X_train, Y_train, X_test, Y_test, params)
#' {
#' Y_train <- as.numeric(Y_train) - 1
#' xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
#' eta = params[2], nround = params[3], objective = "reg:linear", verbose = 1)
#' predictions <- predict(xgbFit, newdata = as.matrix(X_test))
#' return(predictions)
#' }
#'
#' xgbPipeline_classification_extraction <- function(X_train, Y_train, X_test, Y_test, params)
#' {
#' Y_train <- as.numeric(Y_train) - 1
#' xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
#' eta = params[2], nround = params[3], objective = "binary:logistic", verbose = 1)
#' predictions <- predict(xgbFit, newdata = as.matrix(X_test))
#' return(predictions)
#' }
#'
#' # Testing extractJuice
#' extractJuice(X_train = mtcarsMat, Y_train = mtcarsResponse, numFolds = 2,
#' parCV = FALSE, numCores = 8, seedNum = 101, verbose_p = TRUE,
#' fn = xgbPipeline_regression_extraction, fn_params = c(3, 2, 10))
#'
#' extractJuice(X_train = irisTrainMat, Y_train = irisTrainResponse, numFolds = 2,
#' parCV = FALSE, numCores = 8, seedNum = 101, verbose_p = TRUE,
#' fn = xgbPipeline_classification_extraction, fn_params = c(3, 2, 10))
#' @export
extractJuice <- function(X_train, Y_train, numFolds, parCV, numCores, seedNum, verbose_p, fn, fn_params)
{
set.seed(seedNum)
CVGrid <- expand.grid(c(1:numFolds), c(1:1))
folds_list = createMultiFolds(Y_train, k = numFolds, times = 1)
origRowOrder <- c(1:nrow(X_train))
if(parCV == TRUE)
{
registerDoParallel(cores=numCores)
allPreds <- foreach(i=1:nrow(CVGrid)) %dopar%
{
if(verbose_p == TRUE)
{
print(paste0("Calculating predictions from fold ", CVGrid[i,1]))
}
preds <- fn(X_train[folds_list[[i]],],
Y_train[folds_list[[i]]],
X_train[-folds_list[[i]],],
Y_train[-folds_list[[i]]],
fn_params)
if(verbose_p == TRUE)
{
print(paste0("Finished getting predictions from fold ", CVGrid[i,1]))
}
preds
}
allPredIDs <- foreach(i=1:nrow(CVGrid)) %dopar%
{
predID <- origRowOrder[-folds_list[[i]]]
}
allPreds <- unlist(allPreds)
allPredIDs <- unlist(allPredIDs)
if(length(allPreds) != length(allPredIDs)) {
stop(paste0("Error: The number of predictions returned is inconsistent with number of samples in the provided training data", "\n",
"Number of predictions returned: ", length(allPreds), "\n",
"Number of predictions expected: ", length(allPredIDs)))
}
tempDf <- data.frame(allPredIDs, allPreds)
tempDf <- tempDf[order(allPredIDs),]
return(tempDf$allPreds)
} else {
allPreds <- c()
allPredIDs <- c()
for(i in 1:nrow(CVGrid))
{
if(verbose_p == TRUE)
{
print(paste0("Calculating predictions from fold ", CVGrid[i,1]))
}
preds <- fn(X_train[folds_list[[i]],],
Y_train[folds_list[[i]]],
X_train[-folds_list[[i]],],
Y_train[-folds_list[[i]]],
fn_params)
if(verbose_p == TRUE)
{
print(paste0("Finished getting predictions from fold ", CVGrid[i,1]))
}
allPreds = c(allPreds, preds)
}
for(i in 1:nrow(CVGrid))
{
predID <- origRowOrder[-folds_list[[i]]]
allPredIDs <- c(allPredIDs, predID)
}
allPreds <- unlist(allPreds)
allPredIDs <- unlist(allPredIDs)
if(length(allPreds) != length(allPredIDs)) {
stop(paste0("Error: The number of predictions returned is inconsistent with number of samples in the provided training data", "\n",
"Number of predictions returned: ", length(allPreds), "\n",
"Number of predictions expected: ", length(allPredIDs)))
}
tempDf <- data.frame(allPredIDs, allPreds)
tempDf <- tempDf[order(allPredIDs),]
return(tempDf$allPreds)
}
}
#' extractGenericJuice
#'
#' Returns cross validation data from folds (i.e. if 2 folds, then aggregate the data on
#' fold 2 after training only on fold 1 & the data on fold 1 after training only on fold 2). This
#' is helps in feature engineering.
#'
#' @import caret
#' @import doParallel
#' @import foreach
#' @param X_train Training Data (excludes the response/target we wish to predict
#' ) that will be fed into the pipeline function.
#' @param Y_train Training Response/Target - The response/target that will be fed into the pipeline function.
#' @param numFolds Integer indicating the number of folds to use to extract data
#' @param parCV Boolean indicating whether to parallelize the extraction prodcedure.
#' @param numCores Integer indicating the number of cores to use when generating predictions.
#' @param seedNum Integer indicating the seed number. Using the same seed will generate the same folds.
#' @param verbose_p Boolean indicating if prediction extraction details should be printed out the screen.
#' @param fn The pipeline function. The pipeline function must take parameters training data, training response,
#' validation data, validation response. See examples for details.
#' @param fn_params Additional parameters to supply to the pipeline function. See examples for details.
#' @return CV Fold Predictions
#' @export
extractGenericJuice <- function(X_train, Y_train, numFolds, parCV, numCores, seedNum, verbose_p, fn, fn_params)
{
set.seed(seedNum)
CVGrid <- expand.grid(c(1:numFolds), c(1:1))
folds_list = createMultiFolds(Y_train, k = numFolds, times = 1)
origRowOrder <- c(1:nrow(X_train))
if(parCV == TRUE)
{
registerDoParallel(cores=numCores)
all_cv_data <- foreach(i=1:nrow(CVGrid)) %dopar%
{
if(verbose_p == TRUE)
{
print(paste0("Calculating data from fold ", CVGrid[i,1]))
}
cv_data <- fn(X_train[folds_list[[i]],],
Y_train[folds_list[[i]]],
X_train[-folds_list[[i]],],
Y_train[-folds_list[[i]]],
fn_params)
if(verbose_p == TRUE)
{
print(paste0("Finished getting data from fold ", CVGrid[i,1]))
}
cv_data
}
allPredIDs <- foreach(i=1:nrow(CVGrid)) %dopar%
{
predID <- origRowOrder[-folds_list[[i]]]
}
all_cv_data <- do.call(rbind.data.frame, all_cv_data)
allPredIDs <- unlist(allPredIDs)
tempDf <- data.frame(cbind(allPredIDs, all_cv_data))
tempDf <- tempDf[order(allPredIDs),]
tempDf$allPredIDs <- NULL
return(tempDf)
} else {
allPreds <- c()
allPredIDs <- c()
all_cv_data <- foreach(i=1:nrow(CVGrid)) %do%
{
if(verbose_p == TRUE)
{
print(paste0("Calculating data from fold ", CVGrid[i,1]))
}
cv_data <- fn(X_train[folds_list[[i]],],
Y_train[folds_list[[i]]],
X_train[-folds_list[[i]],],
Y_train[-folds_list[[i]]],
fn_params)
if(verbose_p == TRUE)
{
print(paste0("Finished getting data from fold ", CVGrid[i,1]))
}
cv_data
}
allPredIDs <- foreach(i=1:nrow(CVGrid)) %dopar%
{
predID <- origRowOrder[-folds_list[[i]]]
}
all_cv_data <- do.call(rbind.data.frame, all_cv_data)
allPredIDs <- unlist(allPredIDs)
tempDf <- data.frame(cbind(allPredIDs, all_cv_data))
tempDf <- tempDf[order(allPredIDs),]
tempDf$allPredIDs <- NULL
return(tempDf)
}
}
#' JuiceBoxCV_GridSearch
#'
#' Use grid search in conjunction with cross validation to find optimal parameters for your pipline.
#' For each parameter, a cross validation procedure utilizing numFolds folds and numRepeats repetitions will
#' be executed. The parameter yielding largest CV score will be returned (i.e. maximization). In order to search for parameters that minimize the objective,
#' simply negate the value return in the pipeline function.
#'
#' @import caret
#' @import doParallel
#' @import foreach
#' @param X_train Training Data (excludes the response/target we wish to predict
#' ) that will be fed into the pipeline function.
#' @param Y_train Training Response/Target - The response/target that will be fed into the pipeline function.
#' @param numFolds Integer indicating the number of folds to use in the cross validation procedure.
#' @param numRepeats Integer indicating the number of times to repeat cross validation with numFolds.
#' @param goParallel String indicating how to parallelize the procedure.
#' If "parGS", then the parallelization will happen across on the grid search across the parameters. If "parCV", then the parallelization will
#' happen across the numFolds and numRepeats cross validation procedure.
#' @param numCores Integer indicating the number of cores to use.
#' @param seedNum Integer indicating the seed number. Using the same seed will generate the same folds.
#' @param verbose_p Boolean indicating if grid search details should be printed out the screen.
#' @param fitGrid Dataframe dictating the combinations to try. See examples for details.
#' @param fn The pipeline function. The pipeline function must take parameters training data, training response,
#' validation data, validation response. See examples for details.
#' @return Optimal parameters found from grid search
#' @examples
#' library(JuiceBox)
#' library(Metrics)
#' library(xgboost)
#'
#' # Toy data set for classification
#' irisAllMat <- iris
#' irisTrainMat <- irisAllMat[,c(1:4)]
#' irisTrainResponse <- irisAllMat[,c(ncol(irisAllMat))]
#' irisTrainResponse <- factor(ifelse(irisTrainResponse == "setosa", "Yes", "No"))
#'
#' # Toy data set for regression
#' mtcarsMat <- mtcars
#' mtcarsResponse <-mtcarsMat[,1]
#' mtcarsMat <- mtcarsMat[,c(2:ncol(mtcarsMat))]
#'
#' # Pipelines
#' xgbPipeline_regression <- function(X_train, Y_train, X_test, Y_test, params)
#' {
#' Y_train <- as.numeric(Y_train) - 1
#' xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
#' eta = params[2], nround = params[3], objective = "reg:linear", verbose = 1)
#' predictions <- predict(xgbFit, newdata = as.matrix(X_test))
#' actual <- as.numeric(Y_test) - 1
#' rmseValue <- rmse(predictions, actual)
#' return(-rmseValue)
#' }
#'
#' xgbPipeline_classification <- function(X_train, Y_train, X_test, Y_test, params)
#' {
#' Y_train <- as.numeric(Y_train) - 1
#' xgbFit <- xgboost(data = as.matrix(X_train), label = Y_train, max.depth = params[1],
#' eta = params[2], nround = params[3], objective = "binary:logistic", verbose = 1)
#' predictions <- predict(xgbFit, newdata = as.matrix(X_test))
#' actual <- as.numeric(Y_test) - 1
#' logLossValue <- logLoss(actual, predictions)
#' return(-logLossValue)
#' }
#'
#' # Testing JuiceBoxCV_GridSearch
#' JuiceBoxCV_GridSearch(X_train = irisTrainMat, Y_train = irisTrainResponse, numFolds = 2,
#' numRepeats = 2, goParallel = "none", numCores = 8,
#' seedNum = 101, verbose_p = FALSE,
#' fitGrid = expand.grid(c(1:4), c(1:3), c(1:5)),
#' fn = xgbPipeline_classification)
#'
#' JuiceBoxCV_GridSearch(X_train = mtcarsMat, Y_train = mtcarsResponse, numFolds = 2,
#' numRepeats = 2, goParallel = "none", numCores = 8,
#' seedNum = 101, verbose_p = FALSE,
#' fitGrid = expand.grid(c(1:4), c(1:3), c(1:5)),
#' fn = xgbPipeline_regression)
#' @export
JuiceBoxCV_GridSearch <- function(X_train, Y_train, numFolds, numRepeats, goParallel, numCores, seedNum, verbose_p, fitGrid, fn)
{
bestparameter <- c()
if(goParallel == "parGS")
{
registerDoParallel(cores=numCores)
allScores <- foreach(i=1:nrow(fitGrid)) %dopar%
{
if(verbose_p == TRUE) {
print(paste0("Trying parameter = ", paste(fitGrid[i,], collapse = ", ")))
}
cv_score <- JuiceBoxCV(X_train = X_train, Y_train = Y_train, numFolds = numFolds,
numRepeats = numRepeats, parCV = FALSE, numCores = -1, seedNum = seedNum, verbose_p = verbose_p,
fn, as.numeric(fitGrid[i,]))
cv_score
}
allScores <- unlist(allScores)
if(verbose_p == TRUE) {
for(i in c(1:nrow(fitGrid)))
{
stringOfPrints <- c("For parameter w/ values ")
stringOfPrints <- paste0(stringOfPrints, paste(as.numeric(fitGrid[i,]), collapse = ", "))
stringOfPrints <- paste0(stringOfPrints, ", score = ")
stringOfPrints <- paste0(stringOfPrints, allScores[i])
print(stringOfPrints)
}
}
maxScore <- max(allScores)
maxScoreIndex <- which(allScores == maxScore)
# print(paste0("# Of parameter giving max score = ", length(maxScoreIndex)))
maxScoreIndex <- maxScoreIndex[1]
if(verbose_p == TRUE) {
stringOfPrintsMax <- c("Highest Score achieved w/ parameter values ")
stringOfPrintsMax <- paste0(stringOfPrintsMax, paste(as.numeric(fitGrid[maxScoreIndex,]), collapse = ", "))
stringOfPrintsMax <- paste0(stringOfPrintsMax, ", score = ")
stringOfPrintsMax <- paste0(stringOfPrintsMax, allScores[maxScoreIndex])
print(stringOfPrintsMax)
}
bestparameter <- fitGrid[maxScoreIndex,]
} else if (goParallel == "parCV")
{
allScores <- c()
for(i in 1:nrow(fitGrid))
{
if(verbose_p == TRUE) {
print(paste0("Trying parameter = ", paste(fitGrid[i,], collapse = ", ")))
}
cv_score <- JuiceBoxCV(X_train = X_train, Y_train = Y_train, numFolds = numFolds,
numRepeats = numRepeats, parCV = TRUE, numCores = numCores, seedNum = seedNum, verbose_p = verbose_p,
fn, as.numeric(fitGrid[i,]))
allScores = c(allScores, cv_score)
}
if(verbose_p == TRUE) {
for(i in c(1:nrow(fitGrid)))
{
stringOfPrints <- c("For parameter w/ values ")
stringOfPrints <- paste0(stringOfPrints, paste(fitGrid[i,], collapse = ", "))
stringOfPrints <- paste0(stringOfPrints, ", score = ")
stringOfPrints <- paste0(stringOfPrints, allScores[i])
print(stringOfPrints)
}
}
maxScore <- max(allScores)
maxScoreIndex <- which(allScores == maxScore)
# print(paste0("# Of parameter giving max score = ", length(maxScoreIndex)))
maxScoreIndex <- maxScoreIndex[1]
if(verbose_p == TRUE) {
stringOfPrintsMax <- c("Highest Score achieved w/ parameter values ")
stringOfPrintsMax <- paste0(stringOfPrintsMax, paste(as.numeric(fitGrid[maxScoreIndex,]), collapse = ", "))
stringOfPrintsMax <- paste0(stringOfPrintsMax, ", score = ")
stringOfPrintsMax <- paste0(stringOfPrintsMax, allScores[maxScoreIndex])
print(stringOfPrintsMax)
}
bestparameter <- fitGrid[maxScoreIndex,]
} else {
allScores <- c()
for(i in 1:nrow(fitGrid))
{
if(verbose_p == TRUE) {
print(paste0("Trying parameter = ", paste(fitGrid[i,], collapse = ", ")))
}
cv_score <- JuiceBoxCV(X_train = X_train, Y_train = Y_train, numFolds = numFolds,
numRepeats = numRepeats, parCV = FALSE, numCores = numCores, seedNum = seedNum, verbose_p = verbose_p,
fn, as.numeric(fitGrid[i,]))
allScores = c(allScores, cv_score)
}
if(verbose_p == TRUE) {
for(i in c(1:nrow(fitGrid)))
{
stringOfPrints <- c("For parameter w/ values ")
stringOfPrints <- paste0(stringOfPrints, paste(fitGrid[i,], collapse = ", "))
stringOfPrints <- paste0(stringOfPrints, ", score = ")
stringOfPrints <- paste0(stringOfPrints, allScores[i])
print(stringOfPrints)
}
}
maxScore <- max(allScores)
maxScoreIndex <- which(allScores == maxScore)
# print(paste0("# Of parameter giving max score = ", length(maxScoreIndex)))
maxScoreIndex <- maxScoreIndex[1]
if(verbose_p == TRUE) {
stringOfPrintsMax <- c("Highest Score achieved w/ parameter values ")
stringOfPrintsMax <- paste0(stringOfPrintsMax, paste(as.numeric(fitGrid[maxScoreIndex,]), collapse = ", "))
stringOfPrintsMax <- paste0(stringOfPrintsMax, ", score = ")
stringOfPrintsMax <- paste0(stringOfPrintsMax, allScores[maxScoreIndex])
print(stringOfPrintsMax)
}
bestparameter <- fitGrid[maxScoreIndex,]
}
return(bestparameter)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.