# title : tuneXGB
# author : jacob
tuneXGB <- function(data, y, params, k, max_model=NULL){
if(k<2) stop(">> k is very small \n")
data <- as.data.frame(data)
# shuffle params
if(ncol(params)==1){
params = params
cat(">> cartesian grid search, params not shuffled! \n")
Sys.sleep(3)
}else{
set.seed(1)
params = params[sample(nrow(params)),]
cat(">> random grid search, params shuffled! \n")
Sys.sleep(3)
}
# convert char to factor
if(sum(sapply(data, function(x) is.character(x)))>0){
char_idx <- which(sapply(data, function(x) is.character(x)))
data[char_idx] <- lapply(data[char_idx], as.factor)
}
data_y = data[,y]
data_x = data[,which(colnames(data)!=y)]
# need for xgboost
colnames(data_x) <- paste0(rep("X",ncol(data_x)),1:ncol(data_x))
sparse_matrix_train <- sparse.model.matrix(~.-1, data = data_x)
ddata <- xgb.DMatrix(data = sparse_matrix_train, label = data_y)
if(is.numeric(max_model)){
max_model <- max_model
}else{
max_model <- nrow(params)
}
if(nrow(params)<max_model) stop(">> max_model is lower than nrow(params) \n")
output <- list()
for(i in 1:max_model){
set.seed(1)
ml_xgb <- xgb.cv(
data = ddata,
eval_metric = "logloss",
maximize = FALSE,
objective = "binary:logistic",
nrounds = iterations,
early_stopping_rounds = early_stopping_rounds,
nthread = num_threads,
print_every_n = 10,
tree_method = "hist",
grow_policy = "lossguide",
nfold = k,
stratified = TRUE,
params = as.list(sapply(as.list(params),"[",i)),
prediction = TRUE # cvpredict
)
output$scores[i] <- min(ml_xgb$evaluation_log$test_logloss_mean)
output$models[i] <- list(ml_xgb)
}
output$scores <- data.frame(head(params, max_model), logloss=output$scores)
output$scores <- output$scores[order(output$scores$logloss, decreasing = F), ]
return(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.