#' parameter test
# @param savefolder
#' @param nthread number of CPU cores to use, NA for all
#' @param nrounds number of training rounds
#' @param defpar default parameter; defpar = NA: will take parameter from train.parameter file in data directory; defpar = 'default' => defpar = list(eta=0.3, gamma=0, max_depth=6, min_child_weight=1, subsample=1, colsample_bytree=1)
#' @param testpar list of parameter to test: data.frame(parameter name = c(start, stop, stepsize, adjust nrounds(T/F)), eta = c(0.1,0.5,0.1,T))
#' @param plot should results be plotted, default=TRUE
#' @param pdim dimension of plot, default=10
#' @param width proportion of width to height, default=1.5
#' @import data.table
#' @import ggplot2
#' @import xgboost
#' @example
#' parameter.test(nthread = 12, nrounds = 20, defpar = NA, testpar = data.frame(eta = c(0.1, 0.5, 0.1, T), max_depth = c(3, 10, 1, T)))
#' parameter.test(nthread = 12, nrounds = 20, defpar = 'defpar', testpar = data.frame(eta = c(0.1, 0.5, 0.1, T), max_depth = c(3, 10, 1, T)))
#' parameter.test(nthread = 12, nrounds = 20, defpar = list(...), testpar = data.frame(eta = c(0.1, 0.5, 0.1, T), max_depth = c(3, 10, 1, T)))
#' @export
#TODO: dir; defpar--train.parameter
parameter.test <- function(data = "./data",
output_dir = "./testpar",
nthread = 0,
nrounds,
defpar = NA,
testpar,
plot = T,
pdim = 10,
width = 1.5) {
if(defpar == "default"){
defpar = list(objective = "multi:softmax",
num_class = 2,
eta= 0.3,
gamma=0,
max_depth=6,
min_child_weight=1,
subsample=1,
colsample_bytree=1)
} else if (is.na(defpar)) {
defpar <- as.list(fread(paste(data, "train.parameter", sep = "/")))
}
train.matrix <- xgb.DMatrix(paste(data, "train.matrix.data", sep = "/"))
test.matrix <- xgb.DMatrix(paste(data, "test.matrix.data", sep = "/"))
watchlist <- c(train = train.matrix, test = test.matrix)
if(!dir.exists(output_dir)){
dir.create(output_dir, showWarnings = F)
}
defpar$num_class <- get.num.class(train.matrix, test.matrix)
#loop for every parameter
for(i in 1:(dim(testpar)[2])){
#load parameter name and values
pm <- colnames(testpar)[i]
p1 <- testpar[1,i] #start
p2 <- testpar[2,i] #stop
p3 <- testpar[3,i] #stepsize
p4 <- testpar[4,i] #nrounds adjustment
steps <- round((p2-p1)/p3)
#matrix for evaluation
parm <- matrix(nrow = steps+1, ncol = 3)
colnames(parm) <- c(pm, "nrounds", "accuracy")
#loop for parameter values
for(j in 0:steps){
#load change in default parameters
params <- defpar
params[pm] <- p1 + j*p3
#write parameter in eval.m.
parm[j+1,1] <- p1 + j*p3
#train model
if(p4 == 1){#adjust nrounds
nroundsave <- nrounds
nrounds <- round(nrounds*p2/as.numeric(parm[j+1,1]))
}
bst <- xgb.train(data = train.matrix,
watchlist = watchlist,
params = params,
nrounds = nrounds,
nthread = nthread)
#make prediction based on model
pred <- predict(bst, test_data)
#save accuracy; rewrites file every round. one file per parameter
parm[j+1,2] <- nrounds
nrounds <- nroundsave
parm[j+1,3] <- sum(pred == test_label)/(length(pred))
write.csv(parm, file = paste(dir, "/partest_", pm, sep = ""))
}
#make plot if plot TRUE
if(plot){
#make dataframe for ggplot
parm <- as.data.frame(parm)
#make plot
p <- ggplot(parm, aes(parm[,pm], y = value)) +
geom_point(aes(y = parm[,"accuracy"])) +
geom_line(aes(y = parm[,"accuracy"])) +
theme_bw(base_size = pdim*2) +
labs(title = paste("Accuracy in dependency of", pm),
x= pm,
y="accuracy") +
theme(plot.title = element_text(face = "bold.italic", hjust = 0.5),
axis.title.x = element_text(face = "bold"),
axis.text.x = element_text(),
axis.title.y = element_text(face = "bold"),
axis.text.y = element_text()
) +
scale_x_continuous(breaks=0:(steps)*p3)
if(p4 == 1) {#info for adjusted nrounds for eta
p <- p + labs(subtitle = "iterations = nrounds*(max_value/current_value)") +
theme(plot.subtitle = element_text(size = pdim))
}
#save plot
ggsave(filename = paste(output_dir, "/partest_", pm, ".png", sep = ""),
p, width = pdim*width, height = pdim)
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.