#######################################
############## packages ###############
#######################################
# main packages
library("devtools")
library("mlr") # as of april 2018, you need to install the mxnet branch of mlr: devtools::install_github("mlr-org/mlr", ref = "mxnet")
library("mxnet")
# helper packages
library("ggplot2")
library("data.table")
library("dplyr")
library("gridExtra")
####################################
## define the problem to optimize ##
####################################
# read mini_mnist (1/10 of actual mnist for faster evaluation, evenly distributed classes)
train = fread("mnist/train.csv", header = TRUE)
test = fread("mnist/test.csv", header = TRUE)
# Some operations to normalize features
mnist = as.data.frame(rbind(train, test))
mnist = mnist[sample(nrow(mnist)), ]
mnist[, 2:785] = lapply(mnist[, 2:785], function(x) x/255)
rm(train)
rm(test)
# Generate train and test split
train.set = sample(nrow(mnist), size = (2/3)*nrow(mnist))
val.set = sample(setdiff(1:nrow(mnist), train.set), 0.5 * (dim(mnist)[[1]] - length(train.set)))
test.set = setdiff(1:nrow(mnist), c(train.set, val.set))
# mini-mnist has 10 classes
task = makeClassifTask(data = mnist, target = "label")
# define the problem
problem = list(data = task, train = train.set, val = val.set, test = test.set)
# each class has 600 samples
print(problem)
#######################################
## define functions to use hyperband ##
#######################################
# config space
configSpace = makeParamSet(
makeDiscreteParam(id = "optimizer", values = c("sgd", "rmsprop", "adam", "adagrad")),
makeNumericParam(id = "learning.rate", lower = 0.001, upper = 0.1),
makeNumericParam(id = "wd", lower = 0, upper = 0.01),
makeNumericParam(id = "dropout.input", lower = 0, upper = 0.6),
makeNumericParam(id = "dropout.layer1", lower = 0, upper = 0.6),
makeNumericParam(id = "dropout.layer2", lower = 0, upper = 0.6),
makeNumericParam(id = "dropout.layer3", lower = 0, upper = 0.6),
makeLogicalParam(id = "batch.normalization1"),
makeLogicalParam(id = "batch.normalization2"),
makeLogicalParam(id = "batch.normalization3"))
# sample fun
sample.fun = function(par.set, n.configs, ...) {
lapply(sampleValues(par = par.set, n = n.configs), function(x) x[!is.na(x)])
}
# init fun
init.fun = function(r, config, problem) {
lrn = makeLearner("classif.mxff",
# LeNet architecture: http://deeplearning.net/tutorial/lenet.html
layers = 3,
conv.layer1 = TRUE, conv.layer2 = TRUE,
conv.data.shape = c(28, 28),
num.layer1 = 8, num.layer2 = 16, num.layer3 = 120,
conv.kernel1 = c(3,3), conv.stride1 = c(1,1), pool.kernel1 = c(2,2), pool.stride1 = c(2,2),
conv.kernel2 = c(3,3), conv.stride2 = c(1,1), pool.kernel2 = c(2,2), pool.stride2 = c(2,2),
array.batch.size = 200,
begin.round = 1, num.round = r,
ctx = mx.gpu(),
par.vals = config)
mod = train(learner = lrn, task = problem$data, subset = problem$train)
return(mod)
}
# train fun
train.fun = function(mod, budget, problem) {
lrn = makeLearner("classif.mxff", par.vals = mod$learner$par.vals)
lrn = setHyperPars(lrn,
symbol = mod$learner.model$symbol,
arg.params = mod$learner.model$arg.params,
aux.params = mod$learner.model$aux.params,
begin.round = mod$learner$par.vals$begin.round + mod$learner$par.vals$num.round,
num.round = budget,
ctx = mx.gpu())
mod = train(learner = lrn, task = problem$data, subset = problem$train)
return(mod)
}
# performance fun
performance.fun = function(model, problem) {
pred = predict(model, task = problem$data, subset = problem$val)
performance(pred, measures = acc)
}
#######################################
############# applications ############
#######################################
#### make neural net algorithm object ####
obj = algorithm$new(
problem = problem,
id = "cnn",
configuration = sample.fun(par.set = configSpace, n.configs = 1)[[1]],
initial.budget = 1,
init.fun = init.fun,
train.fun = train.fun,
performance.fun = performance.fun)
# we can inspect model of our algorithm object
obj$model
# the data matrix shows us the hyperparameters, the current budget and the performance
obj$algorithm.result$data.matrix
# if we are only interested in the performance, we can also call the getPerformance method
obj$getPerformance()
# we can continue training our object for one iteration by calling
obj$continue(4)
# inspect of the data matrix has changed
obj$algorithm.result$data.matrix
# continue training for 18 iterations to obtain a total of 20 iterations
invisible(capture.output(replicate(3, obj$continue(5))))
# inspect model the model again
obj$model
# inspect the data matrix again
obj$algorithm.result$data.matrix
# we can immediately visualize the performance function
obj$visPerformance()
###### make neural net bracket object #####
brack = bracket$new(
problem = problem,
max.perf = TRUE,
max.resources = 81,
prop.discard = 3,
s = 4,
B = (4 + 1)*81,
id = "cnn",
par.set = configSpace,
sample.fun = sample.fun,
init.fun = init.fun,
train.fun = train.fun,
performance.fun = performance.fun)
# the data matrix shows us the hyperparameters, the current budget and the performance
brack$bracket.storage$data.matrix
# run the bracket
brack$run()
# inspect the data matrix again
brack$bracket.storage$data.matrix
# visualize the the bracket
brack$visPerformances()
# access the performance of the best model
brack$getPerformances()
########### call hyperband ################
hyperhyper = hyperband(
problem = problem,
max.resources = 81,
prop.discard = 3,
max.perf = TRUE,
id = "cnn",
par.set = configSpace,
sample.fun = sample.fun,
init.fun = init.fun,
train.fun = train.fun,
performance.fun = performance.fun)
# visualize the brackets
hyperVis(hyperhyper, perfLimits = c(0, 1))
# get the best performance of each bracket
max(unlist(lapply(hyperhyper, function(x) x$getPerformances())))
# get the architecture of the best bracket
best.mod = which.max(unlist(lapply(hyperhyper, function(x) x$getPerformances())))
hyperhyper[[best.mod]]$models[[1]]$model
# check the performance of the best bracket on the test set
performance(predict(object = hyperhyper[[best.mod]]$models[[1]]$model,
task = problem$data,
subset = problem$test),
measures = acc)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.