addExperiments: Add experiemts to the registry.

Description Usage Arguments Value See Also Examples

View source: R/addExperiments.R

Description

Add experiments for running algorithms on problems to the registry, so they can be executed later.

Usage

1
2
addExperiments(reg, prob.designs, algo.designs, repls = 1L,
  skip.defined = FALSE)

Arguments

reg

[ExperimentRegistry]
Registry.

prob.designs

[character | Design | list of Design]
Either problem ids, a single problem design or a list of problem designs, the latter two created by makeDesign. If missing, all problems are selected (without associating a design), and this is the default.

algo.designs

[character | Design | list of Design]
Either algorithm ids, a single algorithm design or a list of algorithm designs, the latter two created by makeDesign. If missing, all algorithms are selected (without associating a design), and this is the default.

repls

[integer(1)]
Number of replications.
Default is 1.

skip.defined

[logical]
If set to TRUE, already defined experiments get skipped. Otherwise an error is thrown.
Default is FALSE.

Value

Invisibly returns vector of ids of added experiments.

See Also

Other add: addAlgorithm, addProblem

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
### EXAMPLE 1 ###
reg = makeExperimentRegistry(id = "example1", file.dir = tempfile())

# Define a problem:
# Subsampling from the iris dataset.
data(iris)
subsample = function(static, ratio) {
  n = nrow(static)
  train = sample(n, floor(n * ratio))
  test = setdiff(seq(n), train)
  list(test = test, train = train)
}
addProblem(reg, id = "iris", static = iris,
           dynamic = subsample, seed = 123)

# Define algorithm "tree":
# Decision tree on the iris dataset, modeling Species.
tree.wrapper = function(static, dynamic, ...) {
  library(rpart)
  mod = rpart(Species ~ ., data = static[dynamic$train, ], ...)
  pred = predict(mod, newdata = static[dynamic$test, ], type = "class")
  table(static$Species[dynamic$test], pred)
}
addAlgorithm(reg, id = "tree", fun = tree.wrapper)

# Define algorithm "forest":
# Random forest on the iris dataset, modeling Species.
forest.wrapper = function(static, dynamic, ...) {
  library(randomForest)
  mod = randomForest(Species ~ ., data = static, subset = dynamic$train, ...)
  pred = predict(mod, newdata = static[dynamic$test, ])
  table(static$Species[dynamic$test], pred)
}
addAlgorithm(reg, id = "forest", fun = forest.wrapper)

# Define problem parameters:
pars = list(ratio = c(0.67, 0.9))
iris.design = makeDesign("iris", exhaustive = pars)

# Define decision tree parameters:
pars = list(minsplit = c(10, 20), cp = c(0.01, 0.1))
tree.design = makeDesign("tree", exhaustive = pars)

# Define random forest parameters:
pars = list(ntree = c(100, 500))
forest.design = makeDesign("forest", exhaustive = pars)

# Add experiments to the registry:
# Use  previously defined experimental designs.
addExperiments(reg, prob.designs = iris.design,
               algo.designs = list(tree.design, forest.design),
               repls = 2) # usually you would set repls to 100 or more.

# Optional: Short summary over problems and algorithms.
summarizeExperiments(reg)

# Optional: Test one decision tree job and one expensive (ntree = 1000)
# random forest job. Use findExperiments to get the right job ids.
do.tests = FALSE
if (do.tests) {
  id1 = findExperiments(reg, algo.pattern = "tree")[1]
  id2 = findExperiments(reg, algo.pattern = "forest",
                         algo.pars = (ntree == 1000))[1]
  testJob(reg, id1)
  testJob(reg, id2)
}

# Submit the jobs to the batch system
submitJobs(reg)

# Calculate the misclassification rate for all (already done) jobs.
reduce = function(job, res) {
  n = sum(res)
  list(mcr = (n-sum(diag(res)))/n)
}
res = reduceResultsExperiments(reg, fun = reduce)
print(res)

# Aggregate results using 'ddply' from package 'plyr':
# Calculate the mean over all replications of identical experiments
# (same problem, same algorithm and same parameters)
library(plyr)
vars = setdiff(names(res), c("repl", "mcr"))
aggr = ddply(res, vars, summarise, mean.mcr = mean(mcr))
print(aggr)

## Not run: 
### EXAMPLE 2 ###
# define two simple test functions
testfun1 = function(x) sum(x^2)
testfun2 = function(x) -exp(-sum(abs(x)))

# Define ExperimentRegistry:
reg = makeExperimentRegistry("example02", seed = 123, file.dir = tempfile())

# Add the testfunctions to the registry:
addProblem(reg, "testfun1", static = testfun1)
addProblem(reg, "testfun2", static = testfun2)

# Use SimulatedAnnealing on the test functions:
addAlgorithm(reg, "sann", fun = function(static, dynamic) {
  upp = rep(10, 2)
  low = -upp
  start = sample(c(-10, 10), 2)
  res = optim(start, fn = static, lower = low, upper = upp, method = "SANN")
  res = res[c("par", "value", "counts", "convergence")]
  res$start = start
  return(res)
})

# add experiments and submit
addExperiments(reg, repls = 10)
submitJobs(reg)

# Gather informations from the experiments, in this case function value
# and whether the algorithm convergenced:
reduceResultsExperiments(reg, fun = function(job, res) res[c("value", "convergence")])

## End(Not run)

tudo-r/BatchExperiments documentation built on Dec. 1, 2017, 4:02 a.m.