# ============================================================================ #
# #
# Benchmarking Compboost vs. Mboost #
# #
# ============================================================================ #
# Setup:
# ---------------------------------------------------
source("benchmark/runtime/defs.R")
source("benchmark/runtime/algorithms.R")
bm.dir = "benchmark/runtime/benchmark_files"
# Create frame for Benchmark:
# ---------------------------------------------------
if (my.setting$overwrite || (! dir.exists(bm.dir))) {
if (dir.exists(bm.dir)) {
unlink(bm.dir, recursive = TRUE)
} else {
regis = makeExperimentRegistry(
file.dir = bm.dir,
packages = my.setting$packages,
seed = round(1000 * pi)
)
}
} else {
regis = loadRegistry(bm.dir)
}
# Initialize multicore setting depending on OS:
if (Sys.info()["sysname"] == "Windows") {
regis$cluster.functions = makeClusterFunctionsSocket(ncpus = my.setting$cores)
}
if (Sys.info()["sysname"] == "Linux") {
regis$cluster.functions = makeClusterFunctionsMulticore(ncpus = my.setting$cores)
}
# Define data and algorithm for benchmark:
# ---------------------------------------------------
# Function to simulate data:
# data is static part which remains the same while arguments in fun are
# dynamic and can be changed in addExperiment
addProblem(reg = regis, name = "my.data", data = list(noise = 0), fun = function (data, job, n, p) {
noise = data$noise
vars = p + noise
#create beta distributed correlations
corrs = rbeta(n = (vars * (vars - 1))/2, shape1 = 1, shape2 = 8)
corrs = sample(c(-1, 1), size = length(corrs), replace = TRUE) * corrs
sigma = matrix(1, nrow = vars, ncol = vars)
sigma[upper.tri(sigma)] = corrs
sigma[lower.tri(sigma)] = t(sigma)[lower.tri(sigma)]
data = as.data.frame(rmvnorm(n = n, sigma = sigma, method = "svd"))
betas = runif(p + 1, min = -2, max = 2)
data$y = rnorm(n = n, mean = as.matrix(cbind(1, data[,1:p])) %*% betas)
#return (list(data = data, betas = betas))
return (list(data = data))
})
# Function including the algorithm, depending on the parameter for the
# benchmark:
# instance are the object generated by addProblem, data is again the same
# as in addProblem
addAlgorithm(reg = regis, name = "compboost", fun = benchmarkCompboost)
addAlgorithm(reg = regis, name = "mboost.fast", fun = benchmarkMboostFast)
addAlgorithm(reg = regis, name = "mboost", fun = benchmarkMboost)
# Increase number of iterations:
# ------------------------------
addExperiments(
# Registry file:
reg = regis,
# Fixed data dimension:
prob.design = list(
my.data = data.frame(n = 2000, p = 1000)
),
# Test different parameter of the algorithm:
algo.designs = list(
compboost = expand.grid(
iters = my.setting$bm.iters,
learner = c("spline", "linear"),
stringsAsFactors = FALSE
),
mboost = expand.grid(
iters = my.setting$bm.iters,
learner = c("spline", "linear"),
stringsAsFactors = FALSE
),
mboost.fast = expand.grid(
iters = my.setting$bm.iters,
learner = c("spline", "linear"),
stringsAsFactors = FALSE
)
),
# Number of replications:
repls = my.setting$replications
)
# Increase dimension of data:
# ----------------------------------
addExperiments(
# Registry file:
reg = regis,
# Test different data sizes:
prob.design = list(
my.data = data.frame(
n = c(rep(2000, length(my.setting$p)), my.setting$n),
p = c(my.setting$p, rep(1000, length(my.setting$n)))
)
),
# Fix number of iterations:
algo.designs = list(
compboost = data.frame(
iters = 1500,
learner = c("spline", "linear"),
stringsAsFactors = FALSE
),
mboost = data.frame(
iters = 1500,
learner = c("spline", "linear"),
stringsAsFactors = FALSE
),
mboost.fast = data.frame(
iters = 1500,
learner = c("spline", "linear"),
stringsAsFactors = FALSE
)
),
# Number of replications:
repls = my.setting$replications
)
# submitJobs(findNotDone())
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.