benchmark/runtime/execute_runtime_benchmark.R

# ============================================================================ #
#                                                                              #
#                   Benchmarking Compboost vs. Mboost                          #
#                                                                              #
# ============================================================================ #

# Setup:
# ---------------------------------------------------

source("benchmark/runtime/defs.R")
source("benchmark/runtime/algorithms.R")

bm.dir = "benchmark/runtime/benchmark_files"

# Create frame for Benchmark:
# ---------------------------------------------------

if (my.setting$overwrite || (! dir.exists(bm.dir))) {
  if (dir.exists(bm.dir)) { 
    unlink(bm.dir, recursive = TRUE) 
  } else {
    regis = makeExperimentRegistry(
      file.dir = bm.dir,
      packages = my.setting$packages,
      seed     = round(1000 * pi)
    )
  }
} else {
  regis = loadRegistry(bm.dir)
}

# Initialize multicore setting depending on OS:
if (Sys.info()["sysname"] == "Windows") {
  regis$cluster.functions = makeClusterFunctionsSocket(ncpus = my.setting$cores)
}
if (Sys.info()["sysname"] == "Linux") {
  regis$cluster.functions = makeClusterFunctionsMulticore(ncpus = my.setting$cores)
}

# Define data and algorithm for benchmark:
# ---------------------------------------------------

# Function to simulate data:

# data is static part which remains the same while arguments in fun are
# dynamic and can be changed in addExperiment
addProblem(reg = regis, name = "my.data", data = list(noise = 0), fun = function (data, job, n, p) {

  noise = data$noise
  vars = p + noise

  #create beta distributed correlations
  corrs = rbeta(n = (vars * (vars - 1))/2, shape1 = 1, shape2 = 8)
  corrs = sample(c(-1, 1), size = length(corrs), replace = TRUE) * corrs

  sigma = matrix(1, nrow = vars, ncol = vars)
  sigma[upper.tri(sigma)] = corrs
  sigma[lower.tri(sigma)] = t(sigma)[lower.tri(sigma)]

  data = as.data.frame(rmvnorm(n = n, sigma = sigma, method = "svd"))

  betas = runif(p + 1, min = -2, max = 2)
  data$y = rnorm(n = n, mean = as.matrix(cbind(1, data[,1:p])) %*% betas)

  #return (list(data = data, betas = betas))
  return (list(data = data))
})


# Function including the algorithm, depending on the parameter for the
# benchmark:

# instance are the object generated by addProblem, data is again the same
# as in addProblem
addAlgorithm(reg = regis, name = "compboost", fun = benchmarkCompboost)
addAlgorithm(reg = regis, name = "mboost.fast", fun = benchmarkMboostFast)
addAlgorithm(reg = regis, name = "mboost", fun = benchmarkMboost)

# Increase number of iterations:
# ------------------------------
addExperiments(
  # Registry file:
  reg = regis,

  # Fixed data dimension:
  prob.design = list(
    my.data = data.frame(n = 2000, p = 1000)
  ),

  # Test different parameter of the algorithm:
  algo.designs = list(
    compboost = expand.grid(
      iters   = my.setting$bm.iters,
      learner = c("spline", "linear"),
      stringsAsFactors = FALSE
    ),
    mboost = expand.grid(
      iters   = my.setting$bm.iters,
      learner = c("spline", "linear"),
      stringsAsFactors = FALSE
    ),
    mboost.fast = expand.grid(
      iters   = my.setting$bm.iters,
      learner = c("spline", "linear"),
      stringsAsFactors = FALSE
    )
  ),
  # Number of replications:
  repls = my.setting$replications
)

# Increase dimension of data:
# ----------------------------------
addExperiments(
  # Registry file:
  reg = regis,

  # Test different data sizes:
  prob.design = list(
    my.data = data.frame(
      n = c(rep(2000, length(my.setting$p)), my.setting$n),
      p = c(my.setting$p, rep(1000, length(my.setting$n)))
    )
  ),

  # Fix number of iterations:
  algo.designs = list(
    compboost = data.frame(
      iters   = 1500,
      learner = c("spline", "linear"),
      stringsAsFactors = FALSE
    ),
    mboost = data.frame(
      iters   = 1500,
      learner = c("spline", "linear"),
      stringsAsFactors = FALSE
    ),
    mboost.fast = data.frame(
      iters   = 1500,
      learner = c("spline", "linear"),
      stringsAsFactors = FALSE
    )
  ),
  # Number of replications:
  repls = my.setting$replications
)

# submitJobs(findNotDone())
schalkdaniel/compboost documentation built on April 15, 2023, 9:03 p.m.