#file = "C:/Users/M/Documents/r_seminar/2015-12-21_standard_BBOB_run/CMAES_output_1_1.txt"
#read file
#data = read.table(file, skip = 1, fill = TRUE)
#name columns
#when x1 and x2 are not given
#colnames only for the original BBOB output which we do not use anymore (we use our own framework)
#colnames(data) = c("FE", "fitness_minus_Fopt", "best_fitness_minus_Fopt",
# "measured_fitness", "best_measured_fitness")
#when x1 and x2 are given
#colnames(data) = c("FE", "fitness_minus_Fopt", "best_fitness_minus_Fopt",
# "measured_fitness", "best_measured_fitness", "x1", "x2")
#convert everything to integer
#values that are no integers are indicative of a new function run
#dataframe should be separated at these points
#if (!"BBmisc" %in% rownames(installed.packages())) install.packages("BBmisc")
#if (!"snow" %in% rownames(installed.packages())) install.packages("snow")
#if (!"parallel" %in% rownames(installed.packages())) install.packages("parallel")
#require(BBmisc)
#require(snow)
#require(parallel)
#' @title Interpretation of BBoB Data
#' @description
#' \code{readOutput} is used to read and interpret an output file generated by the benchmarking functions
#' \code{\link{bbob_custom}} or \code{\link{bbob_custom_parallel}}.
#' @details
#' There are several functions for reading and interpreting the output generated by \code{\link{bbob_custom}}
#' or \code{\link{bbob_custom_parallel}}. \code{readOutput} is used to read one single output file and returns a list of
#' results of that data (see value section). The user might execute something as follows:
#' \code{\link{readOutput}}(\code{output_function1}).
#' @param file
#' The user has to specify the exact datapath to the bbob data to be read and interpreted.
#' @return \code{readOutput} returns an object of the class \code{single_bbob_result}. A list of those can be passed to
#' \code{\link{aggregateResults}}. Each \code{single_bbob_result} is a list that contains the following components:
#' \item{allBest}{best fitness value of each processed function instance.}
#' \item{avgBest}{average best fitness value of all processed function instances.}
#' \item{overallBest}{overall best fitness value of all processed function instances.}
#' \item{overallWorst}{overall worst fitness of all processed function instances.}
#' \item{sdbest}{standard deviation of all best fitness values (\code{sd(allBest)}).}
#' \item{allRuns}{number of iterations of each processed function instance that has been required for optimization.}
#' \item{longestRun}{overall highest required number of iterations of all function instances.}
#' \item{shortestRun}{overall lowest required number of iterations of all function instances.}
#' \item{avgRuns}{average required iterations of all functions instances.}
#' \item{sdRuns}{standard deviation of required iterations of all function instances (\code{sd(allRuns})).}
#' \item{allRunsEval }{number of function evaluations of each processed function instance that has been required for optimization.}
#' \item{longestRunEval}{overall highest required number of function evaluations of all function instances.}
#' \item{shortestRunEval}{overall lowest required number of function evaluations of all function instances.}
#' \item{avgRunEval}{average required function evaluations of all functions instances.}
#' \item{sdRunsEval}{standard deviation of required function evaluations of all function instances (\code{sd(allRunsEval})).}
#' \item{allStagnations}{number of final iterations without improvement in the fitness value.}
#' \item{longestStagnation}{highest number of iterations without improvement in the fitness value of all function instances.}
#' \item{shortestStagnation}{lowest number of iterations without improvement in the fitness value of all function instances..}
#' \item{avgStagnation}{average number of final iterations without improvement in the fitness value.}
#' \item{sdStagnation}{standard deviation of the number of final iterations
#' without improvement in the fitness value of all function instances (\code{sd(allStagnation})).}
#' \item{allConvergence}{a \code{\link{data.frame}} that stores the gap between the fitness value and the global optimal value for each
#' iteration.}
#' \item{avgConvergence}{a vector that stores \code{mean(allConvergence)} of all processed instances applied
#' over all iterations.}
#' \item{allRestarts}{number of restarts occured in optimizing one single function instance.}
#' \item{t_test_termination}{number of t-test terminations while using OCD as a stopping condition.}
#' \item{chi_test_termination}{number of chi\code{^2}-test terminations while using OCD as a stopping condition.}
#' \item{functionID}{the identifier of the bbob function that has been optimized.}
#' \item{dimension}{the problem dimensions for the optimization.}
#' @export
######################################################################
#for own output (CMAESr)
readOutput = function(file) {
data = read.table(file, skip = 1, fill = TRUE)
data = suppressWarnings(apply(data, 2, as.double))
#get number of restarts
#indicated by a -1 value in the first column
allRestarts = data[which(data[,1] == -1),2]
#remove restart rows
#check if no restarts were logged (backward-compatibility)
if(length(-which(data[,1] == -1)) > 0) data = data[-which(data[,1] == -1),]
#get type of test that caused termination of cmaes (only for OCD)
# t-test is indicated by a -3 value in the first column
# chi-squared-test is indicated by a -2 value in the first column
chi_test_termination = data[which(data[,1] == -3),2]
t_test_termination = data[which(data[,1] == -2),2]
# remove rows of termination conditions
if(length(-which(data[,1]==-2)) > 0) data = data[-which(data[,1] == -2),]
if(length(-which(data[,1]==-3)) > 0) data = data[-which(data[,1] == -3),]
#get separate runs
#get split points (NAs) and increment run counter accordingly
data = as.data.frame(cbind(data, run_id = integer(nrow(data))))
#faster version of run_ids
#get breaks (where one run stopped)
breaks = which(is.na(data[,1]))
#if there is only one instance (case of random sort)
if (length(breaks) < 2) {
data$run_id = 1
}
else {
#remove every second break (because there are two lines separating different runs, except for the last run)
breaks = breaks[-seq(from = 1, to = length(breaks)-2, by = 2)]
#add one break in front
breaks = c(0, breaks)
for (i in 1:(length(breaks)-1)) {
data$run_id[breaks[i]:breaks[i+1]] = i
}
}
allRunIDs = unique(data$run_id)
##################################
#old
#save all run ids for later use
#allRunIDs = 1
#inBreakPoint = FALSE
#for (i in 1:nrow(data)) {
# data$run_id[i] = run
# if (is.na(data[i,1]) && inBreakPoint == FALSE) {
# run = run + 1
# allRunIDs = c(allRunIDs, run)
# inBreakPoint = TRUE
# }
# if (!is.na(data[i,1]) && inBreakPoint == TRUE) inBreakPoint = FALSE
#}
#remove all run ids overlap (if there are 20 runs, the above code actually detects 21)
#allRunIDs = allRunIDs[1:(length(allRunIDs)-1)]
#remove NA rows
data = data[!is.na(data[,1]),]
#clean fitness values
#due to rounding errors in the cmaesr there might be fitness values of less than zero
#(smaller than global optimum), which of course, does not make sense
data[,3] = ifelse(data[,3] < 0, 0, data[,3])
#get data for fitness
#get overall best fitness
overallBest = min(data[,3])
#get average best
allBest = double()
for (i in allRunIDs) {
allBest = c(allBest, min(data[which(data$run_id %in% i), 3]))
}
avgBest = mean(allBest)
#get sd
sdBest = sd(allBest)
#get worst best fitness
overallWorst = max(allBest)
#get data for iterations
longestRun = max(data[,1])
allRuns = integer()
for (i in allRunIDs) {
allRuns = c(allRuns, max(data[which(data$run_id %in% i), 1]))
}
shortestRun = min(allRuns)
avgRun = mean(allRuns)
sdRuns = sd(allRuns)
#get data for function evaluations
longestRunEval = max(data[,2])
allRunsEval = double()
for (i in allRunIDs) {
allRunsEval = c(allRunsEval, max(data[which(data$run_id %in% i), 2]))
}
shortestRunEval = min(allRunsEval)
avgRunEval = mean(allRunsEval)
sdRunsEval = sd(allRunsEval)
#get number of final iterations without improvement
allStagnations = integer(0)
for (i in allRunIDs) {
allStagnations = c(allStagnations, sum(data[which(data$run_id %in% allRunIDs[i]),3] == allBest[i]))
}
longestStagnation = max(allStagnations)
shortestStagnation = min(allStagnations)
avgStagnation = mean(allStagnations)
sdStagnations = sd(allStagnations)
#analyze convergence behavior
#get average convergence
#for this purpose pad all runs that are shorter than the longest run with their last best found value
#this is desired in order to average the convergence over all instances
#since only each result per iteration and not per function evaluation is logged, when there are different numbers
#of individuals, FEs might differ within instances
#therefore, average convergence cannot simply be aggregated over the rows
#instead, store convergence in steps of size 100 and find the corresponding value, that is closest to this
allConvergenceTicks = seq(from = 1, to = max(data[,2]), by = 100)
allConvergence = NULL
for (i in allRunIDs) {
#get the data for the current run id
tempData = data[which(data$run_id == i),]
#get iteration that corresponds closest to the convergence ticks
iterations = findInterval(allConvergenceTicks, tempData[,2])
#findInterval returns 0 for the first element(s), replace by 1
iterations[which(iterations == 0)] = 1
allConvergence = as.data.frame(cbind(allConvergence, tempData[iterations,3]))
}
allConvergence = as.data.frame(cbind(allConvergenceTicks, allConvergence))
#if there is only one run, no need to average anything
if (ncol(allConvergence) > 2) {
avgConvergence = apply(allConvergence[,-1], 1, mean)
avgConvergence = cbind(allConvergenceTicks, avgConvergence)
}
else avgConvergence = allConvergence
#save information about the function and the dimension
#exctract from file name
#this is somewhat dirty, but works if you do not rename files
filePart = substr(file, gregexpr("output", file)[[1]][length(gregexpr("output", file)[[1]])], nchar(file))
split = strsplit(filePart, "_")
functionID = as.numeric(split[[1]][2])
dimension = as.numeric(strsplit(split[[1]][3], "\\.")[[1]][1])
#format return value
result = list(allBest = allBest, avgBest = avgBest, overallBest = overallBest, overallWorst = overallWorst,
sdBest = sdBest,
allRuns = allRuns, longestRun = longestRun, shortestRun = shortestRun, avgRun = avgRun,
sdRuns = sdRuns, allRunsEval = allRunsEval, longestRunEval = longestRunEval,
shortestRunEval = shortestRunEval, avgRunEval = avgRunEval, sdRunsEval = sdRunsEval,
allStagnations = allStagnations, longestStagnation = longestStagnation,
shortestStagnation = shortestStagnation, avgStagnation = avgStagnation,
sdStagnations = sdStagnations, allConvergence = allConvergence, avgConvergence = avgConvergence,
allRestarts = allRestarts, t_test_termination = t_test_termination, chi_test_termination=chi_test_termination,
functionID = functionID, dimension = dimension)
class(result) = "single_bbob_result"
return(result)
}
#' @title Aggregation of Single BBoB Results
#' @description
#' \code{aggregateResults} is used to aggregate the results generated by \code{\link{readOutput}}.
#'
#' @details
#' The function \code{aggregateResults} takes a number of result objects, produced by applying
#' \code{readOutput} on single data files, and aggregates those results.
#' For example: The user can pass the results of two function optimization runs. First, he generates the results of each single output by
#' calling \code{\link{readOutput}} for every single file (e.g. \code{result1 = }\code{\link{readOutput}}(\code{output_function1}),
#' \code{result2 = }\code{\link{readOutput}}(\code{output_function2}).
#' Then, the user passes these results to \code{\link{aggregateResults}}
#' (e.g. \code{\link{aggregateResults}}(\code{c(result1, result2}))
#' @param allResults
#' The user has to pass a number of result objects produced by applying \code{\link{readOutput}}.
#' @return \code{aggregateResults} returns a list that contains the following components aggregated over all passed single bbob results
#' (see \code{\link{readOutput}} for information on the non aggregated single bbob results):
#' \item{aggregatedAllBest}{vector of all best fitness values.}
#' \item{aggregatedAvgBest}{aggregated average best fitness values.}
#' \item{aggregatedOverallBest}{overall best fitness values.}
#' \item{aggregatedOverallWorst}{overall worst fitness values.}
#' \item{aggregatedSDBests}{standard deviations of all best fitness values.}
#' \item{aggregatedAllRuns}{vector of the number of iterations.}
#' \item{aggregatedLongestRun}{overall highest required number of iterations.}
#' \item{aggregatedShortestRun}{overall lowest required number of iterations.}
#' \item{aggregatedAvgRun}{average required iterations.}
#' \item{aggregatedSDRuns}{standard deviation of required iterations (\code{sd(aggregatedAllRuns})).}
#' \item{aggregatedAllRunsEval}{vector of the number of function evaluations.}
#' \item{aggregatedLongestRunEval}{overall highest required number of function evaluations.}
#' \item{aggregatedShortestRunEval}{overall lowest required number of function evaluations.}
#' \item{aggregatedAvgRunEva}{average required function evaluations.}
#' \item{aggregatedSDRunsEval}{standard deviation of required function evaluations (\code{sd(aggregatedAllRunsEval})).}
#' \item{aggregatedAllStagnation}{vector of the final iterations without improvement.}
#' \item{aggregatedLongestStagnation}{overall highest number of iterations without improvement.}
#' \item{aggregatedShortestStagnation}{overall number of iterations without improvement.}
#' \item{aggregatedAvgStagnation}{average number of final iterations without improvement in the fitness value.}
#' \item{aggregatedSDStagnation}{standard deviation of the number of final iterations without improvement
#' (sd of \code{aggregatedAllStagnation}).}
#' \item{aggregatedAllConvergence}{a \code{\link{data.frame}} that stores the average convergence of every single bbob result.}
#' \item{aggregatedAvgConvergence}{the mean of the convergence of all single bbob results (\code{mean(aggregatedAllConvergence})).}
#' \item{aggregatedAllRestarts}{vector of the number of restarts.}
#' \item{aggregated_t_test_termination}{vector of the number of t-test terminations while using OCD as a stopping condition.}
#' \item{aggregated_chi_test_termination}{vector of the number of chi\code{^2}-test terminations while using OCD as a stopping condition.}
#' @export
aggregateResults = function(allResults) {
#do some input checks
if (!is.list(allResults)) stop("input must be of type list")
for (i in 1:length(allResults)) {
if(class(allResults[[i]]) != "single_bbob_result") stop("all elements of result list must be of type single_bbob_result")
}
#aggregate results from input single_bbob_results
#aggregate best fitness values
aggregatedAllBest = numeric(0)
for (i in 1:length(allResults)) {
aggregatedAllBest = c(aggregatedAllBest, allResults[[i]]$allBest)
}
aggregatedAvgBest = mean(aggregatedAllBest)
aggregatedOverallBest = min(aggregatedAllBest)
aggregatedOverallWorst = max(aggregatedAllBest)
aggregatedSDBests = sd(aggregatedAllBest)
#aggregate runtimes
aggregatedAllRuns = integer(0)
for (i in 1:length(allResults)) {
aggregatedAllRuns = c(aggregatedAllRuns, allResults[[i]]$allRuns)
}
aggregatedLongestRun = max(aggregatedAllRuns)
aggregatedShortestRun = min(aggregatedAllRuns)
aggregatedAvgRun = mean(aggregatedAllRuns)
aggregatedSDRuns = sd(aggregatedAllRuns)
#aggregate runtimes by function evaluations
aggregatedAllRunsEval = integer(0)
for (i in 1:length(allResults)) {
aggregatedAllRunsEval = c(aggregatedAllRunsEval, allResults[[i]]$allRunsEval)
}
aggregatedLongestRunEval = max(aggregatedAllRunsEval)
aggregatedShortestRunEval = min(aggregatedAllRunsEval)
aggregatedAvgRunEval = mean(aggregatedAllRunsEval)
aggregatedSDRunsEval = sd(aggregatedAllRunsEval)
#aggregate stagnation
aggregatedAllStagnation = integer(0)
for (i in 1:length(allResults)) {
aggregatedAllStagnation = c(aggregatedAllStagnation, allResults[[i]]$allStagnations)
}
aggregatedLongestStagnation = max(aggregatedAllStagnation)
aggregatedShortestStagnation = min(aggregatedAllStagnation)
aggregatedAvgStagnation = mean(aggregatedAllStagnation)
aggregatedSDStagnation = sd(aggregatedAllStagnation)
#aggregate convergence
#follows the same logic as the single convergence aggregation
#except for that entries already correspond to ticks, so we just need to take the row
#corresponding to the current tick
allConvergenceTicks = seq(from = 1, to = aggregatedLongestRunEval, by = 100)
aggregatedAllConvergence = matrix(nrow = length(allConvergenceTicks), ncol = length(allResults))
for (i in 1:length(allResults)) {
currentConvergence = allResults[[i]]$avgConvergence
#find all ticks that are included in the current convergence, pad the rest
currentConvergenceTicks = c(currentConvergence[,1], rep(collapse(currentConvergence[nrow(currentConvergence),1]),
times = (length(allConvergenceTicks) - nrow(currentConvergence))))
#convert to indexes
currentIndexes = ceiling(as.numeric(currentConvergenceTicks)/100)
aggregatedAllConvergence[,i] = currentConvergence[currentIndexes,2]
}
aggregatedAvgConvergence = apply(aggregatedAllConvergence, 1, mean)
aggregatedAvgConvergence = cbind(allConvergenceTicks, aggregatedAvgConvergence)
aggregatedAllConvergence = cbind(allConvergenceTicks, aggregatedAllConvergence)
#aggregate restarts
aggregatedAllRestarts = integer(0)
for (i in 1:length(allResults)) {
aggregatedAllRestarts = c(aggregatedAllRestarts, allResults[[i]]$allRestarts)
}
#aggregate t_test_termination
aggregated_t_test_termination = integer(0)
for (i in 1:length(allResults)) {
aggregated_t_test_termination = c(aggregated_t_test_termination, allResults[[i]]$t_test_termination)
}
#aggregate chi_test_termination
aggregated_chi_test_termination = integer(0)
for (i in 1:length(allResults)) {
aggregated_chi_test_termination = c(aggregated_chi_test_termination, allResults[[i]]$chi_test_termination)
}
#format return value
result = list(aggregatedAllBest = aggregatedAllBest, aggregatedAvgBest = aggregatedAvgBest,
aggregatedOverallBest = aggregatedOverallBest, aggregatedOverallWorst = aggregatedOverallWorst,
aggregatedSDBests = aggregatedSDBests, aggregatedAllRuns = aggregatedAllRuns,
aggregatedLongestRun = aggregatedLongestRun, aggregatedShortestRun = aggregatedShortestRun,
aggregatedAvgRun = aggregatedAvgRun, aggregatedSDRuns = aggregatedSDRuns,
aggregatedAllRunsEval = aggregatedAllRunsEval, aggregatedLongestRunEval = aggregatedLongestRunEval,
aggregatedShortestRunEval = aggregatedShortestRunEval, aggregatedAvgRunEval = aggregatedAvgRunEval,
aggregatedSDRunsEval = aggregatedSDRunsEval, aggregatedAllStagnation = aggregatedAllStagnation,
aggregatedLongestStagnation = aggregatedLongestStagnation,
aggregatedShortestStagnation = aggregatedShortestStagnation,
aggregatedAvgStagnation = aggregatedAvgStagnation,
aggregatedSDStagnation = aggregatedSDStagnation,
aggregatedAllConvergence = aggregatedAllConvergence,
aggregatedAvgConvergence = aggregatedAvgConvergence,
aggregatedAllRestarts = aggregatedAllRestarts,
aggregated_t_test_termination = aggregated_t_test_termination,
aggregated_chi_test_termination = aggregated_chi_test_termination)
return(result)
}
#' @title Empirical Cumulative Distribution Function of BBoB data
#' @description
#' \code{extractECDFofFunctions} returns a cumulative distribution function of
#' the functions that were solved within the desired fitness gap by the number of function evaluations this took
#' @details
#' \code{extractECDFofFunctions} can be used to observe which fraction of functions has been solved within the desired fitness gap
#' passed to the function. For example, a bbob experiment could terminate an optimization run after 100000 function evaluations.
#' If the specified fitness gap is not reached after this number of function evaluations, the corresponding function remains unsolved
#' with respect to that specific gap.
#' @param results
#' \code{results} must be a return object of \code{\link{aggregatedResults}}, i.e. the aggregated results of several function optimizations.
#' @return
#' \code{extractECDFofFunctions} returns an empirical cumulative ditribution function based on bbob data which can be plotted directly.
#' @export
extractECDFofFunctions = function(results, fitnessGap = 1e-08) {
allConvergence = results$aggregatedAllConvergence[,-1]
thresholds = integer(0)
for (i in 1:ncol(allConvergence)) {
if (!length(which(allConvergence[,i]<fitnessGap)) == 0) {
thresholds = c(thresholds, ((min(which(allConvergence[,i]<fitnessGap))-1) * 100 + 1))
}
}
#sort thresholds ascending
thresholds = sort(thresholds)
#make % values for cumulative distribution function
breaks = seq(from = 1/ncol(allConvergence), to = 1, length.out = ncol(allConvergence))
#remove % values that are not reached (these functions did not reach the desired value)
breaks = breaks[1:length(thresholds)]
#add a point (all iterations,max(breaks)) with the maximum number of FEs in order to show the stagnation in the plot
#max 100001 are evaluated if there are 100000 FEs
breaks = c(breaks, max(breaks))
thresholds = c(thresholds, (nrow(allConvergence)-1) * 100 + 1)
#add (0,0) for better plots and return
return(rbind(c(0,0), cbind(thresholds, breaks)))
}
#' @name load_results
#' @aliases loadAllResults
#' @aliases loadAllResultsParallel
#' @title Load All Single BBoB Results From a Folder
#' @description
#' \code{loadAllResults} loads the bbob results of a a number of single bbob output files generated by \code{\link{bbob_custom}} or
#' \code{\link{bbob_custom_parallel}}.
#' @details
#' \code{loadAllResults} detects and loads the files to be loaded/interpreted,
#' i.e. \code{\link{readOutput}} is applied to every single file specified by \code{usedFunctions, usedDimensions, path, algorithmName}.
#' For parallelisation of the loading process, the user might invoke \code{loadAllResultsParallel} with the same parameter setup.
#' @param usedFunctions
#' the function identifiers for which bbob data exists and interpretation is desired.
#' @param usedDimensions
#' the problem dimensions that have been used for optimization.
#' @param path
#' the path of the folder containing the output data of a bbob experiment
#' @param algorithmName
#' The identifier of the optimizer for detecting single output files in the specified folder
#' @return
#' \code{loadAllResults} returns an object \code{allResults}, which is a list of single bbob results as generated by \code{readOutput}
#' (see \code{\link{readOutput}} for details on the lists' elements).
#' @export
#loads all results that correspond to the naming conventions used by bbob_custom
loadAllResults = function(usedFunctions, usedDimensions, path, algorithmName) {
allResults = NULL
pbar = makeProgressBar(min = 0, max = length(usedFunctions)*length(usedDimensions))
pbar$set(0)
for (i in 1:length(usedFunctions)) {
for (j in 1:length(usedDimensions)) {
file = paste(path, "/", algorithmName, "_output_", usedFunctions[i], "_", usedDimensions[j], ".txt", sep = "")
result = readOutput(file)
if (is.null(allResults)) allResults = list(result)
else allResults = c(allResults, list(result))
pbar$set((i-1)*length(usedDimensions)+j)
}
}
return(allResults)
}
#' @rdname load_results
#' @importFrom parallel detectCores
#' @importFrom snow makeCluster stopCluster clusterApply
#' @export
#loads all results that correspond to the naming conventions used by bbob_custom (parallel version)
#gets more efficient the more different functions were used
loadAllResultsParallel = function(usedFunctions, usedDimensions, path, algorithmName) {
allResults = NULL
pbar = makeProgressBar(min = 0, max = length(usedDimensions))
pbar$set(0)
nCores = parallel::detectCores()
cluster = snow::makeCluster(nCores, type = "SOCK")
#export all environment functions
ex = Filter(function(x) is.function(get(x, .GlobalEnv)), ls(.GlobalEnv))
clusterExport(cluster, ex)
for (i in 1:length(usedDimensions)) {
results = snow::clusterApply(cl = cluster, x = usedFunctions, function(x) readOutput(
paste(path, "/", algorithmName, "_output_", x, "_", usedDimensions[i], ".txt", sep = "")
))
if (is.null(allResults)) allResults = results
else allResults = c(allResults, results)
pbar$set(i)
}
snow::stopCluster(cluster)
#no order results as they might be out of order due to parallel jobs
sortedResults = NULL
for (i in usedFunctions) {
for (j in usedDimensions) {
for (k in 1:length(allResults)) {
if (allResults[[k]]$functionID == i & allResults[[k]]$dimension == j) {
if (is.null(sortedResults)) sortedResults = allResults[k]
else sortedResults = c(sortedResults, allResults[k])
}
}
}
}
return(sortedResults)
}
#' @name allresults_processing
#' @aliases getAggregatedConvergenceFunctions
#' @aliases getAvgBestPerFunctionAndDimension
#' @aliases getAvgBestPerFunction
#' @aliases getAvgBestPerDimension
#' @title Interpretation of Aggregated BBoB Results
#' @description
#' \code{allresults_processing} is a collection of functions for interpreting aggregated bbob results
#' (i.e. a return object of \code{\link{aggregateResults}}).
#' @details
#' The functions as defined above serve the following purposes:
#' \describe{
#' \item{\code{getAggregatedConvergenceFunctions}}{A function that averages the convergence
#' for each optimized function over all dimensions specified.}
#' \item{\code{getAvgBestPerFunctionAndDimension}}{A function that averages the best results
#' for each function and all dimensions specified.}
#' \item{\code{getAvgBestPerFunction}}{A function that averages the best results
#' for each function over all dimensions specified.}
#' \item{\code{getAvgBestPerDimension}}{A function that averages the best results
#' for each dimension over all functions specified.}
#' }
#' @param results
#' \code{results} must be a return object of \code{\link{aggregatedResults}}, i.e. the aggregated results of several function optimizations.
#' @param nFunctions
#' the number of function for which data exists and results should be computed
#' @param nDimensions
#' the number of dimensions for which data exists and results should be computed (nDimensions is the total number of logged dimensions,
#' i.e. \code{nDimensions} has to be a counting value, not the actual dimensionality)
#' @return The return objects are function specific and as follows:
#' \item{aggregatedConvergenceFunctions}{A matrix of the fitness gaps for each function averaged over all dimensions.}
#' \item{getAvgBestPerFunctionAndDimension}{A vector that stores the average best value (mean over all instances)
#' for each function and each dimension of that function.}
#' \item{getAvgBestPerFunction}{A vector that stores the average best value (mean over all instances) for each function over
#' (dimensions are not considered separately.}
#' \item{getAvgBestPerDimension}{A vector that stores the average the best value
#' for each dimension over all functions (functions are not considered separately).}
#' @export
#get convergence averaged per function (over all dimensions)
getAggregatedConvergenceFunctions = function(results, nFunctions, nDimensions) {
allConvergence = results$aggregatedAllConvergence
ticks = allConvergence[,1]
allConvergence = allConvergence[,-1]
aggregatedConvergenceFunctions = matrix(nrow = nrow(allConvergence), ncol = nFunctions, data = 0)
for (i in 1:nFunctions) {
aggregatedConvergenceFunctions[,i] = apply(allConvergence[,((i-1) * nDimensions + 1):(i * nDimensions)],
1, mean)
}
aggregatedConvergenceFunctions = cbind(ticks, aggregatedConvergenceFunctions)
return(aggregatedConvergenceFunctions)
}
#' @rdname allresults_processing
#' @export
#get best results averaged per function and dimension
getAvgBestPerFunctionAndDimension = function(results, nFunctions, nDimensions) {
avgBest = double(0)
nInstances = length(results$aggregatedAllBest)/nFunctions/nDimensions
for (i in 1:(nFunctions*nDimensions)) {
avgBest = c(avgBest, mean(results$aggregatedAllBest[((i-1)*nInstances+1):(i*nInstances)]))
}
return(avgBest)
}
#' @rdname allresults_processing
#' @export
#get best results averaged per function
getAvgBestPerFunction = function(results, nFunctions, nDimensions) {
avgBest = double(0)
nInstances = length(results$aggregatedAllBest)/nFunctions/nDimensions
for (i in 1:nFunctions) {
avgBest = c(avgBest, mean(results$aggregatedAllBest[((i-1)*nInstances*nDimensions+1):(i*nInstances*nDimensions)]))
}
return(avgBest)
}
#' @rdname allresults_processing
#' @export
#get best results averaged per dimension
getAvgBestPerDimension = function(results, nFunctions, nDimensions) {
avgBest = double(0)
nInstances = length(results$aggregatedAllBest)/nFunctions/nDimensions
for (i in 1:nDimensions) {
currentAvg = double(0)
for (j in 1:nFunctions) {
indexes = (((j-1)*nDimensions*nInstances+1+(i-1)*nInstances):((j-1)*nDimensions*nInstances+i*nInstances))
currentAvg = c(currentAvg, mean(results$aggregatedAllBest[indexes]))
}
currentAvg = mean(currentAvg)
avgBest = c(avgBest, currentAvg)
}
return(avgBest)
}
#' @title
#' Get the Number of Active Functions in an Interation
#' @description
#' \code{getActiveFunctions} returns the number of functions per iteration that are not yet stopped.
#' Functions stop e.g. when a certain solution quality is reached
#' @param results
#' \code{results} must be a return object of \code{\link{aggregatedResults}}, i.e. the aggregated results of several function optimizations.
#' @return
#' \code{getActiveFunctions} returns a vector that contains the number of functions (the number of instances of all functions)
#' that are not stopped (per iteration).
#' @export
getActiveFunctions = function(results) {
notConverged = integer(0)
allRunsEval = results$aggregatedAllRunsEval
ticks = seq(from = 1, to = results$aggregatedLongestRunEval, by = 100)
for (i in ticks) {
currentlyNotConverged = 0
#track remove runs from the vector that did not satisfy the if clause in the loop (because
#they will never again)
removeVector = integer(0)
for (j in 1:length(allRunsEval)) {
if (allRunsEval[j] > i) currentlyNotConverged = currentlyNotConverged + 1
else removeVector = c(removeVector, j)
}
if (length(removeVector) > 0) allRunsEval = allRunsEval[-removeVector]
j = j - length(removeVector)
notConverged = c(notConverged, currentlyNotConverged)
}
return(notConverged)
}
#' @title Average Convergence per Function and/or per Dimension
#' @description
#' \code{averageConvergence} returns a matrix with the convergence values per function or per dimension or per a combination of both.
#' @param allConvergence
#' matrix of convergence values
#' @param nDimensions
#' the number of dimensions for which data exists and results should be computed (nDimensions is the total number of logged dimensions,
#' i.e. \code{nDimensions} has to be a counting value, not the actual dimensionality)
#' @param includedFunctions
#' functions for which the average convergence should be computed
#' @param includedDimensions
#' dimensions for which the average convergence should be computed
#' @return
#' \code{averageConvergence} returns the average of the convergence values for the functions and/or dimensions specified when calling the function.
#average convergence per function or per dimension or per a combination of both
#nDimensions is the total number of logged dimensions, not only of the included ones
#included dimensions has to be a counting value, not the actual dimensionality
averageConvergence = function(allConvergence, includedFunctions, includedDimensions, nDimensions) {
avgConvergence = numeric(nrow(allConvergence))
tempConvergence = allConvergence[,-1]
for (i in includedFunctions) {
for (j in includedDimensions) {
avgConvergence = avgConvergence + tempConvergence[,i*nDimensions-nDimensions+j]
}
}
avgConvergence = avgConvergence / (length(includedFunctions)*length(includedDimensions))
avgConvergence = cbind(allConvergence[,1], avgConvergence)
}
#checks whether all required logs for the R file output_analysis.R exist
#' @export
checkLogCompleteness = function(usedFunctions = 1:24, usedDimensions = c(2, 5, 10, 20), nInstances = 15) {
checkSuccessful = TRUE
#get all directories in current working directory
allDirs = dir()[file.info(dir())$isdir]
allDirs = c(allDirs, paste("OCD_parametrization/", dir("./OCD_parametrization"), sep = ""))
requiredDirs = c("CMAES_default_with_restart", "CMAES_OCD_no_restarts", "CMAES_only_default", "GA_default",
"GA_OCD", "OCD_disp", "OCD_disp_fit", "OCD_evo", "OCD_evo_disp", "OCD_evo_disp_fit",
"OCD_evo_fit", "OCD_fit", "Random_Search_100000", "OCD_parametrization/OCD_RUN_0.01_10",
"OCD_parametrization/OCD_RUN_0.01_100", "OCD_parametrization/OCD_RUN_0.01_1000", "OCD_parametrization/OCD_RUN_0.001_10",
"OCD_parametrization/OCD_RUN_0.001_100", "OCD_parametrization/OCD_RUN_0.001_1000",
"OCD_parametrization/OCD_RUN_0.0001_10", "OCD_parametrization/OCD_RUN_0.0001_100",
"OCD_parametrization/OCD_RUN_0.0001_1000", "OCD_parametrization/OCD_RUN_0.00001_10",
"OCD_parametrization/OCD_RUN_0.00001_100","OCD_parametrization/OCD_RUN_0.00001_1000",
"CMAES_default_with_restart2",
"GA_default2", "OCD_evo_disp2", "GA_OCD2")
pbar = makeProgressBar(min = 1, max = length(requiredDirs))
for (i in 1:length(requiredDirs)) {
if (length(grep(requiredDirs[i], allDirs)) == 0) {
print(paste("Required directory", requiredDirs[i], "is missing."))
checkSuccessful = FALSE
}
}
#list all names of the algorithms, the dimensions and functions to check for names
algorithmNames = c("cmaes", "CMAES_OCD", "GA", "random search")
#match algorithm names to dirs
dirAlgorithmMatch = c(1, 2, 1, 3, 3, 2, 2, 2, 2, 2, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 3, 2, 3)
#check if all required .txt files exist
for (i in 1:length(requiredDirs)) {
pbar$set(i)
currentDir = requiredDirs[i]
currentFiles = dir(currentDir)
for (j in 1:length(usedFunctions)) {
for (k in 1:length(usedDimensions)) {
currentFile = paste(algorithmNames[dirAlgorithmMatch[i]], "_output_", usedFunctions[j], "_", usedDimensions[k], ".txt",
sep = "")
if (length(grep(currentFile, currentFiles, ignore.case = TRUE)) == 0) {
print(paste("Required file", currentFile, "in directory", currentDir, "is missing"))
checkSuccessful = FALSE
}
}
}
}
#separate test for the restart run test
if (length(grep("CMAES_restart_test", allDirs)) == 0) {
print("Required directory CMAES_restart_test is missing.")
checkSuccessful = FALSE
}
else {
currentFiles = dir("CMAES_restart_test")
if (length(grep("cmaes1_output_12_20.txt", currentFiles)) == 0) {
print("Required file cmaes1_output_12_20 in directory CMAES_restart_test is missing.")
checkSuccessful = FALSE
}
if (length(grep("cmaes2_output_12_20.txt", currentFiles)) == 0) {
print("Required file cmaes2_output_12_20 in directory CMAES_restart_test is missing.")
checkSuccessful = FALSE
}
if (length(grep("cmaes3_output_12_20.txt", currentFiles)) == 0) {
print("Required file cmaes3_output_12_20 in directory CMAES_restart_test is missing.")
checkSuccessful = FALSE
}
if (length(grep("cmaes4_output_12_20.txt", currentFiles)) == 0) {
print("Required file cmaes4_output_12_20 in directory CMAES_restart_test is missing.")
checkSuccessful = FALSE
}
if (length(grep("cmaes5_output_12_20.txt", currentFiles)) == 0) {
print("Required file cmaes5_output_12_20 in directory CMAES_restart_test is missing.")
checkSuccessful = FALSE
}
if (length(grep("cmaes6_output_12_20", currentFiles)) == 0) {
print("Required file cmaes6_output_12_20 in directory CMAES_restart_test is missing.")
checkSuccessful = FALSE
}
}
if(checkSuccessful) print("Syntax check revealed no anomalies. Proceed to generate output")
return(checkSuccessful)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.