# sourceCpp('src/align.cc')
# sourceCpp('src/read.cc')
#' Reduce the size of the data set by evenly subsampling the records
#'
#' @param df The data to subsample
#' @param n The amount of samples
#' @return A smaller data.frame
limit.data <- function(df, n) {
N <- nrow(df)
if (N > n) {
idx <- unique(c(1, seq(1, N, length.out = n), N))
df[idx,]
} else
df
}
#' Scan *.info files for IOHProfiler or COCO
#'
#' @param folder The folder containing the .info or .json files
#' @return The paths to all found .info and .json-files
#' @export
#' @note This automatically filetrs our files of size 0
#' @examples
#' path <- system.file("extdata", "ONE_PLUS_LAMDA_EA", package="IOHanalyzer")
#' scan_index_file(path)
scan_index_file <- function(folder) {
folder <- trimws(folder)
files <-
list.files(
folder,
pattern = '.(info|json)$',
recursive = T,
full.names = T
)
files[file.size(files) > 0]
}
#' Read .info files and extract information
#'
#' @param fname The path to the .info file
#' @return The data contained in the .info file
#' @export
#' @examples
#' path <- system.file("extdata", "ONE_PLUS_LAMDA_EA", package="IOHanalyzer")
#' info <- read_index_file(file.path(path,"IOHprofiler_f1_i1.info"))
read_index_file <- function(fname) {
format <- tools::file_ext(fname)
if (format == 'json')
read_index_file__json(fname)
else {
tryCatch(
read_index_file__IOH(fname),
warning = function(e)
read_index_file__COCO(fname),
error = function(e)
read_index_file__COCO(fname),
finally = function(e)
stop(paste0('Error in reading .info files ', e))
)
}
}
#' Read IOHprofiler-based .json files and extract information
#'
#' @param fname The path to the json info-file
#' @return The data contained in the json info-file
#' @noRd
read_index_file__json <- function(fname) {
json_data <- fromJSON(file = fname)
base_dir <- dirname(fname)
exp_attrs <-
sapply(json_data$experiment_attributes, function(x) {
x
})
data <- list()
tryCatch({
fid <- json_data$function_id
fname <- json_data$function_name
suite <- json_data$suite
maximization <- json_data$maximization
algid <- json_data$algorithm$name
attributes <- json_data$attributes
version <- json_data$version
}, error = function(e) {
return(NULL)
})
data <- lapply(json_data$scenarios, function(scenario) {
run_attrs <- list()
for (run_attr in json_data$run_attributes) {
attr(run_attrs, run_attr) <-
sapply(scenario$runs, function(x)
x$run_attr)
}
datafile <- file.path(base_dir, scenario$path)
temp <- c(
list(
funcId = fid,
funcName = fname,
suite = suite,
maximization = maximization,
algId = algid,
DIM = scenario$dimension,
attributes = attributes,
version = version,
datafile = datafile,
instance = sapply(scenario$runs, function(x)
x$instance),
maxRT = sapply(scenario$runs, function(x)
x$evals),
finalFV = sapply(scenario$runs, function(x)
x$best$y),
final_pos = sapply(scenario$runs, function(x)
x$best$x)
),
run_attrs,
exp_attrs
)
})
data
}
#' Read IOHprofiler-based .info files and extract information
#'
#' @param fname The path to the .info file
#' @return The data contained in the .info file
#' @noRd
read_index_file__IOH <- function(fname) {
f <- file(fname, 'r')
path <- dirname(fname)
data <- list()
i <- 1
while (TRUE) {
# TODO: remove suppressWarnings later
lines <- suppressWarnings(readLines(f, n = 3))
if (length(lines) == 0)
break
# TODO: make this quote symbol ' or " as the configurable parameter
# TODO: Fix this
name_value <-
read.csv(text = lines[1],
header = F,
quote = c("\"", "'")) %>%
as.list %>%
unlist %>%
as.vector
header <- name_value %>%
trimws %>% {
regmatches(., regexpr("=", .), invert = T) # match the first appearance of '='
} %>%
unlist %>%
trimws %>%
matrix(nrow = 2) %>% {
ans <- as.list(.[2,])
names(ans) <- .[1,]
for (name in .[1,]) {
value <- ans[[name]]
ans[[name]] <- gsub("'", '', value)
if (name == 'maximization' || name == 'constrained')
value <- as.logical(value)
else
value <-
suppressWarnings(as.numeric(value)) # convert quoted numeric values to numeric
if (!is.na(value))
ans[[name]] <- value
}
ans
}
record <- trimws(strsplit(lines[3], ',')[[1]])
if (length(record) == 1) {
next
}
has_dynattr <- !is.null(header$dynamicAttribute)
# TODO: this must also be removed...
if (record[2] == "") {
warning(sprintf('File %s is incomplete!', fname))
finalFVs <- NULL
instances <- NULL
maxRTs <- NULL
} else {
res <- matrix(unlist(strsplit(record[-1], ':')), nrow = 2)
info <- matrix(unlist(strsplit(res[2,], '\\|')), nrow = 2)
#Check for incorrect usages of reset_problem and remove them
maxRTs <- as.numeric(info[1, ])
idx_correct <- which(maxRTs > 0)
if (has_dynattr) {
info_split <- strsplit(info[2, ], ';')
finalFVs <-
as.numeric(sapply(info_split, `[[`, 1))[idx_correct]
dynamic_attrs <- sapply(info_split, `[[`, 2)[idx_correct]
# dynamic_attrs <- dynamic_attrs[idx_correct
}
else {
finalFVs <- as.numeric(info[2, ])[idx_correct]
}
instances <- as.numeric(res[1, ])[idx_correct]
maxRTs <- maxRTs[idx_correct]
}
record[1] <- gsub("\\\\", "/", record[1])
datafile <- file.path(path, record[1])
attr_list = list(
comment = lines[2],
datafile = datafile,
instance = instances,
maxRT = maxRTs,
finalFV = finalFVs
)
if (has_dynattr) {
attr_list[[header$dynamicAttribute]] <- dynamic_attrs
}
# TODO: Make this code more readable
data[[i]] <- c(header,
attr_list)
i <- i + 1
}
close(f)
datafiles <- unlist(lapply(data, function(x)
x$datafile))
if (length(datafiles) > length(unique(datafiles)))
return(merge_indexinfo(data))
else
return(data)
}
#' Process IOHprofiler-based .info files if they contain multiple references
#' to a single data-file
#'
#' This is needed to assure that the meta-information is concatenated properly
#' and no datafile is processed more often than nessecary
#'
#' @param indexInfo The info-list to reduce
#' @return a reduced version of the provided indexInfo, preserving original order
#' @noRd
merge_indexinfo <- function(indexInfo) {
datafiles <- unlist(lapply(indexInfo, function(x)
x$datafile))
lapply(unique(datafiles), function(dfile) {
new_info <- list()
idxs <- datafiles == dfile
infos <- indexInfo[idxs]
nr_runs <- length(unlist(lapply(infos, function(x)
x$instance)))
for (a in attributes(infos[[1]])$names) {
temp <- unlist(lapply(infos, function(x)
x[[a]]))
if (length(temp) == nr_runs)
new_info[[a]] <- temp
else
new_info[[a]] <- unique(temp)
}
new_info
})
}
#' Read single-objective COCO-based .info files and extract information
#'
#' @param fname The path to the .info file
#' @return The data contained in the .info file
#' @noRd
read_index_file__COCO <- function(fname) {
f <- file(fname, 'r')
path <- dirname(fname)
data <- list()
i <- 1
while (TRUE) {
lines <-
suppressWarnings(readLines(f, n = 3)) # read header and comments
if (length(lines) < 3) {
break
}
comment <- lines[2]
name_value <-
as.vector(unlist(as.list(
read.csv(
text = lines[1],
header = F,
quote = "'"
)
)))
header <- trimws(name_value) %>% {
regmatches(., regexpr("=", .), invert = T) # match the first appearance of '='
} %>%
unlist %>%
trimws %>%
matrix(nrow = 2) %>% {
ans <- as.list(.[2,])
names(ans) <- .[1,]
for (name in .[1,]) {
value <- ans[[name]]
ans[[name]] <- gsub("'", '', value)
value <-
suppressWarnings(as.numeric(value)) # convert quoted numeric values to numeric
if (!is.na(value))
ans[[name]] <- value
}
ans
}
names(header) <- gsub('algorithm', 'algId', names(header))
record <- strsplit(lines[3], ',')[[1]] %>% trimws
if (length(record) < 2) {
warning(sprintf('File %s is incomplete!', fname))
res <- NULL
info <- NULL
} else {
res <- matrix(unlist(strsplit(record[-c(1)], ':')), nrow = 2)
info <-
matrix(as.numeric(unlist(strsplit(res[2,], '\\|'))), nrow = 2)
}
record[1] <- gsub("\\\\", "/", record[1])
if ('folder' %in% names(header))
datafile <- file.path(path, header$folder, record[1])
else
datafile <- file.path(path, record[1])
# TODO: check the name of the attributes and fix them!
data[[i]] <- c(
header,
list(
comment = comment,
datafile = datafile,
instance = as.numeric(res[1,]),
maxRT = info[1,],
finalFV = info[2,]
)
)
i <- i + 1
}
close(f)
data
}
#' Read bi-objective COCO-based .info files and extract information
#'
#' @param fname The path to the .info file
#' @return The data contained in the .info file
#' @noRd
read_index_file__BIOBJ_COCO <- function(fname) {
f <- file(fname, 'r')
path <- dirname(fname)
data <- list()
i <- 1
lines <-
suppressWarnings(readLines(f, n = 2)) # read header and comments
comment <- lines[2]
name_value <-
as.vector(unlist(as.list(
read.csv(
text = lines[1],
header = F,
quote = "'"
)
)))
header <- trimws(name_value) %>% {
regmatches(., regexpr("=", .), invert = T) # match the first appearance of '='
} %>%
unlist %>%
trimws %>%
matrix(nrow = 2) %>% {
ans <- as.list(.[2,])
names(ans) <- .[1,]
for (name in .[1,]) {
value <- ans[[name]]
ans[[name]] <- gsub("'", '', value)
value <-
suppressWarnings(as.numeric(value)) # convert quoted numeric values to numeric
if (!is.na(value))
ans[[name]] <- value
}
ans
}
names(header) <- gsub('algorithm', 'algId', names(header))
while (TRUE) {
# TODO: remove suppressWarnings later
lines <- suppressWarnings(readLines(f, n = 1))
if (length(lines) == 0)
break
record <- strsplit(lines[1], ',')[[1]] %>% trimws
# TODO: this must also be removed...
if (record[4] == "") {
warning(sprintf('File %s is incomplete!', fname))
res <- NULL
info <- NULL
} else {
res <- matrix(unlist(strsplit(record[-c(1, 2, 3)], ':')), nrow = 2)
info <-
matrix(as.numeric(unlist(strsplit(res[2,], '\\|'))), nrow = 2)
}
record[3] <- gsub("\\\\", "/", record[3])
if ('folder' %in% names(header))
datafile <- file.path(path, header$folder, record[3])
else
datafile <- file.path(path, record[3])
funcId <- trimws(strsplit(record[1], '=')[[1]][2])
funcId.int <- suppressWarnings(as.integer(funcId.int))
if (!any(is.na(funcId.int))) {
if (all((funcId.int >= 0L) & (funcId.int <= 1000000000L))) {
funcId <- funcId.int
}
}
DIM <- as.numeric(trimws(strsplit(record[2], '=')[[1]][2]))
# TODO: check the name of the attributes and fix them!
data[[i]] <- c(
header,
list(
comment = comment,
funcId = funcId,
DIM = DIM,
datafile = datafile,
instance = as.numeric(res[1,]),
maxRT = info[1,],
finalFV = info[2,]
)
)
i <- i + 1
}
close(f)
data
}
#' Check the format of data
#'
#' Throws a warning when multiple formats are found in the same folder.
#'
#' @param path The path to the folder to check
#' @return The format of the data in the given folder. Either 'COCO', 'IOHprofiler',
#' 'NEVERGRAD' or 'SOS'.
#' @export
#' @examples
#' path <- system.file("extdata", "ONE_PLUS_LAMDA_EA", package = "IOHanalyzer")
#' check_format(path)
check_format <- function(path) {
if (sub('[^\\.]*\\.', '', basename(path), perl = T) == "csv")
return(NEVERGRAD)
if (sub('[^\\.]*\\.', '', basename(path), perl = T) == "rds")
return("RDS")
index_files <- scan_index_file(path)
if (length(index_files) == 0)
return(SOS)
info <-
unlist(lapply(index_files, read_index_file), recursive = F)
if (all(unlist(lapply(info, function(x) {
return (ifelse(is.null(x$version), F, (
compareVersion(x$version, "0.3.3") >= 0
)))
})))) {
return(IOHprofiler)
}
datafile <- sapply(info, function(item)
item$datafile)
format <- lapply(datafile, function(file) {
tryCatch({
if (!file.exists(file)) {
cdatfile <- stri_replace(file, ".cdat", fixed = ".dat")
tdatfile <- stri_replace(file, ".tdat", fixed = ".dat")
if (file.exists(cdatfile))
file <- cdatfile
else
file <- tdatfile
}
first_line <-
scan(
file,
what = 'character',
sep = '\n',
n = 1,
quiet = T
)
}, error = function(e) {
stop(
"Error detecting data files specified in .info, please verify the
integrity of the provided files."
)
})
if (startsWith(first_line, '% function') ||
startsWith(first_line, '% f evaluations'))
COCO
else if (startsWith(first_line, '\"function')) {
n_col <- ncol(fread(
file,
header = FALSE,
sep = ' ',
colClasses = 'character',
fill = T,
nrows = 1
))
if (n_col == 2)
TWO_COL
else
IOHprofiler
}
else if (first_line == '%')
# Bi-objective COCO format...
BIBOJ_COCO
else {
stop(
"Error detecting file format of file ",
file,
"; Please verify
the integrity of this file."
)
}
}) %>%
unlist %>%
unique
csv_files <-
file.path(path, list.files(path, pattern = '.csv', recursive = T))
if (length(csv_files) > 0)
format <- c(format, NEVERGRAD)
txt_files <-
file.path(path, list.files(path, pattern = '.txt', recursive = T))
if (length(txt_files) > 0)
format <- c(format, SOS)
if (length(format) > 1) {
stop(
paste(
path,
'contains multiple data formats. This is not allowed for data processing.
Please check the returned dataframe for more information.'
)
)
} else
format
}
#' Read IOHProfiler *.dat files
#'
#' @param fname The path to the .dat file
#' @param subsampling Whether to subsample the data or not
#' @noRd
#' @return A list of data.frames
read_dat <- function(fname, subsampling = FALSE) {
# TODO: use the same data loading method as in read_dat__COCO
df <-
fread(
fname,
header = FALSE,
sep = ' ',
colClasses = 'character',
fill = T
)
colnames(df) <- as.character(df[1,])
idx <- which(!grepl('\\d+', df[[1]], perl = T))
# check for data consistence
header_len <- min(apply(df[idx,] != "", 1, sum))
idx <- c(idx, nrow(df) + 1)
df <- df[, 1:header_len]
# turn off the warnings of the data coersion below
options(warn = -1)
# TOOD: this opeartor is the bottelneck
df <- sapply(df, function(c) {
class(c) <- 'numeric'
c
})
options(warn = 0)
res <- lapply(seq(length(idx) - 1), function(i) {
i1 <- idx[i] + 1
i2 <- idx[i + 1] - 1
ans <- df[i1:i2,]
if (i1 == i2)
ans <- as.matrix(t(ans))
# TODO: determine the number of record in the 'efficient mode'
if (subsampling)
ans <- limit.data(ans, n = 500)
else
ans
})
res
}
# TODO: this method is deprecated. Remove it later
# TODO: maybe not subsampling for COCO data
#' read COCO '.dat'-like file
#'
#' @param fname The path to the .dat file
#' @param subsampling Whether to subsample the data or not
#' @noRd
#' @return A list of data.frames
read_dat__COCO_ <- function(fname, subsampling = FALSE) {
c_read_dat(path.expand(fname), 7, '%')
}
#' read COCO '.dat'-like file directly in R
#'
#' @param fname The path to the .dat file
#' @param subsampling Whether to subsample the data or not
#' @noRd
#' @return A list of data.frames
read_dat__COCO <- function(fname, subsampling = FALSE) {
select <- seq(5)
# read the file as a character vector (one string per row)
X <-
fread(fname,
header = FALSE,
sep = '\n',
colClasses = 'character')[[1]]
idx <- which(startsWith(X, '%'))
X <- gsub('\\s+|\\t', ' ', X, perl = T)
df <-
fread(
text = X[-idx],
header = F,
sep = ' ',
select = select,
fill = T
)
idx <- c((idx + 1) - seq_along(idx), nrow(df))
lapply(seq(length(idx) - 1),
function(i) {
i1 <- idx[i]
i2 <- idx[i + 1] - 1
as.matrix(df[i1:i2,])
})
}
read_dat__BIOBJ_COCO <- function(fname, subsampling = FALSE) {
if (endsWith(fname, '.dat'))
select <- seq(3)
else if (endsWith(fname, '.tdat'))
select <- seq(2)
# read the file as a character vector (one string per row)
X <-
fread(fname,
header = FALSE,
sep = '\n',
colClasses = 'character')[[1]]
idx <- which(startsWith(X, '%'))
X <- gsub('\\s+|\\t', ' ', X, perl = T)
df <-
fread(
text = X[-idx],
header = F,
sep = ' ',
select = select,
fill = T
)
idx <- which(startsWith(X, '% function'))
idx <- c((idx + 1) - seq_along(idx) * 4, nrow(df))
lapply(seq(length(idx) - 1), function(i) {
i1 <- idx[i]
i2 <- idx[i + 1] - 1
as.matrix(df[i1:i2,])
})
}
# global variables for the alignment functions
idxEvals <- 1
idxTarget <- 3
n_data_column <- 5
# TODO: add docs to the following three functions
check_contiguous <- function(data) {
sapply(data,
function(d) {
v <- d[, idxEvals]
N <- length(v)
v[1] == 1 && v[N] == N
}) %>%
all
}
align_contiguous <- function(data, idx, rownames) {
N <- length(data)
nrow <- length(rownames)
lapply(data,
function(d) {
v <- d[, idx]
r <- nrow - length(v)
if (r > 0) {
v <- c(v, rep(v[length(v)], r))
}
v
}) %>%
unlist %>%
matrix(nrow = nrow, ncol = N) %>%
set_rownames(rownames)
}
align_non_contiguous <- function(data, idx, rownames) {
N <- length(data)
nrow <- length(rownames)
lapply(data,
function(d) {
y <- d[, idxEvals]
y[is.na(y)] <-
Inf #Prevent problems with NA when no improvements are found in a run
c_impute(d[, idx], y, rownames)
}) %>%
unlist %>%
matrix(nrow = nrow, ncol = N) %>%
set_rownames(rownames)
}
#' Align data by runtimes
#' @param data The data to align
#' @param format Whether the data is form IOHprofiler or COCO
#' @param include_param Whether to include the recorded parameters in the alignment
#' @noRd
#' @return Data aligned by the running time
align_running_time <-
function(data,
format = IOHprofiler,
include_param = TRUE,
maximization = TRUE) {
if (format == IOHprofiler)
idxTarget <- 3
else if (format == COCO)
idxTarget <- 3
else if (format == BIBOJ_COCO) {
n_data_column <- 3
idxTarget <- 2
}
else if (format == TWO_COL) {
n_data_column <- 2
idxTarget <- 2
}
FV <-
sort(unique(unlist(lapply(data, function(x)
x[, idxTarget]))),
decreasing = !maximization)
n_column <- unique(sapply(data, ncol))
if (format == COCO) {
n_param <- 0
idxValue <- idxEvals
param_names <- NULL
}
else if (format == IOHprofiler) {
n_param <- n_column - n_data_column
if (include_param && n_param > 0) {
param_names <- colnames(data[[1]])[(n_data_column + 1):n_column]
idxValue <- c(idxEvals, (n_data_column + 1):n_column)
}
else {
param_names <- NULL
idxValue <- idxEvals
}
}
else {
param_names <- NULL
idxValue <- idxEvals
}
res <-
c_align_running_time(data, FV, idxValue - 1, maximization, idxTarget - 1)
names(res) <- c('RT', param_names)
res
}
#' Align data by function values
#' @param data The data to align
#' @param format Whether the data is form IOHprofiler or COCO.
#' @param include_param Whether to include the recorded parameters in the alignment
#' @noRd
#' @return Data aligned by the function value
align_function_value <-
function(data,
include_param = TRUE,
format = IOHprofiler) {
n_column <- unique(sapply(data, ncol))
stopifnot(length(n_column) == 1)
if (format == COCO) {
idxTarget <- 3
n_param <- 0
}
else if (format == IOHprofiler) {
idxTarget <- 3
n_param <- n_column - n_data_column
}
else if (format == BIBOJ_COCO) {
# bi-objective COCO format
idxTarget <- 2
n_data_column <- 2
n_param <-
0 # no parameter is allowed in this case
}
else if (format == TWO_COL) {
idxTarget <- 2
n_param <- 0
}
if (check_contiguous(data)) {
nrow <- sapply(data, nrow) %>% max
runtime <- seq(nrow)
align_func <- align_contiguous
} else {
runtime <-
sort(unique(unlist(lapply(data, function(x)
x[, idxEvals]))))
nrow <- length(runtime)
align_func <- align_non_contiguous
}
FV <- align_func(data, idxTarget, runtime)
include_param <- include_param && (n_param > 0)
if (include_param) {
param_names <- colnames(data[[1]])[(n_data_column + 1):n_column]
param <- list()
for (i in seq(n_param)) {
name <- param_names[i]
param[[name]] <- align_func(data, i + n_data_column, runtime)
}
}
if (include_param) {
c(list(FV = FV), param)
} else {
list(FV = FV)
}
}
#' Read Nevergrad data
#'
#' Read .csv files in nevergrad format and extract information as a DataSetList
#'
#' @param fname The path to the .csv file
#' @return The DataSetList extracted from the .csv file provided
#' @noRd
read_nevergrad <- function(path) {
dt <- fread(path)
if (!'name' %in% colnames(dt)) {
dt[, name := function_class]
}
triplets <- unique(dt[, .(optimizer_name, dimension, name)])
algIds <- unique(triplets$optimizer_name)
DIMs <- unique(triplets$dimension)
funcIds <- unique(triplets$name)
res <- list()
idx <- 1
for (i in seq(nrow(triplets))) {
algId <- triplets$optimizer_name[i]
DIM <- triplets$dimension[i]
funcId <- triplets$name[i]
rescale_name <- 'rescale'
if (!('rescale' %in% colnames(dt))) {
if ('transform' %in% colnames(dt))
colnames(dt)[colnames(dt) == "transform"] <- "rescale"
else{
dt$rescale <- NA
}
}
data <-
dt[optimizer_name == algId & dimension == DIM & name == funcId,
.(budget, loss, rescale)]
for (scaled in unique(data$rescale)) {
if (!is.na(scaled)) {
data_reduced <- data[rescale == scaled, .(budget, loss)]
}
else {
data_reduced <- data[is.na(rescale), .(budget, loss)]
}
if (!is.na(scaled) && scaled) {
funcId_name <- paste0(funcId, '_rescaled')
}
else {
funcId_name <- funcId
}
rows <- unique(data_reduced$budget) %>% sort
FV <- lapply(rows,
function(b) {
data_reduced[budget == b, loss]
}) %>%
do.call(rbind, .) %>%
set_rownames(rows)
RT <- list()
ds <- structure(
list(RT = RT, FV = FV),
class = c('DataSet', 'list'),
maxRT = max(rows),
finalFV = min(FV),
format = 'NEVERGRAD',
maximization = FALSE,
algId = algId,
funcId = funcId_name,
DIM = DIM
)
res[[idx]] <- ds
idx <- idx + 1
}
}
class(res) %<>% c('DataSetList')
attr(res, 'DIM') <- DIMs
attr(res, 'funcId') <- funcIds
attr(res, 'algId') <- algIds
attr(res, 'suite') <- 'NEVERGRAD'
attr(res, 'maximization') <- F
res
}
#' Read single DataSet of SOS-based data
#'
#' Read single .txt files in SOS format and extract information as a DataSet
#'
#' @param file The path to the .txt file
#' @return The DataSet extracted from the .txt file provided
#' @noRd
read_single_file_SOS <- function(file) {
V1 <- NULL #Local binding to remove CRAN warnings
algId <-
substr(basename(file),
1,
stringi::stri_locate_last(basename(file), fixed = 'D')[[1]] - 1)
dt <- fread(file, header = F)
header <-
scan(
file,
what = 'character',
sep = '\n',
n = 1,
quiet = T
)
splitted <-
header %>% trimws %>% strsplit("\\s+") %>% .[[1]] %>% .[2:length(.)]
info <- list(algId = algId)
for (i in seq_len(length(splitted) / 2)) {
temp <- splitted[[2 * i]]
name <- splitted[[2 * i - 1]]
if (name == 'function')
name <- 'funcId'
if (name == 'dim')
name <- 'DIM'
names(temp) <- name
info <- c(info, temp)
}
dim <- as.numeric(info$DIM)
#Hardcoded fix for SB-related data
if (is.null(dim) || length(dim) == 0) {
warning("Dimension not explicitly defined, setting as 30 by default")
dim <- 30
info$DIM <- dim
}
RT_raw <- dt[[colnames(dt)[[ncol(dt) - dim - 1]]]]
names(RT_raw) <- dt[[colnames(dt)[[ncol(dt) - dim - 2]]]]
RT <- as.matrix(RT_raw)
mode(RT) <- 'integer'
FV_raw <- dt[[colnames(dt)[[ncol(dt) - dim - 2]]]]
names(FV_raw) <- dt[[colnames(dt)[[ncol(dt) - dim - 1]]]]
FV <- as.matrix(FV_raw)
pos <- dt[, (ncol(dt) - dim + 1):ncol(dt)]
colnames(pos) <- as.character(seq_len(dim))
maxRT <- max(RT)
finalFV <- min(FV)
idxs_avail <- dt[['V1']]
idxs_replaced <- dt[['V6']]
idxs_final <- setdiff(idxs_avail, idxs_replaced)
idx_final_best <- idxs_final[[which.min(FV[idxs_final])]]
final_pos <- as.numeric(pos[idx_final_best,])
# if (sum(FV == finalFV) > 1) {
# #Reconstruct population to determine which best solution is final position
# ids_min <- dt[FV_raw == finalFV, V1]
# replaced_idxs <- dt[[colnames(dt)[[ncol(dt) - dim]]]]
# #If none, take the last one added
# pos_idx <- max(ids_min)
# for (i in ids_min) {
# if (all(replaced_idxs != i)) {
# #If multiple, take the first one added
# pos_idx <- i
# break
# }
# }
# final_pos <- as.numeric(pos[pos_idx, ])
# }
# else {
# final_pos <- as.numeric(pos[which.min(FV), ])
# }
PAR <- list(
# 'position' = list(pos),
'final_position' = list(final_pos),
'by_FV' = NULL,
'by_RT' = NULL
)
object <- list()
class(object) <- c('DataSet', class(object))
object$RT <- RT
object$FV <- FV
object$PAR <- PAR
attr(object, 'maxRT') <- maxRT
attr(object, 'finalFV') <- finalFV
attr(object, 'format') <- "SOS"
attr(object, 'maximization') <- F
attr(object, 'suite') <- "SOS"
for (i in seq_along(info)) {
attr(object, names(info)[[i]]) <- type.convert(info[[i]], as.is = T)
}
attr(object, 'ID') <- attr(object, 'algId')
object
}
#' Read DataSetList of SOS-based data
#'
#' Read directory containing .txt files in SOS format and extract information as a DataSetList
#'
#' @param dir The path to the directory file
#' @param corrections_file A file containing boundary-correction ratios for the files in `dir`
#' @return The DataSetList extracted from the directory provided
#' @noRd
read_datasetlist_SOS <- function(dir, corrections_files = NULL) {
V1 <- V3 <- V4 <- NULL #Local binding to remove CRAN warnings
res <- list()
dims <- list()
funcIds <- list()
algIds <- list()
suites <- list()
maximizations <- list()
idx <- 1
corrs <-
as.data.table(rbindlist(lapply(corrections_files, fread)))
for (f in list.files(dir,
recursive = T,
pattern = "*.txt",
full.names = T)) {
if (f %in% corrections_files)
next
ds <- read_single_file_SOS(f)
dims[[idx]] <- attr(ds, 'DIM')
funcIds[[idx]] <- attr(ds, 'funcId')
algIds[[idx]] <- attr(ds, 'algId')
suites[[idx]] <- attr(ds, 'suite')
maximizations[[idx]] <- attr(ds, 'maximization')
if (nrow(corrs) > 0) {
fn <- substr(basename(f), 1, nchar(basename(f)) - 4)
corr_opts <- corrs[V1 == fn,]
if (stri_detect_fixed(fn, "DE")) {
corr <-
corr_opts[V3 == attr(ds, 'F'),][V4 == attr(ds, 'CR'), 'V2'][['V2']]
}
else if (stri_detect_fixed(fn, "RIS")) {
corr <- corr_opts[['V2']]
}
else {
warning("Unknown algorithm, so skipping lookup of boundary corrections ratio")
corr <- NULL
}
if (length(corr) == 1)
ds$PAR$'corrections' <- corr[[1]]
else
warning(paste0("No boundary corrections ratio found for ", fn))
}
res[[idx]] <- ds
idx <- idx + 1
}
class(res) %<>% c('DataSetList')
attr(res, 'DIM') <- dims
attr(res, 'funcId') <- funcIds
attr(res, 'algId') <- algIds
attr(res, 'ID_attributes') <- c('algId')
suite <- unique(suites)
maximization <- unique(maximizations)
if (length(suite) != 1 || length(maximization) != 1) {
warning("Multipe different suites detected!")
}
attr(res, 'suite') <- suite
attr(res, 'maximization') <- maximization
res
clean_DataSetList(res)
}
#' Find corrections-files in SOS-based folder
#'
#' Read directory containing .txt files in SOS format and extract the corrections-files
#'
#' @param path The path to the directory file
#' @return The relative paths to the corection files
#' @noRd
locate_corrections_files <- function(path) {
files <-
list.files(path,
recursive = T,
pattern = "*.txt",
full.names = T)
files[stri_detect_fixed(files, 'corrections')]
}
#' Read DataSetList of OPTION-based data
#'
#' Processes the data.table object created from the OPTION response into a DataSetList object
#'
#' @param dt The data.table object created from the OPTION request
#' @param source The type of data which is loaded, currently either BBOB or Nevergrad
#' @param ... Additional parameters to add to each DataSet object (e.g. function suite of nevergrad data)
#'
#' @return The DataSetList extracted from the data.table provided
#' @noRd
convert_from_OPTION <- function(dt, source, ...) {
#Initialize variables used in data.table to avoid CRAN-check notes.
algorithm_name <- dimensionality <- benchmark_problem <- NULL
instance_id <- num_experiment_run <- num_function_run <- NULL
precision_value <- elapsed_budget <- rotated <- NULL
triplets <-
unique(dt[, .(algorithm_name, dimensionality, benchmark_problem)])
algIds <- list()
DIMs <- list()
funcIds <- list()
res <- list()
idx <- 1
for (i in seq(nrow(triplets))) {
algId <- triplets$algorithm_name[i]
DIM <- as.numeric(triplets$dimensionality[i])
funcId <- triplets$benchmark_problem[i]
if (source == "BBOB") {
data <-
dt[algorithm_name == algId &
dimensionality == DIM & benchmark_problem == funcId,
.(instance_id,
num_experiment_run,
num_function_run,
precision_value)]
funcId_no_f <- as.numeric(stri_sub(funcId, 2))
for (iid in unique(data$instance_id)) {
for (rep in unique(data$num_experiment_run)) {
data_reduced <- data[instance_id == iid & num_experiment_run == rep,
.(
num_function_run = as.numeric(num_function_run),
precision_value = as.numeric(precision_value)
)]
rows <- unique(data_reduced$num_function_run) %>% sort
FV <- lapply(rows,
function(b) {
data_reduced[num_function_run == b, precision_value]
}) %>%
do.call(rbind, .) %>%
set_rownames(rows)
data_twocol <-
as.matrix(data_reduced[order(num_function_run)])
RT <-
align_running_time(
list(data_twocol),
TWO_COL,
maximization = F,
include_param = F
)
ds <- structure(
list(RT = RT$RT, FV = FV),
class = c('DataSet', 'list'),
maxRT = max(rows),
finalFV = min(FV),
format = 'OPTION',
suite = COCO,
maximization = FALSE,
algId = algId,
instance = iid,
funcId = funcId_no_f,
DIM = DIM,
ID = algId
)
res[[idx]] <- ds
idx <- idx + 1
algIds <- c(algIds, algId)
funcIds <- c(funcIds, funcId_no_f)
DIMs <- c(DIMs, DIM)
}
}
}
else {
data <-
dt[algorithm_name == algId &
dimensionality == DIM & benchmark_problem == funcId,
.(elapsed_budget, precision_value, rotated, noise_level)]
for (rotation in unique(data$rotated)) {
for (noise_level in unique(data$noise_level)) {
data_reduced <-
data[rotated == rotation & noise_level == noise_level,
.(
num_function_run = as.numeric(elapsed_budget),
precision_value = as.numeric(precision_value)
)]
rows <- unique(data_reduced$num_function_run) %>% sort
FV <- lapply(rows,
function(b) {
data_reduced[num_function_run == b, precision_value]
}) %>%
do.call(rbind, .) %>%
set_rownames(rows)
data_twocol <-
as.matrix(data_reduced[order(num_function_run)])
RT <-
align_running_time(list(data_twocol), TWO_COL, maximization = F)
ds <- structure(
list(RT = RT$RT, FV = FV),
class = c('DataSet', 'list'),
maxRT = max(rows),
finalFV = min(FV),
format = 'OPTION',
suite = NEVERGRAD,
maximization = FALSE,
algId = algId,
funcId = as.character(funcId),
DIM = DIM,
ID = algId,
rotated = rotation,
noise_level = noise_level
)
res[[idx]] <- ds
idx <- idx + 1
algIds <- c(algIds, algId)
funcIds <- c(funcIds, as.character(funcId))
DIMs <- c(DIMs, DIM)
}
}
}
}
class(res) %<>% c('DataSetList')
attr(res, 'DIM') <- DIMs
attr(res, 'funcId') <- funcIds
attr(res, 'algId') <- algIds
#To be enabled when merged with version 1.6
# attr(res, 'ID') <- algIds
# attr(res, 'ID_attributes') <- c('algId')
attr(res, 'suite') <- source
attr(res, 'maximization') <- F
clean_DataSetList(res)
}
#' Read Nevergrad data
#'
#' Read .csv files in arbitrary format
#'
#' @param path The path to the .csv file
#' @param neval_name The name of the column to use for the evaluation count.
#' If NULL, will be assumed to be sequential
#' @param fval_name The name of the column to use for the function values
#' @param fname_name The name of the column to use for the function name
#' @param algname_name The name of the column to use for the algorithm name
#' @param dim_name The name of the column to use for the dimension
#' @param run_name The name of the column to use for the run number
#' @param maximization Boolean indicating whether the data is resulting from maximization or minimization
#' @param static_attrs Named list containing the static values for missing columns.
#' When a parameter is not present in the csv file, its name-parameter should
#' be set to NULL, and the static value should be added to this static_attrs list.
#'
#' @return The DataSetList extracted from the .csv file provided
#' @export
read_pure_csv <- function(path,
neval_name,
fval_name,
fname_name,
algname_name,
dim_name,
run_name,
maximization = F,
static_attrs = NULL) {
fname <-
algname <- neval <- fval <- NULL #Ugly fix for CRAN warnings
dt <- fread(path)
#If columns are not specified, check if they have static values or should be imputed
impute_evalnrs <- is.null(neval_name)
if (!fval_name %in% colnames(dt)) {
warning(
paste0(
"The function value column named ",
fval_name,
" does not exist
in the provided file!"
)
)
return(NULL)
}
colnames(dt)[colnames(dt) == fval_name] <- "fval"
if (!impute_evalnrs) {
colnames(dt)[colnames(dt) == neval_name] <- "neval"
}
if (is.null(run_name)) {
dt$run <- 1
} else {
colnames(dt)[colnames(dt) == run_name] <- "run"
}
if (is.null(fname_name)) {
dt$fname <- static_attrs$fname
} else {
colnames(dt)[colnames(dt) == fname_name] <- "fname"
}
if (is.null(algname_name)) {
dt$algname <- static_attrs$algname
} else {
colnames(dt)[colnames(dt) == algname_name] <- "algname"
}
if (is.null(dim_name)) {
dt$dim <- static_attrs$dim
} else {
colnames(dt)[colnames(dt) == dim_name] <- "dim"
}
# if (!all(c(neval_name, run_name) %in% colnames(dt))) {
# warning("One or more specified column names do not exist
# in the provided file!")
# return(NULL)
# }
#
# colnames(dt)[colnames(dt) == neval_name] <- "neval"
# colnames(dt)[colnames(dt) == fval_name] <- "fval"
# colnames(dt)[colnames(dt) == fname_name] <- "fname"
# colnames(dt)[colnames(dt) == algname_name] <- "algname"
# colnames(dt)[colnames(dt) == dim_name] <- "dim"
# colnames(dt)[colnames(dt) == run_name] <- "run"
triplets <- unique(dt[, .(fname, dim, algname)])
algIds <- unique(triplets$algname)
DIMs <- unique(triplets$dim)
funcIds <- unique(triplets$fname)
res <- list()
idx <- 1
for (i in seq(nrow(triplets))) {
algId <- triplets$algname[i]
DIM <- triplets$dim[i]
funcId <- triplets$fname[i]
# data <- dt[algname == algId & dim == DIM & fname == funcId,
# .(neval, fval, run)]
data <- dt[algname == algId & dim == DIM & fname == funcId, ]
if (impute_evalnrs) {
data$neval <- ave(data$fval, data$run, FUN = seq_along)
}
dt_for_allign <- dcast(data, neval ~ run, value.var = 'fval')
FV_mat <- as.matrix(dt_for_allign[, 2:ncol(dt_for_allign)])
runtimes <- dt_for_allign$neval
if (maximization) {
FV <-
do.call(cbind, lapply(seq(ncol(FV_mat)), function(x)
cummax(FV_mat[, x])))
FV <-
apply(FV, 2, function(x) {
x[is.na(x)] <- max(x, na.rm = T)
x
})
}
else {
FV <-
do.call(cbind, lapply(seq(ncol(FV_mat)), function(x)
cummin(FV_mat[, x])))
FV <-
apply(FV, 2, function(x) {
x[is.na(x)] <- min(x, na.rm = T)
x
})
}
rownames(FV) <- runtimes
FV_temp <- sort(unique(FV_mat), decreasing = !maximization)
index <- as.numeric(runtimes)
RT <-
c_align_running_time_matrix(FV_mat, FV_temp, as.numeric(index), maximization)
rownames(RT) <- FV_temp
RT[RT < 1] <-
NA #Avoids weird values from impossible imputes at the end
ds <- structure(
list(RT = RT, FV = FV),
class = c('DataSet', 'list'),
maxRT = max(runtimes),
finalFV = min(FV),
format = 'Custom',
maximization = maximization,
algId = algId,
funcId = funcId,
DIM = DIM,
ID = algId
)
res[[idx]] <- ds
idx <- idx + 1
}
class(res) %<>% c('DataSetList')
attr(res, 'DIM') <- DIMs
attr(res, 'funcId') <- funcIds
attr(res, 'algId') <- algIds
attr(res, 'suite') <- 'Custom'
attr(res, 'maximization') <- maximization
res
}
#' Read Nevergrad data
#'
#' Read .csv files in arbitrary format
#'
#' @param info A List containing all meta-data about the dataset to create
#' @param full_sampling Logical. Whether the raw (unaligned) FV matrix should be stored.
#' Currently only useful when a correlation plot between function values and parameters should be made
#'
#' @return The DataSetList extracted from the .csv file provided
#' @export
read_IOH_v1plus <- function(info, full_sampling = FALSE) {
df <-
fread(
info$datafile,
header = FALSE,
sep = ' ',
colClasses = 'character',
fill = T
)
colnames(df) <- as.character(df[1,])
idx <- which(!grepl('\\d+', df[[1]], perl = T))
# check for data consistence
header_len <- min(apply(df[idx,] != "", 1, sum))
idx <- c(idx, nrow(df) + 1)
df <- df[, 1:header_len]
# turn off the warnings of the data coersion below
options(warn = -1)
# TOOD: this opeartor is the bottelneck
df <- sapply(df, function(c) {
class(c) <- 'numeric'
c
})
options(warn = 0)
data <- rbindlist(lapply(seq(length(idx) - 1), function(i) {
i1 <- idx[i] + 1
i2 <- idx[i + 1] - 1
ans <- df[i1:i2,]
if (i1 == i2)
ans <- as.matrix(t(ans))
data.table(ans, runnr = i)
}))
runnr <- evaluations <- raw_y <- NULL #Ugly fix for CRAN warnings
#If columns are not specified, check if they have static values or should be imputed
impute_evalnrs <- !('evaluations' %in% info$attributes)
algId <- info$algId
DIM <- info$dim
funcId <-
attr(info,
getOption('IOHanalyzer.function_representation', 'funcId'))
if (impute_evalnrs) {
data$evaluations <- ave(data$raw_y, data$runnr, FUN = seq_along)
}
dt_for_allign <-
dcast(data, evaluations ~ runnr, value.var = 'raw_y')
FV_mat <- as.matrix(dt_for_allign[, 2:ncol(dt_for_allign)])
runtimes <- dt_for_allign$evaluations
if (info$maximization) {
FV_mat <-
apply(FV_mat, 2, function(x) {
x[is.na(x)] <- min(x, na.rm = T)
x
})
FV <-
do.call(cbind, lapply(seq(ncol(FV_mat)), function(x)
cummax(FV_mat[, x])))
# FV <- apply(FV, 2, function(x) {x[is.na(x)] <- max(x, na.rm = T); x})
}
else {
FV_mat <-
apply(FV_mat, 2, function(x) {
x[is.na(x)] <- max(x, na.rm = T)
x
})
FV <-
do.call(cbind, lapply(seq(ncol(FV_mat)), function(x)
cummin(FV_mat[, x])))
# FV <- apply(FV, 2, function(x) {x[is.na(x)] <- max(x, na.rm = T); x})
}
rownames(FV) <- runtimes
FV_temp <- unique(sort(FV_mat, decreasing = !info$maximization))
index <- as.numeric(runtimes)
RT <-
c_align_running_time_matrix(FV_mat, FV_temp, as.numeric(index), info$maximization)
rownames(RT) <- FV_temp
RT[RT < 1] <-
NA #Avoids weird values from impossible imputes at the end
paramnames <-
info$attributes[!info$attributes %in% c("evaluations", "raw_y")]
PAR <- list(
'by_RT' = lapply(paramnames, function(parname) {
dt_for_allign <-
dcast(data, evaluations ~ runnr, value.var = parname)
mat_temp <- as.matrix(dt_for_allign[, 2:ncol(dt_for_allign)])
rownames(mat_temp) <- runtimes
mat_temp
}),
'by_FV' = lapply(paramnames, function(parname) {
dt_for_allign <- dcast(data, raw_y ~ runnr, value.var = parname)
mat_temp <- as.matrix(dt_for_allign[, 2:ncol(dt_for_allign)])
rownames(mat_temp) <- dt_for_allign$raw_y
mat_temp
})
)
names(PAR$by_RT) <- paramnames
names(PAR$by_FV) <- paramnames
ds <- do.call(function(...)
structure(list(
RT = RT, FV = FV, PAR = PAR
), class = c('DataSet', 'list'), ...),
c(
info,
list(
maxRT = max(runtimes),
finalFV = min(FV),
format = IOHprofiler,
ID = info$algId
)
))
if (getOption('IOHanalyzer.function_representation', 'funcId') == 'funcName') {
attr(ds, 'funcId') <- attr(ds, 'funcName')
}
if (full_sampling || 'violation' %in% info$attributes) {
rownames(FV_mat) <- runtimes
ds$FV_raw_mat <- FV_mat
attr(ds, 'contains_full_FV') <- TRUE
}
if ('violation' %in% info$attributes) {
attr(ds, 'constrained') <- TRUE
}
return(ds)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.