#' @importFrom bit64 as.integer64
.int64.max.dbl.int <- as.integer64((2 ^ 53) - 1);
.int64.0 <- as.integer64(0L);
#' @title Load a Single Log File
#' @description Load a log file and return the results as a data frame. The data
#' frame will have (at most) three columns: \describe{\item{\code{t}}{contains
#' the time in milliseconds consumed since the start of the optimization
#' process and is monotonously increasing} \item{\code{fes}}{contains the
#' number of objective function evaluations since the start of the
#' optimization process and is strictly monotonously increasing}
#' \item{\code{f}}{contains the objective value and is strictly monotonously
#' decreasing, with the except of the last point which might have the same
#' objective value as the second-to-last point.}}
#'
#' The data frame will be enriched with four attributes:
#' \describe{\item{file}{the normalized absolute file path to the log file}
#' \item{algorithm}{the id of the algorithm used to generate the log file}
#' \item{instance}{the problem instance to which the algorithm was applied}
#' \item{seed}{the hexadecimal string representation of the random seed}}
#'
#' Time and FEs have a resolution of 1 and a maximum value of \code{2^53 - 1},
#' as this is the highest integer value that can represented precisely with a
#' double. If a higher value is encountered in any of the two columns,
#' \link[base]{stop} will be invoked, i.e., higher values are not permitted.
#' In order to test for this, we use the 64 bit integers from the \code{bit64}
#' package, since \code{R} does not support 64 bit integers natively.
#' @param file the log file to load
#' @param keep.columns the columns to keep, any vector containing elements
#' \code{"t"} (for time), \code{"f"} (for the objective value), and
#' \code{"fes"} (for the consumed FEs)
#' @param f.must.be.improving \code{true} if the logged objective values must be
#' strictly improving? This is the default way logs are generated by aitoa.
#' However, you can also create a log where every single sampled solution is
#' logged, so then you must set \code{f.must.be.improving=FALSE} to load the
#' data.
#' @param make.time.unique should we make the time indices unique (except maybe
#' for the first and last point)? This makes sense when we want to plot
#' diagrams over a time axis, as we then have removed redundant points right
#' away. If \code{make.time.unique==FALSE}, then there may be multiple
#' improvements at the same time index due to the resolution of the computer
#' clock (while each improvement will definitely have a unique FE).
#' @return a data frame with the columns \code{t} (time in ms), \code{fes}
#' (function evaluations), and \code{f} (objective value), all of which are
#' numeric or integer valued, with integer type being preferred if it can be
#' used without loss of precision
#' @seealso \link{aitoa.parse.file.name}
#' @importFrom bit64 as.integer64
#' @export aitoa.load.log.file
#' @include parse_file_name.R
#' @include utils.R
aitoa.load.log.file <- function(file,
keep.columns = c("fes", "t", "f"),
make.time.unique=FALSE,
f.must.be.improving=TRUE) {
old.options <- options(warn=2);
stopifnot(!is.null(file),
is.character(file),
length(file) == 1L,
!is.na(file),
!is.null(keep.columns),
is.character(keep.columns),
length(keep.columns) > 0L,
!any(is.na(keep.columns)),
!is.null(make.time.unique),
is.logical(make.time.unique),
length(make.time.unique) == 1L,
isTRUE(make.time.unique) || isFALSE(make.time.unique),
!is.null(f.must.be.improving),
is.logical(f.must.be.improving),
length(f.must.be.improving) == 1L,
isTRUE(f.must.be.improving) || isFALSE(f.must.be.improving));
keep.columns <- unique(keep.columns);
stopifnot(length(keep.columns) > 0L,
all(keep.columns %in% c("fes", "t", "f")));
file <- .file.exists(file);
# read file as text file, one line = one element
data <- readLines(con=file, warn=FALSE);
data <- force(data);
# detect consumed FEs
consumedFEs <- grep("# CONSUMED_FES:", data, fixed = TRUE);
consumedFEs <- force(consumedFEs);
stopifnot(length(consumedFEs) == 1L);
consumedFEs <- as.integer64(trimws(strsplit(data[consumedFEs[[1L]]],":", fixed=TRUE)[[1L]][2L]));
consumedFEs <- force(consumedFEs);
stopifnot(is.finite(consumedFEs),
consumedFEs <= .int64.max.dbl.int,
consumedFEs > .int64.0);
consumedFEs <- as.numeric(consumedFEs);
if(consumedFEs <= .Machine$integer.max) {
.fes <- as.integer;
consumedFEs <- as.integer(consumedFEs);
} else {
.fes <- as.numeric;
}
# detect consumed time
consumedTime <- grep("# CONSUMED_TIME:", data, fixed = TRUE);
consumedTime <- force(consumedTime);
stopifnot(length(consumedTime) == 1L);
consumedTime <- as.integer64(trimws(strsplit(data[consumedTime[[1L]]],":", fixed=TRUE)[[1L]][2L]));
consumedTime <- force(consumedTime);
stopifnot(is.finite(consumedTime),
consumedTime <= .int64.max.dbl.int,
consumedTime >= .int64.0);
consumedTime <- as.numeric(consumedTime);
if(consumedTime <= .Machine$integer.max) {
.time <- as.integer;
consumedTime <- as.integer(consumedTime);
} else {
.time <- as.numeric;
}
# extract the correct lines
start <- grep("# BEGIN_LOG", data, fixed=TRUE)[[1L]];
start <- force(start);
end <- grep("# END_OF_LOG", data, fixed=TRUE)[[1L]];
end <- force(end);
stopifnot(start > 0L,
end > (start + 2L),
is.finite(start),
is.finite(end));
# load data as CSV
data <- strsplit(data[(start+2L):(end-1L)], ";", fixed=TRUE);
data <- force(data);
f <- as.numeric(vapply(data, `[[`, "", 1L));
stopifnot(all(is.finite(f)));
f <- force(f);
fes <- .fes(vapply(data, `[[`, "", 2L));
fes <- force(fes);
stopifnot(all(is.finite(fes)),
all(fes > 0),
all(ceiling(fes) == floor(fes)),
all(fes <= consumedFEs));
t <- .time(vapply(data, `[[`, "", 3L));
t <- force(t);
stopifnot(all(is.finite(t)),
all(t >= 0),
all(ceiling(t) == floor(t)),
all(t <= consumedTime));
rm("data");
stopifnot(length(unique(fes)) == length(fes));
startPointAdded <- FALSE;
if(isTRUE(make.time.unique)) {
# take care of re-occuring time values
t.unique.indexes <- findInterval(unique(t), t);
if(t.unique.indexes[1L] != 1L) {
t.unique.indexes <- c(1L, t.unique.indexes);
startPointAdded <- TRUE;
}
t <- t[t.unique.indexes];
t <- force(t);
f <- f[t.unique.indexes];
f <- force(f);
fes <- fes[t.unique.indexes];
fes <- force(fes);
}
l <- length(t);
stopifnot(l >= 1L);
# add an end point if necessary
endPointAdded <- FALSE;
if(fes[l] < consumedFEs) {
stopifnot(consumedTime >= t[l]);
l <- l + 1L;
f[l] <- f[l - 1L];
fes[l] <- consumedFEs;
t[l] <- consumedTime;
endPointAdded <- TRUE;
} else {
stopifnot(t[l] <= consumedTime);
consumedTime <- t[l];
}
stopifnot(fes[l] == consumedFEs,
t[l] == consumedTime);
minLength <- 1L;
if(startPointAdded) { minLength <- minLength + 1L; }
if(endPointAdded) { minLength <- minLength + 1L; }
stopifnot(length(t) >= minLength,
length(t) == length(f),
length(f) == length(fes),
length(t) == l,
all(is.finite(t)),
all(is.finite(f)),
all(is.finite(fes)),
all(is.numeric(t)),
all(is.numeric(f)),
all(is.numeric(fes)),
all(fes > 0),
all(fes <= consumedFEs),
all(t >= 0),
all(t <= consumedTime));
# check how the objective values progress: always decreasing
if(l > 1L) {
before <- 1L:(l-1L);
after <- 2L:l;
stopifnot(all(fes[before] <= fes[after]),
all(t[before] <= t[after]));
if(f.must.be.improving) {
stopifnot(all(f[before] >= f[after]));
}
}
if(endPointAdded) {
if(l > 2L) {
stopifnot(fes[1L:(l-2L)] < fes[2L:(l-1L)]);
}
if(f.must.be.improving) {
stopifnot(f[l] == f[l-1L]);
}
}
if(startPointAdded) {
stopifnot(t[1L] == t[2L]);
if(f.must.be.improving) {
stopifnot(f[1L] >= f[2L]);
}
}
# OK, we should have valid data
t <- force(t);
fes <- force(fes);
f <- force(f);
# try to convert the data to the cheapest format
if(all((f >= (-.Machine$integer.max)) &
(f <= .Machine$integer.max))) {
.ft <- as.integer(f);
if(all(.ft == f)) {
f <- .ft;
}
}
# finished converting
data <- unique(data.frame(t=t, fes=fes, f=f)[keep.columns]);
rm("f");
rm("fes");
rm("t");
data <- force(data);
data <- do.call(force, list(x=data));
stopifnot(is.data.frame(data),
nrow(data) > 0L,
ncol(data) == length(keep.columns),
names(data) == keep.columns);
attr(data, "file") <- file;
info <- unname(unlist(aitoa.parse.file.name(file)));
stopifnot(length(info) == 3L,
is.character(info),
all(nchar(info) > 0L));
attr(data, "algorithm") <- info[[1L]];
attr(data, "instance") <- info[[2L]];
attr(data, "seed") <- info[[3L]];
options(old.options);
return(data);
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.