Nothing
#' @title Reduce results into a data.frame with all relevant information.
#'
#' @description
#' Generates a \code{data.frame} with one row per job id. The columns are: ids of problem and algorithm
#' (named \dQuote{prob} and \dQuote{algo}), one column per parameter of problem or algorithm (named by the parameter name),
#' the replication number (named \dQuote{repl}) and all columns defined in the function to collect the values.
#' Note that you cannot rely on the order of the columns.
#' If a parameter does not have a setting for a certain job / experiment it is set to \code{NA}.
#' Have a look at \code{\link{getResultVars}} if you want to use something like \code{\link[plyr]{ddply}} on the
#' results.
#'
#' The rows are ordered as \code{ids} and named with \code{ids}, so one can easily index them.
#'
#' @param reg [\code{\link{ExperimentRegistry}}]\cr
#' Registry.
#' @param ids [\code{integer}]\cr
#' Ids of selected experiments.
#' Default is all jobs for which results are available.
#' @param part [\code{character}]
#' Only useful for multiple result files, then defines which result file part(s) should be loaded.
#' \code{NA} means all parts are loaded, which is the default.
#' @param fun [\code{function(job, res, ...)}]\cr
#' Function to collect values from \code{job} and result \code{res} object, the latter from stored result file.
#' Must return a named object which can be coerced to a \code{data.frame} (e.g. a \code{list}).
#' Default is a function that simply returns \code{res} which may or may not work, depending on the type
#' of \code{res}. We recommend to always return a named list.
#' @param ... [any]\cr
#' Additional arguments to \code{fun}.
#' @param strings.as.factors [\code{logical(1)}]
#' Should all character columns in result be converted to factors?
#' Default is \code{FALSE}.
#' @param block.size [\code{integer(1)}]
#' Results will be fetched in blocks of this size.
#' Default is max(100, 5 percent of ids).
#' @param impute.val [\code{named list}]\cr
#' If not missing, the value of \code{impute.val} is used as a replacement for the
#' return value of function \code{fun} on missing results. An empty list is allowed.
#' @param apply.on.missing [\code{logical(1)}]\cr
#' Apply the function on jobs with missing results? The argument \dQuote{res} will be \code{NULL}
#' and must be handled in the function.
#' This argument has no effect if \code{impute.val} is set.
#' Default ist \code{FALSE}.
#' @template arg_progress_bar
#' @return [\code{data.frame}]. Aggregated results, containing problem and algorithm paramaters and collected values.
#' @aliases ReducedResultsExperiments
#' @export
reduceResultsExperiments = function(reg, ids, part = NA_character_, fun, ...,
strings.as.factors = FALSE, block.size, impute.val,
apply.on.missing = FALSE, progressbar = TRUE) {
checkExperimentRegistry(reg, strict = TRUE, writeable = FALSE)
syncRegistry(reg)
assertFlag(apply.on.missing)
if (missing(ids)) {
ids = done = findDone(reg)
with.impute = FALSE
} else {
ids = checkIds(reg, ids)
done = findDone(reg, ids)
with.impute = !missing(impute.val)
if (with.impute) {
if (!is.list(impute.val) || !isProperlyNamed(impute.val))
stop("Argument 'impute.val' must be a properly named list")
} else if (!apply.on.missing) {
not.done = setdiff(ids, done)
if (length(not.done) > 0L)
stopf("No results available for jobs with ids: %s", collapse(not.done))
}
}
checkPart(reg, part)
if (missing(fun))
fun = function(job, res) res
else
assertFunction(fun, c("job", "res"))
assertFlag(strings.as.factors)
if (missing(block.size)) {
block.size = max(100L, as.integer(0.05 * length(ids)))
} else {
block.size = asCount(block.size)
}
assertFlag(progressbar)
n = length(ids)
info("Reducing %i results...", n)
impute = if (with.impute) function(job, res, ...) impute.val else fun
getRow = function(j, reg, part, .fun, missing.ok, ...)
c(list(id = j$id, prob = j$prob.id), j$prob.pars, list(algo = j$algo.id), j$algo.pars, list(repl = j$repl),
.fun(j, getResult(reg, j$id, part, missing.ok), ...))
aggr = data.table()
ids2 = chunk(ids, chunk.size = block.size, shuffle = FALSE)
if (progressbar) {
bar = makeProgressBar(max = length(ids2), label = "reduceResultsExperiments")
bar$set()
} else {
bar = makeProgressBar(style = "off")
}
prob.pars = character(0L)
algo.pars = character(0L)
tryCatch({
for(id.chunk in ids2) {
jobs = getJobs(reg, id.chunk, check.ids = FALSE)
prob.pars = unique(c(prob.pars, unlist(lapply(jobs, function(j) names(j$prob.pars)))))
algo.pars = unique(c(algo.pars, unlist(lapply(jobs, function(j) names(j$algo.pars)))))
id.chunk.done = id.chunk %in% done
results = c(lapply(jobs[ id.chunk.done], getRow, reg = reg, part = part, .fun = fun, missing.ok = apply.on.missing, ...),
lapply(jobs[!id.chunk.done], getRow, reg = reg, part = part, .fun = impute, missing.ok = apply.on.missing, ...))
aggr = rbind(aggr, rbindlist(results, fill = TRUE), fill = TRUE)
bar$inc(1L)
}
}, error = bar$error)
aggr = setDF(aggr)
aggr = convertDataFrameCols(aggr, chars.as.factor = strings.as.factors)
# name rows with ids so one can easily index
# THEN RESORT WRT TO IDS from call
# NB: in the for-loop above we potentially changed that order if we used imputing,
# see lines after id.chunk.done = ...
if (nrow(aggr) > 0L) {
aggr = setRowNames(aggr, aggr$id)
aggr = aggr[as.character(ids), ]
}
aggr = addClasses(aggr, "ReducedResultsExperiments")
attr(aggr, "prob.pars.names") = prob.pars
attr(aggr, "algo.pars.names") = algo.pars
return(aggr)
}
#' Get variable groups of reduced results.
#'
#' Useful helper for e.g. package plyr and such.
#'
#' @param data [\code{\link{ReducedResultsExperiments}}]\cr
#' Result data.frame from \code{\link{reduceResultsExperiments}}.
#' @param type [\code{character(1)}]\cr
#' Can be \dQuote{prob} (prob + pars), \dQuote{prob.pars} (only problem pars),
#' \dQuote{algo} (algo + pars), \dQuote{algo.pars} (only algo pars),
#' \dQuote{group} (prob + problem pars + algo + algo pars), \dQuote{result} (result column names).
#' Default is \dQuote{group}.
#' @return [\code{character}]. Names of of columns.
#' @export
#' @examples
#' reg = makeExperimentRegistry("BatchExample", seed = 123, file.dir = tempfile())
#' addProblem(reg, "p1", static = 1)
#' addProblem(reg, "p2", static = 2)
#' addAlgorithm(reg, id = "a1",
#' fun = function(static, dynamic, alpha) c(y = static*alpha))
#' addAlgorithm(reg, id = "a2",
#' fun = function(static, dynamic, alpha, beta) c(y = static*alpha+beta))
#' ad1 = makeDesign("a1", exhaustive = list(alpha = 1:2))
#' ad2 = makeDesign("a2", exhaustive = list(alpha = 1:2, beta = 5:6))
#' addExperiments(reg, algo.designs = list(ad1, ad2), repls = 2)
#' submitJobs(reg)
#' data = reduceResultsExperiments(reg)
#' library(plyr)
#' ddply(data, getResultVars(data, "group"), summarise, mean_y = mean(y))
getResultVars = function(data, type = "group") {
assertClass(data, "ReducedResultsExperiments")
assertChoice(type, c("prob", "prob.pars", "algo", "algo.pars", "group", "result"))
switch(type,
prob = c("prob", attr(data, "prob.pars.names")),
prob.pars = attr(data, "prob.pars.names"),
algo = c("algo", attr(data, "algo.pars.names")),
algo.pars = attr(data, "algo.pars.names"),
group = c("prob", "algo", attr(data, "prob.pars.names"), attr(data, "algo.pars.names")),
result = setdiff(colnames(data), c("id", "algo", "prob", "repl", attr(data, "prob.pars.names"), attr(data, "algo.pars.names")))
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.