#' @title Create \code{Autolearner} objects
#'
#' @description
#' \code{Autolearner} objects wrap mlr \code{Learner} objects as well as
#' preprocessing functions and provide additional meta information. This is used
#' to define the automlr searchspace.
#'
#' @param learner [\code{Learner}|list]\cr
#' An mlr \code{Learner} object for a \code{"learner"} stacktype, otherwise
#' an object returned by \code{\link{autoWrapper}}.\cr
#' If a \code{Learner} is required, it may also be the ID of the
#' \code{Learner} which postpones allocation of memory and loading of
#' packages and may therefore be preferred.
#' @param searchspace [list of \code{Searchparam}]\cr
#' List of \code{Searchparam}s reqpresenting the relevant parameter search
#' space.
#' @param stacktype [\code{character(1)}]\cr
#' Describing how this object can be connected with other learners. Must be
#' one of \code{"wrapper"} or \code{"learner"}.
#'
#' @export
autolearner = function(learner, searchspace = list(), stacktype = "learner") {
assertChoice(stacktype, c("learner", "wrapper"))
names = extractSubList(searchspace, "name")
if (any(duplicated(names))) {
stopf("Duplicated names %s for learner '%s'",
paste(unique(names[duplicated(names)]), collapse = ", "),
ifelse(is.character(learner), learner,
coalesce(learner$id, learner$name)))
}
if (stacktype != "learner") {
assertClass(learner, "AutoWrapper")
} else if (!is.character(learner)) {
assertClass(learner, "Learner")
}
makeS3Obj("Autolearner",
learner = learner,
searchspace = searchspace,
stacktype = stacktype)
}
#' @title Create a wrapper object for \code{\link{autolearner}}.
#'
#' @description
#' Build a wrapper object that can be plugged into \code{\link{autolearner}} to
#' give wrapper functions to \code{\link{buildLearners}}.
#'
#' @param name [\code{character(1)}]\cr
#' the name of the wrapper. Must not contain a \code{$} character.
#' @param cpo [\code{CPO}]\cr
#' The cpo that will be attached to the learner and construct another
#' \code{Learner}.
#' @param datatype [\code{character(1)}]\cr
#' The data this wrapper operates on.
#' @param convertfrom [\code{character(1)} | \code{NULL}]\cr
#' If this wrapper converts data from one type to another, \dQuote{datatype}
#' must be the target type, and \dQuote{convertfrom} must be the source type.
#' If the wrapper is an imputing wrapper, \dQuote{convertfrom} must be
#' \dQuote{missings}, and \dQuote{datatype} must be the type of columns that
#' have their missings imputed. A given wrapper may only impute missings of
#' one column type, even though it sees all columns.
#'
#' @export
autoWrapper = function(name, cpo, datatype, convertfrom = NULL) {
assertString(name)
assert(identical(grep("$", name, fixed = TRUE), integer(0)))
assertClass(cpo, "CPO")
cpoprops = getCPOProperties(cpo)
if (!is.null(convertfrom)) {
assertChoice(convertfrom, c("factors", "ordered", "numerics", "missings"))
assertChoice(convertfrom, cpoprops$properties.adding)
if (convertfrom != "missings") {
assert(datatype == cpoprops$properties.needed)
}
} else {
assertChoice(datatype, cpoprops$properties)
}
if (is.null(convertfrom) || convertfrom == "missings") {
assert(length(cpoprops$properties.needed) == 0 ||
identical(cpoprops$properties.needed, datatype))
}
assertChoice(datatype, c("factors", "ordered", "numerics"))
assert(!identical(convertfrom, datatype))
makeS3Obj("AutoWrapper",
name = name,
cpo = cpo,
is.imputer = identical(convertfrom, "missings"),
is.converter = !is.null(convertfrom) && convertfrom != "missings",
convertfrom = convertfrom,
datatype = datatype)
}
#' @export
print.Autolearner = function(x, ...) {
catf("<automlr %s '%s' [%s]>",
if (x$stacktype == "learner") "learner" else {
if (x$learner$is.imputer) {
sprintf("imputer (%s)", x$learner$datatype)
} else if (x$learner$is.converter) {
sprintf("converter (%s -> %s)", x$learner$convertfrom,
x$learner$datatype)
} else {
sprintf("preproc (%s)", x$learner$datatype)
}
},
ifelse(is.character(x$learner), x$learner,
coalesce(x$learner$id, x$learner$name)),
if (length(x$searchspace) == 0) "" else {
sprintf("\n %s\n", collapse(sapply(x$searchspace,
function(x) collapse(capture.output(print(x)), sep="")),
sep = ",\n "))
})
}
#' @export
print.AutoWrapper = function(x, ...) {
catf("AutoWrapper %s", x$name)
}
#' @title Define the searchspace parameter in a short form
#'
#' @description
#' This function is used to define the search space related to a given learner.
#' The priority here is that the function should be saving space: Much of the
#' information about a parameter is inferred from the learner itself; In
#' principle only the name, the type, and a range of a parameter are needed.
#'
#' However, to get notifications about changes in the mlr package, also all the
#' parameters that are not given should be referenced with an \code{sp()} of
#' type \code{"def"}; otherwise, a warning will be given upon instantiation of
#' the learner.
#'
#' @param name [\code{character(1)}]\cr
#' The name of the parameter which must match the id of the \code{Param} it
#' refers to. May be suffixed with \code{.AMLRFIX#}, where \code{#} is a
#' number, to expose one varible to the outside with different search space
#' depending on requirements.
#' @param type [\code{character(1)}]\cr
#' The type of the parameter. One of: \code{real} (numeric), \code{int}
#' (integer), \code{cat} (discrete), \code{bool} (logical), \code{fix} (fixed
#' value of whatever type), \code{def} (using default value / not setting the
#' value), \code{fixdef} (fixed value, but warn if this is not the default)
#' @param values [\code{numeric}|\code{character}|\code{list}]\cr
#' If \code{type} is \code{real} or \code{int}, this gives the lower and upper
#' bound. If \code{type} is \code{cat}, it is a character vector of possible
#' values. If \code{type} is \code{fix}, only one value (the one to be fixed)
#' must be given. If \code{type} is code{bool} or \code{def}, it must be
#' \code{NULL}.\cr
#' It is possible to give expression values instead of constant values,
#' usually using \code{quote}. These expressions can involve the special
#' values \code{n} (number of observations), \code{p} (number of features),
#' the automlr pseudoparameters described in the docs of
#' \code{\link{makeAMExoWrapper}}, and \code{PARAM.x} to refer to the value of
#' parameter \code{x}. If at least one expression is given, \code{values}
#' needs to be a list containing the expressions and singleton vectors of
#' the appropriate type.\cr
#' A special value for \dQuote{def} type parameters is \dQuote{##}, which
#' leads to using the current default of the function without specifying it.
#' @param trafo [\code{character}|\code{function}|\code{NULL}]\cr
#' May be \dQuote{exp} for exponential transformation, or \dQuote{invexp} for
#' exponential transformation of the difference of the value and its upper
#' bound. Transforms integer parameters in a smart way. Only applies for
#' \code{real} and \code{int} values. May also be an R function.
#' @param id [\code{character(1)}|\code{NULL}]\cr
#' May be given to identify parameters of different learners having similar
#' effects in similar learners.\cr
#' This currently has no effect beyond warnings if different parameters with
#' the same \code{id} have different value ranges.
#' @param special [\code{character(1)}|\code{NULL}]\cr
#' May be \code{NULL}, \code{"dummy"} or \code{"inject"}. Set this to
#' \code{"dummy"} if this is a dummy variable that will be hidden from the
#' learner itself but visible to the outside. Set this to \code{"inject"} to
#' create the parameter in the learners \code{ParamSet} if it does not exist.
#' @param req [\code{language}|\code{NULL}]\cr
#' A requirement for the variable to have effect. May reference other
#' parameters of the learner, as well as the automlr pseudoparameters
#' described in \code{\link{makeAMExoWrapper}}. Must not call any parameter
#' values as functions!
#' @param dim [\code{integer(1)}]\cr
#' The number of dimensions of this variable.
#' @param version [\code{character(1)}|\code{NULL}]\cr
#' Version of MLR to apply this parameter to. If not \code{NULL}, this must
#' be a \code{character} made up of a comparison operator (one of \dQuote{>},
#' \dQuote{<}, \dQuote{>=}, \dQuote{<=}, or \dQuote{==}) and an MLR version
#' number of the form \dQuote{MAJOR.MINOR}. If the mlr version that was found
#' does not satisfy the requirement, the parameter will be ignored.
#'
#' @export
sp = function(name, type = "real", values = NULL, trafo = NULL, id = NULL,
special = NULL, req = NULL, dim = 1, version = NULL) {
assertChoice(type, c("real", "int", "cat", "bool", "fix", "def", "fixdef"))
assertString(name)
assert(nchar(name) > 0)
numexp = 0
if (type %in% c("real", "int")) {
if (is.list(values)) {
# filter out expressions
expression.idx = vlapply(values, is.language)
nonexp.values = c(numeric(0),
unlist(values[!expression.idx], recursive = FALSE))
numexp = length(values) - length(nonexp.values)
assert(numexp == sum(expression.idx))
} else {
nonexp.values = values
values = as.list(values)
}
assertNumeric(nonexp.values, any.missing = FALSE, len = 2 - numexp)
if (numexp == 0) {
assert(values[[2]] >= values[[1]])
}
if (type == "int") {
assert(all(as.integer(nonexp.values) == nonexp.values))
values = lapply(values, function(x)
if (is.language(x)) x else as.integer(x))
}
} else if (type %in% c("fix", "def", "fixdef")) {
if (!is.null(values)) {
assertVector(values, strict = TRUE, len = 1)
}
} else if (type == "cat"){
assertVector(values, strict = TRUE, min.len = 1)
} else { # type == "bool"
assertNull(values)
}
if (!is.null(trafo)) {
if (identical(trafo, "exp") || identical(trafo, "invexp")) {
assert(type %in% c("real", "int"))
} else {
assertFunction(trafo, nargs = 1)
}
}
if (!is.null(id)) {
assertString(id)
assert(type %nin% c("fix", "def", "fixdef"))
assert(nchar(id) > 0)
}
if (!is.null(special)) {
assertChoice(special, c("dummy", "inject"))
}
if (type %in% c("fix", "def", "fixdef") && !is.null(req)) {
stop("Requirements not allowed when type is 'fix', 'def', or 'fixdef'.")
}
if (!is.null(req)) {
assert(checkClass(req, "call"), checkClass(req, "expression"))
}
if (identical(values, "##") && type != "def") {
stop("Only type 'def' parameters can have value '##'.")
}
assert(all(as.integer(dim) == dim))
dim = as.integer(dim)
assertInteger(dim, lower = 1, len = 1)
makeS3Obj("Searchparam",
name = name,
values = values,
type = type,
trafo = trafo,
numexp = numexp,
id = id,
special = special,
req = req,
dim = dim)
}
#' @export
print.Searchparam = function(x, ...) {
toString = function(val) {
if (is.character(val)) {
val
} else if (is.list(val)) {
sapply(val, function(x)
collapse(deparse(x, width.cutoff=500), sep = "\n"))
} else {
collapse(deparse(val, width.cutoff=500), sep = "\n")
}
}
catf("%s%s %s%s%s%s%s%s%s", x$type,
if (x$dim > 1) sprintf("^%s", x$dim) else "", x$name,
if (!is.null(x$trafo))
sprintf(" %s(", toString(x$trafo))
else if (x$type == "bool") "" else " ",
if (x$type %in% c("int", "real")) {
sprintf("[%s]", collapse(toString(x$values), sep = ".."))
} else if (x$type != "bool") {
sprintf("{%s}", collapse(toString(x$values)))
} else {
""
}, if (!is.null(x$trafo)) ")" else "",
if (!is.null(x$id)) sprintf(" {%s}", x$id) else "",
if (!is.null(x$special)) sprintf(" (%s)", x$special) else "",
ifelse(is.null(x$req), "", " *"))
}
# make a named list, more convenient to use
makeNamedAlList = function(...) {
l = list(...)
n = sapply(l, function(item) ifelse(is.character(item$learner),
item$learner, coalesce(item$learner$id, item$learner$name)))
names(l) = n
l
}
# This is an interface for makeXYZParam
# where X is the type (Numeric, Integer, ...)
# Y may be "Vector"
# Z may be "Learner"
# param: a "Searchparam" created with sp()
# learnerid: for debug messages
# makeLearnerParam
# forWrapper
createParameter = function(param, learnerid, makeLearnerParam = FALSE,
forWrapper = FALSE) {
# makeLearnerParam == TRUE means
# 1) create LearnerParam instead of Param
# 2) remove .AMLRFIX# suffix
# 3) no Trafo
if (param$type %in% c("int", "real")) {
pmin = param$values[[1]]
pmax = param$values[[2]]
}
expression.trafo = NULL
if (makeLearnerParam) {
param$name = removeAmlrfix(param$name)
if (!is.null(param$trafo)) {
if (param$type %in% c("int", "real") && !is.character(param$trafo)) {
# if the transformation is not our own trafo, then we can't say
# for sure what the real bounds are, or even what the param type is.
pmin = -Inf
pmax = Inf
param$type = "real"
}
param$trafo = NULL
}
} else if (param$numexp) {
assertChoice(param$type, c("int", "real"))
if (!is.null(param$trafo) && !identical(param$trafo, "exp")) {
stop("Expression bounds with trafo that is not 'exp' not yet supported.")
}
expression.trafo = createExpressionTrafo(pmin, pmax,
param$type == "int", identical(param$trafo, "exp"))
pmin = 0
pmax = 1
param$type = "real"
param$trafo = NULL
}
if (is.character(param$trafo)) {
assertChoice(param$trafo, c("exp", "invexp"))
aux = createTrafo(pmin, pmax,
invert = param$trafo == "invexp", is.int = param$type == "int")
ptrafo = aux$trafo
pmin = aux$newmin
pmax = aux$newmax
} else {
ptrafo = param$trafo # will either be a function or NULL
}
surrogatetype = param$type
if (param$type == "fix" && !forWrapper) {
# we should realistically only get here because we are creating the
# LearnerParam.
assert(makeLearnerParam)
# don't warn for wrappers, for them fixed injects are the norm
warningf(paste("Parameter '%s' for learner '%s' is marked",
"'inject' and has type 'fix'; This usually does not make",
"sense."),
param$name, learnerid)
if (!is.null(param$values)) {
if (is.numeric(param$values[1])) {
surrogatetype = "real"
pmin = param$values[1]
pmax = param$values[1]
} else if (is.logical(param$values[1])) {
surrogatetype = "bool"
}
}
}
if (param$dim > 1) {
if (makeLearnerParam) {
constructor = switch(surrogatetype,
real = makeNumericVectorLearnerParam,
int = makeIntegerVectorLearnerParam,
cat = makeDiscreteVectorLearnerParam,
bool = makeLogicalVectorLearnerParam,
fix = makeDiscreteVectorLearnerParam,
NULL)
} else {
constructor = switch(surrogatetype,
real = makeNumericVectorParam,
int = makeIntegerVectorParam,
cat = makeDiscreteVectorParam,
bool = makeLogicalVectorParam,
fix = makeDiscreteVectorParam,
NULL)
}
paramlist = list(id = param$name, len = param$dim, requires = param$req)
} else {
if (makeLearnerParam) {
constructor = switch(surrogatetype,
real = makeNumericLearnerParam,
int = makeIntegerLearnerParam,
cat = makeDiscreteLearnerParam,
bool = makeLogicalLearnerParam,
fix = makeDiscreteLearnerParam,
NULL)
} else {
constructor = switch(surrogatetype,
real = makeNumericParam,
int = makeIntegerParam,
cat = makeDiscreteParam,
bool = makeLogicalParam,
fix = makeDiscreteParam,
NULL)
}
paramlist = list(id = param$name, requires = param$req)
}
paramlist %c=% switch(surrogatetype,
int = list(lower = pmin, upper = pmax, trafo = ptrafo),
real = list(lower = pmin, upper = pmax, trafo = ptrafo),
cat = list(values = {
x = param$values
if (!test_named(x)) {
names(x) = param$values
}
x}),
bool = list(),
fix = list(values = {
x = param$values
if (!test_named(x)) {
names(x) = param$values
}
x}),
def = stopf(paste("Parameter '%s' for learner '%s' marked as",
"'inject' must not have type 'def'."),
param$name, learnerid),
stopf("Unknown type '%s'; parameter '%s', learner '%s'", param$type,
param$name, learnerid))
if (makeLearnerParam) {
paramlist$trafo = NULL
}
pobject = do.call(constructor, paramlist, quote = TRUE)
if (!is.null(pobject$trafo)) {
pobject$trafo = guardTrafoNa(pobject$trafo)
}
pobject$amlr.isDummy = identical(param$special, "dummy")
pobject$amlr.lrnid = learnerid
pobject$amlr.origValues = param$values
pobject$amlr.expressionTrafo = expression.trafo
pobject
}
guardTrafoNa = function(origTrafo) {
force(origTrafo)
function(x) if (length(x) == 1 && is.na(x)) x else origTrafo(x)
}
createTrafo = function(min, max, invert, is.int) {
if (is.int) {
if (invert) {
stop("invexp int not supported yet.")
}
assert(min >= 0)
addzero = if (min == 0) 0
if (min == 0) {
min = 1
}
ratio = sqrt((min + 1) / min)
sequence = unique(c(addzero,
round(min * ratio ^ (seq(from = 0,
to = floor(log(max / min, base = ratio))))),
max))
return(list(trafo = function(x) ifelse(is.na(x), NA, sequence[x]),
newmin = 1, newmax = length(sequence)))
} else {
if (invert) {
max = 1 - max
min = 1 - min
}
force(min)
force(max)
assert(min > 0)
assert(max > 0)
return(list(trafo = function(x) {
res = min * (max / min)^x
if (invert) 1 - res else res
}, newmin = 0, newmax = 1))
}
}
createExpressionTrafo = function(pmin, pmax, is.int, is.exp) {
force(pmin)
force(pmax)
force(is.int)
force(is.exp)
# env must contain:
# PARAM.x, p (number of features), n (number of rows)
function(x, env) {
if (is.language(pmin)) {
pmin = eval(pmin, envir = env, enclos = globalenv())
}
if (is.language(pmax)) {
pmax = eval(pmax, envir = env, enclos = globalenv())
}
if (!(pmax >= pmin)) {
return(pmin)
}
if (is.int) {
assertIntegerish(pmin, any.missing = FALSE, len = 1)
assertIntegerish(pmax, any.missing = FALSE, len = 1)
pmax = pmax + 1
if (is.exp) {
addzero = pmin == 0
if (addzero) {
pmin = 1
pmax %+=% 1
}
ratio = sqrt((pmin + 1) / pmin)
res = pmin * ratio ^ (x * log(pmax / pmin, base = ratio))
if (addzero) {
res %-=% 1
pmax %-=% 1
}
} else {
res = x * (pmax - pmin) + pmin
}
min(floor(res), pmax - 1)
} else {
trafofn = identity
if (is.exp) {
trafo = createTrafo(pmin, pmax, FALSE, FALSE)
trafofn = trafo$trafo
pmin = trafo$newmin
pmax = trafo$newmax
}
assertNumeric(pmin, any.missing = FALSE, len = 1)
assertNumeric(pmax, any.missing = FALSE, len = 1)
trafofn(x * (pmax - pmin) + pmin)
}
}
}
makeLearnerPars = function(learnerPars) {
for (p in getParamIds(learnerPars)) {
if (!is.null(learnerPars$pars[[p]]$trafo) &&
learnerPars$pars[[p]]$type %in%
c("numeric", "numericvector", "integer", "integervector")) {
# there is a trafo --> need to change limits
if (is.null(learnerPars$pars[[p]]$amlr.origValues)) {
learnerPars$pars[[p]]$lower = -Inf
learnerPars$pars[[p]]$upper = Inf
} else {
learnerPars$pars[[p]]$lower = learnerPars$pars[[p]]$amlr.origValues[[1]]
learnerPars$pars[[p]]$upper = learnerPars$pars[[p]]$amlr.origValues[[2]]
}
# convert type to "numeric(vector)", since after trafo we are not sure
# it is still an int
learnerPars$pars[[p]]$type = switch(learnerPars$pars[[p]]$type,
integer = "numeric",
integervector = "numericvector",
learnerPars$pars[[p]]$type)
}
learnerPars$pars[[p]]$trafo = NULL
# as the things stand now we don't wrap setHyperPars and therefore all
# hyperpars need to be given to the train function. If this ever changes
# (and we e.g. call predictLearner() instead of predict(), and we wrap
# setHyperPars() also) we need to copy the $when property of the
# corresponding LearnerParam inside the modelmultiplexer object.
learnerPars$pars[[p]]$when = "train"
learnerPars$pars[[p]] = addClasses(learnerPars$pars[[p]], "LearnerParam")
# satisfying a weird constraint of mlr:
req = learnerPars$pars[[p]]$requires
if (!is.null(req) && is.expression(req)) {
if (length(req) == 1) {
learnerPars$pars[[p]]$requires = req[[1]]
} else {
learnerPars$pars[[p]]$requires = substitute(eval(x), list(x = req))
}
}
}
learnerPars
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.