Nothing
# a constructorconstructor for cpoRegrResiduals, wrapped using makeFauxCPOConstructor
# documentation below
makeCPORegrResiduals = function(learner, predict.se = FALSE, crr.train.residuals = "plain", crr.resampling = cv5) {
assertFlag(predict.se)
assertChoice(crr.train.residuals, c("resample", "oob", "plain"))
if (crr.train.residuals == "resample") {
assert(checkClass(crr.resampling, "ResampleInstance"),
checkClass(crr.resampling, "ResampleDesc"))
}
learner = checkLearner(learner, "regr", c(if (predict.se) "se", if (crr.train.residuals == "oob") "oobpreds"))
learner = setPredictType(learner, if (predict.se) "se" else "response")
forbidden.pars = c(reserved.params, "crr.train.residuals", "crr.resampling")
addnl.params = getParamSet(learner)
addnl.params$pars = dropNamed(addnl.params$pars, forbidden.pars) # prevent clashes
addnl.params$pars = lapply(addnl.params$pars, function(x) {
# this is necessary so the CPO does not complain about unset hyperparameters.
# TODO there should be a better way.
x$requires = FALSE
x
})
addnl.params = c(pSSLrn(crr.train.residuals = "plain": discrete[list("resample", "oob", "plain")], crr.resampling: untyped),
addnl.params)
par.vals = getHyperPars(learner)
par.vals = dropNamed(par.vals, forbidden.pars)
par.vals = c(list(crr.train.residuals = crr.train.residuals, crr.resampling = crr.resampling), par.vals)
# average out-of-resample-fold prediction / se
# average se is calculated as root-mean-squared
# rr [ResampleResult]
# what [character(1)] "response" or "se"
predMatFromRR = function(rr, what) {
rows = rr$task.desc$size
plist = getRRPredictionList(rr)
sapply(plist$test, function(p) {
out = rep(NA_real_, rows)
out[p$data$id] = p$data[[what]]
out
})
}
# subtract the prediction from task to get to residuals
# task [Task]
# prediction.data [data.frame] the result of predict(...)$data
taskSubtractPrediction = function(task, prediction.data) {
tdata = getTaskData(task)
tname = getTaskTargetNames(task)
tdata[[tname]] %-=% prediction.data$response
changeData(task, tdata)
}
control = NULL # pacify static R code check
data = NULL
target = NULL
predict.type = NULL
makeCPOExtendedTargetOp("regr.residuals", addnl.params, par.vals,
dataformat = "task",
properties.data = intersect(cpo.dataproperties, getLearnerProperties(learner)),
properties.target = "regr",
predict.type.map = c(response = "response", se = if (predict.se) "se"),
cpo.trafo = function(data, target, crr.train.residuals, crr.resampling, ...) {
pars = list(...) # avoid possible name clash through partial matching with par.vals parameter of setHyperPars
control = train(setHyperPars(learner, par.vals = pars), data)
control.invert = dropNamed(predict(control, data)$data, c("id", "truth"))
if (crr.train.residuals == "oob") {
if ("oobpreds" %nin% getLearnerProperties(learner)) {
stop("for crr.resampling == 'oob' the Learner needs property 'oobpreds'.")
}
if (predict.se) {
# since 'se' models don't support oobpreds
# TODO: can go away when mlr-org/mlr#2116 is fixed
model = train(setHyperPars(setPredictType(learner, "response"), par.vals = pars), data)
} else {
model = control
}
newresponse = getOOBPreds(model, data)$data$response
control.invert$response[!is.na(newresponse)] = newresponse[!is.na(newresponse)]
} else if (crr.train.residuals == "resample") {
assert(checkClass(crr.resampling, "ResampleInstance"),
checkClass(crr.resampling, "ResampleDesc"))
if ("ResampleDesc" %in% class(crr.resampling)) {
assertString(crr.resampling$predict)
crr.resampling$predict = "test"
} else {
assertString(crr.resampling$desc$predict)
crr.resampling$desc$predict = "test"
}
rr = resample(learner, data, crr.resampling, keep.pred = TRUE, show.info = FALSE)
if (predict.se) {
pmat = predMatFromRR(rr, "response")
precmat = 1 / predMatFromRR(rr, "se")^2
for (row in seq_along(control.invert$response)) {
wmean = stats::weighted.mean(pmat[row, , drop = TRUE], precmat[row, , drop = TRUE], na.rm = TRUE)
if (is.na(wmean)) {
next
}
control.invert$response[row] = wmean
control.invert$se[row] = 1 / sqrt(mean(precmat[row, , drop = TRUE], na.rm = TRUE))
}
} else {
pmat = predMatFromRR(rr, "response")
newresponse = apply(pmat, 1, mean, na.rm = TRUE)
control.invert$response[!is.na(newresponse)] = newresponse[!is.na(newresponse)]
}
}
taskSubtractPrediction(data, control.invert)
},
cpo.retrafo = {
control.invert = dropNamed(predict(control, newdata = data)$data, c("id", "truth"))
if (!is.null(target)) {
taskSubtractPrediction(target, control.invert)
}
},
cpo.invert = {
inlen = if (predict.type == "se") nrow(target) else length(target)
if (inlen != nrow(control.invert)) {
stopf("cpoRegrResiduals prediction to be inverted has different length from original task used for retrafo.")
}
if (predict.type == "response") {
target + control.invert$response
} else {
cbind(target[, 1, drop = TRUE] + control.invert$response, sqrt(target[, 2, drop = TRUE]^2 + control.invert$se^2))
}
})
}
#' @title Train a Model on a Task and Return the Residual Task
#'
#' @template cpo_doc_intro
#'
#' @description
#' Given a regression learner, this \code{\link{CPO}} fits the learner to a
#' regression \code{\link[mlr]{Task}} and replaces the regression target with
#' the residuals--the differences of the target values and the model's predictions--of the model.
#'
#' For inversion, the predictions of the model for the prediction data are
#' added to the predictions to be inverted.
#'
#' If \code{predict.se} is \code{TRUE}, \code{property.type == "se"} inversion can also
#' be performed. In that case, the \code{se} of the incoming prediction and the \code{se}
#' of the internal model are assumed to be independently distributed, and the resulting
#' \code{se} is the pythagorean sum of the \code{se}s.
#' @param learner [\code{character(1)} | \code{\link[mlr:makeLearner]{Learner}}]\cr
#' A regression \code{\link[mlr:makeLearner]{Learner}}, or a \code{character(1)} identifying a
#' \code{\link[mlr:makeLearner]{Learner}} to be constructed.
#' @param predict.se [\code{logical(1)}]\cr
#' Whether to fit the model with \dQuote{se} predict type. This enables the resulting
#' \code{\link{CPOInverter}} to be used for \code{property.type == "se"} inversion.
#' Default is \code{FALSE}.
#' @param crr.train.residuals [\code{character(1)}]\cr
#' What residuals to use for training (i.e. initial transformation). One of \dQuote{resample}, \dQuote{oob},
#' \dQuote{plain}. If \dQuote{resample} is given, the out-of-resampling-fold predictions are used when resampling
#' according to the \code{resampling} parameter. If \dQuote{oob} is used, the \code{\link[mlr:makeLearner]{Learner}} must
#' have the \dQuote{oobpreds} property; the out-of-bag predictions are then used. If \code{train.residuals} is
#' \dQuote{plain}, the simple regression residuals are used. \dQuote{plain} may offer slightly worse performance
#' than the alternatives, but few \code{mlr} \code{\link[mlr:makeLearner]{Learners}} support \dQuote{oobpreds}, and
#' \dQuote{resample} can come at a considerable run time penalty. Default is \dQuote{plain}.
#' @param crr.resampling [\code{\link[mlr:makeResampleDesc]{ResampleDesc}} | \code{\link[mlr:makeResampleInstance]{ResampleInstance}}]\cr
#' What resampling to use when \code{train.residuals} is \dQuote{resample}; otherwise has no effect.
#' The \code{$predict} slot of the resample description will be ignored and set to \code{test}.
#' If a data point is predicted by multiple resampling folds, the average residual is used. If a data
#' point is not predicted by any resampling fold, the \dQuote{plain} residual is used for that one.
#' Default is \code{cv5}.
#' @section CPOTrained State:
#' The \code{CPORetrafo} state's \code{$control} slot is the \code{\link[mlr:makeWrappedModel]{WrappedModel}}
#' created when training the \code{learner} on the given data.
#'
#' The \code{CPOInverter} state's \code{$control} slot is a \code{data.frame} of the \dQuote{response} and
#' (if \code{predict.se} is \code{TRUE}) \dQuote{se} columns of the prediction done by the model on the data.
#'
#' @template cpo_doc_outro
#' @export
cpoRegrResiduals = makeFauxCPOConstructor(makeCPORegrResiduals, "regr.residuals", "target") # nolint
registerCPO(cpoRegrResiduals(learner = "regr.lm"), "target", "residual fitting", "Replace a regression target by regression residuals.")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.