#' @title MboPlotFit
#'
#' @include MboPlotFit-helpers.R
#'
#' @import checkmate
#' @import mlrMBO
#' @import ParamHelpers
#' @import BBmisc
#' @import ggplot2
#'
#' @importFrom R6 R6Class
#'
#' @description
#' This class plots the quality of the single models fit in the mbo run.
#'
#' @export
MboPlotFit = R6Class(
"MboPlotFit",
inherit = MboPlot,
public = list(
#' @description
#' Creates a new instance of this [R6][R6::R6Class] class.
#'
#' @param opt_state ([OptState]).
initialize = function(opt_state) {
param_set = makeParamSet(makeIntegerParam("highlight_iter"), makeLogicalParam("predict_y_iter_surrogate"))
param_vals = list(highlight_iter = 1L, predict_y_iter_surrogate = FALSE) # default value, else set with function `set_param_vals()`
super$initialize(opt_state, param_set, param_vals)
},
#' @description
#' Plots the fit of the model using R-squared for each iteration of the mbo run.
#'
#' @param highlight_iter (\code{integer(1) | NULL})\cr
#' Specifies the iteration to be highlighted. The default \code{NULL} does not highlight any iteration.
#' @param predict_y_iter_surrogate (\code{logical(1) | FALSE})\cr
#' Specifies if y_hat is predicted with the surrogate from the chosen iteration. If FALSE y_hat is taken from the optimization
#' path, i.e. predicted based on surrogate of the respective iteration. If TRUE we use the surrogate of 'highlight_iter' iteration
#' to predict all points based on the search space x again.
#'
#' @return ([ggplot]).
plot = function(highlight_iter = self$param_vals$highlight_iter, predict_y_iter_surrogate = self$param_vals$predict_y_iter_surrogate) {
opt_path = self$opt_state$opt.path
control = self$opt_state$opt.problem$control
models = self$opt_state$opt.result$stored.models
models = if (inherits(models, "WrappedModel")) list(models) else models # save stored models from mbo-run as list
opt_path_df = as.data.frame(opt_path)
n_iters = opt_path_df[nrow(opt_path_df), "dob"]
names_x = names(opt_path$par.set$pars)
if (!is.null(highlight_iter)) {
assertMultiClass(highlight_iter, c("integer", "numeric"))
if (n_iters < highlight_iter) {
messagef("highlight_iter = %i > n_iters= %i: highlight_iter automatically set to n_iters",
highlight_iter, n_iters)
highlight_iter = n_iters
}
}
# generate opt_path for each iteration "i" with the seen points until "i", i.e. for plot R2
opt_path_iters = lapply(as.list(seq(1:n_iters)), function(row) opt_path_df[opt_path_df$dob != 0, ][1:row,])
# calculate R2
R2 = mapply(function(opt_path) {
RSQOverIterations(opt_path, names_x)
},opt_path = opt_path_iters)
df_r2 = data.frame(R2 = R2, iter = seq(1:n_iters))
# create data for plot y vs. yhat
opt_path_iter = opt_path_df[opt_path_df$dob != 0, ][1:highlight_iter,] # only df until iteration under inspection, i.e. 'highlight_iter'
model_iter = models[highlight_iter]
opt_path_x_iter = getOptPathX(opt_path, 1:highlight_iter)
infill.mean = makeMBOInfillCritMeanResponse()$fun
infill.std = makeMBOInfillCritStandardError()$fun
# if we want to predict all points based on the surrogate of the iteration under inspection, i.e. 'highlight_iter' we calculate mean and se prediction.
if (predict_y_iter_surrogate) {
y_hat = ifelse(control$minimize, 1, -1) * infill.mean(opt_path_x_iter, model_iter, control)
y_hat_se = abs(infill.std(opt_path_x_iter, model_iter, control))
} else {
# else we use the predicted mean and se from the respective surrogate of the iteration, i.e. from the opt_path.
y_hat = opt_path_df[opt_path_df$dob != 0, "mean"] [1:highlight_iter]
y_hat_se = opt_path_df[opt_path_df$dob != 0, "se"] [1:highlight_iter]
}
# calculate range considering absolute se and create df for plot.
y_min = y_hat - y_hat_se
y_max = y_hat + y_hat_se
y_eval = opt_path_iter$y
y_df = data.frame(y.hat = y_hat, y.eval = y_eval, y.absdiff = abs(y_hat - y_eval), iters = seq(1:highlight_iter),
y.min = y_min, y.max = y_max)
current_iter = y_df[nrow(y_df), ,drop = FALSE]
gg_r2 = ggplot(df_r2, aes(x = iter, y = R2))
gg_r2 = gg_r2 + geom_line(na.rm = TRUE)
gg_r2 = gg_r2 + geom_point(shape = 4, na.rm = TRUE)
if (!is.null(highlight_iter)) {
df_line = data.frame(x = c(highlight_iter, highlight_iter), y = c(0, Inf))
gg_r2 = gg_r2 + geom_line(data = df_line, mapping = aes(x = x, y = y),
col = "black", lty = "dashed")
}
gg_r2 = gg_r2 + xlab("Iteration")
gg_r2 = gg_r2 + ylab(bquote(R^2))
gg_r2 = gg_r2 + ggtitle(bquote("In-Sample"~R^2))
# plot y vs. yhat
gg_y = ggplot(y_df, aes(x = y.eval, y = y.hat, col = iters))
gg_y = gg_y + geom_pointrange(aes(ymin = y.min, ymax = y.max), data = y_df)
gg_y = gg_y + geom_point(data = current_iter, aes(x = y.eval, y = y.hat), col = "red", size = 3)
gg_y = gg_y + geom_abline(slope = 1)
gg_y = gg_y + ylab(expression(hat(y)))
gg_y = gg_y + xlab(expression(y))
gg_y = gg_y + labs(color = "Iteration")
gg_y = gg_y + ggtitle(paste("Predicted vs. True Target in Iteration", highlight_iter))
gg = ggarrange(gg_r2, gg_y, ncol = 2)
return(gg)
}
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.