#' @title Decompose the predicted value based on the given features
#'
#' @description
#' This function visualizes the contribution of each feature regarding the predicted value.
#' By default, multiple base learners defined on one feature are aggregated. If you
#' want to show the contribution of single base learner, then set `aggregate = FALSE`.
#'
#' @return `ggplot` object containing the graphic.
#' @param cboost ([Compboost])\cr
#' A trained [Compboost] object.
#' @param newdata (`data.frame()`)\cr
#' Data frame containing exactly one row holding the new observations.
#' @param aggregate (`logical(1L)`)\cr
#' Number of colored base learners added to the legend.
#' @param colbreaks (`numeric()`)\cr
#' Breaks to visualize/highlight different predicted values. Default creates different
#' colors for positive and negative score values. If set to `NULL` no coloring
#' is applied.
#' @param collabels (`character(length(colbreaks) - 1)`)\cr
#' Labels for the color breaks. If set to `NULL` intervals are used as labels.
#' @param nround (`integer(1L)`)\cr
#' Digit passed to [round] for labels (default is `nround = 2L`).
#' @param offset (`logical(1L)`)\cr
#' Flag to indicate whether the offset should be added to the figure or not.
#' @examples
#' dat = mtcars
#' fnum = c("cyl", "disp", "hp", "drat", "wt", "qsec")
#' fcat = c("vs", "am", "gear", "carb")
#' for (fn in fcat) dat[[fn]] = as.factor(dat[[fn]])
#'
#' cboost = Compboost$new(data = dat, target = "mpg",
#' loss = LossQuadratic$new())
#'
#' for (fn in fnum) cboost$addComponents(fn, df = 3)
#' for (fn in fcat) cboost$addBaselearner(fn, "ridge", BaselearnerCategoricalRidge)
#' cboost$train(500L)
#' cbreaks = c(-Inf, -0.1, 0.1, Inf)
#' clabs = c("bad", "middle", "good")
#' plotIndividualContribution(cboost, dat[10, ], colbreaks = cbreaks,
#' collabels = clabs)
#' plotIndividualContribution(cboost, dat[10, ], offset = FALSE,
#' colbreaks = cbreaks, collabels = clabs)
#' @export
plotIndividualContribution = function(cboost, newdata, aggregate = TRUE, colbreaks = c(-Inf, 0, Inf),
collabels = c("negative", "positive"), nround = 2L, offset = TRUE) {
if (! requireNamespace("ggplot2", quietly = TRUE)) stop("Please install ggplot2 to create plots.")
checkmate::assertClass(cboost, "Compboost")
checkmate::assertLogical(aggregate, len = 1L)
if (! is.null(colbreaks))
checkmate::assertNumeric(colbreaks, finite = FALSE, min.len = 2L)
if (! is.null(collabels)) {
if (is.null(colbreaks))
checkmate::assertCharacter(collabels)
else
checkmate::assertCharacter(collabels, len = length(colbreaks) - 1)
}
checkmate::assertIntegerish(nround, len = 1L)
checkmate::assertLogical(offset, len = 1L)
if (is.null(cboost$model))
stop("Model has not been trained!")
if (! cboost$model$isTrained())
stop("Model has not been trained!")
feats = vapply(cboost$model$getFactoryMap(), FUN.VALUE = character(1), FUN = function(blf) {
paste(unique(blf$getFeatureName()), collapse = "_")
})
blnames = names(feats)
if (offset)
df_bls = data.frame(bl = c(blnames, "offset"), feat = c(feats, "offset"))
else
df_bls = data.frame(bl = blnames, feat = feats)
nuisance = lapply(unique(cboost$bl_factory_list$getDataNames()), function(fn) {
checkmate::assertChoice(fn, choices = colnames(newdata))
})
checkmate::assertDataFrame(newdata, nrows = 1L)
ll_ds = cboost$prepareData(newdata)
ll_preds = c(cboost$model$predictIndividual(ll_ds), offset = cboost$model$getOffset())
df_preds = data.frame(bl = names(ll_preds), value = unname(unlist(ll_preds)))
df_plt = merge(df_bls, df_preds, by = "bl", all.x = TRUE)
df_plt$value[is.na(df_plt$value)] = 0
fval = paste0("(", vapply(X = newdata, FUN.VALUE = character(1L), FUN = function(x) {
if (is.numeric(x)) return(as.character(round(x, nround)))
return(as.character(x))
}), ")")
df_fval = do.call(rbind, lapply(cboost$baselearner_list, function(f) {
fn = f$factory$getFeatureName()
fl = vapply(fn, function(f) {
x = newdata[[f]]
if (is.numeric(x)) return(as.character(round(x, nround)))
return(as.character(x))
}, character(1))
out = data.frame(feat = paste(fn, collapse = "_"), label = paste(sprintf("%s (%s)", fn, fl), collapse = ", "))
return(out)
}))
rownames(df_fval) = NULL
if (offset) {
df_fval = rbind(df_fval, data.frame(feat = "offset", label = "offset"))
}
if (aggregate) {
df_plt = aggregate(x = df_plt$value, by = list(df_plt$feat), FUN = sum)
names(df_plt) = c("feat", "value")
df_plt = merge(df_plt, df_fval, by = "feat")
} else {
df_plt$feat = df_plt$bl
df_plt$label = df_plt$feat
}
df_plt = df_plt[order(df_plt$value, decreasing = TRUE), ]
df_plt$bl_num = rev(seq_len(nrow(df_plt)))
if (! is.null(colbreaks)) {
if (! is.null(collabels))
df_plt$cbreak = cut(df_plt$value, breaks = colbreaks, labels = collabels)
else
df_plt$cbreak = cut(df_plt$value, breaks = colbreaks)
}
pred = round(sum(df_plt$value), nround)
subtitle = paste0("Score: ", pred)
.data = ggplot2::.data
if (is.null(colbreaks[1]))
gg = ggplot2::ggplot(df_plt, ggplot2::aes(x = .data$value, y = .data$bl_num))
else
gg = ggplot2::ggplot(df_plt, ggplot2::aes(x = .data$value, y = .data$bl_num,
color = .data$cbreak, fill = .data$cbreak))
gg = gg +
ggplot2::geom_vline(xintercept = 0, color = "dark grey", alpha = 0.6) +
ggplot2::geom_segment(ggplot2::aes(xend = 0, yend = .data$bl_num)) +
ggplot2::geom_point() +
ggplot2::ylab("") +
ggplot2::xlab("Contribution to predicted value") +
ggplot2::labs(color = "", fill = "") +
ggplot2::scale_y_continuous(labels = df_plt$label, breaks = df_plt$bl_num) +
ggplot2::ggtitle("Prediction", subtitle)
return(gg)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.