#extract legend
#https://github.com/hadley/ggplot2/wiki/Share-a-legend-between-two-ggplot2-graphs
g_legend = function(a.gplot){
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
return(legend)}
plotIndividualContributionAC = function(cboost, newdata) {
checkmate::assertR6(cboost, "Learner")
checkmate::assertR6(cboost$model$univariate, "Compboost")
if (! checkmate::testR6(cboost$model$interactions, "Compboost")) {
warning("No interaction model detected, falling back to `plotIndividualContribution(cboost$model$univariate, newdata)`.")
return(plotIndividualContribution(cboost$model$univariate, newdata, offset = FALSE))
}
checkmate::assertDataFrame(newdata, nrows = 1)
p_uni = cboost$model$univariate$predictIndividual(newdata)
p_int = cboost$model$interactions$predictIndividual(newdata)
p_int$offset = NULL
df_new = newdata
df_new$residuals = 0
tsk_new = TaskRegr$new(id = "residuals", backend = df_new, target = "residuals")
p_trees = predict.residualBooster(cboost$model$deeper_interactions, tsk_new)
p = c(p_uni, p_int, other = p_trees)
df_plt = data.frame(blearner = names(p), value = unlist(p),
stage = rep(c("Univariate", "Pairwise interactions", "Deep trees"), times = c(length(p_uni), length(p_int), 1)))
df_plt$blearner = factor(df_plt$blearner, levels = rev(df_plt$blearner))
df_plt$stage = factor(df_plt$stage, levels = c("Univariate", "Pairwise interactions", "Deep trees"))
df_plt$bl_num = rev(seq_along(df_plt$blearner))
fnames = c("offset", "other", task$feature_names)
fidx = t(do.call(rbind, lapply(fnames, function(feat) grepl(feat, df_plt$blearner))))
flab = character(nrow(fidx))
for (i in seq_len(nrow(fidx))) {
feats = fnames[fidx[i, ]]
if (length(feats) > 1) {
df_char = data.frame(f = feats, n = nchar(feats))
df_char = df_char[order(df_char$n, decreasing = TRUE), ]
df_char$count = sapply(df_char$f, function(fn) sum(grepl(fn, df_char$f)))
feats = df_char$f[df_char$count == 1]
}
feats = vapply(feats, function(f) {
if (f %in% c("other", "offset")) return(f)
fval = newdata[[f]]
if (is.numeric(fval))
return(paste0(f, " (", round(fval, 2), ")"))
else
return(paste0(f, " (", fval, ")"))
}, character(1L))
if (length(feats) == 2)
flab[i] = paste0("Interaction: ", paste(feats, collapse = ", "))
else
flab[i] = feats
}
flab[flab == "other"] = "Deep trees"
df_plt$feat = flab
df_plt0 = df_plt %>%
group_by(feat) %>%
summarize(value = sum(value), stage = stage[1]) %>%
ungroup() %>%
arrange(desc(stage)) %>%
mutate(bl_num = seq_along(value))
pred = sum(df_plt0$value)
prob = 1 / (1 + exp(-pred))
plabel = ifelse(prob > 0.5, task$positive, task$negative)
title = "Prediction"
subtitle = paste0("Score: ", round(pred, 2), " Probability: ", round(prob, 2), " Predicted label: ", plabel)
ggplot(df_plt0, aes(x = value, y = bl_num, color = stage, fill = stage)) +
geom_vline(xintercept = 0, color = "dark grey", alpha = 0.6) +
geom_segment(aes(xend = 0, yend = bl_num)) +
geom_point() +
ylab("") +
xlab("Contribution to predicted value") +
labs(color = "", fill = "") +
ggsci::scale_fill_uchicago() +
ggsci::scale_color_uchicago() +
#scale_y_reverse() +
scale_y_continuous(labels = df_plt0$feat, breaks = df_plt0$bl_num) +
theme_minimal() +
ggtitle(title, subtitle)
}
### Need to put this directly in compboost:
predict.compboostExtract = function(object, newdata) {
feats = names(object)
feats = feats[feats != "offset"]
out = lapply(feats, function(ft) {
if (! ft %in% names(newdata)) {
warning("New data should contain all features used for building the model! Feature ", ft, " is missing.")
return(NULL)
} else {
eff = object[[ft]]$predict(newdata[[ft]])
pe = eff$linear + eff$nonlinear
return(list(pe = pe, effects = eff, src = newdata[[ft]]))
}
})
names(out) = feats
pred = rowSums(do.call(cbind, lapply(out, function(x) x$pe))) + object$offset
return(list(pred = pred, pe = out, offset = object$offset))
}
plotCategorical = function(feats, pe, cnums = NULL) {
plt_dat = do.call(rbind, lapply(feats, function(feat) {
if (! any(grepl(feat, names(pe))))
stop("Feature ", feat, " not in partial effects data.")
fidx = grep(feat, names(pe))
data.frame(cat = cnums, est = pe[[fidx]], feature = feat)
}))
if (is.null(cnums[1]))
cnums = rownames(pe[[fidx]])
plt_dat$cat = factor(plt_dat$cat, levels = cnums)
ggplot(data = plt_dat, aes(x = cat, y = est)) +
geom_boxplot() +
ylab("Contribution to\nprediction") +
xlab("") +
labs(color = "") +
theme(legend.position = "bottom") +
facet_wrap(. ~ feature, ncol = 3, scales = "free")
}
plotNumeric = function(feats, pe) {
plt_dat = do.call(rbind, lapply(feats, function(feat) {
if (! feat %in% names(pe$pe))
stop("Feature ", feat, " not in partial effects data.")
p = pe$pe[[feat]]
data.frame(x = p$src, pe = p$pe, linear = p$effects$linear,
nonlinear = p$effects$nonlinear, feature = feat)
}))
plt_dat$feat = factor(plt_dat$feat, levels = feats)
ggplot(data = plt_dat, aes(x = x)) +
geom_line(aes(y = linear, color = "Linear part"), alpha = 0.6, linetype = "dashed") +
geom_line(aes(y = nonlinear, color = "Non-linear part"), alpha = 0.6, linetype = "dashed") +
geom_line(aes(y = pe, color = "Partial effect"), size = 1.3) +
geom_rug() +
ylab("Contribution to\nprediction") +
xlab("") +
labs(color = "") +
theme(legend.position = "bottom") +
facet_wrap(. ~ feature, ncol = 3, scales = "free")
}
predictTensor = function(dat, dato, coef, tnames, n_knots, degree) {
xo1 = dato[[tnames[1]]]
xo2 = dato[[tnames[2]]]
x1 = dat[[tnames[1]]]
x2 = dat[[tnames[2]]]
knots1 = compboostSplines::createKnots(values = xo1, n_knots = n_knots, degree = degree)
basis1 = compboostSplines::createSplineBasis(values = x1, degree = degree, knots = knots1)
knots2 = compboostSplines::createKnots(values = xo2, n_knots = n_knots, degree = degree)
basis2 = compboostSplines::createSplineBasis(values = x2, degree = degree, knots = knots2)
tensor = compboostSplines::rowWiseTensor(basis1, basis2)
return(tensor %*% coef)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.