mboost_tutorial.R
In mboost: Model-Based Boosting

### R code from vignette source 'mboost_tutorial.Rnw'

###################################################
### code chunk number 1: init
###################################################
## set up R session
options(prompt = "R> ", continue = "+  ", width = 80)
pd <- packageDescription("mboost")
if (any(is.na(pd))){
    install.packages("mboost", repos = "http://cran.at.r-project.org")
    pd <- packageDescription("mboost")
}
if (compareVersion(pd$Version, "2.1-0") < 0){ # must be mboost 2.1-X or newer
    warning("Current version of mboost is installed!")
    install.packages("mboost", repos = "http://cran.at.r-project.org")
}
options(digits = 3)
require("mboost")
set.seed(190781)
### make graphics directory if not existing
if (!file.exists("graphics"))
    dir.create("graphics")


###################################################
### code chunk number 2: setup
###################################################
library("mboost")                         ## load package
data("bodyfat", package = "TH.data")      ## load data


###################################################
### code chunk number 3: mod_garcia
###################################################
## Reproduce formula of Garcia et al., 2005
lm1 <- lm(DEXfat ~ hipcirc  + kneebreadth + anthro3a, data = bodyfat)
coef(lm1)


###################################################
### code chunk number 4: mod_garcia_boosted
###################################################
## Estimate same model by glmboost
glm1 <- glmboost(DEXfat ~ hipcirc  + kneebreadth + anthro3a, data = bodyfat)
coef(glm1, off2int=TRUE)    ## off2int adds the offset to the intercept


###################################################
### code chunk number 5: mod_glmboost
###################################################
glm2 <- glmboost(DEXfat ~ ., data = bodyfat)


###################################################
### code chunk number 6: mod_glmboost2
###################################################
preds <- names(bodyfat[, names(bodyfat) != "DEXfat"])        ## names of predictors
fm <- as.formula(paste("DEXfat ~", paste(preds, collapse = "+")))  ## build formula
fm


###################################################
### code chunk number 7: coef_glmboost
###################################################
coef(glm2,        ## usually the argument 'which' is used to specify single base-
     which = "")  ## learners via partial matching; With which = "" we select all.


###################################################
### code chunk number 8: plot_glmboost
###################################################
plot(glm2, off2int = TRUE)    ## default plot, offset added to intercept
## now change ylim to the range of the coefficients without intercept (zoom-in)
plot(glm2, ylim = range(coef(glm2, which = preds)))


###################################################
### code chunk number 9: coefsplot1
###################################################
## same as first plot from chunk "plot_glmboost" but with better margins
par(mar = c(4, 4, 0, 6) + 0.1)
plot(glm2, main="", off2int=TRUE)


###################################################
### code chunk number 10: coefsplot2
###################################################
## same as second plot from chunk "plot_glmboost" but with better margins
par(mar = c(4, 4, 0, 6) + 0.1)
plot(glm2, ylim = range(coef(glm2, which = preds)), main = "")


###################################################
### code chunk number 11: mod_gamboost
###################################################
## now an additive model with the same variables as lm1
gam1 <- gamboost(DEXfat ~ bbs(hipcirc) + bbs(kneebreadth) + bbs(anthro3a),
                 data = bodyfat)


###################################################
### code chunk number 12: plot_gamboost
###################################################
par(mfrow = c(1,3))    ## 3 plots in one device
plot(gam1)             ## get the partial effects


###################################################
### code chunk number 13: partialplots
###################################################
## same as plot from chunk "plot_gamboost" but with better margins
par(mfrow=c(1,3))
par(mar = c(4, 4, 0, 0) + 0.1)
plot(gam1)


###################################################
### code chunk number 14: mod_gamboost2
###################################################
## every predictor enters the model via a bbs() base-learner (i.e., as smooth effect)
gam2 <- gamboost(DEXfat ~ ., baselearner = "bbs", data = bodyfat,
                 control = boost_control(trace = TRUE))


###################################################
### code chunk number 15: mod_gamboost2_refit
###################################################
## refit model to supress trace in cvrisk
## this is only required for the output in the PDF
gam2 <- gamboost(DEXfat ~ ., baselearner = "bbs", data = bodyfat)


###################################################
### code chunk number 16: mod_gamboost2_ctd
###################################################
set.seed(123)          ## set seed to make results reproducible
cvm <- cvrisk(gam2)    ## default method is 25-fold bootstrap cross-validation
## if package 'multicore' is not available this will trigger a warning
cvm
plot(cvm)    ## get the paths


###################################################
### code chunk number 17: cvpaths
###################################################
## same as plot from chunk "mod_gamboost2_ctd"with better margins
par(mar = c(4, 4, 0, 0) + 0.1)
plot(cvm, main = "")


###################################################
### code chunk number 18: mstop
###################################################
mstop(cvm)    ## extract the optimal mstop


###################################################
### code chunk number 19: mstop_ctd
###################################################
gam2[ mstop(cvm) ]    ## set the model automatically to the optimal mstop


###################################################
### code chunk number 20: mstop_save
###################################################
m_cvm <- mstop(cvm)


###################################################
### code chunk number 21: mod_gamboost2_refit2
###################################################
## refit model to get trace again
## this is only required for the output in the PDF
gam2 <- gamboost(DEXfat ~ ., baselearner = "bbs", data = bodyfat,
                 control = boost_control(trace = TRUE))
gam2[ mstop(cvm) ]


###################################################
### code chunk number 22: coef_gamboost_save
###################################################
n_coef <- length(coef(gam2))
coef_string <-
    paste(c("zero", "one", "two", "three", "four",
            "five", "six", "seven", "eight", "nine")[n_coef + 1],
          if (n_coef == 1) "predictor" else "predictors")


###################################################
### code chunk number 23: coef_gamboost
###################################################
names(coef(gam2)) ## displays the selected base-learners at iteration "mstop(cvm)"
## To see that nothing got lost we now increase mstop to 1000:
gam2[1000, return = FALSE]  # return = FALSE just supresses "print(gam2)"


###################################################
### code chunk number 24: coef_gamboost2
###################################################
names(coef(gam2))  ## displays the selected base-learners, now at iteration 1000


###################################################
### code chunk number 25: quantreg_gamboost
###################################################
## Same model as glm1 but now with QuantReg() family
glm3 <- glmboost(DEXfat ~ hipcirc + kneebreadth + anthro3a, data = bodyfat,
                 family = QuantReg(tau = 0.5), control = boost_control(mstop = 500))
coef(glm3, off2int = TRUE)


###################################################
### code chunk number 26: loss
###################################################
loss =  function(y, f) tau * (y - f) * ((y - f) >= 0) +
                       (tau - 1) * (y - f) * ((y - f) < 0)


###################################################
### code chunk number 27: ngradient
###################################################
ngradient = function(y, f, w = NULL)  tau * ((y - f) >= 0) +
                    (tau- 1) * ((y - f) < 0)


###################################################
### code chunk number 28: OurQR
###################################################
OurQuantReg <- function(tau = 0.5){    ## function to include dependency on tau
  Family(                              ## applying the Family function
         loss = function(y, f)                               ## loss as above
                    tau       * (y - f) * ((y - f) >= 0) +
                    (tau - 1) * (y - f) * ((y - f) < 0) ,
         ngradient = function(y, f, w = NULL)                ## ngradient as above
                    tau * ((y - f) >= 0) + (tau - 1) * ((y - f) < 0),
         offset = function(y, w = NULL)                      ## median as offset
                    quantile(y, p = 0.5),
         name = "Our new family for quantile regression" )}
OurQuantReg()


###################################################
### code chunk number 29: ourQR_gamboost
###################################################
## Same model as glm3 but now with our new family
glm3b <- glmboost(DEXfat ~ hipcirc + kneebreadth + anthro3a, data = bodyfat,
                  family = OurQuantReg(tau = 0.5),
                  control = boost_control(mstop = 500))
identical(coef(glm3b), coef(glm3))


###################################################
### code chunk number 30: QR_hipcirc
###################################################
glm4a <- glmboost(DEXfat ~ hipcirc, family = OurQuantReg(tau = 0.05), data = bodyfat,
                  control = boost_control(mstop = 2000))
glm4b <- glmboost(DEXfat ~ hipcirc, family = OurQuantReg(tau = 0.5),  data = bodyfat,
                  control = boost_control(mstop = 2000))
glm4c <- glmboost(DEXfat ~ hipcirc, family = OurQuantReg(tau = 0.95), data = bodyfat,
                  control = boost_control(mstop = 2000))


###################################################
### code chunk number 31: plot_QR_hipcirc
###################################################
ord <- order(bodyfat$hipcirc)    ## order the data to avoid problems when plotting
plot(bodyfat$hipcirc[ord], bodyfat$DEXfat[ord])                   ## observed data
lines(bodyfat$hipcirc[ord], fitted(glm4a)[ord], lty = 2, lwd = 2) ## 0.05 quantile
lines(bodyfat$hipcirc[ord], fitted(glm4b)[ord], lty = 1, lwd = 2) ## median
lines(bodyfat$hipcirc[ord], fitted(glm4c)[ord], lty = 2, lwd = 2) ## 0.95 quantile


###################################################
### code chunk number 32: quantregbodyfat
###################################################
## same plot but with better margings and parameters
par(mar = c(4, 4, 0, 0) + 0.1)
plot(bodyfat$hipcirc[ord], bodyfat$DEXfat[ord])
lines(bodyfat$hipcirc[ord], fitted(glm4a)[ord], lty=2,lwd=2)
lines(bodyfat$hipcirc[ord], fitted(glm4b)[ord], lty=1, lwd=2)
lines(bodyfat$hipcirc[ord], fitted(glm4c)[ord], lty=2, lwd=2)


###################################################
### code chunk number 33: init_figs
###################################################
################################################################################
## the following chunks produce the graphics that are NOT part of the bodyfat ##
## example                                                                    ##
################################################################################


###################################################
### code chunk number 34: init
###################################################
red <- rgb(103,0,31, max = 255) ## define red color


###################################################
### code chunk number 35: center_false
###################################################
## load library mboost
library("mboost")
library("RColorBrewer")
## define colors
cols <- paste(brewer.pal(11,"RdBu")[c(10,2)], "E6", sep="")

## create graphics to show importance of centering
set.seed(1907)
x <- rnorm(50, mean = 5)
y <- rnorm(50, mean = x, sd = 0.3)
## subtract offset as usual in boosting
y <- y - mean(y)

par(ps = 8, cex=1, cex.lab=1, mar=c(3.1,3,0.5,0.1), mgp=c(2,1,0))
xrange <- range(0,x)
plot(y ~ x, xlim = xrange, cex = 1,
     xlab = "x", ylab = "negative gradient in step m = 1",
     pch = 20,
     col = rgb(0.5, 0.5, 0.5, alpha = 0.8))
abline(h = 0, col = "gray", lwd = 0.5)
abline(v = 0, col = "gray", lwd = 0.5)
abline(lm(y ~ x -1), col = cols[1], lwd = 1.5)
points(0, 0, col = cols[2], lwd = 1.5)
points(mean(x), mean(y), col = cols[2], pch = 3, lwd = 1.5)
legend(0.1, 2.35,  legend = c("origin", "center of data", "base-learner"),
       pch = c(21, 3, -1), lty = c(-1, -1, 1),
       col = c(cols[2], cols[2], cols[1]), lwd = 1.5, bg = "white", bty = "n")


###################################################
### code chunk number 36: center_true
###################################################
## create graphics to show importance of centering
set.seed(1907)
x <- rnorm(50, mean = 5)
y <- rnorm(50, mean = x, sd = 0.3)
## subtract offset as usual in boosting
y <- y - mean(y)

## figure with centering of x
mx <- mean(x)
xrange <- range(0,x)
x <- x - mean(x)

par(ps = 8, cex=1, cex.lab=1, mar=c(3.1,3,0.5,0.1), mgp=c(2,1,0))
plot(y ~ x, xlim = xrange - mx, cex = 1,
     xlab = "x (centered)", ylab = "negative gradient in step m = 1",
     pch = 20,
     col = rgb(0.5, 0.5, 0.5, alpha = 0.8))
abline(h = 0, col = "gray", lwd = 0.5)
abline(v = 0, col = "gray", lwd = 0.5)
abline(lm(y ~ x -1), col = cols[1], lwd = 1.5)
points(0, 0, col = cols[2], lwd = 1.5)
points(mean(x), mean(y), col = cols[2], pch = 3, lwd = 1.5)


###################################################
### code chunk number 37: bolsx1
###################################################
### Simulate some data
set.seed(1907)
n <- 100
x1 <- rnorm(n)
x2 <- as.factor(sample(0:3, n, replace = TRUE))
y <- 0.5 * x1 +  rnorm(n)
mod <- gamboost(y ~ bols(x1), control = boost_control(mstop = 25))
par(mar = c(4, 4, 0, 0) + 0.1)
plot(sort(x1), (0.5 * x1)[order(x1)], type = "l", lwd = 2,
     xlab = expression(x[1]), ylab = expression(f(x[1])))
lines(sort(x1), fitted(mod, which = 1)[order(x1)], col = red,
      lwd = 2, type = "b", pch = 20)
legend("topleft", c("true effect", "model"),
       lty = c(1, 1), pch = c(-1, 20), merge = TRUE,
       lwd = 2,
       col = c("black", red), bty = "n")


###################################################
### code chunk number 38: bolsx2
###################################################
beta <- c(0, -1, 0.5, 3)
y <- drop(model.matrix(~ x2) %*% beta + rnorm(n, sd = 0.3))
mod <- gamboost(y ~ bols(x2), control = boost_control(mstop = 50))
par(mar = c(4, 4, 0, 0) + 0.1)
betaPred <- coef(mod)[[1]]
betaPred[1] <- betaPred[1] + attr(coef(mod), "offset")
betaTrue <- c(0, -1, 0.5, 3)
plot(1:4, betaPred, type = "n", xaxt = "n",
     xlab = expression(x[2]), ylab = expression(f(x[2])),
     xlim = c(0.5, 4.5), ylim = c(-1.5, 3.5))
axis(1, at = 1:4, labels = expression(x[2]^(1), x[2]^(2), x[2]^(3), x[2]^(4)))
for (i in 1:4)
    lines(i + c(-0.38, 0, 0.38), rep(betaPred[i], each = 3),
          lwd = 2, col = red, type = "b", pch = 20)
for (i in 1:4)
    lines(i + c(-0.4, 0.4), rep(betaTrue[i], each = 2),
          lwd = 2, col = "black")
legend("topleft", c("true effect", "model"),
       pch = c(-1,20), lty = c(1, 1), lwd = 2,
       col = c("black", red), bty = "n")


###################################################
### code chunk number 39: bbsx1
###################################################
set.seed(1907)
n <- 100
x1 <- rnorm(n)
x2 <- rnorm(n) + 0.25 * x1
x3 <- as.factor(sample(0:1, n, replace = TRUE))
x4 <- gl(4, 25)
y <- 3 * sin(x1) + x2^2 + rnorm(n)

## specify knot locations for bbs(x2, ...)
knots.x2 <- quantile(x2, c(0.25, 0.5, 0.75))

## fit the model
mod <- gamboost(y ~ bbs(x1, knots = 20, df = 4) +
                bbs(x2, knots = knots.x2, df = 5) +
                bols(x3) + bols(x4))

## plot partial effects
par(mar = c(4, 4, 0, 0) + 0.1)
plot(sort(x1), (3 * sin(x1))[order(x1)], type = "l", lwd = 2,
     xlab = expression(x[1]), ylab =  expression(f[1](x[1])))
lines(sort(x1), fitted(mod, which = 1)[order(x1)], col = red, lwd = 2,
      type = "b", pch = 20)
legend("topleft", c("true effect", "model"),
       lty = c(1, 1), pch = c(-1,20), merge = TRUE, lwd = 2,
       col = c("black", red), bty = "n")


###################################################
### code chunk number 40: bbsx2
###################################################
par(mar = c(4, 4, 0, 0) + 0.1)
plot(sort(x2), (x2^2)[order(x2)], type = "l", lwd = 2,
     xlab = expression(x[2]), ylab =  expression(f[2](x[2])))
## offset needed to conserve correct level (otherwise center both effects around
## zero to make them comparable)
lines(sort(x2), fitted(mod, which = 2)[order(x2)] + mod$offset, col = red, lwd = 2,
      type = "b", pch = 20)


###################################################
### code chunk number 41: cyclic1
###################################################
### example for cyclic spline
set.seed(1907)
true_f <- function(x)
    cos(x) + 0.25 * sin(4 * x)
x <- runif(150, 0, 2 * pi)
y <- rnorm(150, mean = true_f(x), sd=0.1)
newX <- seq(0, 2*pi, length = 200)
mod <- gamboost(y ~ bbs(x, knots = 20, degree = 4))
mod[3000]
mod_cyclic <- gamboost(y ~ bbs(x, cyclic=TRUE, knots = 20,
                               degree = 4, boundary.knots=c(0, 2*pi)))
mod_cyclic[3000]

par(mar = c(4, 4, 0, 0) + 0.1)
plot(x,y, ylab = "f(x)",
     pch = 20, xlim = c(0, 7),
     col = rgb(0.5, 0.5, 0.5, alpha = 0.8))
lines(newX, predict(mod, data.frame(x = newX)),
      col = "black", lwd = 2)
lines(newX + 2 * pi, predict(mod, data.frame(x = newX)),
      col = "black", lwd = 2)
legend("bottomleft", c("cyclic = FALSE"),
       lty = c(1), col = c("black"), lwd = 2, bty = "n")


###################################################
### code chunk number 42: cyclic2
###################################################
par(mar = c(4, 4, 0, 0) + 0.1)
plot(x, y, ylab = "f(x)",
     pch = 20, xlim = c(0, 7),
     col = rgb(0.5, 0.5, 0.5, alpha = 0.8))
lines(newX, predict(mod_cyclic, data.frame(x = newX)),
      col = red, lwd = 2)
lines(newX + 2 * pi, predict(mod_cyclic, data.frame(x = newX)),
      col = red, lwd = 2)
abline(v = 2 * pi, col = "gray", lty = "dashed", lwd = 2)
legend("bottomleft", c("cyclic = TRUE"),
       lty = c(1), col = c(red), lwd = 2, bty = "n")


###################################################
### code chunk number 43: bspatial1
###################################################
set.seed(1907)
x1 <- runif(250,-pi,pi)
x2 <- runif(250,-pi,pi)
y <- sin(x1) * sin(x2) + rnorm(250, sd = 0.4)
modspa <- gamboost(y ~ bspatial(x1, x2, knots = list(x1 = 12, x2 = 12)))
# possible to specify different knot mesh for x1 and x2:
# modspa <- gamboost(y ~ bspatial(x1, x2, knots = list(x1 = 12, x2 = 20)))

library(lattice)
library(RColorBrewer)
## set up colorscheme
nCols <- 50
cols <- colorRampPalette(rev(brewer.pal(11, "RdBu")), space = "Lab",
                         interpolate = "spline")(nCols)
## make new data on a grid:
nd <- expand.grid(x1 = seq(-pi, pi, length = 100),
                  x2 = seq(-pi, pi, length = 100))
preds <- predict(modspa, newdata = nd)
breaks <- seq(min(preds), max(preds), length = nCols + 1)
print(levelplot(preds ~ nd$x1 + nd$x2,
          xlab = expression(x[1]),
          ylab = expression(x[2]),
          at = breaks,
          col.regions = cols, cex = 0.7,
          colorkey = list(space = "left", at = breaks, width = 1)))


###################################################
### code chunk number 44: bspatial2
###################################################
x1 <- x2 <- seq(-pi, pi, length = 50)
nd <- expand.grid(x1 = x1,
                  x2 = x2)
preds <- predict(modspa, newdata = nd)
z <- matrix(preds, ncol = length(x1))
nrz <- nrow(z)
ncz <- ncol(z)
jet.colors <- colorRampPalette(paste(brewer.pal(11,"RdBu")[c(10,2)],
                                     "E6", sep=""))
nbcol <- 10
color <- jet.colors(nbcol)
zfacet <- z[-1, -1] + z[-1, -ncz] + z[-nrz, -1] + z[-nrz, -ncz]
facetcol <- cut(zfacet, nbcol)
par(mar=c(1, 0.1, 0.1, 0.1), mgp = c(1.8,1,0))
persp(x1, x2, z,
      theta = 45, phi = 30, expand = 0.5,
      col = color[facetcol], ticktype = "detailed",
      nticks = 3,
      xlab = "x1", ylab = "x2")


###################################################
### code chunk number 45: families
###################################################
pdf("./graphics/fig-family.pdf", width = 5, height = 4)
par(mar = c(4, 4, 0, 0) + 0.1)
## Gaussian
x <- seq(-2, 2, length = 200)
plot(x,x^2, type = "l", ylab = expression(rho), xlab = expression(y-f))

## Laplace
x <- seq(-2, 2, length = 200)
lossL1 <- function(x, param) abs(x)
mp <- 0.5
dat <- data.frame(x = rep(x, length(mp)),
                  y = as.numeric(sapply(mp, function(z) lossL1(x, param = z))),
                  param = rep(mp, each = length(x)))
dat$tau <- factor(dat$param)
plot(x, dat$y, type = "l", ylab = expression(rho), xlab = expression(y-f))

## Huber
x <- seq(-2, 2, length = 200)
lossH <- function(x, param) ifelse(abs(x) <= param, (1/2) * x^2, param * (abs(x) - param / 2))
mp <- c(0.2, 0.5, 1, 2, 10)
dat <- data.frame(x = x, sapply(mp, function(z) lossH(x, param = z)))
plot(x, dat$X1, type = "l", ylim = c(0,2), ylab = expression(rho),
     xlab = expression(y-f))
legend(0, 2, xjust = 0.5, legend = paste(mp), title = expression(delta),
       lty = 1, col = c(1,3,4,5,6), cex = 0.6, box.col = "gray")
for(i in 1:(length(mp)-1)){
    lines(x, dat[,i+2], col = i+2)
}

## Quantile
x <- seq(-2, 2, length = 200)
lossL1 <- function(x, param) ifelse(x >= 0, param  * x, (param - 1) * x)
mp <- c(5:9 / 10)
dat <- data.frame(x = x, sapply(mp, function(z) lossL1(x, param = z)))
plot(x, dat$X1, type = "l", ylab = expression(rho), ylim = c(0,2),
     xlab = expression(y-f))
legend(0,2, xjust = 0.5, legend = paste(mp), title = expression(tau),
       lty = 1, col = c(1,3,4,5,6), cex = 0.6, box.col = "gray")
for(i in 1:(length(mp)-1)){
    lines(x, dat[,i+2], col = i+2)
}

## Binary
x <- seq(-2, 2, length = 200)
dat <- data.frame(x = x, Binomial = log(1+exp(-2*x), base=2), AdaExp = exp(-x))
plot(x, dat$Binomial, type = "l", lty = 1, col = 1, xlab = expression(tilde(y) * f),
     ylab = expression(rho), ylim = c(0,7.5))
lines(x, dat$AdaExp, lty = 2, col = 2)
legend("topright", legend = c("Binomial","AdaExp"), xjust = 0.5, title = "loss",
       lty = 1:2, col = 1:2, cex = 0.8, bty = "n")
dev.off()