density-on-scalar_birth.R
In FDboost: Boosting Functional Regression Models

## ----plot-data, echo=TRUE, warning=FALSE, message=FALSE, fig.cap='Densities of births in Germany per year and sex in $B^2(\\delta)$. Years are coded by different colors and line types, see Figure \\ref{fig:legend}.', fig.height=3.5, fig.width=11, fig.pos='!h'----
# load FDboost package
library(FDboost)
# load birth_densities
data("birthDistribution", package = "FDboost")

# function to plot a matrix or vector containing functions in B^2(delta) or L^2_0(delta);
# Is used for densities, effects, predictions (also clr transformed)
plot_function <- function(plot_matrix, ...) {
  funplot(1:12, plot_matrix, xlab = "month", xaxp = c(1, 12, 11), pch = 20, ...)
  abline( h = 0, col = "grey", lwd = 0.5)
}

# function to create two plots (by sex) from a matrix containing densities or predictions
# (also clr transformed) for males in first half of rows and females in second half
plot_birth_densities <- function(birth_matrix, ylim = range(birth_matrix), ...) {
  par(mfrow = c(1, 2))
  for (k in 1:2) {
    n_obs <- nrow(birth_matrix) / 2
    obs <- 1:n_obs + (k - 1) * n_obs
    plot_function(birth_matrix[obs, ], main = c("Male", "Female")[k], 
                   ylim = ylim, col = rainbow(n_obs, start = 0.5, end = 1), 
                   lty = c(1, 2, 4, 5), ...)
  }
}

# Plot densities
plot_birth_densities(birthDistribution$birth_densities, ylab = "densities")

## ----legend, echo=TRUE, fig.cap='Coding of the years.', fig.height=1.2, fig.width=8, fig.pos='!h'----
# legend (also for later plots)
year_col <- rainbow(70, start = 0.5, end = 1)
year_lty <- c(1, 2, 4, 5)
par(mar = c(0, 0, 0, 0) + 0.1)
plot(NULL, xaxt = "n", yaxt = "n", bty = "n", ylab = "", xlab = "", xlim = 0:1, ylim = 0:1)
legend("top", xpd = TRUE, legend = 1950:2019, lty = year_lty, ncol = 10, bty = "n",
       text.col = year_col, col = year_col, cex = 0.7)

## ----clr-data, echo=TRUE, fig.cap='Clr transformed densities in $L^2_0(\\delta)$. Years are coded by different colors and line types, see Figure \\ref{fig:legend}.', fig.height=3.5, fig.width=11, fig.pos='!h'----
# The function clr() can be used to compute the clr-densities; Our reference measure delta
# corresponds to equal integration weights w = 1 for all density values
birth_densities_clr_test <- t(apply(birthDistribution$birth_densities, 1, clr, w = 1))
# Compare with clr-densities contained in data set
sum(birth_densities_clr_test != birthDistribution$birth_densities_clr)
# Plot clr-densities
plot_birth_densities(birthDistribution$birth_densities_clr, ylab = "clr-densities")

## ----fit-model, echo = TRUE, message=FALSE------------------------------------
model <- FDboost(birth_densities_clr ~ 1 + bolsc(sex, df = 1) + 
                   bbsc(year, df = 1, differences = 1),
                 # use bbsc() in timeformula to ensure integrate-to-zero constraint
                 timeformula = ~bbsc(month, df = 4, 
                                     # December is followed by January of subsequent year
                                     cyclic = TRUE, 
                                     # knots = {1, ..., 12} with additional boundary knot
                                     # 0 (coinciding with 12) due to cyclic = TRUE
                                     knots = 1:11, boundary.knots = c(0, 12), 
                                     # degree = 1 with these knots yields identity matrix 
                                     # as design matrix
                                     degree = 1),
                 data = birthDistribution, offset = 0, 
                 control = boost_control(mstop = 1000))

## ----get-mstop, echo = TRUE, message=FALSE------------------------------------
# set.seed(1708)
# folds <- applyFolds(model, folds = cv(rep(1, model$ydim[1]), type = "bootstrap", B = 10))
# ms <- mstop(folds) # = 999
ms <- 999 
model <- model[ms] 

## ----plot_effects_clr, echo = TRUE, fig.cap='Estimated effects in $L_0^2(\\delta)$. Years are coded by different colors and line types, see Figure \\ref{fig:legend}.', fig.height=3, fig.width=11, fig.pos='!h'----
# Plotting 'model' yields the clr-transformed effects
par(mfrow = c(1, 3))
plot(model, n1 = 12, n2 = 12)

## ----get_and_plot_effects, echo = TRUE, fig.cap='Estimated effects in $B^2(\\delta)$. Years are coded by different colors and line types, see Figure \\ref{fig:legend}.', fig.height=3, fig.width=11, fig.pos='!h'----
# Get estimated clr transformed effects; we use predict(), which returns a matrix of the
# same dimension as the response (140 x 12), i.e., we have to extract the respective rows;
# Alternatively, one could use coef(), but has to specify n1 = 12, n2 = 12 to get the den-
# sities at 1, ..., 12, which also only yields the year effect on a grid of 12 years

# all rows contain intercept
intercept_clr <- predict(model, which = 1)[1, ] 
# first 70 rows contain effect for sex = male, second 70 rows for sex = female
sex_clr <- predict(model, which = 2)[c(1, 71), ] 
# first 70 rows contain effect for years from 1950 to 2019, second 70 rows are repetition
year_clr <- predict(model, which = 3)[1:70, ]

sex_col <- c("blue", "red")
par(mfrow = c(1, 3), mar = c(5, 5, 4, 2) + 0.1)

# Retransform to Bayes Hilbert space using clr(..., inverse = TRUE); Our reference measure
# delta corresponds to equal integration weights w = 1 for all function values
intercept <- clr(intercept_clr, w = 1, inverse = TRUE)
sex <- t(apply(sex_clr, 1, clr, w = 1, inverse = TRUE))
year <- t(apply(year_clr, 1, clr, w = 1, inverse = TRUE))

# Plot retransformed effects
plot_function(intercept, main = "Intercept", ylab = expression(hat(beta)[0]), 
              id = rep(1, 12)) # id is passed to funplot since intercept is a vector
plot_function(sex, main = "Effect of sex", col = sex_col, 
              ylab = expression(hat(beta)["sex"]))
legend("topleft", legend = c("sex = male", "sex = female"), text.col = sex_col, bty = "n")
plot_function(year, main = "Effect of year", col = year_col, 
              ylab = expression(hat(g)("year")), lty = year_lty)

## ----get_and_plot_predictions, echo = TRUE, fig.cap='Predicted densities in $B^2(\\delta)$. Years are coded by different colors and line types, see Figure \\ref{fig:legend}.', fig.height=3.5, fig.width=11, fig.pos='!h'----
predictions_clr <- predict(model)
predictions <- t(apply(predictions_clr, 1, clr, inverse = TRUE))
plot_birth_densities(predictions, ylim = range(birthDistribution$birth_densities), 
                     ylab = "predictions")