factorize: Factorize tensor product model

factorizeR Documentation

Factorize tensor product model

Description

Factorize an FDboost tensorporduct model into the response and covariate parts for effect visualization.

Factorize an mfboost tensorporduct model into the response and covariate parts for effect visualization.

Usage

factorize(x, ...)

## S3 method for class 'mfboost'
factorize(
  x,
  blwise = TRUE,
  newdata = NULL,
  newformula = NULL,
  newobj.formula = NULL,
  ...
)

Arguments

x

a model object of class mfboost.

...

other arguments passed to methods (no arguments so far).

blwise

logical, should the factorization be carried out base-learner-wise (TRUE, default) or for the whole model simultaneously.

Value

a list of two mboost models of class mboost_fac containing basisfunctions for response and covariates, respectively, as base-learners.

a list of two mboost models of class mboost_fac containing basisfunctions for response and covariates, respectively, as base-learners.

Examples


# generate irregular toy data -------------------------------------------------------

n <- 100
m <- 40
# covariates
x <- seq(0,2,len = n)
# time & id
set.seed(90384)
t <- runif(n = n*m, -pi,pi)
id <- sample(1:n, size = n*m, replace = TRUE)

# generate components
fx <- ft <- list()
fx[[1]] <- exp(x)
d <- numeric(2)
d[1] <- sqrt(c(crossprod(fx[[1]])))
fx[[1]] <- fx[[1]] / d[1]
fx[[2]] <- -5*x^2
fx[[2]] <- fx[[2]] - fx[[1]] * c(crossprod(fx[[1]], fx[[2]])) # orthogonalize fx[[2]]
d[2] <- sqrt(c(crossprod(fx[[2]])))
fx[[2]] <- fx[[2]] / d[2]
ft[[1]] <- sin(t)
ft[[2]] <- cos(t)
ft[[1]] <- ft[[1]] / sqrt(sum(ft[[1]]^2))
ft[[2]] <- ft[[2]] / sqrt(sum(ft[[2]]^2))

mu1 <- d[1] * fx[[1]][id] * ft[[1]]
mu2 <- d[2] * fx[[2]][id] * ft[[2]]
# add linear covariate
ft[[3]] <- t^2 * sin(4*t) 
ft[[3]] <- ft[[3]] - ft[[1]] * c(crossprod(ft[[1]], ft[[3]]))
ft[[3]] <- ft[[3]] - ft[[2]] * c(crossprod(ft[[2]], ft[[3]]))
ft[[3]] <- ft[[3]] / sqrt(sum(ft[[3]]^2))
set.seed(9234)
fx[[3]] <- runif(0,3, n = length(x))
fx[[3]] <- fx[[3]] - fx[[1]] * c(crossprod(fx[[1]], fx[[3]]))
fx[[3]] <- fx[[3]] - fx[[2]] * c(crossprod(fx[[2]], fx[[3]]))
d[3] <- sqrt(sum(fx[[3]]^2))
fx[[3]] <- fx[[3]] / d[3]

mu3 <- d[3] * fx[[3]][id] * ft[[3]]

mu <- mu1 + mu2 + mu3
# add some noise
y <- mu + rnorm(length(mu), 0, .01)
# and noise covariate
z <- rnorm(n)

# fit FDboost model -------------------------------------------------------

dat <- list(y = y, x = x, t = t, x_lin = fx[[3]], id = id)
m <- FDboost(y ~ bbs(x, knots = 5, df = 2, differences = 0) + 
               # bbs(z, knots = 2, df = 2, differences = 0) + 
               bols(x_lin, intercept = FALSE, df = 2)
               , ~ bbs(t),
             id = ~ id, 
             offset = 0, #numInt = "Riemann", 
             control = boost_control(nu = 1), 
             data = dat)
MU <- split(mu, id)
PRED <- split(predict(m), id)
Ti <- split(t, id)
t0 <- seq(-pi, pi, length.out = 40)
MU <- do.call(cbind, Map(function(mu, t) approx(t, mu, t0)$y,
          MU, Ti)) 
PRED <- do.call(cbind, Map(function(mu, t) approx(t, mu, t0)$y,
            PRED, Ti))

opar <- par(mfrow = c(2,2))
image(t0, x, MU)
contour(t0, x, MU, add = TRUE)
image(t0, x, PRED)
contour(t0, x, PRED, add = TRUE)
persp(t0, x, MU, zlim = range(c(MU, PRED), na.rm = TRUE))
persp(t0, x, PRED, zlim = range(c(MU, PRED), na.rm = TRUE))
par(opar)

# factorize model ---------------------------------------------------------

# fac <- shapeboost:::factorize.FDboost(m)
fac <- manifoldboost:::factorize.FDboost(m)
# fac <- factorize(m)

vi <- as.data.frame(varimp(fac$cov))
lattice::barchart(variable ~ reduction, group = blearner, vi, stack = TRUE)

cbind(d^2, sort(vi$reduction, decreasing = TRUE)[1:3])


x_plot <- list(x, x, fx[[3]])

cols <- c("cornflowerblue", "darkseagreen", "darkred")
opar <- par(mfrow = c(3,2))
wch <- c(1,2,10)
for(w in 1:length(wch)) {
  manifoldboost:::plot.FDboost_fac(fac$resp, which = wch[w], col = "darkgrey", ask = FALSE,
       main = names(fac$resp$baselearner[wch[w]]))
  lines(sort(t), ft[[w]][order(t)]*max(d), col = cols[w], lty = 2)
  plot(fac$cov, which = wch[w], 
       main = names(fac$cov$baselearner[wch[w]]))
  points(x_plot[[w]], d[w] * fx[[w]] / max(d), col = cols[w], pch = 3)
}
par(opar)

# re-compose predictions
preds <- lapply(fac, predict)
predf <- rowSums(preds$resp * preds$cov[id, ])
PREDf <- split(predf, id)
PREDf <- do.call(cbind, Map(function(mu, t) approx(t, mu, t0)$y,
                            PREDf, Ti))
opar <- par(mfrow = c(1,2))
image(t0,x, PRED, main = "original prediction")
contour(t0,x, PRED, add = TRUE)
image(t0,x,PREDf, main = "recomposed")
contour(t0,x, PREDf, add = TRUE)
par(opar)

# check out other methods
set.seed(8399)
newdata_resp <- list(t = sort(runif(60, min(t), max(t))))
a <- predict(fac$resp, newdata = newdata_resp, which = 1:5)
plot(newdata_resp$t, a[, 1])
# coef method
cf <- coef(fac$resp, which = 1)


# check factorization on a new dataset ------------------------------------

t_grid <- seq(-pi,pi,len = 30)
x_grid <- seq(0,2,len = 30)
x_lin_grid <- seq(min(dat$x_lin), max(dat$x_lin), len = 30)

# use grid data for factorization
griddata <- expand.grid(
  # time 
  t = t_grid,
  # covariates
  x = x_grid,
  x_lin = 0
)

griddata_lin <- expand.grid(
  t = seq(-pi, pi, len = 30),
  x = 0,
  x_lin = x_lin_grid
)

griddata <- rbind(griddata, griddata_lin)

griddata$id <- as.numeric(factor(paste(griddata$x, griddata$x_lin, sep = ":")))

# fac2 <- shapeboost:::factorize.FDboost(m, newdata = griddata)
fac2 <- manifoldboost:::factorize.FDboost(m, newdata = griddata)

ratio <- -max(abs(predict(fac$resp, which = 1))) / max(abs(predict(fac2$resp, which = 1)))

opar <- par(mfrow = c(3,2))
wch <- c(1,2,10)
for(w in 1:length(wch)) {
  manifoldboost:::plot.FDboost_fac(fac$resp, which = wch[w], col = "darkgrey", ask = FALSE,
                                main = names(fac$resp$baselearner[wch[w]]))
  
  lines(sort(griddata$t), 
        ratio*predict(fac2$resp, which = wch[w])[order(griddata$t)], col = cols[w], lty = 2)
  plot(fac$cov, which = wch[w], 
       main = names(fac$cov$baselearner[wch[w]]))
  this_x <- fac2$cov$model.frame(which = wch[w])[[1]][[1]]
    lines(sort(this_x), 1/ratio*predict(fac2$cov, which = wch[w])[order(this_x)], col = cols[w], lty = 1)
}
par(opar)

# check predictions
p <- predict(fac2$resp, which = 1)

# generate regular toy data --------------------------------------------------

n <- 100
m <- 40
# covariates
x <- seq(0,2,len = n)
# time 
t <- seq(-pi,pi,len = m)
# generate components
fx <- ft <- list()
fx[[1]] <- exp(x)
d <- numeric(2)
d[1] <- sqrt(c(crossprod(fx[[1]])))
fx[[1]] <- fx[[1]] / d[1]
fx[[2]] <- -5*x^2
fx[[2]] <- fx[[2]] - fx[[1]] * c(crossprod(fx[[1]], fx[[2]])) # orthogonalize fx[[2]]
d[2] <- sqrt(c(crossprod(fx[[2]])))
fx[[2]] <- fx[[2]] / d[2]
ft[[1]] <- sin(t)
ft[[2]] <- cos(t)
ft[[1]] <- ft[[1]] / sqrt(sum(ft[[1]]^2))
ft[[2]] <- ft[[2]] / sqrt(sum(ft[[2]]^2))
mu1 <- d[1] * fx[[1]] %*% t(ft[[1]])
mu2 <- d[2] * fx[[2]] %*% t(ft[[2]])
# add linear covariate
ft[[3]] <- t^2 * sin(4*t) 
ft[[3]] <- ft[[3]] - ft[[1]] * c(crossprod(ft[[1]], ft[[3]]))
ft[[3]] <- ft[[3]] - ft[[2]] * c(crossprod(ft[[2]], ft[[3]]))
ft[[3]] <- ft[[3]] / sqrt(sum(ft[[3]]^2))
set.seed(9234)
fx[[3]] <- runif(0,3, n = length(x))
fx[[3]] <- fx[[3]] - fx[[1]] * c(crossprod(fx[[1]], fx[[3]]))
fx[[3]] <- fx[[3]] - fx[[2]] * c(crossprod(fx[[2]], fx[[3]]))
d[3] <- sqrt(sum(fx[[3]]^2))
fx[[3]] <- fx[[3]] / d[3]
mu3 <- d[3] * fx[[3]] %*% t(ft[[3]])

mu <- mu1 + mu2 + mu3
# add some noise
y <- mu + rnorm(length(mu), 0, .01)
# and noise covariate
z <- rnorm(n)

# fit FDboost model -------------------------------------------------------

dat <- list(y = y, x = x, t = t, x_lin = fx[[3]])
m <- FDboost(y ~ bbs(x, knots = 5, df = 2, differences = 0) + 
               # bbs(z, knots = 2, df = 2, differences = 0) + 
               bols(x_lin, intercept = FALSE, df = 2)
               , ~ bbs(t), offset = 0, 
             control = boost_control(nu = 1), 
             data = dat)

opar <- par(mfrow = c(1,2))
image(t, x, t(mu))
contour(t, x, t(mu), add = TRUE)
image(t, x, t(predict(m)))
contour(t, x, t(predict(m)), add = TRUE)
par(opar)

# factorize model ---------------------------------------------------------

# fac <- shapeboost:::factorize.FDboost(m)
fac <- manifoldboost:::factorize.FDboost(m)

vi <- as.data.frame(varimp(fac$cov))
lattice::barchart(variable ~ reduction, group = blearner, vi, stack = TRUE)

cbind(d^2, vi$reduction[c(1:2, 10)])


x_plot <- list(x, x, fx[[3]])

cols <- c("cornflowerblue", "darkseagreen", "darkred")
opar <- par(mfrow = c(3,2))
wch <- c(1,2,10)
for(w in 1:length(wch)) {
  manifoldboost:::plot.FDboost_fac(fac$resp, which = wch[w], col = "darkgrey", ask = FALSE,
       main = names(fac$resp$baselearner[wch[w]]))
  lines(t, ft[[w]]*max(d), col = cols[w], lty = 2)
  plot(fac$cov, which = wch[w], 
       main = names(fac$cov$baselearner[wch[w]]))
  points(x_plot[[w]], d[w] * fx[[w]] / max(d), col = cols[w], pch = 3)
}
par(opar)

# re-compose prediction
preds <- lapply(fac, predict)
PREDSf <- array(0, dim = c(nrow(preds$resp),nrow(preds$cov)))
for(i in 1:ncol(preds$resp))
  PREDSf <- PREDSf + preds$resp[,i] %*% t(preds$cov[,i])

opar <- par(mfrow = c(1,2))
image(t,x, t(predict(m)), main = "original prediction")
contour(t,x, t(predict(m)), add = TRUE)
image(t,x,PREDSf, main = "recomposed")
contour(t,x, PREDSf, add = TRUE)
par(opar)
# => matches

# check out other methods
set.seed(8399)
newdata_resp <- list(t = sort(runif(60, min(t), max(t))))
a <- predict(fac$resp, newdata = newdata_resp, which = 1:5)
plot(newdata_resp$t, a[, 1])
# coef method
cf <- coef(fac$resp, which = 1)

# modeling the FORM/SIZE-AND-SHAPE of IRREGULAR curves -------------------

# load irregular cell data
data("cells", package = "manifoldboost")

# subsample (one for each covariate combination)
cellsub <- as.data.frame(cells[-which(names(cells)=="response")])
cellsub$myd <- factor(with(cellsub, 
                           paste0("a=", a, " r=", r, " b=", b, " m=", m)))
subids <- match(unique(cellsub$myd), cellsub$myd)
cellsub <- as.list(cellsub[subids, ])
cellsub$response <- cells$response[as.numeric(cells$response$id) %in% subids, ]

# fit model
cell_model <- mfboost(
  formula = response ~ bbsc(a, df = 3, knots = 5) + 
    bbsc(r, df = 3, knots = 5) + 
    bbsc(b, df = 3, knots = 5) + 
    bbsc(m, df = 3, knots = 5),
  obj.formula = value^dim ~ 
    bbs(arg, df = 1, differences = 0, knots = 5, 
        boundary.knots = c(0,1), cyclic = TRUE) | id, 
  data = cellsub,
  family = PlanarSizeShapeL2(
    weight_fun = trapez_weights,
    arg_range = c(0,1)),
  control = boost_control(mstop = 300)
  )

# # cross-validation
# set.seed(9382)
# cell_cv <- cvrisk(cell_model,
#                           folds = cvLong(
#                             id = cell_model$id,
#                             weights = cell_model$`(weights)`,
#                             type = "kfold"),
#                           grid = 0:mstop(cell_model))
# cell_model[mstop(cell_cv)]

# plot first four predictions
par(mfrow = c(2,2), mar = rep(2, 4) )
plot(cell_model, ids = 1:4, t = "l", 
     main = cellsub$myd[1:4], 
     seg_par = list(lty = "dashed"))
legend(x = "bottomright", lty = c(1,1, 2),
       legend = c("intercept", "prediction", "point correspondence"), 
       col = c("grey", "black", "grey"))

# compare with data
plot(cell_model, ids = 1:4, t = "l", y0_ = cell_model$family@mf$y_[1:4], 
     main = cellsub$myd[1:4], 
     seg_par = list(lty = "dashed"))
legend(x = "bottomright", lty = c(1,1, 2),
       legend = c("observation", "prediction", "point correspondence"), 
       col = c("grey", "black", "grey"))

# predict dense cells on grids
cellgrid <- cellsub
cellgrid$response <- with(cellgrid$response, expand.grid(
  id = unique(id), 
  arg = seq(0,1, len = 100),
  dim = unique(dim),
  value = NA))
cellgrid$response$value <- predict(cell_model, 
                                   newdata = cellgrid, type = "response")


# factorize effects
cell_fac <- factorize(cell_model)

vimp <- varimp(cell_fac$cov)
plot(vimp, auto.key = FALSE)

# plot two most important effect directions
this <- cell_fac$cov$which(head(names(vimp)[order(vimp, decreasing = TRUE)], 2))
par(mfcol = c(2,2))
plot(cell_fac$resp, which = this, y0_par = list(type="l"))
plot(cell_fac$cov, which = this)



Almond-S/manifoldboost documentation built on June 23, 2022, 11:06 a.m.