# hytboostnow.R
# ::rtemis::
# 2018 E.D. Gennatas www.lambdamd.org
# 05-2020: switched to hytreenow
#' \pkg{rtemis} internal: Low-level function for boosting Hard Hybrid Trees (no weights)
#'
#' @inheritParams s_GLM
#' @param x Data frame: Input features
#' @param y Vector: Output
#' @param mod Algorithm to boost, for options, see [select_learn]
#' @param mod.params Named list of arguments for `mod`
#' @param learning.rate Float (0, 1] Learning rate for the additive steps
#' @param init Float: Initial value for prediction. Default = mean(y)
#' @param cxrcoef Logical: If TRUE, pass `cxr = TRUE, cxrcoef = TRUE` to [predict.hytreenow]
#' @param tolerance Float: If training error <= this value, training stops
#' @param tolerance.valid Float: If validation error <= this value, training stops
#' @param max.iter Integer: Maximum number of iterations (additive steps) to perform. Default = 10
#' @param trace Integer: If > 0, print diagnostic info to console
#' @param base.verbose Logical: `verbose` argument passed to learner
#' @param print.error.plot String or Integer: "final" plots a training and validation (if available) error curve at the
#' end of training. If integer, plot training and validation error curve every this many iterations
#' during training
#' for each base learner
#' @param ... Additional parameters to be passed to learner
#'
#' @return `hytboostnow` object
#' @author E.D. Gennatas
#' @keywords internal
#' @noRd
hytboostnow <- function(x, y,
x.valid = NULL, y.valid = NULL,
resid = NULL,
boost.obj = NULL,
# mod.params = list(),
case.p = .9,
# weights = NULL,
learning.rate = .1,
# tolerance = 0,
# tolerance.valid = 0,
max.iter = 10,
# ++ hytreenow params ++
max.depth = 5,
# gamma = .1,
alpha = 0,
lambda = 1,
lambda.seq = NULL,
minobsinnode = 2,
minobsinnode.lin = 10,
shrinkage = 1,
part.minsplit = 2,
part.xval = 0,
part.max.depth = 1,
part.cp = 0,
part.minbucket = 5,
# init = mean(y),
lin.type = "glmnet",
cv.glmnet.nfolds = 5,
cv.glmnet.lambda = "lambda.min",
# -- earlystop --
earlystop.params = setup.earlystop(),
earlystop.using = "train",
init = mean(y),
cxrcoef = FALSE,
print.progress.every = 5,
print.error.plot = "final",
base.verbose = FALSE,
verbose = TRUE,
trace = 0,
prefix = NULL,
print.plot = TRUE,
plot.theme = "darkgrid",
# print.base.plot = FALSE,
plot.type = "l") {
# Arguments ----
if (!verbose) print.plot <- FALSE
# extra.args <- list(...)
# mod.params <- c(mod.params, extra.args)
if (length(max.depth) > 1) stop("max.depth must be scalar integer")
# Boost ----
# hytreenow params ----
mod.params <- list(
max.depth = max.depth,
# gamma = gamma,
shrinkage = shrinkage,
minobsinnode.lin = minobsinnode.lin,
# lincoef params --
alpha = alpha,
lambda = lambda,
lambda.seq = lambda.seq,
minobsinnode = minobsinnode,
lin.type = lin.type,
cv.glmnet.nfolds = cv.glmnet.nfolds,
cv.glmnet.lambda = cv.glmnet.lambda,
# rpart params --
part.minsplit = part.minsplit,
part.xval = part.xval,
part.max.depth = part.max.depth,
part.cp = part.cp,
part.minbucket = part.minbucket
)
if (trace > 0) {
parameterSummary(mod.params,
init = init,
max.iter = max.iter,
learning.rate = learning.rate,
# tolerance = tolerance,
# tolerance.valid = tolerance.valid
title = "hytboostnow Parameters"
)
}
if (trace > 0) msg2("Initial MSE =", mse(y, init))
# '- New series ----
# init learning.rate vector
if (is.null(boost.obj)) {
mods <- list()
Fval <- penult.fitted <- init
.learning.rate <- numeric()
error <- vector("numeric")
error[1] <- mse(y, Fval) # will be overwritten, needed for while statement
if (!is.null(x.valid)) {
error.valid <- vector("numeric")
Fvalid <- init
} else {
error.valid <- predicted.valid <- Fvalid <- NULL
}
i <- 1
if (verbose) msg2("[ Boosting Hybrid Tree... ]", sep = "")
} else {
# '- Expand series ----
.learning.rate <- boost.obj$learning.rate
mods <- boost.obj$mods
Fval <- penult.fitted <- boost.obj$fitted
error <- boost.obj$error
if (!is.null(x.valid)) {
error.valid <- boost.obj$error.valid
Fvalid <- boost.obj$predicted.valid
} else {
error.valid <- predicted.valid <- Fvalid <- NULL
}
max.iter <- max.iter + length(mods)
i <- length(mods) + 1
if (trace > 0) msg2("i =", i)
if (verbose) msg2("[ Expanding boosted Hybrid Tree... ]", sep = "")
}
if (is.null(resid)) resid <- y - Fval
# Print error during training
if (max.iter == 1 && is.null(boost.obj)) {
print.progress.index <- FALSE
print.error.plot <- "none"
} else if (print.progress.every < max.iter) {
print.progress.index <- seq(print.progress.every, max.iter, print.progress.every)
} else {
print.progress.index <- max.iter
}
# Print error plot
if (max.iter > 1 && is.numeric(print.error.plot)) {
if (print.error.plot < max.iter) {
print.error.plot.index <- seq(print.error.plot, max.iter, print.error.plot)
} else {
print.error.plot.index <- max.iter
}
print.error.plot <- "iter"
}
# '- Iterate learner ----
while (i <= max.iter) {
.learning.rate[i] <- learning.rate
if (trace > 0) msg2("learning.rate is", .learning.rate[i])
if (trace > 0) msg2("i =", i)
if (case.p < 1) {
n.cases <- NROW(x)
index <- sample(n.cases, case.p * n.cases)
x1 <- x[index, , drop = FALSE]
# y1 <- y[index] # not used, we use resid1
resid1 <- resid[index]
} else {
x1 <- x
resid1 <- resid
}
mod.args <- c(
list(
x = x1, y = resid1,
# x.test = x.valid, y.test = y.valid,
verbose = base.verbose
),
mod.params
)
mods[[i]] <- do.call(hytreenow, args = mod.args)
if (cxrcoef) {
if (trace > 0) msg2("Updating cxrcoef")
fitted0 <- predict.hytreenow(mods[[i]], x, cxr = TRUE, cxrcoef = TRUE)
fitted <- fitted0$yhat
mods[[i]]$cxr <- fitted0$cxr
mods[[i]]$cxrcoef <- fitted0$cxrcoef
} else {
fitted <- predict.hytreenow(mods[[i]], x)
}
names(mods)[i] <- paste0("hytree.", i)
Fval <- Fval + .learning.rate[i] * fitted
if (i == max.iter - 1) penult.fitted <- Fval
resid <- y - Fval
error[i] <- mse(y, Fval)
if (!is.null(x.valid)) {
predicted.valid <- predict.hytreenow(mods[[i]], x.valid)
Fvalid <- Fvalid + .learning.rate[i] * predicted.valid
error.valid[i] <- mse(y.valid, Fvalid)
if (verbose && i %in% print.progress.index) {
msg2("Iteration #", i, ": Training MSE = ", ddSci(error[i]),
"; Validation MSE = ", ddSci(error.valid[i]),
sep = ""
)
}
# if (!is.null(earlystop.params)) {
# es <- do.call(earlystop, c(list(x = error.valid), earlystop.params))
# if (es) break
# }
} else {
if (verbose && i %in% print.progress.index) {
msg2("Iteration #", i, ": Training MSE = ", ddSci(error[i]), sep = "")
}
# if (!is.null(earlystop)) {
# es <- do.call(earlystop, c(list(x = error), earlystop.params))
# if (es) break
# }
}
if (print.error.plot == "iter" && i %in% print.error.plot.index) {
if (is.null(x.valid)) {
mplot3_xy(seq(error), error,
type = plot.type,
xlab = "Iteration", ylab = "MSE",
x.axis.at = seq(error),
main = paste0(prefix, "LIHAD Boosting"), zerolines = FALSE,
theme = plot.theme
)
} else {
mplot3_xy(seq(error), list(training = error, validation = error.valid),
type = plot.type,
xlab = "Iteration", ylab = "MSE", group.adj = .95,
x.axis.at = seq(error),
main = paste0(prefix, "LIHAD Boosting"), zerolines = FALSE,
theme = plot.theme
)
}
}
# '- Early stopping ----
if (!is.null(earlystop.params)) {
if (earlystop.using == "valid" && !is.null(x.valid)) {
es <- do.call(earlystop, c(list(x = error.valid), earlystop.params))
} else {
es <- do.call(earlystop, c(list(x = error), earlystop.params))
}
if (es) {
break
msg2("Breaking out of iteration", i)
}
}
i <- i + 1
} # /Iterate learner
if (trace > 0) msg2("Reached max iterations")
if (print.error.plot == "final") {
if (is.null(x.valid)) {
mplot3_xy(seq(error), error,
type = plot.type,
xlab = "Iteration", ylab = "MSE",
x.axis.at = seq(error),
main = paste0(prefix, "LIHAD Boosting"), zerolines = FALSE,
theme = plot.theme
)
} else {
mplot3_xy(seq(error), list(training = error, validation = error.valid),
type = plot.type,
xlab = "Iteration", ylab = "MSE", group.adj = .95,
x.axis.at = seq(error),
main = paste0(prefix, "LIHAD Boosting"), zerolines = FALSE,
theme = plot.theme
)
}
}
# Outro ----
# '- boost object ----
obj <- list(
init = init,
learning.rate = .learning.rate,
penult.fitted = penult.fitted,
fitted = Fval,
last.fitted = fitted,
predicted.valid = Fvalid,
error = error,
error.valid = error.valid,
mod.params = mod.params,
mods = mods
)
class(obj) <- c("hytboostnow", "list")
obj
} # rtemis::hytboostnow
#' Print method for `boost` object
#'
#' @param x `hytboostnow` object
#' @param ... Not used
#'
#' @method print hytboostnow
#' @author E.D. Gennatas
#' @export
print.hytboostnow <- function(x, ...) {
n.iter <- length(x$mods)
cat("\n A boosted Hybrid Tree with", n.iter, "iterations\n")
cat(" and a learning rate of", x$learning.rate[1], "\n\n")
invisible(x)
# printls(x$mod[[1]]$parameters) # must teach printls to handle functions
} # rtemis::print.hytboostnow
#' Predict method for `hytboostnow` object
#'
#' @param object `hytboostnow` object
#' @method predict hytboostnow
#' @author E.D. Gennatas
#' @export
predict.hytboostnow <- function(object,
newdata = NULL,
n.feat = NCOL(newdata),
n.iter = NULL,
fixed.cxr = NULL,
as.matrix = FALSE,
n.cores = 1,
verbose = FALSE, ...) {
if (is.null(newdata)) {
return(object$fitted)
}
if (!is.null(newdata)) {
if (!is.data.frame(newdata)) {
.colnames <- if (!is.null(colnames(newdata))) colnames(newdata) else paste0("V", seq_len(NCOL(newdata)))
newdata <- as.data.frame(newdata)
colnames(newdata) <- .colnames
newdata <- newdata[, seq(n.feat), drop = FALSE]
}
}
# If n.iter is defined, only use the first so many trees, otherwise all
if (is.null(n.iter)) n.iter <- length(object$mods)
if (!as.matrix) {
predicted <- rowSums(cbind(
rep(object$init, NROW(newdata)),
pbapply::pbsapply(seq(n.iter), function(i) {
predict.hytreenow(object$mods[[i]], newdata,
fixed.cxr = fixed.cxr[[i]]
) * object$learning.rate[i]
},
cl = n.cores
)
))
} else {
predicted.n <- pbapply::pbsapply(seq(n.iter), function(i) {
predict.hytreenow(object$mods[[i]], newdata,
fixed.cxr = fixed.cxr[[i]]
) * object$learning.rate[i]
},
cl = n.cores
)
predicted <- matrix(nrow = NROW(newdata), ncol = n.iter)
predicted[, 1] <- object$init + predicted.n[, 1]
for (i in seq(n.iter)[-1]) {
predicted[, i] <- predicted[, i - 1] + predicted.n[, i]
}
}
predicted
} # rtemis::predict.hytboostnow
#' Expand boosting series
#'
#' Add iterations to a [boost] object
#'
#' @inheritParams hytboostnow
#' @param object [boost] object
#'
#' @keywords internal
#' @noRd
#' @author E.D. Gennatas
expand.hytboostnow <- function(object,
x, y = NULL,
x.valid = NULL, y.valid = NULL,
resid = NULL,
mod.params = NULL,
max.iter = 10,
learning.rate = NULL,
case.p = 1,
# tolerance = NULL,
cxrcoef = FALSE,
prefix = NULL,
verbose = TRUE,
trace = 0,
print.error.plot = "final") {
if (is.null(mod.params)) mod.params <- object$mod.params
if (is.null(learning.rate)) learning.rate <- rev(object$learning.rate)[1]
if (trace > 0) msg2("learning.rate =", learning.rate)
hytboostnow(
x = x, y = y,
x.valid = x.valid, y.valid = y.valid,
resid = resid,
boost.obj = object,
learning.rate = learning.rate,
max.iter = max.iter,
init = object$init,
case.p = case.p,
cxrcoef = cxrcoef,
# tolerance = tolerance,
prefix = prefix,
verbose = verbose,
trace = trace,
print.error.plot = print.error.plot
)
} # rtemis::expand.hytboostnow
#' `as.hytboostnow` Place model in `hytboostnow` structure
#'
#' @author E.D. Gennatas
#' @keywords internal
#' @noRd
as.hytboostnow <- function(object,
x, y,
x.valid, y.valid, # not currently used
learning.rate = .1,
init.learning.rate = learning.rate,
init = 0,
apply.lr = TRUE
# tolerance = .00001,
# tolerance.valid = .00001
) {
if (!inherits(object, "hytreenow")) {
stop("Please provide hytreenow object")
}
mods <- list()
mods[[1]] <- object
fitted <- if (apply.lr) predict.hytreenow(object, x) * init.learning.rate else predict.hytreenow(object, x)
obj <- list(
init = init,
learning.rate = ifelse(apply.lr, init.learning.rate, 1),
fitted = fitted,
predicted.valid = NULL,
error = mse(y, fitted),
error.valid = NULL,
mods = mods
)
class(obj) <- c("hytboostnow", "list")
obj
} # rtemis::as.hytboostnow
#' Update [boost] object's fitted values
#'
#' Calculate new fitted values for a [boost].
#' Advanced use only: run after updating learning.rate
#'
#' All this will eventually be automated using an R6 object, maybe
#'
#' @method update hytboostnow
#' @param object `hytboostnow` object
#'
#' @return `hytboostnow` object
#' @author E.D. Gennatas
#' @keywords internal
#' @noRd
# TODO: save penultimate fitted, add last
update.hytboostnow <- function(object, x, y = NULL,
trace = 0, ...) {
if (trace > 0) fitted.orig <- object$fitted
fitted <- object$penult.fitted + rev(object$learning.rate)[1] * predict.hytreenow(object$mods[[length(object$mods)]], x)
object$error[length(object$error)] <- mse(object$y.train, fitted)
if (trace > 0 && !is.null(y)) {
mse.orig <- mse(y, fitted.orig)
new.mse <- mse(y, fitted)
msg2("old mse = ", mse.orig, "; new mse = ", new.mse, sep = "")
}
object$fitted <- fitted
if (trace > 0) msg2("Object updated")
object
} # rtemis::update.hytboostnow
#' `as.hytboostnow` Place model in `hytboostnow` structure
#'
#' @author E.D. Gennatas
#' @keywords internal
#' @noRd
as.hytboostnow2 <- function(object,
x, y,
learning.rate = .1,
init.learning.rate = learning.rate,
init = 0,
apply.lr = TRUE) {
if (!inherits(object, "hytreenow")) {
stop("Please provide hytreenow object")
}
mods <- list()
mods[[1]] <- object
fitted <- if (apply.lr) predict.hytreenow(object, x) * init.learning.rate else predict.hytreenow(object, x)
obj <- list(
init = init,
learning.rate = c(init.learning.rate, learning.rate),
fitted = fitted,
predicted.valid = NULL,
error = mse(y, fitted),
error.valid = NULL,
mods = mods
)
class(obj) <- c("hytboostnow", "list")
obj
} # rtemis::as.hytboostnow2
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.