Nothing
########################
## Boosting Algorithm ##
########################
### generic method for likelihood-based boosting with component-wise P-splines
### for fitting Cox-type additive models (flexible Cox boosting)
cfboost <- function(x, ...)
UseMethod("cfboost")
### formula interface
cfboost.formula <- function(formula, data = list(), weights = NULL, na.action = na.omit, control = boost_control(), ...) {
## construct design matrix etc.
object <- boost_dpp(formula, data, weights, na.action)
object$input <- object$menv@get("input")
object$data <- data
object$formula <- formula
if (!is.null(weights))
object$oob$input <- object$oob$menv@get("input")
RET <- cfboost_fit(object, control = control, data = data, weights = weights, ...)
RET$call <- match.call()
return(RET)
}
### fitting-function
cfboost_fit <- function(object, control = boost_control(), data, weights = NULL, ...) {
## data and base-learner
x <- object$input
class(x) <- "list"
oob <- list(x = object$oob$input , y = object$oob$y)
if (!is.null(oob$x)) class(oob$x) <- "list"
y <- object$y
if (!inherits(y, "Surv")) stop("response is not an object of class ", sQuote("Surv"))
if (!control$savedata){ ## free memory
rm("object")
}
## hyper parameters
mstop <- control$mstop
risk <- control$risk
nu <- control$nu
trace <- control$trace
tracestep <- options("width")$width / 2
maxit <- control$maxit # maximum iterations in optim (see PMLE())
which.offset <- control$which.offset
hardStop <- control$hardStop # option: "continue boosting if minimum not reached" or "stop"
## check if (enough) oob-data is present if risk="oobag"
if (risk == "oobag"){
checksum <- sum(weights==0)
if(checksum == 0)
stop("All observations are used for estimation.\nSpecify some weights equal to zero for ", sQuote("risk = oobag"))
if (checksum < length(weights)/10)
warning("Less than 1/10 of the data used for out-of-bag risk.\n", sQuote("object$risk"), " might be misleading.")
}
## the ensemble
ens <- rep(NA, mstop)
if (control$saveensss)
ensss <- vector(mode = "list", length = mstop)
else
ensss <- NULL
## vector of empirical risks for all boosting iterations
mrisk <- numeric(mstop)
mrisk[1:mstop] <- NA
maxll <- numeric(length(x))
coefs <- logLH <- vector(mode = "list", length = length(x))
fit <- fit_oob <- offset <- getoffset(y, which.offset)
if (trace)
cat("Offset: ", offset, "\n\n")
mstart <- 1
hSi <- 1 # number of iterations in the repeat loop
df_est <- matrix(NA, nrow = mstop, ncol = length(x)) # matrix of estimated degrees of freedom
## compute df2lambda which depends on the offset and on y
if (trace)
cat("Compute df2lambda .")
for (i in 1:length(x)){
if (!is.null( attr(x[[i]], "df"))){
attr(x[[i]], "df2lambda")(y, offset)
if (trace) cat(".")
}
}
if (trace) cat("\n\n")
##################################
#### start boosting iteration ####
##################################
repeat{
for (m in mstart:mstop) {
if (trace)
cat("Step ", m, "; Progress .", sep="")
## fit MLE component-wise
for (i in 1:length(x)) {
maxll[i] <- NA
dummy_ens <- ens[1:m] # get the first m-1 selected base-learners
dummy_ens[m] <- i # and set the m-th base-learner temporarily to i
## try to compute the (component-wise) penalized MLE
dummy <- try(PMLE(y, x, offset, fit, dummy_ens, nu, maxit))
if (inherits(dummy, "try-error")) next
coefs[[i]] <- dummy$par
maxll[i] <- dummy$maxll
logLH[[i]] <- dummy$logLH
if (!is.null(dummy$df)) df_est[m,i] <- dummy$df
if (trace) cat(".")
}
if (trace) cat("\n")
if (all(is.na(maxll)))
stop("could not fit base learner in boosting iteration ", m)
## select base-learner
xselect <- which.max(maxll)
## output for debugging
if (trace)
cat("\tSelected: ", xselect, " with log LH ", maxll[xselect],"\n", sep = "")
## update step
fit <- fit + nu * x[[xselect]] %*% coefs[[xselect]]
## save the model, i.e., the selected coefficient and base-learner
ens[m] <- xselect
if (control$saveensss)
ensss[[m]] <- coefs[[xselect]]
## save updated parameters in x[[xselect]]
x[[xselect]] <- updatecoefs(x[[xselect]], coefs[[xselect]])
if (risk == "inbag"){
mrisk[m] <- - logLH[[xselect]](coefs[[xselect]] * nu)
if (trace)
cat("\trisk (inbag) = ", mrisk[m], "\n")
}
if (risk == "oobag"){
## make a call to PMLE function to built matrices and get the logLH function (without estimation of coefficients)
dummy <- PMLE(oob$y, oob$x, offset, fit_oob, ens[1:m], nu, maxit, estimation = FALSE)
mrisk[m] <- - dummy$logLH(coefs[[xselect]] * nu)
if (trace)
cat("\trisk (oobag) = ", mrisk[m], "\n")
## update step for oob data
oob$x[[xselect]] <- updatecoefs(oob$x[[xselect]], coefs[[xselect]])
fit_oob <- fit_oob + nu * oob$x[[xselect]] %*% coefs[[xselect]]
}
}
if (hardStop || risk != "oobag" || which.min(mrisk) < (mstop - mstop * 0.2/hSi) || hSi == 5) break
## else if minimum is at the end of the boosting algorithm,
## don't stop but proceed with boosting: Therefore, increase mstop
## and the length of the arrays and vectors for storage of results
warning("risk in test sample seems not to be minimal. ", sQuote("mstop"), " increased.")
hSi <- hSi + 1
increase <- 100
ens <- c(ens, rep(NA, increase))
dummy <- vector("list", length = (mstop + increase))
dummy[1:mstop] <- ensss
ensss <- dummy
mrisk <- c(mrisk, rep(NA, increase))
df_est <- rbind(df_est, matrix(NA, nrow = increase, ncol = ncol(df_est)))# matrix of estimated degrees of freedom
mstart <- mstop + 1
mstop <- control$mstop <- mstop + increase
}
class(mrisk) <- risk
RET <- list(ensemble = ens, ### selected base-learners
ensembless = ensss, ### list of coefficients in each iteration
fit = fit, ### vector of fitted values
offset = offset, ### offset
control = control, ### control parameters
response = y, ### the response variable
risk = mrisk, ### the negative maximum log LH
weights = weights, ### weights used for fitting
df = df_est, ### estimated degrees of freedom for smooth base-learners
coefs = lapply(x[1:length(x)], getcoefs, nu = nu) ### coefficients
)
### save learning sample
if (control$savedata) RET$data <- object
RET$predict <- function(newdata = NULL, mstop = mstop, ...) {
if (!is.null(newdata)) {
if (is.null(colnames(newdata)))
stop("missing column names for ", sQuote("newdata"))
if (is.matrix(newdata)) newdata <- as.data.frame(newdata)
}
lp <- offset
for (m in 1:mstop)
lp <- lp + nu * predict(x[[ens[m]]], newdata = newdata, newcoefs = ensss[[m]])
return(lp)
}
class(RET) <- c("cfboost")
return(RET)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.