#' Cross Validate Penalized Elastic Net S-Estimator (PENSE)
#'
#' @param formula a model formula
#' @param data a training data set
#' @param cv.method preferably one of "boot632" (the default), "cv", or "repeatedcv".
#' @param nfolds the number of bootstrap or cross-validation folds to use. defaults to 5.
#' @param folds a vector of pre-set cross-validation or bootstrap folds from caret::createResample or
#' caret::createFolds.
#' @param nrep the number of repetitions for cv.method = "repeatedcv". defaults to 4.
#' @param tunlen the number of values for the unknown hyperparameter to test. defaults to 10.
#' @param crit the criterion by which to evaluate the model performance. must be one of "MAE" (the default)
#' or "MSE".
#' @param select the selection rule to use. Should be one of "best" or "oneSE" (the default).
#' @param max.c the largest value of the constant for calculating lambda. defaults to 8, but
#' may be adjusted. for example, if the error metric becomes constant after a certain
#' value of C, it may be advisable to lower max.c to a smaller value to obtain
#' a more fine-grained grid over the plausible values.
#' @param min.a the minimum value of the alpha parameter. defaults to 0.
#' @param max.a the maximum value of the alpha parameter. defaults to 1.
#' @return
#' a train object
#' @export
#'
cv_pense = function(formula, data, cv.method = "boot632", nfolds = 5, nrep = 4, folds = NULL, tunlen = 10, crit = "MAE", select = "oneSE", max.c = 8, min.a = 0, max.a = 1){
if (!is.null(folds)) {
nfolds = NULL
}
PENSE <- list(type = "Regression",
library = "pense",
loop = NULL)
PENSE$parameters <- data.frame(parameter = c( "alpha", "C", "base.lambda"),
class = rep("numeric", 3),
label = c("alpha", "C", "base.lambda"))
huber.scale = function(y){
MASS::hubers(y,
initmu =
hubers(y,
initmu = MASS::hubers(y)$mu,
s = sqrt(mean(c(sd(y)^2, mad(y)^2)))
)$mu)$s
}
lm.betas <- lmSolve(formula, data)
model.mat <- model.matrix(formula, data)
lm.pred <- as.vector(lm.betas) %*% t(model.mat)
lm.res <- as.vector(model.frame(formula, data)[,1]) - lm.pred
PENSE$noiseSD <- huber.scale(lm.res)
PENSE$max.c <- max.c
penseGrid <- function(x, y, max.c = PENSE$max.c, noise.sd = PENSE$noiseSD, len = NULL, search = "grid") {
D = nrow(x)
N = length(y)
lambda0 = noise.sd * sqrt(log(D) / N)
C <- seq(1, max.c, length.out = len)
alphas <- seq(min.a, max.a, len = 6)
## use grid search:
if(search == "grid"){
search = "grid"
} else {
search = "grid"
}
grid <- expand.grid(alpha = alphas, C = C)
grid$base.lambda <- rep(lambda0, nrow(grid))
out <- grid
return(out)
}
PENSE$grid <- penseGrid
penseFit <- function(x, y, param, ...) {
f = function(alpha){
sqrt((2 - alpha)^3 / (2 - (1 - alpha)))
}
suppressPackageStartupMessages(require(pense))
initopts = initest_options(keep_solutions = 4, psc_method = "exact", resid_keep_method='threshold', psc_keep = 0.90, resid_keep_thresh = 0.85)
penseopts = pense_options(delta = 0.30,
maxit = 2500,
mscale_maxit = 1000,
eps = 1e-04,
mscale_eps = 1e-05)
lambda = param$C * f(param$alpha) * param$base.lambda
alpha = param$alpha
pense::pense(
x = as.matrix(x),
y = as.vector(y),
lambda = lambda,
alpha = alpha,
ncores = 1,
init_options = initopts,
options = penseopts,
standardize = FALSE
)
}
PENSE$fit <- penseFit
PENSE$prob <- penseFit
pensePred <- function(modelFit, newdata, preProc = NULL, submodels = NULL){
pense:::predict.pense(modelFit, newdata, exact = TRUE)
}
PENSE$predict <- pensePred
postRobResamp = function(pred, obs) {
isNA <- is.na(pred)
pred <- pred[!isNA]
obs <- obs[!isNA]
if (!is.factor(obs) && is.numeric(obs)) {
if (length(obs) + length(pred) == 0) {
out <- rep(NA, 2)
}
else {
huber.mean <- function (y) {
init.robmu = MASS::hubers(y, k = 2.241403, initmu = median(y), s = sd(y))$mu
MASS::hubers(y, k = 1.281552, initmu = init.robmu)$mu
}
robmse <- huber.mean((pred - obs)^2)
robmae <- huber.mean(abs(pred - obs))
out <- c(robmse, robmae)
}
names(out) <- c("MSE", "MAE")
}
else {
if (length(obs) + length(pred) == 0) {
out <- rep(NA, 2)
}
else {
pred <- factor(pred, levels = levels(obs))
requireNamespaceQuietStop("e1071")
out <- unlist(e1071::classAgreement(table(obs, pred)))[c("diag",
"kappa")]
}
names(out) <- c("Accuracy", "Kappa")
}
if (any(is.nan(out)))
out[is.nan(out)] <- NA
out
}
robustSummary = function (data, lev = NULL, model = NULL)
{
if (is.character(data$obs))
data$obs <- factor(data$obs, levels = lev)
postRobResamp(data[, "pred"], data[, "obs"])
}
if (cv.method == "repeatedcv") {
fitControl <- trainControl(method = cv.method,
number = nfolds,
index = folds,
repeats = nrep,
allowParallel = TRUE,
savePredictions = "all",
selectionFunction = select,
summaryFunction = robustSummary,
search = "grid")
} else {
fitControl <- trainControl(method = cv.method,
number = nfolds,
index = folds,
allowParallel = TRUE,
savePredictions = "all",
selectionFunction = select,
summaryFunction = robustSummary,
search = "grid")
}
fitted.models <- train(formula, data,
method = PENSE,
metric = crit,
tuneLength = tunlen,
maximize = FALSE,
preProcess = c("center", "scale"),
trControl = fitControl)
f = function(alpha){
sqrt((2 - alpha)^3 / (2 - (1 - alpha)))
}
lambda <- fitted.models$results$C * f(fitted.models$results$alpha) * fitted.models$results$base.lambda
fitted.models$results <- cbind.data.frame(fitted.models$results[,1:3],
lambda = lambda,
fitted.models$results[,4:ncol(fitted.models$results)])
return(fitted.models)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.