Nothing
#' Compute (Adaptive) Elastic Net M-Estimates of Regression
#'
#' Compute elastic net M-estimates along a grid of penalization levels with optional
#' penalty loadings for adaptive elastic net.
#'
#' @param x `n` by `p` matrix of numeric predictors.
#' @param y vector of response values of length `n`.
#' For binary classification, `y` should be a factor with 2 levels.
#' @param alpha elastic net penalty mixing parameter with \eqn{0 \le \alpha \le 1}.
#' `alpha = 1` is the LASSO penalty, and `alpha = 0` the Ridge penalty.
#' @param scale fixed scale of the residuals.
#' @param nlambda number of penalization levels.
#' @param lambda_min_ratio Smallest value of the penalization level as a fraction of the
#' largest level (i.e., the smallest value for which all coefficients are zero).
#' The default depends on the sample size relative to the number of variables and `alpha`.
#' If more observations than variables are available, the default is `1e-3 * alpha`,
#' otherwise `1e-2 * alpha`.
#' @param penalty_loadings a vector of positive penalty loadings (a.k.a. weights)
#' for different penalization of each coefficient. Only allowed for `alpha` > 0.
#' @param standardize logical flag to standardize the `x` variables prior to fitting the
#' M-estimates. Coefficients are always returned on the original scale.
#' This can fail for variables with a large proportion of a single value
#' (e.g., zero-inflated data). In this case, either compute with
#' `standardize = FALSE` or standardize the data manually.
#' @param lambda optional user-supplied sequence of penalization levels.
#' If given and not `NULL`, `nlambda` and `lambda_min_ratio` are ignored.
#' @param starting_points a list of staring points, created by [starting_point()].
#' The starting points are shared among all penalization levels.
#' @param intercept include an intercept in the model.
#' @param add_zero_based also consider the 0-based regularization path in addition to the given
#' starting points.
#' @param cc cutoff constant for Tukey's bisquare \eqn{\rho} function.
#' @param eps numerical tolerance.
#' @param explore_solutions number of solutions to compute up to the desired precision `eps`.
#' @param explore_tol numerical tolerance for exploring possible solutions.
#' Should be (much) looser than `eps` to be useful.
#' @param max_solutions only retain up to `max_solutions` unique solutions per penalization level.
#' @param comparison_tol numeric tolerance to determine if two solutions are equal.
#' The comparison is first done on the absolute difference in the value of the objective
#' function at the solution.
#' If this is less than `comparison_tol`, two solutions are deemed equal if the
#' squared difference of the intercepts is less than `comparison_tol` and the squared
#' \eqn{L_2} norm of the difference vector is less than `comparison_tol`.
#' @param sparse use sparse coefficient vectors.
#' @param ncores number of CPU cores to use in parallel. By default, only one CPU core is used.
#' Not supported on all platforms, in which case a warning is given.
#' @param algorithm_opts options for the MM algorithm to compute estimates.
#' See [mm_algorithm_options()] for details.
#' @param mscale_bdp,mscale_opts options for the M-scale estimate used to standardize
#' the predictors (if `standardize = TRUE`).
#'
#' @return a list-like object with the following items
#' \describe{
#' \item{`alpha`}{the sequence of `alpha` parameters.}
#' \item{`lambda`}{a list of sequences of penalization levels, one per `alpha` parameter.}
#' \item{`scale`}{the used scale of the residuals.}
#' \item{`estimates`}{a list of estimates. Each estimate contains the following information:
#' \describe{
#' \item{`intercept`}{intercept estimate.}
#' \item{`beta`}{beta (slope) estimate.}
#' \item{`lambda`}{penalization level at which the estimate is computed.}
#' \item{`alpha`}{*alpha* hyper-parameter at which the estimate is computed.}
#' \item{`objf_value`}{value of the objective function at the solution.}
#' \item{`statuscode`}{if `> 0` the algorithm experienced issues when
#' computing the estimate.}
#' \item{`status`}{optional status message from the algorithm.}
#' }
#' }
#' \item{`call`}{the original call.}
#' }
#'
#' @family functions to compute robust estimates
#' @seealso [regmest_cv()] for selecting hyper-parameters via cross-validation.
#' @seealso [coef.pense_fit()] for extracting coefficient estimates.
#' @seealso [plot.pense_fit()] for plotting the regularization path.
#'
#' @export
regmest <- function(x, y, alpha, nlambda = 50, lambda, lambda_min_ratio, scale, starting_points,
penalty_loadings, intercept = TRUE, cc = 4.7,
eps = 1e-6, explore_solutions = 10, explore_tol = 0.1,
max_solutions = 10, comparison_tol = sqrt(eps), sparse = FALSE,
ncores = 1, standardize = TRUE,
algorithm_opts = mm_algorithm_options(), add_zero_based = TRUE,
mscale_bdp = 0.25, mscale_opts = mscale_algorithm_options()) {
call <- match.call(expand.dots = TRUE)
args <- as.list(call[-1L])
args$standardize <- isTRUE(standardize) # Ignore standardize = 'cv_only'!
args <- do.call(.regmest_args, args, envir = parent.frame())
compute_fit <- function (alpha, lambda) {
fit <- .regmest_internal(args$std_data$x, args$std_data$y,
alpha = alpha,
lambda = lambda,
scale = args$scale,
penalty_loadings = args$penalty_loadings,
mest_opts = args$mest_opts,
optional_args = args$optional_args)
# Retain only the best solution:
fit$estimates <- lapply(fit$estimates, function (ests) {
args$restore_coef_length(args$std_data$unstandardize_coefs(ests[[1L]]))
})
# Handle metrics
fit$estimates <- .metrics_attrib(fit$estimates, fit$metrics)
fit$lambda <- unlist(vapply(fit$estimates, FUN = `[[`, FUN.VALUE = numeric(1),
'lambda'), use.names = FALSE, recursive = FALSE)
fit$alpha <- alpha
fit
}
# Call internal function
fits <- mapply(
args$alpha, args$lambda,
SIMPLIFY = FALSE, USE.NAMES = FALSE,
FUN = compute_fit)
structure(list(
call = call,
scale = args$scale,
lambda = args$lambda,
estimates = unlist(lapply(fits, `[[`, 'estimates'), recursive = FALSE, use.names = FALSE),
alpha = vapply(fits, FUN.VALUE = numeric(1L), FUN = `[[`, 'alpha', USE.NAMES = FALSE)),
class = c('mest', 'pense_fit'))
}
#' Cross-validation for (Adaptive) Elastic Net M-Estimates
#'
#' Perform (repeated) K-fold cross-validation for [regmest()].
#'
#' @inheritParams regmest
#' @param standardize whether to standardize the `x` variables prior to fitting the PENSE estimates.
#' Can also be set to `"cv_only"`, in which case the input data is not standardized, but the
#' training data in the CV folds is scaled to match the scaling of the input data.
#' Coefficients are always returned on the original scale.
#' This can fail for variables with a large proportion of a single value
#' (e.g., zero-inflated data).
#' In this case, either compute with `standardize = FALSE` or standardize the data manually.
#' @template cv_params
#' @inheritDotParams regmest -standardize
#'
#' @seealso [regmest()] for computing regularized S-estimates without cross-validation.
#'
#' @return a list-like object as returned by [regmest()], plus the following components:
#' \describe{
#' \item{`cvres`}{data frame of average cross-validated performance.}
#' }
#'
#' @example examples/adapense_fit.R
#'
#' @family functions to compute robust estimates with CV
#' @seealso [coef.pense_cvfit()] for extracting coefficient estimates.
#' @seealso [plot.pense_cvfit()] for plotting the CV performance or the regularization path.
#' @export
#' @importFrom stats sd
#' @importFrom rlang abort
regmest_cv <- function(x, y, standardize = TRUE, lambda, cv_k, cv_repl = 1,
cv_metric = c('tau_size', 'mape', 'rmspe', 'auroc'),
fit_all = TRUE, cl = NULL, ...) {
call <- match.call(expand.dots = TRUE)
args <- do.call(.regmest_args, as.list(call[-1L]), envir = parent.frame())
fit_ses <- if (is.character(fit_all)) {
unique(vapply(fit_all, FUN = .parse_se_string, FUN.VALUE = numeric(1L), only_fact = TRUE,
USE.NAMES = FALSE))
} else if (isFALSE(fit_all)) {
.parse_se_string('min', only_fact = TRUE)
} else {
TRUE
}
if (length(fit_ses) < 1L) {
fit_ses <- TRUE
}
cv_k <- .as(cv_k[[1L]], 'integer')
cv_repl <- .as(cv_repl[[1L]], 'integer')
if (cv_k < 2L) {
abort("`cv_k` must be greater than 1.")
}
if (cv_repl < 1L) {
abort("`cv_repl` must be greater than 0.")
}
if (identical(cv_repl, 1L) && any(fit_ses > 0)) {
warn("To use `fit_all = \"se\"`, `cv_repl` must be 2 or greater.")
fit_ses <- 0
}
if (!is.null(cl)) {
if (args$mest_opts$num_threads > 1L) {
abort("`cl` can only be used if `ncores = 1`.")
}
if (!is(cl, 'cluster')) {
abort("`cl` must be a valid `parallel` cluster.")
}
}
cv_metric <- if (is.null(call$cv_metric) && args$binary_response) {
cv_measure_str <- 'auroc'
.cv_auroc
} else if (is.character(cv_metric)) {
cv_measure_str <- match.arg(cv_metric)
switch(cv_measure_str, mape = .cv_mape, rmspe = .cv_rmspe, tau_size = tau_size,
auroc = if (args$binary_response) {
.cv_auroc
} else {
abort("cv_metric=\"auroc\" is only valid for binary responses.")
})
} else {
cv_measure_str <- 'user_fun'
match.fun(cv_metric)
}
if (is.null(formals(cv_metric))) {
abort("Function `cv_metric` must accept at least 1 argument.")
}
# Get a common seed to be used for every alpha value
fit_seed <- sample.int(.Machine$integer.max, 1L)
cv_est_fun <- function (train_data, test_ind, handler_args) {
cv_fit <- .regmest_internal(
train_data$x, train_data$y,
alpha = handler_args$alpha,
lambda = handler_args$lambda,
scale = handler_args$args$scale,
penalty_loadings = handler_args$args$penalty_loadings,
mest_opts = handler_args$args$mest_opts,
optional_args = handler_args$args$optional_args)
# Return only best local optima
lapply(cv_fit$estimates, `[[`, 1L)
}
cv_est_dispatch <- function (alpha, lambda) {
set.seed(fit_seed)
cv_perf <- .run_replicated_cv(args$std_data,
cv_k = cv_k,
cv_repl = cv_repl,
metric = cv_metric,
cv_est_fun = cv_est_fun,
par_cluster = cl,
handler_args = list(args = args,
alpha = alpha,
lambda = lambda))
data.frame(lambda = lambda, alpha = alpha,
cvavg = rowMeans(cv_perf),
cvse = if (cv_repl > 1L) { apply(cv_perf, 1, sd) } else { 0 })
}
cv_curves <- mapply(
args$alpha, args$lambda,
SIMPLIFY = FALSE, USE.NAMES = FALSE,
FUN = cv_est_dispatch)
cv_curves <- do.call(rbind, cv_curves)
if (isTRUE(fit_ses)) {
fit_lambda <- args$lambda
} else {
fit_lambda <- lapply(args$alpha, function (alpha) {
rows <- which((cv_curves$alpha - alpha)^2 < .Machine$double.eps)
lambda_inds <- vapply(fit_ses, FUN.VALUE = numeric(1L),
FUN = function (se_fact) {
sel <- .cv_se_selection(cv_curves$cvavg[rows],
cv_curves$cvse[rows],
se_fact)
which(sel == 'se_fact')
})
unique(cv_curves$lambda[rows[lambda_inds]])
})
}
compute_global_fits <- function (alpha, lambda) {
fit <- .regmest_internal(args$std_data$x, args$std_data$y,
alpha = alpha,
lambda = lambda,
scale = args$scale,
penalty_loadings = args$penalty_loadings,
mest_opts = args$mest_opts,
optional_args = args$optional_args)
# Retain only the best solution:
fit$estimates <- lapply(fit$estimates, function (ests) {
args$restore_coef_length(args$std_data$unstandardize_coefs(ests[[1L]]))
})
# Handle metrics
fit$estimates <- .metrics_attrib(fit$estimates, fit$metrics)
fit$lambda <- unlist(vapply(fit$estimates, FUN.VALUE = numeric(1),
FUN = `[[`, 'lambda'),
use.names = FALSE, recursive = FALSE)
fit$alpha <- alpha
fit
}
fits <- mapply(
args$alpha, fit_lambda,
SIMPLIFY = FALSE, USE.NAMES = FALSE,
FUN = compute_global_fits)
structure(list(
call = call,
scale = args$scale,
cvres = cv_curves,
cv_measure = cv_measure_str,
lambda = args$lambda,
estimates = unlist(lapply(fits, `[[`, 'estimates'),
recursive = FALSE, use.names = FALSE),
alpha = vapply(fits, FUN.VALUE = numeric(1L),
FUN = `[[`, 'alpha',
USE.NAMES = FALSE)),
class = c('mest', 'pense_cvfit'))
}
#' @description `adamest_cv()` is a convenience wrapper to compute adaptive elastic-net M-estimates.
#'
#' @details
#' `adamest_cv()` is a convenience wrapper which performs 3 steps:
#'
#' 1. compute preliminary estimates via `regmest_cv(..., alpha = alpha_preliminary)`,
#' 2. computes the penalty loadings from the estimate `beta` with best prediction performance by
#' `adamest_loadings = 1 / abs(beta)^exponent`, and
#' 3. compute the adaptive PENSE estimates via
#' `regmest_cv(..., penalty_loadings = adamest_loadings)`.
#'
#' @param alpha_preliminary `alpha` parameter for the preliminary estimate.
#' @param exponent the exponent for computing the penalty loadings based on the
#' preliminary estimate.
#'
#' @return a list-like object as returned by [adamest_cv()] plus the following components:
#' \describe{
#' \item{`exponent`}{value of the exponent.}
#' \item{`preliminary`}{CV results for the preliminary estimate.}
#' \item{`penalty_loadings`}{penalty loadings used for the adaptive
#' elastic net M-estimate.}
#' }
#'
#' @rdname regmest_cv
#' @family functions to compute robust estimates with CV
#' @export
#' @importFrom stats coef
#' @importFrom rlang abort
adamest_cv <- function (x, y, alpha, alpha_preliminary = 0, exponent = 1, ...) {
call <- match.call(expand.dots = TRUE)
if (!is.null(call$penalty_loadings)) {
abort(paste("Argument `penalty_loadings` not valid for `adamest_cv`",
"Penalty loadings are determined internally."))
}
exponent <- .as(exponent[[1L]], 'numeric')
# Compute preliminary estimate
prelim_call <- call
prelim_call[[1L]] <- quote(pense::regmest_cv)
prelim_call$alpha <- .as(alpha_preliminary[[1L]], 'numeric')
prelim_call$alpha_preliminary <- NULL
prelim_call$exponent <- NULL
prelim <- eval.parent(prelim_call)
prelim_coef <- coef(prelim, sparse = FALSE, concat = FALSE)
pen_loadings <- abs(prelim_coef$beta)^(-exponent)
adamstep <- regmest_cv(x, y, alpha = alpha,
penalty_loadings = pen_loadings, ...)
adamstep$call <- call
adamstep$exponent <- exponent
adamstep$preliminary <- prelim
adamstep$penalty_loadings <- pen_loadings
return(adamstep)
}
## Get the smallest lambda such that the PENSE estimate gives the empty model.
.regmest_max_lambda <- function (x, y, alpha, scale, mest_options,
penalty_loadings = NULL) {
optional_args <- list()
if (!is.null(penalty_loadings)) {
optional_args$pen_loadings <- penalty_loadings
}
.Call(C_mesten_max_lambda, x, y, scale, mest_options, optional_args) /
max(0.01, alpha)
}
## Generate a log-spaced grid of decreasing lambda values
.regmest_lambda_grid <- function (x, y, alpha, scale, nlambda,
lambda_min_ratio, mest_options,
penalty_loadings) {
alpha <- max(0.01, alpha)
x_dim <- dim(x)
if (is.null(lambda_min_ratio)) {
lambda_min_ratio <- alpha * if (x_dim[[1L]] > x_dim[[2L]]) { 1e-3 } else { 1e-2 }
}
max_lambda <- .regmest_max_lambda(x, y, alpha, scale, mest_options, penalty_loadings)
rev(exp(seq(log(lambda_min_ratio * max_lambda), log(max_lambda), length.out = nlambda)))
}
## Perform some final input adjustments and call the internal C++ code.
.regmest_internal <- function(x, y, alpha, scale, lambda, penalty_loadings = NULL, mest_opts,
optional_args) {
# Create penalties-list, without sorting the lambda sequence
penalties <- lapply(lambda, function (l) { list(lambda = l, alpha = alpha) })
if (!is.null(penalty_loadings)) {
optional_args$pen_loadings <- penalty_loadings
}
.Call(C_mesten_regression, x, y, scale, penalties, mest_opts, optional_args)
}
#' @importFrom rlang warn abort
#' @importFrom methods is
#' @importFrom Matrix sparseVector
#' @importFrom stats runif
.regmest_args <- function (x, y, alpha, nlambda = 50, lambda, lambda_min_ratio, scale,
starting_points, penalty_loadings, intercept = TRUE, cc = 4.7,
eps = 1e-6, explore_solutions = 10, explore_tol = 0.1,
max_solutions = 10, comparison_tol = sqrt(eps), sparse = FALSE,
ncores = 1, standardize = TRUE, algorithm_opts = mm_algorithm_options(),
add_zero_based = TRUE, mscale_bdp = 0.25,
mscale_opts = mscale_algorithm_options(), ...) {
args_call <- match.call(expand.dots = FALSE)
optional_args <- list()
## Process input arguments
response <- .validate_response(y)
y <- response$values
x_dim <- dim(x)
if (length(y) != x_dim[[1L]]) {
abort("Number of observations in `x` and `y` does not match.")
} else if (x_dim[[2L]] <= 1L) {
abort("`x` must be a matrix with at least 2 columns.")
}
alpha <- .as(alpha, 'numeric')
if (any(alpha < 0 | alpha > 1)) {
abort("`alpha` is outside 0 and 1.")
} else if (any(alpha < sqrt(.Machine$double.eps))) {
alpha[which(alpha < sqrt(.Machine$double.eps))] <- 0
if (any(alpha > 0)) {
abort("`alpha=0` cannot be mixed with other `alpha` values.")
}
}
scale <- .as(scale[[1L]], 'numeric')
if (!isTRUE(scale > .Machine$double.eps)) {
abort("`scale` must be positive.")
}
mscale_opts <- .full_mscale_algo_options(bdp = mscale_bdp, mscale_opts = mscale_opts)
mest_opts <- list(algo_opts = algorithm_opts,
cc = .as(cc[[1L]], 'numeric'),
strategy_0 = isTRUE(add_zero_based),
algorithm = .regmest_algorithm_id(algorithm_opts),
intercept = !isFALSE(intercept),
sparse = isTRUE(sparse),
eps = .as(eps[[1L]], 'numeric'),
max_it = mscale_opts$max_it, # max. iterations for finding the largest lambda
comparison_tol = .as(comparison_tol[[1L]], 'numeric'),
explore_tol = .as(explore_tol[[1L]], 'numeric'),
nr_tracks = .as(explore_solutions[[1L]], 'integer'),
max_optima = .as(max_solutions[[1L]], 'integer'),
num_threads = max(1L, .as(ncores[[1L]], 'integer')))
if (mest_opts$cc < .Machine$double.eps) {
abort("`cc` must be positive.")
}
if (mest_opts$explore_tol < mest_opts$eps) {
abort("`explore_tol` must not be less than `eps`")
}
if (mest_opts$comparison_tol < mest_opts$eps) {
abort("`comparison_tol` must not be less than `eps`")
}
# If using the MM algorithm, ensure that the EN options are set.
if (mest_opts$algorithm == 1L) {
mest_opts$algo_opts$en_options <- .select_en_algorithm(mest_opts$algo_opts$en_options,
alpha,
mest_opts$sparse, eps)
mest_opts$sparse <- mest_opts$algo_opts$en_options$sparse
}
# Set the number of cores for the ENPY options
if (mest_opts$num_threads > 1L && !isTRUE(.k_multithreading_support)) {
warn("Multithreading not supported. Using only 1 core.")
mest_opts$num_threads <- 1L
}
# Standardizing the data
standardize <- if (is.character(standardize)) {
if (pmatch(standardize[[1L]], 'cv_only', nomatch = 0L) == 1L) {
standardize <- 'cv_only'
} else {
abort("`standardize` must be either TRUE/FALSE or \"cv_only\".")
}
} else {
isTRUE(standardize)
}
# Check penalty loadings
if (!missing(penalty_loadings) && !is.null(penalty_loadings)) {
checked_pls <- .prepare_penalty_loadings(penalty_loadings, x, alpha, sparse = mest_opts$sparse)
penalty_loadings <- checked_pls$loadings
restore_coef_length <- checked_pls$restore_fun
x <- checked_pls$trimmed_x
} else {
restore_coef_length <- function (coef) coef
penalty_loadings <- NULL
}
if (ncol(x) == 0L) {
warn("All values in `penalty_loadings` are infinite. Only computing the intercept.")
mest_opts$intercept <- TRUE
std_data <- .standardize_data(matrix(runif(x_dim[[1L]]), ncol = 1L), y, intercept = TRUE,
sparse = mest_opts$sparse,
standardize = standardize, robust = TRUE,
mscale_opts = mscale_opts,
bdp = mscale_opts$delta, cc = mscale_opts$cc)
# Compute only the 0-based solution.
mest_opts$strategy_0 <- TRUE
lambda <- lapply(args$alpha, FUN = .regmest_lambda_grid,
x = std_data$x, y = std_data$y, scale = scale,
nlambda = 1, lambda_min_ratio = 1,
mest_options = mest_opts, penalty_loadings1 = NULL)
return(list(std_data = std_data,
alpha = args$alpha,
lambda = lambda,
scale = scale,
penalty_loadings = NULL,
mest_opts = mest_opts,
optional_args = optional_args,
restore_coef_length = restore_coef_length))
}
std_data <- .standardize_data(x, y, intercept = mest_opts$intercept,
standardize = standardize,
robust = TRUE,
sparse = mest_opts$sparse,
mscale_opts = mscale_opts,
bdp = mscale_opts$delta,
cc = mscale_opts$cc)
# Scale penalty loadings appropriately
penalty_loadings <- penalty_loadings / std_data$scale_x
if (length(penalty_loadings) == 0L) {
penalty_loadings <- NULL
}
# Determine lambda grid
lambda <- if (missing(lambda) || is.null(lambda)) {
if (missing(lambda_min_ratio)) {
lambda_min_ratio <- NULL
}
lapply(alpha, FUN = .regmest_lambda_grid,
x = std_data$x, y = std_data$y, scale = scale,
nlambda = nlambda,
lambda_min_ratio = lambda_min_ratio,
mest_options = mest_opts,
penalty_loadings = penalty_loadings)
} else if (!is.list(lambda)) {
rep.int(list(sort(.as(lambda, 'numeric'), decreasing = TRUE)), length(alpha))
} else if (identical(length(lambda), length(alpha))) {
lapply(lambda, function (l) {
if (any(l < .Machine$double.eps)) {
abort("All values in `lambda` must be positive.")
}
sort(.as(l, 'numeric'), decreasing = TRUE)
})
} else {
abort("`lambda` must either be a numeric vector or a list the same length as `alpha`.")
}
if (!missing(starting_points)) {
if (is(starting_points, 'pense_fit') || is(starting_points, 'pense_cvfit')) {
starting_points <- as_starting_point(starting_points)
} else if (is(starting_points, 'starting_point')) {
starting_points <- structure(list(starting_points), class = 'starting_points')
} else if (!is(starting_points, 'starting_points')) {
abort(paste("`starting_points` must be a list of starting points created by",
"`starting_point()`, `enpy_initial_estimates()`, or a combination thereof."))
}
# Ensure the `beta` coefficients in `other_starts` agree with the desired vector class
# (sparse vs. dense) and standardize them.
optional_args$shared_starts <- lapply(.sparsify_other_starts(starting_points, mest_opts$sparse),
std_data$standardize_coefs)
}
return(list(std_data = std_data,
binary_response = response$binary,
alpha = alpha,
scale = scale,
lambda = lambda,
penalty_loadings = penalty_loadings,
mest_opts = mest_opts,
optional_args = optional_args,
restore_coef_length = restore_coef_length))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.