#==================================================================================================#
# This is the master function for robust subset selection. It calls heuristics and mio.
#==================================================================================================#
#' @title Robust subset selection
#'
#' @author Ryan Thompson
#'
#' @description Fits a sequence of regression models using robust subset selection.
#'
#' @param x a predictor matrix
#' @param y a response vector
#' @param k the number of predictors to minimise sum of squares over; by default a sequence from 0
#' to 20
#' @param h the number of observations to minimise sum of squares over; by default a sequence from
#' 75 to 100 percent of sample size (in increments of 5 percent)
#' @param k.mio the subset of \code{k} for which the mixed-integer solver should be run
#' @param h.mio the subset of \code{h} for which the mixed-integer solver should be run
#' @param params a list of parameters (settings) to pass to the mixed-integer solver (Gurobi)
#' @param tau a positive number greater than or equal to 1 used to tighten coefficient bounds in the
#' mixed-integer solver; small values give quicker run times but can also exclude the optimal
#' solution; can be \code{Inf}
#' @param warm.start a logical indicating whether to warm start the mio solver using the heuristics
#' @param robust a logical indicating whether to standardise the data robustly; median/mad for
#' \code{TRUE} and mean/sd for \code{FALSE}
#' @param max.ns.iter the maximum number of neighbourhood search iterations allowed
#' @param max.gd.iter the maximum number of gradient descent iterations allowed per value of
#' \code{k} and \code{h}
#' @param eps a numerical tolerance parameter used to declare convergence
#'
#' @return An object of class \code{rss}; a list with the following components:
#' \item{beta}{an array of estimated regression coefficients; columns correspond to \code{k} and
#' matrices to \code{h}}
#' \item{weights}{an array of binary weights; weights equal to one correspond to good observations
#' selected for inclusion in the least squares fit; columns correspond to \code{k} and matrices to
#' \code{h}}
#' \item{objval}{a matrix with the objective function values; rows correspond to \code{k} and
#' columns to \code{h}}
#' \item{mipgap}{a matrix with the optimality gap values; rows correspond to \code{k} and columns
#' to \code{h}}
#' \item{k}{a vector containing the values of \code{k} used in the fit}
#' \item{h}{a vector containing the values of \code{h} used in the fit}
#'
#' @details The function first computes solutions over all combinations of \code{k} and \code{h}
#' using heuristics. The heuristics include projected block-coordinate gradient descent and
#' neighbourhood search (see \href{https://arxiv.org/abs/2005.08217}{arXiv}). The solutions produced
#' by the heuristics can be refined further using the mixed-integer solver. The tuning parameters
#' that the solver operates on are specified by the \code{k.mio} and \code{h.mio} parameters,
#' which must be subsets of \code{k} and \code{h}. \cr
#'
#' By default, the mixed-integer optimisation problem is formulated with SOS constraints and
#' bound constraints. The bound constraints are estimated as \eqn{\tau\|\hat{\beta}\|_\infty}, where
#' \eqn{\hat{\beta}} is output from the heuristics. For finite values of \code{tau}, the
#' mixed-integer solver automatically converts the SOS constraints to Big-M constraints, which are
#' more numerically efficient to optimise.
#'
#' @references Thompson, R. (2022). 'Robust subset selection'. Computational Statistics and Data
#' Analysis 169, p. 107415.
#'
#' @example R/examples/example-rss.R
#'
#' @export
rss <- \(x, y, k = 0:min(nrow(x) - 1, ncol(x), 20), h = round(seq(0.75, 1, 0.05) * nrow(x)),
k.mio = NULL, h.mio = NULL, params = list(TimeLimit = 60, OutputFlag = 0), tau = 1.5,
warm.start = TRUE, robust = TRUE, max.ns.iter = 1e2, max.gd.iter = 1e5, eps = 1e-4) {
# Check data is valid
if (!is.matrix(x)) x <- as.matrix(x)
attributes(x)$dimnames <- NULL
if (!is.matrix(y)) y <- as.matrix(y)
attributes(y)$dimnames <- NULL
# Check arguments
if (!is.null(k.mio)) if (!all(k.mio %in% k)) stop('k.mio not a subset of k')
if (!is.null(h.mio)) if (!all(h.mio %in% h)) stop('h.mio not a subset of h')
# Preliminaries
n <- nrow(x)
p <- ncol(x)
nk <- length(k)
nh <- length(h)
nk.mio <- length(k.mio)
nh.mio <- length(h.mio)
# Centre y and centre/scale x; median/mad for robust and mean/sd for nonrobust
x.o <- x
y.o <- y
if (robust) {
x <- rob.scale(x, center = TRUE, scale = TRUE)
if (any(is.na(x))) stop('At least one predictor has MAD = 0 causing robust standardisation to
fail. Set robust = FALSE or remove offending predictors.')
y <- rob.scale(y, center = TRUE, scale = FALSE)
} else {
x <- scale2(x, center = TRUE, scale = TRUE)
y <- scale2(y, center = TRUE, scale = FALSE)
}
# Run neighbourhood search
fits <- ns(x, y, k, h, max.ns.iter, max.gd.iter, eps)
# Run mio
fits$mipgap <- array(dim = c(nk, nh))
if (!any(is.null(k.mio)) & !any(is.null(h.mio))) {
for (j in 1:nh.mio) {
for (i in 1:nk.mio) {
if (k.mio[i] == 0) next
k.ind <- which(k == k.mio[i])
h.ind <- which(h == h.mio[j])
fit <- mio(x, y, k.mio[i], h.mio[j], fits$beta[, k.ind, h.ind], fits$eta[, k.ind, h.ind],
tau, params, warm.start)
fits$beta[, k.ind, h.ind] <- fit$beta
fits$eta[, k.ind, h.ind] <- fit$eta
fits$objval[k.ind, h.ind] <- fit$objval
fits$mipgap[k.ind, h.ind] <- fit$mipgap
}
}
}
# Refit the models on the original (unscaled) data
fits.final <- list(beta = array(dim = c(p + 1, nk, nh)), weights = array(dim = c(n, nk, nh)),
objval = array(dim = c(nk, nh)), mipgap = fits$mipgap, k = k, h = h)
colnames(fits.final$objval) <- colnames(fits.final$mipgap) <-
dimnames(fits.final$beta)[[3]] <- dimnames(fits.final$weights)[[3]] <- h
rownames(fits.final$objval) <- rownames(fits.final$mipgap) <-
dimnames(fits.final$beta)[[2]] <- dimnames(fits.final$weights)[[2]] <- k
for (j in 1:nh) {
for (i in 1:nk) {
if (any(fits$beta[, i, j] != 0)) {
beta <- fits$beta[, i, j]
eta <- fits$eta[, i, j]
beta[abs(beta) < 1e-5] <- 0
eta[abs(eta) < 1e-5] <- 0
id.beta <- 1:p %in% which(beta != 0)
id.eta <- 1:n %in% which(eta != 0)
beta <- c(0, beta)
beta[c(TRUE, id.beta)] <- stats::lsfit(x.o[!id.eta, id.beta], y.o[!id.eta])$coef
} else {
eta <- H(y.o, n - h[j])
id.eta <- 1:n %in% which(eta != 0)
beta <- matrix(0, p + 1)
beta[1] <- mean(y.o[!id.eta])
}
eta <- y.o - cbind(1, x.o) %*% beta
eta[!id.eta] <- 0
objval <- f(cbind(1, x.o), y.o, beta, eta)
fits.final$beta[, i, j] <- beta
fits.final$weights[, i, j] <- as.integer(eta == 0)
fits.final$objval[i, j] <- objval
}
}
# Return fit
class(fits.final) <- 'rss'
return(fits.final)
}
#==================================================================================================#
# This function is responsible for mixed-integer optimisation. It contains the Gurobi specifications
# for solving the robust subset selection problem. The mio solver is capable of solving the problem
# to global optimality.
#==================================================================================================#
mio <- \(x, y, k, h, beta, eta, tau, params, warm.start) {
# Preliminaries
n <- nrow(x)
p <- ncol(x)
w <- cbind(x, diag(1, n))
form <- ifelse(ncol(x) <= nrow(x) & h == nrow(x), 1, 2)
# Set bounds for variables
Mb <- tau * max(abs(beta))
Mb <- ifelse(is.nan(Mb), Inf, Mb)
Me <- tau * max(abs(eta))
Me <- ifelse(is.nan(Me), Inf, Me)
# Set the solver problem formulation
model <- list()
# Problem formulation 1 (low-dimensional)
# -------------------------------------------------------------
# Objective function: x ^ T Q x + c ^ T x + a
# Problem variable: x = [beta, eta, s, z]
# Constraints
# C1: sum(s) >= p - k <==> ||beta||_0 <= k
# C2: sum(z) >= h <==> ||eta||_0 <= n - h
# NOTE: Gurobi will transform SOS constraints to Big-M
# constraints when coef bounds are finite
# -------------------------------------------------------------
if (form == 1) {
model$vtype <- c(rep('C', p + n), rep('B', p + n)) # Problem variable x
model$Q <- 0.5 * rbind(
cbind(t(w) %*% w, matrix(0, p + n, p + n)),
matrix(0, p + n, p + n + p + n)
) # matrix Q in objective function
model$obj <- c(- t(w) %*% y, rep(0, p + n)) # Vector c in objective function
model$objcon <- 0.5 * t(y) %*% y # Constant a in objective function
model$A <- # LHS of constraints
rbind(
cbind(matrix(0, 1, p), matrix(0, 1, n), matrix(1, 1, p), matrix(0, 1, n)), # C1
cbind(matrix(0, 1, p), matrix(0, 1, n), matrix(0, 1, p), matrix(1, 1, n)) # C2
)
model$sense <- c('>=', '>=') # Constraint types
model$rhs <- c(p - k, h) # RHS of constraints
model$lb <- c(rep(- Mb, p), rep(- Me, n), rep(0, p + n)) # Lower bound on variables
model$ub <- c(rep(Mb, p), rep(Me, n), rep(1, p + n)) # Upper bound on variables
model$sos <- vector('list', p + n)
for (j in 1:(p + n)) { # SOS constraints
model$sos[[j]]$type <- 1
model$sos[[j]]$index <- c(j, p + n + j)
model$sos[[j]]$weight <- c(1, 2)
}
if (warm.start) model$start <- c(beta, eta, beta == 0, eta == 0) # Warm start
}
# Problem formulation 2 (high-dimensional)
# -------------------------------------------------------------
# Objective function: x ^ T Q x + c ^ T x + a
# Problem variable: x = [beta, eta, s, z, xi]
# Constraints
# C1: sum(s) >= p - k <==> ||beta||_0 <= k
# C2: sum(z) >= h <==> ||eta||_0 <= n - h
# C3: w [beta, eta] - xi = 0 <==> w [beta, eta] = xi
# NOTE: Gurobi will transform SOS constraints to Big-M
# constraints when coef bounds are finite
# -------------------------------------------------------------
if (form == 2) {
Mx <- tau * max(abs(w %*% c(beta, eta)))
Mx <- ifelse(is.nan(Mx), Inf, Mx)
model$vtype <- c(rep('C', p + n), rep('B', p + n), rep('C', n)) # Problem variable x
model$Q <- 0.5 * rbind(
matrix(0, p + n + p + n, p + n + p + n + n),
cbind(matrix(0, n, p + n + p + n), diag(n))
) # matrix Q in objective function
model$obj <- c(- t(w) %*% y, rep(0, p + n + n)) # Vector c in objective function
model$objcon <- 0.5 * t(y) %*% y # Constant a in objective function
model$A <- # LHS of constraints
rbind(
cbind(matrix(0, 1, p), matrix(0, 1, n), matrix(1, 1, p), matrix(0, 1, n), matrix(0, 1, n)), # C1
cbind(matrix(0, 1, p), matrix(0, 1, n), matrix(0, 1, p), matrix(1, 1, n), matrix(0, 1, n)), # C2
cbind(w, matrix(0, n, p), matrix(0, n, n), - diag(n)) # C3
)
model$sense <- c('>=', '>=', rep('=', n)) # Constraint types
model$rhs <- c(p - k, h, rep(0, n)) # RHS of constraints
model$lb <- c(rep(- Mb, p), rep(- Me, n), rep(0, p + n), rep(- Mx, n)) # Lower bound on coefs
model$ub <- c(rep(Mb, p), rep(Me, n), rep(1, p + n), rep(Mx, n)) # Upper bound on coefs
model$sos <- vector('list', p + n)
for (j in 1:(p + n)) { # SOS constraints
model$sos[[j]]$type <- 1
model$sos[[j]]$index <- c(j, p + n + j)
model$sos[[j]]$weight <- c(1, 2)
}
if (warm.start) model$start <- c(beta, eta, beta == 0, eta == 0, w %*% c(beta, eta)) # Warm start
}
# Solve the model using Gurobi
fit <- gurobi::gurobi(model, params)
if (is.null(fit$x)) x <- rep(0, p + n) # In case Gurobi fails to solve the relaxation
fit$beta <- fit$x[1:p]
fit$eta <- fit$x[(p + 1):(p + n)]
return(fit)
}
#==================================================================================================#
# Coefficient function
#==================================================================================================#
#' @title Coefficient function for rss object
#'
#' @author Ryan Thompson
#'
#' @description Extracts coefficients for a given parameter pair \code{(k,h)}.
#'
#' @param object an object of class \code{rss}
#' @param k the number of predictors indexing the desired fit
#' @param h the number of observations indexing the desired fit
#' @param ... any other arguments
#'
#' @return An array of coefficients.
#'
#' @method coef rss
#'
#' @export
#'
#' @importFrom stats "coef"
coef.rss <- \(object, k = NULL, h = NULL, ...) {
if (!is.null(k)) index1 <- which(object$k == k) else index1 <- seq_along(object$k)
if (!is.null(h)) index2 <- which(object$h == h) else index2 <- seq_along(object$h)
object$beta[, index1, index2]
}
#==================================================================================================#
# Predict function
#==================================================================================================#
#' @title Predict function for rss object
#'
#' @author Ryan Thompson
#'
#' @description Generate predictions for new data using a given parameter pair \code{(k,h)}.
#'
#' @param object an object of class \code{rss}
#' @param x.new a matrix of new values for the predictors
#' @param k the number of predictors indexing the desired fit
#' @param h the number of observations indexing the desired fit
#' @param ... any other arguments
#'
#' @return An array of predictions.
#'
#' @method predict rss
#'
#' @export
#'
#' @importFrom stats "predict"
predict.rss <- \(object, x.new, k = NULL, h = NULL, ...) {
x.new <- as.matrix(x.new)
x.new <- cbind(1, x.new)
beta <- coef.rss(object, k, h, ...)
if (length(dim(beta)) < 3) x.new %*% beta else apply(beta, 2:3, \(beta) x.new %*% beta)
}
#==================================================================================================#
# Plot function
#==================================================================================================#
#' @title Plot function for rss object
#'
#' @author Ryan Thompson
#'
#' @description Plot the coefficient profiles from robust subset selection.
#'
#' @param x an object of class \code{rss}
#' @param ... any other arguments
#'
#' @return A plot of the coefficient profiles.
#'
#' @method plot rss
#'
#' @export
#'
#' @importFrom graphics "plot"
plot.rss <- \(x, ...) {
beta <- x$beta
beta <- beta[- 1, , , drop = FALSE]
beta[beta == 0] <- NA
beta <- data.frame(beta = as.vector(beta),
predictor = as.factor(rep(seq_len(nrow(beta)), length(x$k) * length(x$h))),
k = as.factor(rep(rep(x$k, each = nrow(beta), length(x$h)))),
h = as.factor(rep(x$h, each = nrow(beta) * length(x$k))))
beta <- stats::na.omit(beta)
ggplot2::ggplot(beta, ggplot2::aes_string('k', 'beta', col = 'predictor')) +
ggplot2::geom_point() +
ggplot2::facet_wrap(. ~ h, labeller = ggplot2::label_both)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.