cvreg: Cross Validation and Robust Estimation Utilities

Documented in predict.sqrtenet print.sqrtenet sqrtEnet sqrtPENSE sqrtPENSEM

#' Square-Root Elastic Net
#'
#' @description This fits an elastic net model to continuous responses using the square root method for selecting an optimal
#' penalty parameter. No cross validation is required. Data are automatically unit scaled and centered. Coefficients are
#' returned on the original scale of the inputs. Hence, it is not neccessary to center and/or standardize the inputs here.
#' Note that the returned coefficients have the L2 penalty relaxed after fitting per
#' Zou & Hastie (2005), rather than the naive estimates returned by glmnet.
#'
#' @param formula a model formula
#' @param data a data frame
#' @param alpha a value between 0 and 1 for the mixing parameter. defaults to 0.5.
#' @param conf.level the confidence level to use in setting the penalty. the default is 0.95.
#'
#' @return a model fit
#' @export
#'
#' @references
#' Zou, H. & Trevor, T. (2005). Regularization and Variable Selection via the Elastic Net. Journal of the Royal Statistical Society, Series B, 67(2), 301-320. \cr
#' \cr
#' Belloni A., Chernozhukov, V., & Wang, L. (2011) Square-root lasso: pivotal recovery of sparse signals via conic programming. Biometrika, 98(4):791-806. \cr
#' \cr
#' van de Geer S. (2016) The Square-Root Lasso. In: Estimation and Testing Under Sparsity. Lecture Notes in Mathematics, vol 2159. Springer, Cham \cr
#' \cr
#' Raninen, E. & Ollila, E. (2017) Scaled and square-root elastic net. IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), New Orleans, LA, 2017, pp. 4336-4340. doi: 10.1109/ICASSP.2017.7952975 \cr
#'
sqrtEnet <- function(formula, data, alpha = 0.5, conf.level = 0.95) {

  this.call <- match.call()
  x = model.matrix(formula, data)[,-1]; coefnames <- c("(Intercept)", colnames(x))
  y = model.frame(formula, data)[, 1]
  yscale <- sd(y); xscale <- numeric(); ycenter <- mean(y)
  for (i in 1:ncol(x)){
    if (length(unique(x[,i])) > 2){
      xscale[i] <- sd(x[,i]);
    } else{
      xscale[i] <- 1
    }
  }

  data = Scale(data)
  x = model.matrix(formula, data)[,-1]
  y = model.frame(formula, data)[, 1]
  n <- nrow(x)
  p <- ncol(x)
  if (p == 1) {
    L = 0.5
  } else {
    L = 0.1
    Lold = 0
    while (abs(L - Lold) > 0.001) {
      k = (L^4 + 2 * L^2)
      Lold = L
      L = -qnorm(min(k/p, conf.level))
      L = (L + Lold)/2
    }
  }

  lambdal1 = sqrt((2/n)*log(p)) * L
  lambdal2 <- cvreg:::.ridgeEstLambda(x, y, drop(pseudoinverse(crossprod(x))%*%crossprod(x,y)))/n
  lambda <- (alpha*lambdal1) + ((1-alpha)*lambdal2)
  l.min <- lambda/2
  l.max <- lambda * 1.25
  #l.max <- ifelse(alpha!=0, max(abs(t(y - mean(y)*(1-mean(y))) %*% x ) )/ (alpha*n), max(abs(t(y - mean(y)*(1-mean(y))) %*% x ) ) * n)
  out <- glmnet::glmnet(x, y, alpha = alpha, nlambda = 200, standardize = FALSE)
  coef_path <- rbind("(Intercept)" = out$a0, out$beta)
  lambda_path <- out$lambda
  resids <- y - glmnet::predict.glmnet(out, newx=x)
  full_MSE <- colMeans(resids^2)
  index_sel <- which.min(abs(full_MSE*lambda^2 / out$lambda^2 - 1))
  fit = list("beta"=as.numeric(out$beta[, index_sel]),
             "a0"=out$a0[index_sel],
             "sigma"=sqrt(full_MSE[index_sel]),
             "lambda"=out$lambda[index_sel],
             "fit"=out)

  coefs <- as.vector(zapsmall(round(c(fit$a0, fit$beta), 6), 5) * (1+lambdal2))
  fitted <- as.vector(cbind(1,x)%*%coefs)
  residuals <- y - fitted
  xscale <- c(1, xscale)
  coefs <- sapply(1:length(coefs), function(i) coefs[i] * (yscale/xscale[i]))
  names(coefs) <- c("(Intercept)" , colnames(x))
  results <- list("coefficients"=coefs,
                  "fitted" = (fitted*yscale)+ycenter,
                  "residuals" = yscale*residuals,
                  "sigma"=sqrt(full_MSE[index_sel]) * yscale,
                  "alpha" = alpha,
                  "lambda" = lambda,
                  "coef_path" = coef_path,
                  "lambda_path" = lambda_path,
                  "call" = this.call)
  class(results) <- "sqrtenet"
  return(results)
}


#' Square-Root Penalized Elastic Net S-estimator
#'
#' @description This fits an penalized elastic net s-estimator using the square root
#' method for selecting an optimal penalty parameter. No cross validation is required.
#' Data are automatically unit scaled and centered using the Yohai and Zou \strong{\eqn{\tau}}
#' location and scale estimate. Coefficients are returned on the original scale of the
#' inputs. Hence, it is not neccessary to center and/or standardize the inputs here.
#'
#'
#' @param formula a model formula
#' @param data a data frame
#' @param alpha a value between 0 and 1 for the mixing parameter. defaults to 0.5.
#' @param delta the breakdown point for the robust estimator. the default is 0.293, somewhat arbitrarily (it is the breakdown point of the Theil-Sen estimator). the value must be greater than zero and at most 0.50.
#' @param conf.level the confidence level to use in setting the penalty. the default is 0.95.
#' @param alg should the augmented LARS ("lars") be used (the default), or should the Dual Augmented Lagrangian ("dal") option be used?
#'
#' @return a model fit
#' @export
#'
#' @references
#' Belloni A.; Chernozhukov, V.; Wang, L. (2011) Square-root lasso: pivotal recovery of sparse signals via conic programming. Biometrika, 98(4):791-806. \cr
#' \cr
#' Freue, G.V.C.; Kepplinger, D; Salibián-Barrera, M; Smucler, E. (2019) Robust elastic net estimators for variable selection and identification of proteomic biomarkers. Ann. Appl. Stat. 13(4):2065-2090. doi:10.1214/19-AOAS1269. \cr
#' \cr
#' van de Geer S. (2016) The Square-Root Lasso. In: Estimation and Testing Under Sparsity. Lecture Notes in Mathematics, vol 2159. Springer, Cham \cr
#' \cr
#' Raninen , E.;  Ollila, E. (2017) Scaled and square-root elastic net. IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), New Orleans, LA, 2017, pp. 4336-4340. doi: 10.1109/ICASSP.2017.7952975 \cr
#'
sqrtPENSE <- function(formula, data, alpha = 0.5, delta = 0.293, conf.level = 0.95, alg = c("lars", "dal")) {

  this.call <- match.call()
  alg <- match.arg(alg)

  if (alg=="dal"){
    enopts <- pense:::en_options_dal(maxit = 450,
                             eps = 1e-05,
                             eta_mult = 2.090764,
                             eta_start_numerator = 0.0125,
                             preconditioner = "approx")
  }
  else{
    enopts <- pense:::en_options_aug_lars(use_gram = "yes", eps = 1e-05)
  }


  yzscale <- function(x) unname(cvreg:::.scaleTauYZ(x))
  yzcentr <- function(x) unname(cvreg:::.scaleTauYZ(x, mu.too = TRUE)[1])
  x = model.matrix(formula, data)[,-1]; coefnames <- c("(Intercept)", colnames(x))
  y = model.frame(formula, data)[, 1]
  yscale <- yzscale(y); xscale <- numeric(); ycenter <- yzcentr(y)

  for (i in 1:ncol(x)){
    if (length(unique(x[,i])) > 2){
      xscale[i] <- yzscale(x[,i]);
    } else{
      xscale[i] <- 1
    }
  }
  data = Scale2(data, yzcentr, yzscale)
  x = model.matrix(formula, data)[,-1]
  y = model.frame(formula, data)[, 1]
  n <- nrow(x); p <- ncol(x)

  if (p == 1) {
    L = 0.5
  } else {
    L = 0.1
    Lold = 0
    while (abs(L - Lold) > 0.001) {
      k = (L^4 + 2 * L^2)
      Lold = L
      L = -qnorm(min(k/p, conf.level))
      L = (L + Lold)/2
    }
  }
  initfit <- cvreg:::.initfit(x, y, alpha = 0, lambda =0)$coef[-1]
  lambdal1 = sqrt((2/n)*log(p)) * L
  lambdal2 <- cvreg:::.ridgeEstLambda(x, y, initfit)/n
  lambda <- (alpha*lambdal1) + ((1-alpha)*lambdal2)
  l.min <- lambda/4
  l.max <- lambda*8
  #l.max <- ifelse(alpha!=0, max(abs(t(y - yzcentr(y)*(1-yzcentr(y))) %*% x ) )/ (alpha*n), max(abs(t(y - yzcentr(y)*(1-yzcentr(y))) %*% x ) ) * n)
  out <- pense:::pense(x, y, alpha = alpha, nlambda = 150, standardize = FALSE, cv_k = 1, options = pense:::pense_options(delta=delta), en_options = enopts)
  coef_path <- out$coefficients
  lambda_path <- out$lambda
  resids <- out$residuals
  full_MSE <- apply(resids^2, 2, yzcentr)
  index_sel <- which.min(abs(full_MSE*lambda^2 / out$lambda^2 - 1))
  fit = list("beta"=as.numeric(out$coefficients[-1, index_sel]),
             "a0"=out$coefficients[1 , index_sel],
             "sigma_hat"=sqrt(full_MSE[index_sel]),
             "fit"=out)

  coefs <- as.vector(zapsmall(round(c(fit$a0, fit$beta), 6), 5))
  fitted <- as.vector(cbind(1,x)%*%coefs)
  xscale <- c(1, xscale)
  coefs <- sapply(1:length(coefs), function(i) coefs[i] * (yscale/xscale[i]))
  names(coefs) <- c("(Intercept)" , colnames(x))
  results <- list("coefficients"=coefs,
                  "fitted" = (fitted*yscale)+ycenter,
                  "residuals" = yscale*out$residuals[,index_sel],
                  "sigma"=sqrt(full_MSE[index_sel]) * yscale,
                  "alpha" = alpha,
                  "lambda" = lambda,
                  "coef_path" = coef_path,
                  "lambda_path" = lambda_path,
                  "call" = this.call)
  class(results) <- "sqrtenet"
  return(results)
}

#' Square-Root Penalized Elastic Net MM-estimator
#'
#' @description This fits an penalized elastic net s-estimator with M-estimation
#' step (making this an MM-estimator) using the square root method for selecting an optimal
#' penalty parameter. No cross validation is required. Data are automatically unit
#' scaled and centered using the Yohai and Zou \strong{\eqn{\tau}} location and scale estimate.
#' Coefficients are returned on the original scale of the inputs. Hence, it is not neccessary
#' to center and/or standardize the inputs here.
#'
#' @param formula a model formula
#' @param data a data frame
#' @param alpha a value between 0 and 1 for the mixing parameter. defaults to 0.5.
#' @param delta the breakdown point for the robust estimator. the default is 0.293, somewhat arbitrarily (it is the breakdown point of the Theil-Sen estimator). the value must be greater than zero and at most 0.50.
#' @param conf.level the confidence level to use in setting the penalty. the default is 0.95.
#' @param alg should the augmented LARS ("lars") be used (the default), or should the Dual Augmented Lagrangian ("dal") option be used?
#'
#' @return a model fit
#' @export
#'
#' @references
#' Belloni A.; Chernozhukov, V.; Wang, L. (2011) Square-root lasso: pivotal recovery of sparse signals via conic programming. Biometrika, 98(4):791-806. \cr
#' \cr
#' Freue, G.V.C.; Kepplinger, D; Salibián-Barrera, M; Smucler, E. (2019) Robust elastic net estimators for variable selection and identification of proteomic biomarkers. Ann. Appl. Stat. 13(4):2065-2090. doi:10.1214/19-AOAS1269. \cr
#' \cr
#' van de Geer S. (2016) The Square-Root Lasso. In: Estimation and Testing Under Sparsity. Lecture Notes in Mathematics, vol 2159. Springer, Cham \cr
#' \cr
#' Raninen , E.;  Ollila, E. (2017) Scaled and square-root elastic net. IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), New Orleans, LA, 2017, pp. 4336-4340. doi: 10.1109/ICASSP.2017.7952975 \cr
#'

sqrtPENSEM <- function(formula, data, alpha = 0.5, delta = 0.293, conf.level = 0.95, alg = c( "lars", "dal")) {

  this.call <- match.call()
  alg <- match.arg(alg)

  if (alg=="dal"){
    enopts <- pense:::en_options_dal(maxit = 250,
                             eps = 1e-05,
                             eta_mult = 2.090764,
                             eta_start_numerator = 0.0125,
                             preconditioner = "approx")
  }
  else{
    enopts <- pense:::en_options_aug_lars(use_gram = "yes", eps = 1e-05)
  }

  yzscale <- function(x) unname(cvreg:::.scaleTauYZ(x))
  yzcentr <- function(x) unname(cvreg:::.scaleTauYZ(x, mu.too = TRUE)[1])
  x = model.matrix(formula, data)[,-1]; coefnames <- c("(Intercept)", colnames(x))
  y = model.frame(formula, data)[, 1]
  yscale <- yzscale(y); xscale <- numeric(); ycenter <- yzcentr(y)
  for (i in 1:ncol(x)){
    if (length(unique(x[,i])) > 2){
      xscale[i] <- yzscale(x[,i]);
    } else{
      xscale[i] <- 1
    }
  }
  data <- Scale2(data, yzcentr, yzscale)
  x <- model.matrix(formula, data)[,-1]
  y <- model.frame(formula, data)[, 1]
  n <- nrow(x); p <- ncol(x)

  if (p == 1) {
    L = 0.5
  } else {
    L = 0.1
    Lold = 0
    while (abs(L - Lold) > 0.001) {
      k = (L^4 + 2 * L^2)
      Lold = L
      L = -qnorm(min(k/p, conf.level))
      L = (L + Lold)/2
    }
  }
  initfit <- cvreg:::.initfit(x, y, alpha = 0, lambda =0)$coef[-1]
  lambdal1 = sqrt((2/n)*log(p)) * L
  lambdal2 <- cvreg:::.ridgeEstLambda(x, y, initfit)/n
  lambda <- (alpha*lambdal1) + ((1-alpha)*lambdal2)
  l.min <- lambda/4
  l.max <- lambda*8
  #l.max <- ifelse(alpha!=0, max(abs(t(y - yzcentr(y)*(1-yzcentr(y))) %*% x ) )/ (alpha*n), max(abs(t(y - yzcentr(y)*(1-yzcentr(y))) %*% x ) ) * n)
  out <- pense:::pense(x, y, alpha = alpha, nlambda = 125, standardize = FALSE, cv_k = 1, options = pense:::pense_options(delta=delta), en_options = enopts)
  resids <- out$residuals
  full_MSE <- apply(resids^2, 2, yzcentr)
  index_sel <- which.min(abs(full_MSE*lambda^2 / out$lambda^2 - 1))
  lambda_s <- out$lambda[index_sel]
  lambdal1 = sqrt((2/n)*log(p)) * L
  lambdal2 <- cvreg:::.ridgeEstLambda(x, y, initfit)/n
  lambda_m <- (alpha*lambdal1) + ((1-alpha)*lambdal2)
  l.min <- lambda_m/2
  l.max <- lambda_m*2
  #ypred <- drop(x%*%as.vector(out$coefficients[-1,index_sel]))
  #l.max <- ifelse(alpha!=0, max(abs(t(ypred - mean(ypred)*(1-mean(ypred))) %*% x ) )/ (alpha*n), max(abs(t(ypred - mean(ypred)*(1-mean(ypred))) %*% x ) ) * n)
  out <- pense:::pensem(x, y, alpha = alpha, lambda = seq(l.min, max(c(l.max,out$lambda)), len = 125), lambda_s = lambda_s, standardize = FALSE, cv_k = 1, options = pense:::pense_options(delta=delta), en_options = enopts)
  coef_path <- out$coefficients
  lambda_path <- out$lambda
  resids <- out$residuals
  full_MSE <- apply(resids^2, 2, yzcentr)
  index_sel <- which.min(abs(full_MSE*lambda_m^2 / out$lambda^2 - 1))
  lambda_m <- out$lambda[index_sel]
  fit = list("beta"=as.numeric(out$coefficients[-1, index_sel]),
             "a0"=out$coefficients[1 , index_sel],
             "sigma_hat"=sqrt(full_MSE[index_sel]),
             "fit"=out,
             "alpha" = alpha,
             "lambda_s" = lambda_s,
             "lambda_m" = lambda_m)

  coefs <- as.vector(zapsmall(round(c(fit$a0, fit$beta), 6), 5))
  fitted <- as.vector(cbind(1,x)%*%coefs)
  xscale <- c(1, xscale)
  coefs <- sapply(1:length(coefs), function(i) coefs[i] * (yscale/xscale[i]))
  names(coefs) <- c("(Intercept)" , colnames(x))
  results <- list("coefficients"=coefs,
                  "fitted" = (fitted*yscale)+ycenter,
                  "residuals" = yscale*out$residuals[,index_sel],
                  "sigma"=sqrt(full_MSE[index_sel]) * yscale,
                  "alpha" = alpha,
                  "lambda_s" = lambda_s,
                  "lambda_m" = lambda_m,
                  "coef_path" = coef_path,
                  "lambda_path" = lambda_path,
                  "call" = this.call)
  class(results) <- "sqrtenet"
  return(results)
}


#' print method for sqrtenet objects
#'
#' @param fit the model fit
#' @method print sqrtenet
#' @keywords internal
#' @export
print.sqrtenet <- function(fit){
  darkpurple <- crayon::make_style(rgb(0.34, 0.16, 0.29))
  brightgreen <- crayon::make_style(rgb(0.536, 0.7378, 0))
  cat(darkpurple("Call: "))
  print(fit$call)
  cat("Statistics for optimal model : \n \n")
  cat(crayon::cyan("Coefficients: \n\n"))
  coefnames <- names(fit$coefficients)
  coefs <- as.vector(fit$coefficients)
  names(coefs) <- coefnames
  print(round(coefs, 3))
  cat("\n")
  blueishgreen <- crayon::make_style(rgb(0.002, 0.6951, 0.64))
  cat(crayon::blue("Residual standard deviation: "), round(fit$sigma, 3), "\n")
  if (is.null(fit$lambda)){
    cat(brightgreen("lambda_s: "), fit$lambda_s, "\n")
    cat(brightgreen("lambda_m: "), fit$lambda_m, "\n")
  }
  else{
    cat(brightgreen("lambda: "), fit$lambda, "\n")
  }
}

#' predict method for sqrtenet class models
#'
#' @param fit the model fit
#' @param newdata a data frame. if NULL the fitted values are returned.
#' @method predict sqrtenet
#' @return a matrix or vector
#' @export
#'
predict.sqrtenet <- function(fit, newdata = NULL){

  if (is.null(newdata)){
      fit$fitted
  }
  else {
      as.vector(fit$coefficients %*% as.matrix(newdata))
    }
}