Nothing
# TODO: PR for SuperLearner
#' @title Elastic net regression, including lasso and ridge
#'
#' @description
#' Penalized regression using elastic net. Alpha = 0 corresponds to ridge
#' regression and alpha = 1 corresponds to Lasso.
#'
#' See \code{vignette("glmnet_beta", package = "glmnet")} for a nice tutorial on
#' glmnet.
#'
#' @param Y Outcome variable
#' @param X Covariate dataframe
#' @param newX Dataframe to predict the outcome
#' @param obsWeights Optional observation-level weights
#' @param id Optional id to group observations from the same unit (not used
#' currently).
#' @param family "gaussian" for regression, "binomial" for binary
#' classification. Untested options: "multinomial" for multiple classification
#' or "mgaussian" for multiple response, "poisson" for non-negative outcome
#' with proportional mean and variance, "cox".
#' @param alpha Elastic net mixing parameter, range [0, 1]. 0 = ridge regression
#' and 1 = lasso.
#' @param nfolds Number of folds for internal cross-validation to optimize lambda.
#' @param nlambda Number of lambda values to check, recommended to be 100 or more.
#' @param loss Loss function, can be "deviance", "mse", or "mae". If family =
#' binomial can also be "auc" or "class" (misclassification error).
#' @param useMin If TRUE use lambda that minimizes risk, otherwise use 1
#' standard-error rule which chooses a higher penalty with performance within
#' one standard error of the minimum (see Breiman et al. 1984 on CART for
#' background).
#' @param parallel If TRUE use any parallel backend registered via the foreach
#' package.
#' @param ... Any additional arguments are passed through to cv.glmnet.
#'
#' @examples
#'
#' library(SuperLearner)
#'
#' # Load a test dataset.
#' data(PimaIndiansDiabetes2, package = "mlbench")
#' data = PimaIndiansDiabetes2
#'
#' # Omit observations with missing data.
#' data = na.omit(data)
#'
#' Y = as.numeric(data$diabetes == "pos")
#' X = subset(data, select = -diabetes)
#'
#' set.seed(1, "L'Ecuyer-CMRG")
#'
#' sl = SuperLearner(Y, X, family = binomial(),
#' SL.library = c("SL.mean", "SL.glm", "SL.glmnet"))
#' sl
#'
#' @references
#'
#' Friedman, J., Hastie, T., & Tibshirani, R. (2010). Regularization paths for
#' generalized linear models via coordinate descent. Journal of statistical
#' software, 33(1), 1.
#'
#' Hoerl, A. E., & Kennard, R. W. (1970). Ridge regression: Biased estimation
#' for nonorthogonal problems. Technometrics, 12(1), 55-67.
#'
#' Tibshirani, R. (1996). Regression shrinkage and selection via the lasso.
#' Journal of the Royal Statistical Society. Series B (Methodological), 267-288.
#'
#' Zou, H., & Hastie, T. (2005). Regularization and variable selection via the
#' elastic net. Journal of the Royal Statistical Society: Series B (Statistical
#' Methodology), 67(2), 301-320.
#'
#' @seealso \code{\link{predict.SL.glmnet}} \code{\link[glmnet]{cv.glmnet}}
#' \code{\link[glmnet]{glmnet}}
#'
#' @importFrom stats model.matrix
#' @export
SL.glmnet2 <- function(Y, X, newX, family, obsWeights, id,
alpha = 1, nfolds = 10, nlambda = 100, useMin = TRUE,
loss = "deviance", parallel = FALSE,
...) {
#.SL.require('glmnet')
# X must be a matrix, should we use model.matrix or as.matrix
# TODO: support sparse matrices.
if (!is.matrix(X)) {
X <- model.matrix(~ -1 + ., X)
newX <- model.matrix(~ -1 + ., newX)
}
# Use CV to find optimal lambda.
fitCV <- glmnet::cv.glmnet(x = X, y = Y, weights = obsWeights,
lambda = NULL,
type.measure = loss,
nfolds = nfolds,
family = family$family,
alpha = alpha,
nlambda = nlambda,
parallel = parallel,
...)
# If we predict with the cv.glmnet object we can specify lambda using a
# string.
pred <- predict(fitCV, newx = newX, type = "response",
s = ifelse(useMin, "lambda.min", "lambda.1se"))
fit <- list(object = fitCV, useMin = useMin)
class(fit) <- "SL.glmnet"
out <- list(pred = pred, fit = fit)
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.