#'---
#'title: "ensr vs glmnetUtils"
#'---
#'
#' # Introduction
#'
#' This script provides some comparisons between the ensr and
#' [glmnetUtils](https://cran.r-project.org/package=glmnetUtils)
#' packages. This script is referenced in a vignette but is not formally part of the
#' ensr package. If this script was to be part of the ensr package, then the
#' glmnetUtils package would need to be included in the DESCRIPTION file, likely
#' in the Suggests section. We do not want to include glmnetUtils in the
#' DESCRIPTION file and thus this example lives on the github page only.
#'
#+ lable = "needed_namespaces"
library(glmnetUtils)
library(ensr)
library(data.table)
library(magrittr)
library(ggplot2)
#'
#' For the examples we will use the tbi data from the ensr package.
data(tbi, package = "ensr")
set.seed(42)
#'
#' # Factors in the model specification?
#'
#' ensr requires users to specify a response and support matrix in the same way
#' that glmnet requires. glmnetUtils allows the user to use a formula to
#' specifiy a model. While the formula specificiation can be helpful it does
#' allow for a user to specify a model that likely should not be fitted via
#' elastic net. For example
# this takes awhile to evaluate
# factor_on_rhs <- cva.glmnet(injury1 ~ factor(age), data = tbi, family = "binomial")
#'
#' The `factor_on_rhs` object was fitted, but is not meaningful. By requiring
#' the end user to explicitly specify response and support matrices, glmnet and
#' ensr would force the end user to carfully consider fitting the above model.
#'
#' # Selecting $\lambda$ and $\alpha$ from a grid, or not?
#'
#' The search for $\alpha$ is different between ensr and glmnetUtils.
#' Effectively glmnetUtils::cva.glmnet is a `lapply`
#' wrapper about `cv.glmnet` of the form: `lapply(X = alpha, FUN =
#' glmnet::cv.glmnet)`. This is efficient, but it means that each $\alpha$
#' value has a unique set of possible $\lambda$ values with a probability zero
#' occurance of a common $\lambda$ value assessed with two or more $\alpha$
#' values. ensr builds a grid of $\lambda$ and $\alpha$ values such that each
#' $\alpha$ will be assessed with values of $\lambda$ common to other values of
#' $\alpha.$ ensr supports an estiamte of a ($\alpha$, $\labmda$, z) surface
#' whereas glmnetUtils results will require some imputation to do likewise.
#'
#' Example:
# ensr approach
ymat <- as.matrix(tbi[, injury1])
xmat <- as.matrix(tbi[, pcode1:ncode6])
foldid <- rep(seq(10), length.out = nrow(tbi))
ensr_object <- ensr(y = ymat, x = xmat, standardize = FALSE, family = "binomial", foldid = foldid)
# glmnetUtils object
glmnetUtils_object <-
cva.glmnet(injury1 ~ pcode1 + pcode2 + pcode3 + pcode4 + pcode5 + pcode6 + ncode1 + ncode2 + ncode3 + ncode4 + ncode5 + ncode6,
data = tbi,
standardize = FALSE,
family = "binomial",
alpha = seq(0, 1, length = 11),
foldid = foldid)
glmnetUtils_lambdas <- lapply(glmnetUtils_object$modlist, `[[`, "lambda")
glmnetUtils_cvm <- lapply(glmnetUtils_object$modlist, `[[`, "cvm")
glmnetUtils_alphas <- as.list(seq(0, 1, length = 11))
glmnetUtils_plot_data <-
Map(data.frame,
lambda = glmnetUtils_lambdas,
cvm = glmnetUtils_cvm,
alpha = glmnetUtils_alphas) %>%
do.call(rbind, .) %>%
as.data.table
glmnetUtils_plot_data[, z := (cvm - min(cvm)) / sd(cvm)]
ggplot(glmnetUtils_plot_data) +
aes(x = alpha, y = lambda, z = cvm) +
geom_point() +
# geom_contour() + ## Cannot be fitted ...
scale_y_log10()
#'
#' There is a red point on the graphs below showing the min cross validation
#' error.
#'
# ensr plot vs glmnetUtils
ensr_plot_data <- plot(ensr_object)$data
ensr_plot <-
ggplot(ensr_plot_data) +
theme_bw() +
aes(x = alpha, y = lambda, z = log10(z), color = log10(z)) +
geom_point() +
geom_point(data = ensr_plot_data[cvm == min(cvm)], size = 2, color = "red") +
ggtitle("glmnetUtils") +
ggplot2::scale_color_gradient2(limits = c(-5, log10(5))) +
geom_contour() +
scale_y_log10(limits = c(1e-5, 1e2))
glmnetUtils_plot <-
ggplot(glmnetUtils_plot_data) +
theme_bw() +
aes(x = alpha, y = lambda, z = z, color = log10(z)) +
geom_point() +
geom_point(data = glmnetUtils_plot_data[cvm == min(cvm)], size = 2, color = "red") +
ggtitle("glmnetUtils") +
ggplot2::scale_color_gradient2(limits = c(-5, log10(5))) +
scale_y_log10(limits = c(1e-5, 1e2))
grid.arrange(ensr_plot, glmnetUtils_plot, nrow = 1)
#'
#' Very similar results.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.