Nothing
################################################################################
##### cv.glmnetr_yymmdd.R ######################################################
################################################################################
## do cross validation to choose tuning parameter, i.e. lambda and gamma #######
#' Get a cross validation informed relaxed lasso model fit. Available to the
#' user but intended to be call from nested.glmnetr().
#'
#' @description
#' Derive an elastic net (including a relaxed lasso) model and identify
#' hyperparameters, i.e. alpha, gamma and lambda, which give the best fit
#' based upon cross validation. It is analogous to (and uses) the cv.glmnet()
#' function of the 'glmnet' package, but also tunes on alpha.
#'
#' @param trainxs predictor matrix
#' @param trainy__ outcome vector
#' @param family model family, "cox", "binomial" or "gaussian" (default)
#' @param lambda the lambda vector. May be NULL.
#' @param gamma the gamma vector. Default is c(0,0.25,0.50,0.75,1).
#' @param alpha A vector for alpha values considetered when tuning, for example
#' c(0,0.2,0.4,0.6,0.8,1). Default is c(1) to fit the
#' lasso model involving only the L1 penalty. c(0) could be used to ffit
#' the reidge model involving only the L2 penalty.
#' @param folds_n number of folds for cross validation. Default and generally recommended is 10.
#' @param fine use a finer step in determining lambda. Of little value unless one
#' repeats the cross validation many times to more finely tune the hyperparameters.
#' See the 'glmnet' package documentation.
#' @param path The path option from cv.glmnet(). 0 for FALSE reducing
#' computation time when the numerics are stable, 1 to avoid cases where
#' the path = 0 option might get very slow. 0 by default.
#' @param foldid a vector of integers to associate each record to a fold. The integers should be between 1 and folds_n.
#' @param track indicate whether or not to update progress in the console. Default of
#' 0 suppresses these updates. The option of 1 provides these updates. In fitting
#' clinical data with non full rank design matrix we have found some R-packages to
#' take a vary long time or seemingly be caught in infinite loops. Therefore we allow
#' the user to track the program progress and judge whether things are moving forward or
#' if the process should be stopped.
#' @param ... Additional arguments that can be passed to cv.glmnet()
#'
#' @details
#' This is the main program for model derivation. As currently implemented the
#' package requires the data to be input as vectors and matrices with no missing values
#' (NA). All data vectors and matrices must be numerical. For factors (categorical variables) one
#' should first construct corresponding numerical variables to represent the factor
#' levels. To take advantage of the lasso model, one can use one hot coding
#' assigning an indicator for each level of each categorical variable, or creating
#' as well other contrasts variables suggested by the subject matter.
#'
#' @return A cross validation informed relaxed lasso model fit.
#'
#' @seealso
#' \code{\link{summary.cv.glmnetr}} , \code{\link{predict.cv.glmnetr}} , \code{\link{nested.glmnetr}}
#'
#' @author Walter Kremers (kremers.walter@mayo.edu)
#'
#' @export
#'
#' @importFrom stats runif
#' @importFrom survival Surv
#' @importFrom glmnet cv.glmnet
#' @importFrom Matrix rankMatrix
#'
#' @examples
#' # set seed for random numbers, optionally, to get reproducible results
#' set.seed(82545037)
#' sim.data=glmnetr.simdata(nrows=200, ncols=100, beta=NULL)
#' xs=sim.data$xs
#' y_=sim.data$y_
#' event=sim.data$event
#' # for this example we use a small number for folds_n to shorten run time
#' cv.glmnetr.fit = nested.glmnetr(xs, y_=y_, family="gaussian", folds_n=4, resample=0)
#' plot(cv.glmnetr.fit)
#' plot(cv.glmnetr.fit, coefs=1)
#' summary(cv.glmnetr.fit)
#'
cv.glmnetr = function(trainxs, trainy__, family, alpha=1, gamma=c(0,0.25,0.5,0.75,1), lambda=NULL, foldid=NULL, folds_n=NULL, fine=0, path=0, track=0, ... ) {
lambda_null = 0
if (is.null(lambda)) {
lambda_null = 1
temp_fit_1 = cv.glmnet( trainxs, trainy__, family=family, alpha=1, foldid=foldid, nfolds=folds_n , ... )
cv_glmnet_fit_a0 = cv.glmnet( trainxs, trainy__, family=family, alpha=0, foldid=foldid, nfolds=folds_n , ... )
if (fine == 0) {
lambda2 = sort( cv_glmnet_fit_a0$lambda[ ( (cv_glmnet_fit_a0$lambda < min(temp_fit_1$lambda)) |
(cv_glmnet_fit_a0$lambda > max(temp_fit_1$lambda)) ) ] )
lambda = sort( unique( c(temp_fit_1$lambda, lambda2) ) , decreasing = TRUE)
} else {
lambda = sort( unique( c(temp_fit_1$lambda, cv_glmnet_fit_a0$lambda) ) , decreasing = TRUE )
}
}
cv_glmnet_fit_ = list()
lalpha = length(alpha)
for (j_ in c(1:lalpha)) {
# print(c(j_,alpha[j_]))
alpha_ = alpha[j_]
cv_glmnet_fit_[[j_]] = cv.glmnet( trainxs, trainy__, family=family, alpha=alpha_, lambda=lambda, gamma=gamma, relax=TRUE, foldid=foldid, nfolds=folds_n, path=path, ... )
# names(cv_glmnet_fit_[[j_]])
class(cv_glmnet_fit_[[j_]]) = "cv.glmnetr"
cv_glmnet_fit_[[j_]]$alpha = alpha_
cv_glmnet_fit_[[j_]]$family = family
cv_glmnet_fit_[[j_]]$nzero.min = cv_glmnet_fit_[[j_]]$nzero[cv_glmnet_fit_[[j_]]$index[1]] ## CHECK ##
cv_glmnet_fit_[[j_]]$measure.min = cv_glmnet_fit_[[j_]]$cvm[ cv_glmnet_fit_[[j_]]$index[1]] ## CHECK ##
index.min.g0 = which.min( cv_glmnet_fit_[[j_]]$relaxed$statlist[[ 1 ]]$cvm )
cv_glmnet_fit_[[j_]]$relaxed$index.min.g0 = index.min.g0
cv_glmnet_fit_[[j_]]$relaxed$nzero.min.g0 = cv_glmnet_fit_[[j_]]$nzero[ index.min.g0 ]
cv_glmnet_fit_[[j_]]$relaxed$lambda.min.g0 = cv_glmnet_fit_[[j_]]$lambda[ index.min.g0 ]
cv_glmnet_fit_[[j_]]$relaxed$measure.min.g0 = cv_glmnet_fit_[[j_]]$relaxed$statlist[[1]]$cvm[ index.min.g0 ]
indx = cv_glmnet_fit_[[j_]]$relaxed$index
cvm.min_ = cv_glmnet_fit_[[j_]]$relaxed$statlist[[ indx[1,2] ]]$cvm[ indx[1,1] ]
if (j_ == 1) {
a.index.min = 1
cvm.min = cvm.min_
} else if (cvm.min_ <= cvm.min ) {
a.index.min = j_
cvm.min = cvm.min_
}
}
class(cv_glmnet_fit_) = "cv.glmnetr.list"
cv_glmnet_fit_$family = family
cv_glmnet_fit_$alpha = alpha
cv_elastic_fit = cv_glmnet_fit_[[a.index.min]]
class(cv_elastic_fit) = "cv.glmnetr.el"
indx = cv_elastic_fit$relaxed$index
index.elastic = c( a.index.min , indx[1,2] , indx[1,1] )
vals.elastic = c(alpha[a.index.min], gamma[indx[1,2]], cv_elastic_fit$lambda[ indx[1,1] ], cvm.min , cv_elastic_fit$relaxed$nzero.min)
names(index.elastic) = c("alpha", "gamma", "lambda")
names(vals.elastic) = c("alpha", "gamma", "lambda", "measure", "NZero")
# print(index.elastic)
if (track >= 2) { print(vals.elastic) }
cv_elastic_fit$index.elastic = index.elastic
cv_elastic_fit$vals.elastic = vals.elastic
cv_lasso_fit = cv_glmnet_fit_[[ lalpha ]]
cv_ridge_fit = cv_glmnet_fit_a0
cv_ridge_fit$family = family
# cv_elastic_fit
# cv_lasso_fit
# cv_ridge_fit
return( list( cv_glmnet_fit_ = cv_glmnet_fit_ , cv_lasso_fit = cv_lasso_fit ,
cv_ridge_fit = cv_ridge_fit , cv_elastic_fit = cv_elastic_fit ,
lambda=lambda, lambda_null=lambda_null ) )
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.