Nothing
#' Function to run on a dataset with not too much missing data to identify a set of reasonable lambda values
#'
#' Expects a singly imputed dataset and fits a logistic LASSO so the user
#' can pick a set that wil probably be interesting.
#'
#' @param ds dataset to investigate
#' @param out outcome vector
#' @param family see \code{\link{glmnet}}. Defaults to "binomial" (i.e. lasso penalized logistic regression).
#' @param showFirst show the top coefficients (first \code{showFirst} occurring)
#' @param showPlot if \code{TRUE} (the default), visually supports the decision
#' @param type.measure see \code{\link{cv.glmnet}}
#' @param repsNeededForFirstOccurrence How many times (i.e. for how many lambda values)
#' must a coefficient be consecutively nonzero before we count it as "occurring"
#' @param weights vector with weight to be assigned to each row of \code{ds}
#' @param \dots passed on to \code{\link{plotex}} (if relevant)
#' @param verbosity The higher this value, the more levels of progress and debug
#' information is displayed (note: in R for Windows, turn off buffered output)
#' @param minNumHigher How many lambdas higher than the optimum do you minimally want (if available)
#' @param minNumLower How many lambdas lower than the optimum do you minimally want (if available)
#' @param maxNumLower How many lambdas lower than the optimum do you maximally want
#' @param imputeDs2FitDsProperties see \code{\link{imputeDs2FitDs}} and \code{\link{EMLasso}}
#' @param standardize see \code{\link{glmnet}}. Defaults to FALSE.
#' @param nfolds see \code{\link{glmnet}}. Defaults to 10.
#' @return list of class "LambdaHelper":
#' \item{topres }{\code{data.frame} with \code{showFirst} rows, and columns:
#' \code{variable} (name), \code{lambda},\code{critl} (lower bound of criterion),
#' \code{crit} (estimate of criterion), \code{critu} (upper bound of criterion),
#' \code{critsd} (sd of criterion), \code{index} (at which lambda index does this variable first occur)}
#' \item{allLambda }{vector of lambda values}
#' \item{regionDfr }{\code{data.frame} w 3 rows 3 columns: \code{name} (values: "lower lambda", "optimum", and "higher lambda"), \code{idx} and \code{lambda}}
#' \item{regionOfInterestData }{see \code{\link{getMinMaxPosLikeGlmnet}}}
#' @note EMLasso is pretty heavy and has to be run per lambda. This functions helps
#' preselect some lambda values, and can typically avoid useless calculations for
#' non-interesting lambda values.
#' @author Nick Sabbe \email{nick.sabbe@@ugent.be}
#' @keywords glmnet lambda
#' @examples aDfr<-generateTypicalIndependentDfr(numCat=10, numCnt=10, numObs=100, catProbs=rep(1/3,3),
#' rcnt=typicalRandomNorm, doShuffle=TRUE, verbosity=1)
#'
#' outlins<- -mean(aDfr$cnt1)+aDfr$cnt1+2*(aDfr$cat1=="b")
#' outprobs<-expit(outlins)
#' y<-factor(sapply(outprobs, function(prob){sample(c("no", "yes"), 1, prob=c(1-prob,prob))}))
#'
#' rlh<-findReasonableLambdaHelper(aDfr, y, verbosity=10)
#' @export
findReasonableLambdaHelper<-function(ds, out, family="binomial", showFirst=20, showPlot=TRUE,
type.measure="auc", repsNeededForFirstOccurrence=3, weights=rep(1, nrow(ds)), ..., verbosity=0,
minNumHigher=20, minNumLower=20, maxNumLower=30,
imputeDs2FitDsProperties=normalImputationConversion(), standardize=FALSE, nfolds=10)
{
cv<-fit.glmnet(ds=ds, out=out, lambda=NULL, weights=weights, verbosity=verbosity-1,
standardize=standardize, type.measure=type.measure,
imputeDs2FitDsProperties=imputeDs2FitDsProperties, family=family, nfolds=nfolds)
fit<-cv$glmnet.fit
if(showPlot)
{
plotex(cv, xvar="lambda", numTicks=10, label=TRUE,
legendOf=showFirst,..., verbosity=verbosity-1)
}
#kolommen van beta zijn lambdas, van groot (s0) naar klein (s99)
firstAppearance<-firstRepeatedAppearance(cv, repsNeededForFirstOccurrence)
#firstAppearance now holds, for every variable that ever appears, the index
#within the lambdas where it first appears.
orderOfFirstAppearance<-order(firstAppearance)[1:showFirst]
whereAppearing<-firstAppearance[orderOfFirstAppearance]
forVars<-names(firstAppearance)[orderOfFirstAppearance]
regionOfInterestData<-getMinMaxPosLikeGlmnet(cv)
ll<-length(regionOfInterestData$lambda)
sgn<-sign(regionOfInterestData$lambda[1] - regionOfInterestData$lambda[ll])
#if sgn is positive (+1), lambda[1] is bigger than the last one, so ordered from big to small
if(abs(regionOfInterestData$pos.optimum - regionOfInterestData$pos.higherlambda) < minNumHigher)
{
regionOfInterestData$pos.higherlambda<-regionOfInterestData$pos.optimum-(sgn*minNumHigher)
if(regionOfInterestData$pos.higherlambda<1)
{
regionOfInterestData$pos.higherlambda<-1
}
if(regionOfInterestData$pos.higherlambda>ll)
{
regionOfInterestData$pos.higherlambda<-ll
}
}
actualNumHigher<-abs(regionOfInterestData$pos.higherlambda-regionOfInterestData$pos.optimum)
catwif(verbosity > 0, "actualNumHigher: ", actualNumHigher, ", as opposed to minNumHigher:", minNumHigher)
if(actualNumHigher < minNumHigher)
{
#add this to the 'lower' side:
minNumLower <- minNumLower + (minNumHigher - actualNumHigher)
maxNumLower <- maxNumLower + (minNumHigher - actualNumHigher)
}
diff1<-abs(regionOfInterestData$pos.lowerlambda1 - regionOfInterestData$pos.optimum)
diff2<-abs(regionOfInterestData$pos.lowerlambda2 - regionOfInterestData$pos.optimum)
if(diff1 < minNumLower) diff1<-minNumLower
if(diff2 < minNumLower) diff2<-minNumLower
if(diff1 > maxNumLower) diff1<-maxNumLower
if(diff2 > maxNumLower) diff2<-maxNumLower
diff<-min(diff1, diff2)
regionOfInterestData$pos.lowerlambda<-regionOfInterestData$pos.optimum + (sgn*diff)
if(regionOfInterestData$pos.lowerlambda<1)
{
regionOfInterestData$pos.lowerlambda<-1
}
if(regionOfInterestData$pos.lowerlambda>ll)
{
regionOfInterestData$pos.lowerlambda<-ll
}
poss<-c(regionOfInterestData$pos.lowerlambda, regionOfInterestData$pos.optimum, regionOfInterestData$pos.higherlambda)
tmp<-regionOfInterestData$lambda[poss]
regionDfr<-data.frame(name=c("lower lambda", "optimum", "higher lambda"), idx=poss, lambda=tmp, stringsAsFactors=FALSE)
if(verbosity > 0)
{
catw("Lambdas used:")
print(regionDfr)
}
retval<-list(topres=data.frame(variable=forVars, lambda=fit$lambda[whereAppearing],
critl=cv$cvlo[whereAppearing], crit=cv$cvm[whereAppearing],
critu=cv$cvup[whereAppearing], critsd=cv$cvsd[whereAppearing],
index=whereAppearing, stringsAsFactors=FALSE), allLambda=fit$lambda,
regionDfr=regionDfr, regionOfInterestData=regionOfInterestData)
class(retval)<-"LambdaHelper"
return(retval)
}
#' @rdname findReasonableLambdaHelper
#'
#' @usage \method{[}{LambdaHelper}(object, i, j, drop = TRUE)
#' @param object \code{LambdaHelper}
#' @param i row index
#' @param j column index. If this is missing, the \code{i}th lambda is returned
#' @param drop if \code{TRUE} the result is coerced to the simplest structure possible
#' @return depends on the parameters
#' @export
#' @aliases [.LambdaHelper
#' @method [ LambdaHelper
#' @examples data(emlcvfit, package="EMLasso")
#' rlh<-findReasonableLambdaHelper(aDfr, y, verbosity=10)
#' rlh[1]
#' rlh[1:5, NULL]
`[.LambdaHelper`<-function(object, i, j, drop = TRUE)
{
if(missing(j)) return(object$topres$lambda[i])
if(is.null(j) || is.na(j) || (j=="")) return(object$topres[i, seq(ncol(object$topres)), drop=drop])
else return(object$topres[i, j, drop=drop])
}
#' @rdname findReasonableLambdaHelper
#'
#' @param x object to find 'interesting' set of lambdas for
#' @return vector of lambda values, normally high to low
#' @keywords lambda
#' @export
getLambdas<-function(x,...) UseMethod("getLambdas")
#' @rdname findReasonableLambdaHelper
#'
#' @inheritParams getLambdas
#' @method getLambdas lambdaregion
#' @usage \method{getLambdas}{lambdaregion}(x, ...)
#' @export
#' @examples data(emlcvfit, package="EMLasso")
#' rlh<-findReasonableLambdaHelper(aDfr, y, verbosity=10)
#' getLambdas(rlh$regionOfInterestData)
getLambdas.lambdaregion<-function(x,...)
{
x$lambda[(x$pos.higherlambda):(x$pos.lowerlambda)]
}
#' @rdname findReasonableLambdaHelper
#'
#' @inheritParams getLambdas
#' @method getLambdas LambdaHelper
#' @usage \method{getLambdas}{LambdaHelper}(x, ...)
#' @export
#' @examples data(emlcvfit, package="EMLasso")
#' rlh<-findReasonableLambdaHelper(aDfr, y, verbosity=10)
#' getLambdas(rlh)
getLambdas.LambdaHelper<-function(x,...)
{
getLambdas(x$regionOfInterestData, ...)
}
#' @rdname findReasonableLambdaHelper
#'
#' @inheritParams getLambdas
#' @param s integer index in the set of lambdas or an explicit value of lambda.
#' For cv.glmnet can also be "lambda.min" or "lambda.1se"
#' @method getLambdas glmnet
#' @usage \method{getLambdas}{glmnet}(x, s=1, ...)
#' @export
#' @examples x=matrix(rnorm(100*20),100,20)
#' g2=sample(1:2,100,replace=TRUE)
#' fit2=glmnet(x,g2,family="binomial")
#' getLambdas(fit2)
#' @rdname findReasonableLambdaHelper
#'
#' @inheritParams getLambdas
#' @method getLambdas cv.glmnet
#' @usage \method{getLambdas}{cv.glmnet}(x, s=c("lambda.1se","lambda.min"), ...)
#' @export
#' @examples x=matrix(rnorm(100*20),100,20)
#' g2=sample(1:2,100,replace=TRUE)
#' cv.fit2=cv.glmnet(x,g2,family="binomial")
#' getLambdas(cv.fit2)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.