R/constrained_lasso.R

#' Constrained lasso fitting.
#'
#' Run the constrained lasso procedure.
#'
#' @param x The matrix of features.
#' @param y The response value.  In the real-valued repsonse case, y should be centered.
#' @param family The model family. Default is "gaussian" for standard lasso.
#'   "binomial" is available for lasso logistic regression.
#' @param alpha The elastic net parameter.  Default is 1 (the standard lasso).
#' @param nlambda Numer of lambda values to fit
#' @param lambda.min.ratio Smallest value for lambda as a fraction of lambda.max
#' @param lambda Optional grid of lambda values
#' @param ... Additional arguments to internally pass to the "glmnet" function.
#'
#' @return beta The resulting coefficient vectors in matrix form.
#' @example
#' n <- 100
#' p <- 30
#' x <- log(1 + abs(matrix(rnorm(n*p), nrow = n, ncol = p)))
#' y <- x[, 1] - x[, 2] + .1 * rnorm(n)
#' y <- y - mean(y)
#' fit1 <- glmnet.constr(x, y, family = "gaussian")
glmnet.constr=function(x,y,family = c("gaussian", "binomial"),alpha=1,nlambda=100,lambda.min.ratio=.01,lambda=NULL,...){
# glmnet with constraint sum beta=0
# Gaussian case: make sure y is centered.
# Binomial case: does not include an intercept

  family = match.arg(family)
  p=ncol(x)
  n=length(y)
  R=c(rep(1,p),rep(-1,p))

  if(family=="gaussian"){
    xx=rbind(cbind(x,-x),R)
    yy=c(y,0)
     w=c(rep(1,n),100*n)
    w=w/sum(w)
    if(!is.null(lambda)) {nlambda=NULL}
    ggg=glmnet::glmnet(xx,yy,standardize=F,intercept=F,family=family,weights=w,lower.limits=rep(0,ncol(xx)+1),upper.limits=rep(Inf,ncol(xx)+1),alpha=alpha,nlambda=nlambda,lambda=lambda,lambda.min.ratio=lambda.min.ratio)
  }

  if(family=="binomial"){
    #here we add two fake obs, one at y=0, one at y=1
    xx=rbind(cbind(x,-x),R,R)
    yy=c(y,0,1)
     w=c(rep(1,n),n*100,n*100)
    w=w/sum(w)
    # I may want to include an intercept here, but having trouble getting sum b==0
    ggg=glmnet::glmnet(xx,yy,standardize=F,intercept=F,family=family,weights=w,lower.limits=rep(0,ncol(xx)+1),upper.limits=rep(Inf,ncol(xx)+1),alpha=alpha,nlambda=nlambda,lambda.min.ratio=lambda.min.ratio,...)
  }

  b=ggg$beta[1:p,]-ggg$beta[-(1:p),]
  return(list(beta=b,a0=ggg$a0,percvar=1-ggg$dev.ratio,lambda=ggg$lambda,glmnet.obj=ggg,family=family))

}


#' CV for constrained lasso.
#'
#' Cross-validation to choose lambda in the constrained lasso procedure
#'
#' @param fit The result of a "glmnet.contsr" function call.
#' @param x The matrix of features.
#' @param y The response value.  In the real-valued repsonse case, y should be centered.
#' @param nfolds The number of CV folds. Default is 10.
#' @param foldid Fold id for each observation.  By default these are uniformly random.
#' @param alpha Elastic net parameter.  Default is 1 which gives the lasso.
#'
#' @return lambda The gride of lambda values
#' @return cvm The CV estimate of prediction error for each lambda.
#' @return yhat.preval The predicted values generated by each fold.
cv.glmnet.constr=function(fit,x,y,nfolds=10,foldid=NULL,keep=F,alpha=1){
  # "fit" is output from glmnet.constr

  p=ncol(x)
  n=length(y)
  family=fit$family
  yhat=matrix(NA,n,length(fit$lambda))
  if(is.null(foldid))  foldid = sample(rep(seq(nfolds), length = n))
  nfolds=length(table(foldid))

  for(ii in 1:nfolds){
    oo=foldid==ii
    ggg= glmnet.constr(x[!oo,],y[!oo],family=family,alpha=alpha)
    b=matrix(as.numeric(coef(ggg$glmnet.obj,s=fit$lambda)[-1,]),ncol=length(fit$lambda))
    a0=ggg$a0
    bb=b[(1:p),]-b[-(1:p),]
    yhat[oo,]=a0+x[oo,]%*%bb
  }

  if(family=="gaussian") errfun=function(yhat,y){
         mean( (y-yhat)^2)
  }

  if(family=="binomial") {errfun=function(yhat,y){
    eps=1e-4
    pr=1/(1+exp(-yhat))
    o=( y==1 & (pr>1-eps) ) | (y==0 & (pr<eps))
    val=y*log(pr)+(1-y)*log(1-pr)

    -2*mean(val[!o])
  }}

  ym=matrix(y,nrow=n,ncol=ncol(yhat))
  cvm=apply(yhat,2,errfun,y)
  yhat.preval=NULL
  if(keep & family=="gaussian") yhat.preval=yhat
  if(keep & family=="binomial") yhat.preval=1/(1+exp(-yhat))

  best <- which.min(cvm)
  beta_best <- fit$beta[, best]

  return(list(lambda=fit$lambda,cvm=cvm,yhat.preval=yhat.preval,beta = beta_best))
}

predict.glmnet.constr=function(fit,x){
  yhat=matrix(fit$a0,nrow=nrow(x),ncol=ncol(fit$b))+x%*%fit$b

  if(fit$family=="binomial") {
      yhat=1/(1+exp(-yhat))
  }

  return(yhat)
}
stephenbates19/logratiolasso documentation built on May 18, 2019, 4:52 p.m.