R/lassogrp.R

Defines functions PoissReg LinReg LogReg lassoModel lassoControl lambdamax.default lambdamax.formula lambdamax varBlockStand get.blocks lassogrpModelmatrix print.lassofit extract.lassogrp plot.lassogrp fitted.lassogrp lassoGrpFit lasso.formula lasso.default lasso

Documented in extract.lassogrp fitted.lassogrp get.blocks lambdamax lambdamax.default lambdamax.formula lasso lassoControl lasso.default lasso.formula lassoGrpFit lassogrpModelmatrix lassoModel LinReg LogReg plot.lassogrp PoissReg print.lassofit varBlockStand

lasso <- function(x,...)  UseMethod("lasso")

lasso.default <-
  function(x, y, index, subset, weights = rep(1, length(y)), model='gaussian',
           lambda=NULL, steps = 21,
           adaptive = FALSE, cv.function = cv.lasso, cv=NULL,
           adaptcoef = NULL, adaptlambda = NULL, penscale = sqrt,
           center=NA, standardize = TRUE,
           save.x = TRUE, control = lassoControl(), ...)
{
  ## ----------------------------------------------------------------------
  ## Author: Werner Stahel, Date: 29 Nov 2007, 08:22
  fit <- lassoGrpFit(x, y, index, subset=subset, weights=weights, model=model,
                     lambda=lambda, steps=steps,
                     penscale=penscale,
                     center=center, standardize = standardize,
                     save.x = if(adaptive) TRUE else save.x,
                     control = control, ...)
  fit$innercall <- fit$call
  fit$call <- match.call() ## Overwrite lassoGrpFit

  if (adaptive) {
    if (control@trace > 0)
      cat('\n*** calling lasso.lassogrp to adapt to results of first call\n\n')

    fit <- lasso.lassogrp(fit, lambda=lambda, steps=steps,
                          cv=cv, cv.function = cv.function,
                          adaptcoef = NULL, adaptlambda = NULL,
                          save.x = FALSE, control = control, ...)
    ## bookkeeping
    if (save.x) fit$x <- x
  }
  fit
}
## ==================================================================
lasso.formula <-
  function(formula, data, subset, weights, na.action, offset, nonpen = ~ 1,
           model='gaussian', lambda=NULL, steps = 21, adaptive = FALSE,
           cv.function = cv.lasso, penscale = sqrt,
           cv=NULL, adaptcoef = NULL, adaptlambda = NULL,
           contrasts = NULL, save.x = TRUE,
           center=NA, standardize = TRUE,
           control = lassoControl(), ...)
{
  ## ----------------------------------------------------------------------
  ## Author: Werner Stahel, Date: 29 Nov 2007, 08:22
  ## same as lassogrp.formula
  m <- match.call(expand.dots = FALSE)
  ## Remove not-needed stuff to create the model-frame
  m$nonpen <- m$model <- m$lambda <- m$steps <- m$adaptive <- m$cv.function <-
    m$coef.init <- m$penscale <- m$cv <- m$adaptcoef <- m$adaptlambda <-
      m$contrasts <- m$save.x <-
        m$center <- m$standardize <- m$control <- m$... <- NULL

  ## Workaround subtle bug (which was *not* in grplasso; see
  ## "offset(" in ../tests/test_grplasso.R :
  if( ("offset" %in% all.names(formula)) &&
     !("offset" %in% all.vars (formula)))
    stop("Cannot (yet) use  offset(.) in lasso() formula;\n",
         " use 'offset' argument instead")
  
  environment(nonpen) <- environment(formula)
  
  l <- lassogrpModelmatrix(m, formula, nonpen, data, weights, subset, na.action,
                     contrasts, env = parent.frame())
##-   if(is.null(coef.init))  coef.init <- rep(0, ncol(l$x))

  ## 'subset': has been done in lassogrpModelmatrix, do not use it again below:
  fit <- if(is.null(adaptcoef))
    lasso.default(x = l$x, y = l$y, index = l$index,
                       weights = l$w,
                       model = model, lambda = lambda,
                       adaptive = adaptive,
                       cv.function = cv.function,
                       offset = l$off,
                       penscale=penscale,
                       center=center, standardize=standardize,
                       save.x = save.x, control = control, ...)
  else {
    if (is.list(adaptcoef)&&"coefficients"%in%names(adaptcoef))
      adaptcoef <- adaptcoef$coefficients
    l$model <- model
    lasso.lassogrp(l, lambda=lambda, steps=steps,
                   cv = cv, cv.function = cv.function,
                   adaptcoef = adaptcoef, adaptlambda = NULL,
                   penscale = penscale, ## weights = NULL,
                   center = center, standardize = standardize,
                   save.x = save.x, control = control, ...)
  }
  fit$terms <- l$Terms
  ## !!! Terms and x do not match index and coefs if adaptive==T
  fit$contrasts <- attr(l$x, "contrasts")
  fit$xlevels <- .getXlevels(l$Terms, l$mf)
  fit$na.action <- attr(l$mf, "na.action")
  fit$formula <- formula
  fit$innercall <- fit$call
  fit$call <- match.call() ## Overwrite lassoGrpFit
  fit$data <- data
  structure(fit, class = "lassogrp")
}

## ==================================================================
lasso.lassogrp <- #f
  function(x, lambda=NULL, steps=21, cv = NULL, cv.function = cv.lasso,
           adaptcoef = NULL, adaptlambda = NULL, penscale = sqrt,
           weights = NULL, center = NA, ## standardize = TRUE,
           save.x = TRUE, control = lassoControl(), ...)
{ ## adaptive lasso
  if (length(x$x)==0 || length(x$y)==0)
      stop("must have an 'x' and 'y' component")
  lx <- x$x
  ly <- x$y
  lindex <- x$index
  lcall <- match.call(expand.dots=TRUE)
## coefficients to adapt to
  lcf <- adaptcoef
  if (length(lcf)==0) {
    if(length(adaptlambda) > 1)
      stop("length(adaptlambda) is larger than one")
    if (length(adaptlambda) == 0) { ## use CV
      lcv <- if (is.null(cv)) cv.function(x, se=FALSE) else cv
      adaptlambda <- x$lambda[which.min(lcv$mse)]
    }
    else if (adaptlambda < 0) {
      adaptlambda <- x$lambda[-adaptlambda]
    }
    lil <- which(abs(adaptlambda-x$lambda) <= 0.01*x$lambda)
    if (length(lil)==0)
      stop('lambda not suitable. not yet programmed') # call lassogrp
    lcf <- x$coef[,lil[1]]
  } else # length(lcf) >= 1 :
    if (length(lcf)!=ncol(lx)) stop('wrong number of coefficients in adaptcoef')
  if (is.matrix(lcf)) lcf <- structure(c(lcf), names=dimnames(lcf)[[1]])
  if (is.null(names(lcf))) {
    if (length(lcf)>1)
      stop('coefficients must have names')
    else { ## index of coef
      lcf <- x$coefficients[,lcf]
      if (is.null(lcf)) stop("'adaptcoef' not suitable")
      names(lcf) <- dimnames(x$coefficients)[[1]]
    }
  }
  ## drop variables with coef==0
  ltdrop <- aggregate(lcf==0, list(lindex), all)
  ltdr <- ltdrop[ltdrop[,2],1]
  lv <- which(!lindex%in%ltdr)
  ltrm <- x$lasso.terms
  if (length(ltdr)) {
    ltrm[ltdr+1] <- 0
    lindex <- structure(lindex[lv], term.labels=attr(lindex, 'term.labels'))
    # do not drop term.labels!
  }
  if (all(lindex<=0))
    stop('no nonzero coefficients available to adapt to')
  lxx <- lx <- lx[,lv,drop=FALSE]
  lsc <- lcf <- lcf[lv]
  ## same scale within groups
  lgrp <- table(lindex)>1
  if (any(lgrp)) {
    lgrp <- as.numeric(names(lgrp)[lgrp])
    for (li in lgrp) {
      lig <- which(lindex==li)
      lsc[lig] <- sqrt(sum(lcf[lig]^2))
    }
  }
  lsc <- lsc[lindex>0]
  lj <- match(names(lsc),dimnames(lx)[[2]])
  if (any(is.na(lj)))
    stop(paste("coefficients", names(lsc)[is.na(lj)]," not in model matrix"))
## --- change scales 
  lx[,lj] <- sweep(lx[,lj,drop=FALSE],2,lsc,"*")
##-   attr(lindex, 'grpnames') <-
##-     grpnames  ## contains names even for eliminated groups
  if (is.null(weights)) weights <- x$weights
  fit <- lassoGrpFit(lx,ly, index=lindex, weights=weights,
                     model=x$model, lambda=lambda, steps=steps,
                     penscale=penscale, standardize=FALSE,
                     save.x = save.x, control = control, ...)
## adjust scales
  fit$coefficients[lj,] <- sweep(fit$coefficients[lj, ,drop=FALSE],
                                 1, lsc, "*")
  if (save.x) fit$x <- lxx
  fit$adaptcoef <- lcf

## terms
  lt <- fit$lasso.terms[!is.na(fit$lasso.terms)]
  ltrm[names(lt)] <- lt
  fit$lasso.terms <- ltrm
  if(!is.null(x$formula)) { ## i.e. *NOT* normally (called from lasso.default()):
    ## formula (without dropped terms)
    ltr <- setdiff(names(ltrm)[(!is.na(ltrm))&ltrm!=0],"(Intercept)")
    fit$formula <- update(x$formula, paste('~', paste(ltr, collapse="+")))
  }
  fit$call <- lcall
##  fit
  structure(fit, class = "lassogrp")
} ## end lasso.lassogrp
## ============================================================================
lassoGrpFit <-
  function(x, y, index, subset, weights = rep(1, length(y)), model='gaussian',
           offset = rep(0, length(y)), lambda = NULL, steps = 21,
           coef.init = rep(0, ncol(x)), penscale = sqrt,
           center = NA, standardize = TRUE,
           save.x = NULL, control = lassoControl(), ...)
{
  ## Purpose: Function to fit a solution (path) of a group lasso problem
  ## ----------------------------------------------------------------------
  ## Arguments:
  ## x: design matrix (including intercept), already rescaled and
  ##    possibly blockwise orthonormalized.
  ## y: response vector
  ## index: vector which defines the grouping of the variables. Components
  ##        sharing the same number build a group. Non-penalized
  ##        coefficients are marked with "NA".
  ## subset:  either logical or index vector that selects rows.
  ##          needed at this low level mainly for cross validation
  ## weights: vector of observation weights.
  ## offset: vector of offset values; needs to have the same length as the
  ##         response vector.
  ## lambda: vector of penalty parameters. Optimization starts with the
  ##         first component. See details below.
  ## coef.init: initial vector of parameter estimates corresponding to the
  ##            first component in the vector "lambda".
  ## penscale: rescaling function to adjust the value of the penalty
  ##           parameter to the degrees of freedom of the parameter group.
  ##           See the reference below.
  ## model: an object of class "lassoModel" implementing the negative
  ##        log-likelihood, gradient, hessian etc. See the documentation
  ##        of "lassoModel" for more details.
  ## control: options for the fitting algorithm, see "lassoControl".
  ## ...: additional arguments to be passed to the functions defined
  ##      in "model".
  ## ----------------------------------------------------------------------
  ## Author: Lukas Meier, Date: 30 Aug 2005, 09:02

  ## Do some argument checking first

  if (is.character(model)) {
    lmn <- c(gaussian='LinReg', binomial='LogReg', poisson='PoissReg')[model]
    if (is.na(lmn)) {
      warning('unsuitable argument "model"')
      lmn <- 'LinReg'
    }
    model <- get(lmn)(...)
  } else if(length(list(...)))## catch typos, etc (!!)
      warning("arguments in ", deparse(list(...)), " are disregarded")

  ## Check the design matrix
  if(!is.matrix(x)) stop("x has to be a matrix")
  p <- ncol(x)
  if(any(is.na(x))) stop("Missing values in x not allowed!")
  ## Check the response
  if(!is.numeric(y)) stop("y has to be of type 'numeric'")
  nobs <- length(y)
  if(nrow(x) != nobs) stop("x and y have not correct dimensions")
  if(!model@check(y)) stop("y has wrong format")

  ## Check the other arguments
  if(length(weights)==0) weights <- rep(1,nobs) else
  if(length(weights)!=nobs)
    stop("length(weights) not equal length(y)")
  if(any(weights < 0))
    stop("negative weights not allowed")

  if(length(offset) != nobs)
    stop("length(offset) not equal length(y)")

  if(missing(coef.init) || is.null(coef.init))  coef.init <- rep(0, p)
  else if(length(coef.init) != p)
    stop("length(coef.init) not equal ncol(x)")

  if(!is.numeric(index))
    stop("argument  'index'  has to be of type 'numeric'!")
  if(length(index) != p)
    stop("length(index) not equal ncol(x)!")
  ## !!! WSt: the following statement might cause a bug
  if(any(iina <- is.na(index))) ## recode 'NA' to '0' , for back compatibility,
    ## as in grplasso,  NA's (only!) where used to indicate "non.pen."
    index[iina] <- 0L
  if(all(in0 <- index <= 0))
    stop("None of the predictors are penalized.")
  if(!is.function(penscale))
    stop("'penscale' is not a function")
  check <- validObject(control) ## will stop the program if error occurs

  ## subset
  if(!missing(subset)) {
    if(is.logical(subset)) {
      if(length(subset)!=nobs)
        stop("length of logical vector  subset  not equal length(y)")
    } else {
      if (!(all(range(c( subset,1,nobs))==c(1,nobs)) ||
            all(range(c(-subset,1,nobs))==c(1,nobs))))
        stop("argument  'subset'  not suitable")
    }
    x <- x[subset, ,drop=FALSE]
    nobs <- length(y <- y[subset])
    weights <- weights[subset]
    offset <- offset[subset]
  }

  ## lambda
  if (length(lambda)) lambda <- lambda[!is.na(lambda)]
  if (length(lambda)==0) {
    lambdamax <-
      lambdamax(x, y, index, weights=weights, offset = offset,
                coef.init = coef.init, penscale = penscale,
                model = model, standardize = standardize, ...)
    if (length(steps)==1)
      steps <- if(steps<0.5)
        seq(1,0,-max(steps,0.001)) else seq(1,0,length=max(3,steps))^2 # !!!
    lambda <- lambdamax*steps
  }
  lambda <- unique(lambda)
  if(any(diff(lambda)>0)) {
    warning("lambda values will be sorted in decreasing order")
    lambda <- sort(lambda, decreasing=TRUE)
  }

  ## Extract the control information
  update.hess  <- control@update.hess
  update.every <- control@update.every
  inner.loops  <- control@inner.loops
  line.search  <- control@line.search
  max.iter     <- control@max.iter
  lower        <- control@lower
  upper        <- control@upper
  if (is.null(save.x)) save.x <- control@save.x
  tol          <- control@tol
  trace        <- control@trace
  beta         <- control@beta
  sigma        <- control@sigma

  nrlambda <- length(lambda)
  if(nrlambda > 1 && update.hess == "always"){
    warning("More than one lambda value and update.hess = \"always\". You may want to use update.hess = \"lambda\"")
  }

  ## For the linear model, the Hessian is constant and has hence to be
  ## computed only *once*

  if(model@name == "Linear Regression Model"){
    if(update.hess != "lambda"){
      update.hess <- "lambda"
      if(trace >= 1)
        cat("Setting update.hess = 'lambda'\n")
    }
    if(update.every <= nrlambda){
      update.every <- nrlambda + 1
      if(trace >= 1)
        cat("Setting update.every = length(lambda) + 1\n")
    }
  }

  ## keep original x if wanted
  x.old <- if(save.x) x else NULL

  ## Which are the non-penalized parameters?
  any.notpen    <- any(in0, na.rm=TRUE)
  inotpen.which <- which(in0)
  nrnotpen      <- length(inotpen.which)
  interc.which  <-
    if(all(x[,1] == 1)) 1L ## = 99% of cases
    else which(apply(x==1, 2, all))
  has.interc <- length(interc.which) > 0
  notpenintonly <- nrnotpen==1 && has.interc
  if (is.na(center) || is.null(center)) center <- has.interc
  if(center) {
    if (!notpenintonly)
      warning('penalization not adjusted to non-penalized carriers')
    if (!has.interc)
      message("centering without intercept .. ")
  } else { ## not center
    if(notpenintonly)
      warning('Are you sure you want uncentered carriers with model with intercept?')
  }
  ## Index vector of the penalized parameter groups
  if(any.notpen) {
    ipen <- index[-inotpen.which]
    ipen.which <- split((1:p)[-inotpen.which], ipen)
  } else {
    if(has.interc)
      warning("All groups are penalized, including the intercept.")
    ipen <- index
    ipen.which <- split((1:p), ipen)
  }

  nrpen      <- length(ipen.which)
  dict.pen   <- sort(unique(ipen))

  ## Table of degrees of freedom
  ipen.tab   <- table(ipen)[as.character(dict.pen)]

  ## Center
  if (center) {
    if(has.interc) {
      ctr <- colMeans(x. <- x[,-interc.which, drop = FALSE])
      x[,-interc.which] <- sweep(x., 2, ctr)
    } else {
      x[] <- sweep(x, 2, colMeans(x))
    }
  }
  ## Standardize the design matrix -> blockwise orthonormalization
  if(standardize) {
    ##warning("...Using standardized design matrix.\n")
    stand        <- varBlockStand(x, ipen.which, inotpen.which)
    x            <- stand$x
    scale.pen    <- stand$scale.pen
    scale.notpen <- stand$scale.notpen
  }
  ## From now on x is the *normalized* design matrix!

  ## Extract the columns into lists, works faster for large matrices
  if(any.notpen){
    x.notpen <- list(); length(x.notpen) <- nrnotpen
    for(i in seq_along(inotpen.which))
      x.notpen[[i]] <- x[,inotpen.which[[i]], drop = FALSE]
  }

  x.pen <- list(); length(x.pen) <- length(nrpen)
  for(i in seq_along(ipen.which))
    x.pen[[i]] <- x[,ipen.which[[i]], drop = FALSE]

  ## Extract the needed functions
  check     <- validObject(model)
  invlink   <- model@invlink
  nloglik   <- model@nloglik
  ngradient <- model@ngradient
  nhessian  <- model@nhessian

  coef      <- coef.init
  coef.pen  <- coef.init
  if(any.notpen)
    coef.pen  <- coef[-inotpen.which]

##-   lambdanm <- format(lambda)
##-   if (any(duplicated(lambdanm))) lambdanm <- as.character(lambda)
  lambdanm <- paste('l',1:nrlambda,sep='.')
  names(lambda) <- lambdanm

  norms.pen    <- c(sqrt(rowsum(coef.pen^2, group = ipen)))

  norms.pen.m  <- matrix(0, nrow = nrpen, ncol = nrlambda,
                         dimnames = list(NULL, lambdanm))
  norms.npen.m <- matrix(0, nrow = nrnotpen, ncol = nrlambda,
                         dimnames = list(NULL, lambdanm))
  nloglik.v <- fn.val.v <- numeric(nrlambda)
  coef.m    <- grad.m   <-
               matrix(0, nrow = p, ncol = nrlambda,
                       dimnames = list(colnames(x), lambdanm))
  fitted    <- linear.predictors <-
               matrix(0, nrow = nobs, ncol = nrlambda,
                      dimnames = list(rownames(x), lambdanm))

  converged <- rep(TRUE, nrlambda)

  ## *Initial* vector of linear predictors (eta) and transformed to the
  ## scale of the response (mu)
  eta <- offset + c(x %*% coef)
  mu <- invlink(eta)

  ## Create vectors for the Hessian approximations
  if(any.notpen)
    nH.notpen <- numeric(nrnotpen)
  nH.pen <- numeric(nrpen)

  for(pos in 1:nrlambda){
    l <- lambda[pos]

    if(trace >= 2)
      cat("\nLambda:", l, "\n")

    ## Initial (or updated) Hessian Matrix of the *negative* log-likelihood
    ## function (uses parameter estimates based on the last penalty parameter
    ## value)

    if(update.hess == "lambda" && (pos %% update.every == 0 || pos == 1)) {
      ## Non-penalized groups
      if(any.notpen){
        for(j in 1:nrnotpen){ ## changed
          Xj <- x.notpen[[j]]
          nH.notpen[j] <- min(max(nhessian(Xj, mu, weights, ...), lower), upper)
        }
      }
      ## Penalized groups
      for(j in 1:nrpen){
        ind <- ipen.which[[j]]
        Xj  <- x.pen[[j]]
        diagH <- numeric(length(ind))
        for(i in seq_along(ind)){
          diagH[i] <- nhessian(Xj[, i, drop = FALSE], mu, weights, ...)
        }
        nH.pen[j] <- min(max(diagH, lower), upper)
      }
    }

    ## Start the optimization process
    fn.val <- nloglik(y, eta, weights, ...) +
      l * sum(penscale(ipen.tab) * norms.pen)

    ## These are needed to get into the while loop the first time
    do.all <- FALSE
    d.fn   <- d.par <- 1

    counter    <- 1 ## Count the sub-loops
    iter.count <- 0 ## Count the loops through *all* groups

    ## Stop the following while loop if the convergence criterion is fulfilled
    ## but only if we have gone through all the coordinates

    ##while(d.fn > tol | d.par > sqrt(tol) | !do.all){
    while(d.fn > tol || d.par > sqrt(tol) || !do.all){
      ## Escape loop if maximal iteration reached
      if(iter.count >= max.iter){
        converged[pos] <- FALSE
        warning("Maximal number of iterations reached for lambda[", pos, "]")
        break
      }

      ## Save the parameter vector and the function value of the previous step
      fn.val.old <- fn.val
      coef.old   <- coef

      ## Check whether we have some useful information from the previous step

      ## Go through all groups if counter == 0 or if we have exceeded the
      ## number of inner loops (inner.loops)
      if(counter == 0 || counter > inner.loops){
        do.all <- TRUE
        guessed.active <- 1:nrpen
        counter <- 1
        if(trace >= 2)
          cat("...Running through all groups\n")
      }else{## Go through the groups which were identified at the previous step
        guessed.active <- which(norms.pen != 0)
        if(length(guessed.active) == 0){
          guessed.active <- 1:nrpen
          do.all <- TRUE
          if(trace >= 2)
            cat("...Running through all groups\n")
        }else{
          do.all <- FALSE
          if(counter == 1 && trace >= 2)
            cat("...Starting inner loop\n")
          counter <- counter + 1
        }
      }
      if(do.all)
        iter.count <- iter.count + 1

      ## These are used for the line search, start at initial value 1
      ## They are currently here for security reasons
      start.notpen <- rep(1, nrnotpen)
      start.pen    <- rep(1, nrpen)

      if(any.notpen){
        ## Optimize the *non-penalized* parameters
        for(j in 1:nrnotpen){
          ind <- inotpen.which[j]
          Xj  <- x.notpen[[j]]

          ## Gradient of the negative log-likelihood function
          ngrad <- c(ngradient(Xj, y, mu, weights, ...))

          ## Update the Hessian if necessary
          if(update.hess == "always"){
            nH <- min(max(nhessian(Xj, mu, weights, ...), lower), upper)
          }else{
            nH <- nH.notpen[j]
          }

          ## Calculate the search direction
          d <- -(1 / nH) * ngrad
          ## Set to 0 if the value is very small compared to the current
          ## coefficient estimate

          d <- zapsmall(c(coef[ind], d))[2]

          ## If d != 0, we have to do a line search
          if(d != 0){
            scale <- min(start.notpen[j] / beta, 1) ##1
            coef.test      <- coef
            coef.test[ind] <- coef[ind] + scale * d

            Xjd <- Xj * d
            eta.test     <- eta + Xjd * scale

            if(line.search){
              qh    <- sum(ngrad * d)

              fn.val0      <- nloglik(y, eta, weights, ...)
              fn.val.test  <- nloglik(y, eta.test, weights, ...)

              qh <- zapsmall(c(qh, fn.val0))[1]

              ## Armijo line search. Stop if scale gets too small (10^-30).
              while(fn.val.test - fn.val0 > sigma * scale * qh & scale > 10^-30){
                ##cat("Doing line search (nonpen)\n")
                scale          <- scale * beta
                coef.test[ind] <- coef[ind] + scale * d
                eta.test       <- eta + Xjd * scale
                fn.val.test    <- nloglik(y, eta.test, weights, ...)
              }
            } ## end if(line.search)
            if(scale <= 10^-30){ ## Do nothing in that case
              ## coef.test <- coef
              ## eta.test  <- eta
              ## mu        <- mu
              start.notpen[j] <- 1
            }else{ ## Update the information
              coef <- coef.test
              eta  <- eta.test
              mu   <- invlink(eta)
              start.notpen[j] <- scale
            }

            ## Save the scaling factor for the next iteration (in order that
            ## we only have to do very few line searches)
            ## start.notpen[j] <- scale

            ## Update the remaining information
            ## coef <- coef.test
            ## eta  <- eta.test
            ## mu   <- invlink(eta)
          } ## end if(abs(d) > sqrt(.Machine$double.eps))
        } ## end for(j in 1:nrnotpen)
      } ## if(any.notpen)

      ## Optimize the *penalized* parameter groups
      for(j in guessed.active){
        ind  <- ipen.which[[j]]
        npar <- ipen.tab[j]

        coef.ind       <- coef[ind]
        cross.coef.ind <- crossprod(coef.ind)

        ## Design matrix of the current group
        Xj <- x.pen[[j]]

        ## Negative gradient of the current group
        ngrad <- c(ngradient(Xj, y, mu, weights, ...))

        ## Update the Hessian if necessary
        nH <-
          if(update.hess == "always") {
            dH <- numeric(length(ind))
            for(i in seq_along(ind)){ ## for loop seems to be faster than sapply
              dH[i] <- nhessian(Xj[,i,drop = FALSE], mu, weights, ...)
            }
            min(max(dH, lower), upper)
          }
          else nH.pen[j]

        cond       <- -ngrad + nH * coef.ind
        cond.norm2 <- c(crossprod(cond))

        ## Check the condition whether the minimum is at the non-differentiable
        ## position (-coef.ind) via the condition on the subgradient.

### __ FIXME  use 'border' instead of  'l * penscale(npar)'  below -__________

        border <- penscale(npar) * l
        if(cond.norm2 > border^2){
          d <- (1 / nH) *
            (-ngrad - l * penscale(npar) * (cond / sqrt(cond.norm2)))
          ##d <- zapsmall(c(coef.ind, d))[-(1:npar)]
        }else{
          d <- -coef.ind
        }

        ## If !all(d == 0), we have to do a line search
        if(!all(d == 0)){
          scale <- min(start.pen[j] / beta, 1)

          coef.test      <- coef
          coef.test[ind] <- coef.ind + scale * d
          Xjd            <- c(Xj %*% d)
          eta.test       <- eta + Xjd * scale

          if(line.search){
            qh <- sum(ngrad * d) +
              l * penscale(npar) * sqrt(crossprod(coef.ind + d)) -
                l * penscale(npar)* sqrt(cross.coef.ind)

            fn.val.test    <- nloglik(y, eta.test, weights, ...)
            fn.val0        <- nloglik(y, eta, weights, ...)

            left <- fn.val.test - fn.val0 +
              l  * penscale(npar) * sqrt(crossprod(coef.test[ind])) -
                l  * penscale(npar) * sqrt(cross.coef.ind)
            right <- sigma * scale * qh
            while(left > right & scale > 10^-30){
              ##cat("Doing line search (pen)\n")
              scale          <- scale * beta
              coef.test[ind] <- coef.ind + scale * d
              eta.test       <- eta + Xjd * scale
              fn.val.test    <- nloglik(y, eta.test, weights, ...)

              left <- fn.val.test - fn.val0 +
                l  * penscale(npar) * sqrt(crossprod(coef.test[ind])) -
                  l  * penscale(npar) * sqrt(cross.coef.ind)
              right <- sigma * scale * qh
            } ## end while(left > right & qh != 0)
          } ## end if(line.search)
          ## If we escaped the while loop because 'scale' is too small
          ## (= we add nothing), we just stay at the current solution to
          ## prevent tiny values
          if(scale <= 10^-30){ ## Do *nothing* in that case
            ##coef.test <- coef
            ##eta.test  <- eta
            ##mu        <- mu
            start.pen[j] <- 1
          }else{
            coef <- coef.test
            eta  <- eta.test
            mu   <- invlink(eta)
            start.pen[j] <- scale
          }
        } ## end if(!all(d == 0))
        norms.pen[j] <- sqrt(crossprod(coef[ind]))
      } ## end for(j in guessed.active)

      fn.val <- nloglik(y, eta, weights, ...) +
        l * sum(penscale(ipen.tab) * norms.pen)

      ## Relative difference with respect to parameter vector
      d.par <- sqrt(crossprod(coef - coef.old)) / (1 + sqrt(crossprod(coef)))

      ## Relative difference with respect to function value (penalized
      ## likelihood)
      d.fn <- (fn.val.old - fn.val) / (1 + abs(fn.val))

      ## Print out improvement if desired (trace >= 2)
      if(trace >= 2){
        cat("d.fn:", d.fn, " d.par:", d.par,
            " nr.var:", sum(coef != 0), "\n")
      }

      ## If we are working on a sub-set of predictors and have converged
      ## we stop the optimization and will do a loop through all
      ## predictors in the next run. Therefore we set counter = 0.

      ##if(d.fn <= tol && d.par <= sqrt(tol)){
      if(d.fn <= tol && d.par <= sqrt(tol)){
        counter <- 0 ## will force a run through all groups
        if(trace >= 2 && !do.all)
          cat("...Subproblem (active set) solved\n")
      }
    } ## end of while(d.fn > tol | d.par > sqrt(tol) | !do.all)

    if(trace == 1)
      cat("Lambda:", l, " nr.var:", sum(coef != 0), "\n")

    coef.m[,pos]            <- coef
    fn.val.v[pos]           <- fn.val
    norms.pen.m[,pos]       <- norms.pen
    nloglik.v[pos]          <- nloglik(y, eta, weights, ...)
    grad.m[,pos]            <- ngradient(x, y, mu, weights, ...)
    linear.predictors[,pos] <- eta
    fitted[,pos]            <- invlink(eta)
  } ## end for(pos in 1:nrlambda)
## ---
  nterms <- max(abs(index))
  terml <- c('(Intercept)', attr(index, 'term.labels'))
  nterms <- max(abs(index)+1,length(terml))
  if (length(terml)<nterms)
    terml <- c('(Intercept)', paste('G',1:(nterms-1),sep='.') )
  dimnames(norms.pen.m)[[1]] <- terml[dict.pen+1]
  termt <- rep(NA,nterms)
  termt[dict.pen+1] <- 1
  termt[unique(-index[inotpen.which])+1] <- -1
  names(termt) <- terml
  ## Transform the coefficients back to the original scale if the design
  ## matrix was standardized
  if(standardize) {
    if(any.notpen)
      coef.m[inotpen.which,] <- coef.m[inotpen.which, ,drop=FALSE] / scale.notpen
    ## For df > 1 we have to use a matrix inversion to go back to the
    ## original scale
    for(j in seq_along(ipen.which)){
      ind <- ipen.which[[j]]
      coef.m[ind,] <- solve(scale.pen[[j]], coef.m[ind,,drop = FALSE])
    }
  }
  ## correct intercept for centering
  if(center && has.interc)
    coef.m[interc.which,] <-
      coef.m[interc.which,] - ctr %*% coef.m[-interc.which, ,drop=FALSE]

  structure(list(coefficients = coef.m,
                 norms.pen    = norms.pen.m,
                 nloglik      = nloglik.v,
                 fn.val       = fn.val.v,
                 fitted       = fitted,
                 linear.predictors = linear.predictors,
                 call         = match.call(),
                 x            = x.old, ## use untransformed values
		 y	      = y,
                 index        = index,
                 lasso.terms  = termt,
                 weights      = weights,
                 model        = model,
                 offset       = offset,
                 lambda       = lambda,
                 penscale     = penscale,
                 center=center, standardize=standardize,
                 ngradient    = grad.m,
                 converged    = converged,
                 control      = control),
            class = "lassogrp")
} ## end  lassoGrpFit

## ==================================================================

print.lassogrp <- #f
  function(x, coefficients=TRUE, doc = options("doc")[[1]], ...)
{
  ## Purpose: Print an object of class "lassogrp"
  ## ----------------------------------------------------------------------
  ## Arguments:
  ## x:      Object of class "lassogrp"
  ## ----------------------------------------------------------------------
  ## Author: Werner Stahel
  if (length(doc)==0) doc <- 0
  if (doc >= 1) {
    if (length(tit(x))) cat("\nlasso: ",tit(x),"\n")
    if (doc >= 2 && length(descr(x))) cat(paste(descr(x),"\n"),"\n")
  }

  cat("Call:\n  ", deparse(x$call), sep = "")
  cat("\n* Number of observations:", length(x$y))
  cat("\n\n* Penalty parameter lambda:  ")
  lambda <- x$lambda
  if (length(lambda)>5)
    cat(length(lambda),' values between ',
        format(min(lambda)), ' and ',
        format(max(lambda)), '\n')  else{
          cat("\n")
          print(lambda)
        }
  if (!is.null(x$adaptcoef)) {
    cat("* Adaptation coefficients: \n")
    print(x$adaptcoef) ## cat('  ',format(x$adaptcoef), '\n')
  }
  cat("\n* Predictors: groups     :",
      length(unique(na.omit(x$index))), "\n")
  llt <- x$lasso.terms
  lltn <- names(llt)
  cat("* Penalized:\n")
  print(lltn[!is.na(llt) & llt>0], quote=FALSE)
  cat("  Not penalized:\n")
  print(lltn[!is.na(llt) & llt<0], quote=FALSE)
  if (any(ldr <- !is.na(llt) & llt==0)) {
    cat("  Not included:\n")
    print(lltn[ldr], quote=FALSE)
  }
  if (coefficients) {
    p <- ncol(lcf <- cbind(x$coefficients))
    lsel <- p > 5
    ljlam <- if (lsel) round(seq(1,p,length=5)) else seq_len(p) # even for p==0
    cat("\n* Coefficients", if(lsel) "for selected lambdas",
        "(*N* = not penalized) :\n")
    lind <- x$index
    lord <- c(which(lind== 0),
              which(lind < 0)[order(lind[lind<0])],
              which(lind > 0)[order(lind[lind>0])])
    lcf <- lcf[lord, ljlam, drop=FALSE]
    lind <- lind[lord]
    dimnames(lcf)[[1]] <-
      ##- paste(c('not pen',lnm)[lind+1], dimnames(lcf)[[1]], sep=": ")
      paste(ifelse(lind <= 0, '*N* ', '    '), dimnames(lcf)[[1]], sep=" ")
    print(rbind(lambda=x$lambda[ljlam], lcf))
  }
  invisible(x)
}

## ==================================================================
predict.lassogrp <- #f
  function(object, newdata = NULL, type = c("link", "response"),
           na.action = na.pass, ...)
{
  ## Purpose: Obtains predictions from a "lassogrp" object.
  ## ----------------------------------------------------------------------
  ## Arguments:
  ## object:  a "lassogrp" object
  ## newdata: data.frame or design matrix of observations at which
  ##          predictions are to be made.
  ## type: the type of prediction. type = "link" is on the
  ##       scale of linear predictors, whereas type = "response" is on
  ##       the scale of the response variable, i.e. type = "response"
  ##       applies the inverse link function on the linear predictors.
  ## ...:  other options to be passed to the predict function.
  ## ----------------------------------------------------------------------
  ## Author: Lukas Meier, Date:  7 Apr 2006, 09:04

  type <- match.arg(type)

  na.act <- object$na.action

  ## If no new data is available, use the information in the fit object
  if(missing(newdata) || is.null(newdata)) {
    pred <- switch(type,
                   link = object$linear.predictors,
                   response = fitted(object))
    if(!is.null(na.act))
      pred <- napredict(na.act, pred)
  } else {
    tt <- object$terms
    if (is.null(tt)) tt <- object$formula
    if (!is.null(tt)) { ## if we have a terms object in the fit
      newdata <- as.data.frame(newdata)
      Terms <- delete.response(tt)
      m <- model.frame(Terms, newdata, na.action = na.action,
                       xlev = object$xlevels)
      offset <- attr(tt, "offset")

      if(!is.null((cl <- attr(Terms, "dataClasses"))))
        .checkMFClasses(cl, m)
      x <- model.matrix(Terms, m, contrasts = object$contrasts)
      if (ncol(x)!=nrow(coef(object))) {
        warning('ncol(x)!=nrow(coef(object)): check!')
        x <- x[,row.names(coef(object))]
      }
      pred <- x %*% coef(object)
      if(!is.null(offset)){
        offset <- eval(attr(tt, "variables")[[offset]], newdata)
        pred <- pred + offset
      }
    } else { ## if the object comes from lassoGrpFit
      ## Hmm, this does happen (FIXME?)
      x <- as.matrix(newdata)
      pred <- x %*% coef(object)
      if(any(object$offset != 0))
        warning("Possible offset not considered!")
    }

    pred <- switch(type,
                   link = pred,
                   response = object$model@invlink(pred)
                   ##apply(pred, 2, object$model@invlink))
                   )
    if(!is.null(na.action))
      pred <- napredict(na.action, pred)
  }
  if(dim(pred)[2] == 1)
    pred <- structure(c(pred), names=row.names(pred))

  attr(pred, "lambda") <- object$lambda
  pred
}

## ==================================================================
fitted.lassogrp <- function(object, ...)
    structure(object$fitted, lambda = object$lambda)

## ==================================================================
## MM (FIXME): This should be compatible  *or*  in a smart way extend / complement the
##     -----   extract.lassogrp() below
"[.lassogrp" <- #f
  function(x, i) {

  ## First get dimensions of the original object x
  nrlambda <- length(x$lambda)
  if(missing(i)) i <- seq_len(nrlambda) # select all

  ## Error checking
  ## ...

  ## Subset a copy of the object:
  fit.red <- x

  ## (NB: should be allowed to select all or none)
  fit.red$call$lambda  <- fit.red$lambda <- x$lambda[i]
  fit.red$nloglik      <- x$nloglik[i]
  fit.red$fn.val       <- x$fn.val [i]
  fit.red$coefficients <- coef(x)    [ , i, drop = FALSE]
  fit.red$ngradient    <- x$ngradient[ , i, drop = FALSE]
  fit.red$fitted       <- fitted(x)  [ , i, drop = FALSE]
  fit.red$linear.predictors <- x$linear.predictors[
                                       , i, drop = FALSE]
  fit.red$norms.pen    <- x$norms.pen[ , i, drop = FALSE]
  fit.red
}

## =========================================================
plot.lassogrp <-
  function(x, type = c("norms","coefficients","criteria"),
           cv = NULL, se = TRUE,
           col = NULL, lty=NULL, lwd = 1.5, mar = NULL,
           axes = TRUE, frame.plot = axes,
           xlim = NULL, ylim = NULL,
           legend = TRUE, main = NULL, xlab = "Lambda", ylab = NULL, ...)
{
  ## Purpose: Plots the solution path of a "lassogrp" object. The x-axis
  ##          is the penalty parameter lambda, the y-axis can be
  ##          coefficients or the l2-norm of the coefficient groups.
  ## ----------------------------------------------------------------------
  ## Arguments:
  ## x:    a lassogrp object
  ## type: what should be on the y-axis? Coefficients (dummy
  ##       variables)?
  ## col:  a vector indicating the color of the different solution
  ##       paths. The length should equal the number of coefficients.
  ## ...:  other parameters to be passed to the plotting functions.
  ## ----------------------------------------------------------------------
  ## Author: Lukas Meier, Date:  7 Apr 2006 / Werner Stahel Aug 2009

  lambda <- x$lambda
  if(length(lambda) <= 1)
    stop("Plot function needs at least two lambdas")
  rtLambda <- sqrt(lambda)

  type <- match.arg(type)

  if(is.null(xlim))
    xlim <- rev(range(rtLambda))
  else
    stopifnot(length(xlim) == 2, diff(xlim) > 0)
  ind <- unique(x$index)

  nr.npen <- sum(x$index<=0)
  dict.pen <- na.omit(ind)
  dict.pen.ord <- nr.npen + 1 : length(dict.pen)
## ----------------------------
  if (type == "norms" || type == "coefficients") {
    if (type == "norms") {
      yy <- x$norms.pen
    ## colors and ltypes
      col <- if(is.null(col))  1:nrow(yy)  else rep(col,length=nrow(yy))
      lty <- if(is.null(lty))  1:nrow(yy)  else rep(lty,length=nrow(yy))
      if (is.null(main)) main <- "Paths of norms of penalized coefficients"
    } else { # if(type == "coefficients")
      if (is.null(main)) main <- "Coefficient paths"
      col <- if(is.null(col))  1:length(ind)  else rep(col,length=length(ind))
      lty <- if(is.null(lty))  1:length(ind)  else rep(lty,length=length(ind))
      ## possibly too complicated
      index.ord <- numeric()
      index.ord[x$index<=0] <- 1:nr.npen

      dict.pen.sort <- sort(dict.pen)
      for(j in seq_along(dict.pen))
        index.ord[x$index == dict.pen.sort[j]] <- dict.pen.ord[j]

      col <- col[index.ord]
      lty <- lty[index.ord]
      yy <- coef(x)
    }
    if (is.null(ylab)) ylab <- type
    if (is.null(ylim)) ylim <- range(yy[,lambda>0])
    matplot(rtLambda, t(yy), type = "l", axes = FALSE,
            xlab = xlab, ylab = ylab, col = col, lty=lty,
            main = main, xlim = xlim, ylim = ylim, lwd=lwd, ...)
    if (axes[length(axes)]) axis(4)
#    axis(4, mgp=c(3,2,0), at = yy[, ncol(yy)], labels = rownames(yy))
    if (legend) legend(x="topleft", legend=rownames(yy), col=col, lty=lty)
## ----------------------------
  } else if(type=='criteria') {

    lmain <- if (is.null(main)) "log-likelihood and penalty" else main
    col <- if(is.null(col))  c(1,2,2,3)  else rep(col,length=4)
    lty <- if(is.null(lty))  c(1,5,6,2)  else rep(lty,length=4)
    mar <- if(is.null(mar))  c(4,4,4,4)  else rep(mar,length=4)
    l1 <- colSums(x$norms.pen)
    yy <- cbind(x$nloglik/length(x$y))
    lylab <- if(is.null(ylab)) "-loglik/n" else ylab
    if (is.null(cv)) cv <- x$cv
    if (!is.null(cv)) {
      if (is.logical(cv)&&cv) cv <- cv.lasso(x)
      if (!is.list(cv)) {
        warning('argument  "cv"  not suitable')
      } else {
        yy <- cbind(yy, cv$mse)
        if (se) yy <- cbind(yy, cv$mse+outer(cv$mse.se,c(-1,1)))
        if (is.null(ylab)) lylab <- paste(lylab, " || MSE (cv)")
        if (is.null(main)) lmain <- "log-likelihood, MSE (cv), and penalty"
      }
    }
    matplot(rtLambda, yy, type = "l", axes=FALSE,
            xlab = xlab, ylab = lylab,
            col = col[c(1,2,3,3)], lty=lty[c(1,2,3,3)],
            main = lmain, xlim = xlim, mar=mar, lwd=lwd, ...)
    par(usr=c(par('usr')[1:2], 0,1.05*max(l1)))
    lines(rtLambda, l1, col=col[4], lty=lty[4], lwd=lwd)
    if (axes[length(axes)]) {
      axis(4, col=col[4])
      mtext("l1 norm",4,1.8)
    }
  } else warning(':plot.lassogrp: invalid argument "type". No plot')
  if (frame.plot)
    box()
  if (axes) {
    axis(2)
    ## pretty lambda values for SQRT(lambda) scale :
    xlb <- pretty(lambda)
    xlb <- xlb[xlb != 0 & min(lambda) <= xlb & xlb <= max(lambda)]
    xlb <- c(pretty(lambda[lambda < min(xlb)]), xlb)
    ## add the small ones typically missing
    axis(1, at=sqrt(xlb), labels=format(xlb))
    axis(3, at=rtLambda, labels=FALSE, tcl=0.5, mgp=c(1,0.5,0), xpd=TRUE)
    ilam <- c(1, length(lambda))
    if(ilam[2] > 8) # thin them
      ilam <- c(seq(1,length(lambda)-3,by=5),length(lambda))
    la <- lambda[ilam]
    axis(3, at=sqrt(la), labels=names(la), tcl=-0.3, mgp=c(1, .5, 0), xpd=TRUE)
  }
  stamp(sure=FALSE)
  invisible(yy)
}

## =========================================================
extract.lassogrp <-
  function(object, i=NULL, lambda=NULL, data=NULL, fitfun='lm', ...)
{
  ## Purpose:
  ## ----------------------------------------------------------------------
  ## Arguments:
  ## does not make sense yet for more than one i
  ## ----------------------------------------------------------------------
  ## Author: Werner Stahel, Date: 21 Aug 2009, 08:55
  lform <- object$formula ## formula(object)
  lcall <- object$call
  if (is.null(lform)) lform <- lcall$x
  if (is.null(lform))
    stop('extract.lassogrp needs an object with formula')
  ## which model(s)?
  if (is.null(i)) {
      if (is.null(lambda))
          stop("Must specify either 'i' or 'lambda' argument for extract.lassogrp()")
    if (length(lambda) !=1 || lambda < 0 || lambda > max(object$lambda))
      stop('!extract.lassogrp! argument "lambda" not suitable')
    i <- which.min(abs(sqrt(lambda)-sqrt(object$lambda)))
  }
  ni <- length(i)
  if (ni!=1) {
    warning(':extract.lassogrp: not programmed for extracting more than 1 model')
    i <- i[1]
  }
  ## model
  lpen <- object$norms.pen[,i]
  ltrms <- names(lpen[lpen>0])
  lform <- update(eval(lform),
                  if (is.null(lcall$nonpen)) ~1 else lcall$nonpen )
  lform <- update(lform, paste('~.+',paste(ltrms, collapse=' + ')) )
  lmod <- object$model
  if (!is.null(lmod)) {
    if (is.character(lmod))
      lmod <- pmatch(lmod,c('gaussian','binomial','poisson'))
    else
      lmod <- pmatch(substring(lmod@name,1,3),c('Linear','Logistic','Poisson'))
  }
  lmeth <- c("lm","binomial","glm")[lmod]
##-   ## data
  ldata <- data
  if (is.null(ldata)) ldata <- object$data
  if (is.null(ldata)) ldata <- eval(object$call$data)
  if (is.null(ldata)) ldata <- eval(eval(object$call$x)$call$data)
  if (is.null(ldata)) stop("no 'data' found")
  ##  environment(lform) <- ldata
  ## 
  if (!is.null(data)) lcall$data <- substitute(data)
  if (is.null(lcall$data))
    stop ("!extract.lassogrp! I do not find the data. Specify the 'data' argument")
  if (is.null(lcall$na.action)) lcall$na.action <- as.name('nainf.exclude')
  fcall <- if (fitfun=='regr')
    call(fitfun, formula=lform, data=lcall$data, method = lmeth,
               family=c('gaussian','binomial','poisson')[lmod],
               subset=lcall$subset, weights=lcall$weights,
               na.action=lcall$na.action) # , '...'=...
  else if (fitfun=='lm')
    call(fitfun, formula=lform, data=lcall$data,
               subset=lcall$subset, weights=lcall$weights)
             #  ,na.action=lcall$na.action) # , '...'=...
  else {
    call(fitfun, formula=lform, data=lcall$data,
               family=c('gaussian','binomial','poisson')[lmod],
               subset=lcall$subset, weights=lcall$weights,
               na.action=lcall$na.action) # , '...'=...
  }
  lreg <- eval(fcall, parent.frame())
  result <- NULL
  for (li in seq_along(i)) { ## for loop not in use!
    lr <- lreg
    lk <- i[li]
    ##  lcoef <- lr$coefficients <- object$coefficients[,lk]
    lr$coefficients <- object$coefficients[names(lreg$coefficients),lk]
    lr$fitted.values <- object$fitted[,lk]
    lrsd <- lr$residuals <- object$y - lr$fitted.values
    ## lr$df.residual <- lreg$df.residual - sum(lcoef==0) ## leave df.residuels!
    ## only good for raw residuals
    lcl <- lreg$call
    lcl[1] <- paste('lasso',fitfun,sep='.')
    attr(lcl,'comment') <- 'call not R usable'
    lr$call <- lcl
    lrss <- sum(lrsd^2, na.rm=TRUE)
    lr$sigma <- sqrt(lrss/lr$df.residual)
    lr$r.squared <- 1-lrss/sum(object$y^2,na.rm=TRUE)
    lr$stres <- lr$testcoef <- lr$adj.r.squared <- lr$fstatistic <-
      lr$covariance <- lr$correlation <- NULL
    lr$fitfun <- fitfun
    lr$faceff <- termeffects(lr) ##  factoreffects(lr) 
    lr$fit.unpen <- lreg
    result <- c(result,lr)
  }
  result <- if (length(result)==1) result[[1]] else result
  result$data <- ldata
  class(result) <- c('lassofit', class(lreg))
  result
}
## ===================================================================
print.lassofit <-
  function(x, # dummycoef = NULL, #digits = max(3, options("digits")[[1]] - 3),
    residuals=FALSE, doc = options("doc")[[1]], ...)
{
  ## Author: Werner Stahel, Date: 24 Aug 2009, 12:27
  if(length(doc)==0) doc <- 0
  if(doc >= 1) {
    if (length(tit(x))) cat("\n",tit(x),"\n")
    if (doc >= 2 && length(descr(x))) cat(paste(descr(x),"\n"),"\n")
  }
##-   if (length(dummycoef)==0)
##-     dummycoef <- c(options("show.dummy.coef")[[1]],TRUE)[1]
  cat("\nCall:\n")
  cat(paste(deparse(x$call), sep = "\n", collapse = "\n"),"\n", sep = "")
  cat("Fitting function ",x$fitfun,"\n\n")
  cf <- coef(x)
  print(list('nonzero coefficients'=cf[cf!=0],
             'zero coefficents'=names(cf[cf==0])))
  cat("Root Mean Square of Error: ", format(x$sigma),'\n')
  if (residuals) {
    cat("\nResiduals:\n")
    print(summary(x$residuals))
  }
  invisible(x)
}

## Needed, e.g., for  update(., )  to work:
formula.lassogrp <- ## identical to stats:::formula.glm
function (x, ...) 
{
    form <- x$formula
    if (!is.null(form)) {
        form <- formula(x$terms)
        environment(form) <- environment(x$formula)
        form
    }
    else formula(x$terms)
}
## ===================================================================
cv.lasso <-
  function (object, blocks = 10,
            trace=FALSE, control=lassoControl(trace=0), env=globalenv(),
            plot.it = NULL, se = TRUE, ...)
  ## modified cv.lars, WSt
  ## allows for blocks to be specified rather than random
{
  x <- object$x
  y <- object$y
  n <- length(y)
  blocksinmodel <- FALSE
  ## argument blocks -> generate blocks
  if (is.character(blocks)) {
    bl <- get.blocks (object, blocks, env=env)
    blocklist <- bl$blocklist
    mf <- bl$model.frame
    blocksinmodel <- blocks %in% all.vars(as.formula(object$call$formula))
  } else if (length(blocks)==1) {
    stopifnot(blocks >= 1)
    blocklist <- split(sample(1:n), rep(1:blocks, length = n))
  } else if (length(blocks)!=n) {
    stop('!cv.lasso! argument "blocks" not suitable')
  } else {
    blocklist <- split(sample(1:n), blocks)
  }
  ## fetch x
  lj <- match(names(object$index), colnames(x))
  if (any(is.na(lj)))
    stop (if(length(x)==0) "!cv.lasso! no x component" else
          "!cv.lasso! length(index) not equal ncol(x).")
  lx <- x[,lj]
  ## crossvalidate
  K <- length(blocklist)
  blockmse <- matrix(NA_real_, K, length(object$lambda))
  fitted   <- matrix(NA_real_, n, length(object$lambda))
  for (i in seq(K)) {
    omit <- blocklist[[i]]
    fit <-
      lassoGrpFit(lx, y, object$index, subset=-omit, weights=object$weights,
                  model=object$model, offset=object$offset, lambda=object$lambda,
                  penscale=object$penscale, center=object$center,
                  standardize=object$standardize,
                  save.x=FALSE, control=control, ...)
##-     fit <- update(object, subset=-omit, model=FALSE,
##-                   lambda=object$lambda, adaptive=FALSE, cv.function=NULL,
##-                   save.x=FALSE, weights=object$eights, offset=object$offset,
##-                   control=control)
    if (blocksinmodel)  {
      xnew <- mf[omit, , drop = FALSE]
      xnew[,blocks] <- factor(mf[-omit,blocks][1])
    } else {
      xnew <- lx[omit, , drop = FALSE]
      fit$terms <- NULL
    }
    pred <- rbind(predict(fit, xnew))
       ## out of sample predictions for all lambda values
    blockmse[i,] <-
      if (blocksinmodel) {
        pred <- sweep(pred, 2, colMeans(pred) - mean(y[omit]))
        apply(y[omit] - pred, 2, var)
      }
      else
        colMeans((y[omit] - pred)^2)
    fitted[omit,] <- pred
    if (trace)  cat("\n CV Fold", i, "\n\n")
  }
  ## summarize
  mse <- colMeans(blockmse)
  mse.se <- sqrt(apply(blockmse, 2, var)/K)
  result <- list(mse = mse, mse.se = mse.se, mse.blocks = blockmse,
                 fitted = fitted, lambda = object$lambda,
                 blocksinmodel = blocksinmodel)
  ## plot
  if (is.null(plot.it)) plot.it <- identical(parent.frame(), globalenv())
  if (plot.it)   plot.lassogrp(object, type='crit', cv = result, se = se)
  ##
  structure(result, class="lassoCV")
}
## ===================================================================
lassogrpModelmatrix <-
  function(m, formula, nonpen = ~1, data, weights, subset, na.action,
           contrasts, env)
{
  ## Purpose:  Generates the model matrix and adds information about
  ## variables to be penalized or non-penalized by the L1 term in the
  ## lasso fitting.
  ## ----------------------------------------------------------------------
  ## Author: Lukas Meier, Date: 30 Jun 2006: create.design() in helpers.R

  if(!inherits(formula, "formula") || length(formula) != 3)
    stop("Argument 'formula' is of wrong type or length")
  any.nonpen <- !is.null(nonpen)
  ## Case where some variables won't be penalized -> merge formulas,
  ## also check the environments (is this the correct way ???)

  m$formula <- ## <-> lasso.formula() has first arg 'formula' !
    if(any.nonpen) {
      if(!inherits(nonpen, "formula"))
        stop("Argument 'nonpen' must be a formula")

      is.gEnv <- function(e) identical(e, .GlobalEnv)

      ## Paste the two formulas together ## !!! changed
      nonp <- ~ . + dummy
      nonp[[2]][[3]] <- nonpen[[length(nonpen)]]
      fterms <- terms(formula, data=eval(m$data))
      f <- update(fterms, nonp)
      ## Get the environment of the formulas
      env.formula <- environment(formula)
      env.nonpen <- environment(nonpen)

      ## If env. of 'formula' is not global, check if env. of 'nonpen' differs.
      ## If yes give warning and use env. of 'formula'
      if(!is.gEnv(env.formula)){
        ## environment(f) <- env.formula
        if(!is.gEnv(env.nonpen) && !identical(env.formula, env.nonpen))
          warning("'formula' and 'nonpen' have different environments. I use environment of 'formula'")

        environment(f) <- environment(formula)
      } else if (!is.gEnv(env.nonpen)){ ## if env. of 'nonpen' is not global
        ## (but env. of 'formula' is), use env. of 'nonpen'
        environment(f) <- env.nonpen
      }
      f
    }
    else formula

  m$drop.unused.levels <- TRUE

  ## Create model-frame
  m[[1]] <- as.name("model.frame")

  mf <- eval(m, env)

  ## Create design matrix, na.action handles the missing values, therefore
  ## weights and offset may be of shorter length (mf does this for us)
  Terms <- attr(mf, "terms") ##terms(m$formula, data = data)
  x <- model.matrix(Terms, data = mf, contrasts = contrasts)
  y <- model.response(mf)
  w <- model.weights(mf)
  off <- model.offset(mf)

  if (!is.null(w) && any(w < 0))
    stop("Negative weights not allowed")

  if(!is.numeric(off))
    off <- rep(0, length(y))
  if(!length(w))
    w <- rep(1, length(y))

  ## Handle the non-penalized coefficients
  if(any.nonpen){
    tmp <- terms(nonpen, data = data)
    co <- contrasts[attr(tmp, "term.labels")]
    used.co <- if(length(co)) co[!unlist(lapply(co, is.null))] # else NULL
    ## also uses the response...to be changed
    x.nonpen <- model.matrix(tmp, data = mf, contrasts = used.co)
    matches <- match(colnames(x.nonpen), colnames(x))
  }
  index <- attr(x, "assign")
  names(index) <- dimnames(x)[[2]]
  if(any.nonpen)
    index[matches] <- - index[matches] # was 'NA'
  attr(index,'term.labels') <- attr(Terms,'term.labels')
  list(x = x,
       y = y,
       w = w,
       off = off,
       mf = mf,
       index = index,
       Terms = Terms)
}

## ===================================================================
get.blocks <- function(object, blocks, env=globalenv())
{
  ## Author: Werner Stahel
  m <- object$call
  if (is.null(m$formula))
    stop('!get.blocks! call of "object" does not contain a formula')
  m$formula <- update(as.formula(m$formula), paste('~.+',blocks))
  m$nonpen <- m$lambda <- m$model <- m$adaptive <- m$cv.function <-
    m$coef.init <- m$penscale <- m$standardize <- m$contrasts <- m$control <-
    m$trace <- m$... <- NULL
  m[[1]] <- as.name("model.frame")
  mf <- eval(m, env)
  if (nrow(mf)!=nrow(object$x))
    stop('!get.blocks! blocks and data incompatible')
##-   Terms <- attr(mf, "terms") ##terms(m$formula, data = data)
##-   mm <- model.matrix(Terms, data = mf, contrasts = contrasts)
##-   mmb <- model.matrix(as.formula(paste('~',blocks)),
##-                       data = mf, contrasts = contrasts)
  list(blocklist = split(1:nrow(mf), factor(mf[,blocks])), model.frame = mf)
}

## ==================================================================
varBlockStand <- function(x, ipen.which, inotpen.which)
{
  ## Purpose:   standardize matrix, respecting blocks of variables
  ## ----------------------------------------------------------------------
  ## Arguments:
  ## ----------------------------------------------------------------------
  ## Author: Lukas Meier, Date:  4 Aug 2006, 08:50

  n <- nrow(x)
  x.ort <- x
  scale.pen <- vector(mode='list', length = length(ipen.which))
  if(length(inotpen.which) > 0){
    x. <- x[,inotpen.which, drop=FALSE]
    scale.notpen <- sqrt(colMeans(x.^2))
    x.ort[,inotpen.which] <- scale(x., FALSE, scale.notpen)
  } else scale.notpen <- NULL
  cnms <- colnames(x)
  if(is.null(cnms)) cnms <- as.character(seq_len(ncol(x)))
  rt.n <- sqrt(n)
  for(j in seq_along(ipen.which)){
    p. <- length(ind <- ipen.which[[j]])
    decomp <- qr(x[,ind])
    if(decomp$rank < min(n,p.)) ## block does not have full rank
        stop("Block belonging to columns ",
             paste(cnms[ind], collapse = ", "),
             sprintf(" has qr()-rank = %d  <  min(n=%d, p'=%d)",
                     decomp$rank, n, p.))
    scale.pen[[j]] <- qr.R(decomp) * 1 / rt.n
    x.ort[,ind] <- qr.Q(decomp) * rt.n
  }
  list(x = x.ort, scale.pen = scale.pen, scale.notpen = scale.notpen)
}

## ==================================================================
lambdamax <- function(x, ...)
  UseMethod("lambdamax")

lambdamax.formula <-
  function(formula, nonpen  = ~ 1, data, weights, subset, na.action, offset,
           coef.init, penscale = sqrt, model = LogReg(),
           center = NA, standardize = TRUE, contrasts = NULL, nlminb.opt = list(), ...)
{
  ## Purpose: Function to find the maximal value of the penalty parameter
  ##          lambda
  ## ----------------------------------------------------------------------
  ##
  ## ----------------------------------------------------------------------
  ## Author: Lukas Meier, Date: 20 Apr 2006, 11:24

  call <- match.call()
  m <- match.call(expand.dots = FALSE)

  ## Remove not-needed stuff to create the model-frame
  m$nonpen <- m$coef.init <- m$penscale <- m$model <-
    m$center <- m$standardize <- m$contrasts <- m$nlminb.opt <- m$... <- NULL

  l <- lassogrpModelmatrix(m, formula, nonpen, data, weights, subset, na.action,
                     contrasts, parent.frame())

  if(missing(coef.init))
    coef.init <- rep(0, ncol(l$x))

  lambdamax.default(l$x, y = l$y, index = l$index, weights = l$w,
                    offset = l$off, coef.init = coef.init,
                    penscale = penscale, model = model,
                    center = center, standardize = standardize,
                    nlminb.opt = nlminb.opt, ...)
}

## ==================================================================
lambdamax.default <-
  function(x, y, index, weights = NULL, offset = rep(0, length(y)),
           coef.init = rep(0, ncol(x)), penscale = sqrt, model = LogReg(),
           center = NA, standardize = TRUE, nlminb.opt = list(), ...)
{
  ## Purpose: Function to find the maximal value of the penalty parameter
  ##          lambda
  ## ----------------------------------------------------------------------
  ## Arguments:
  ## X: design matrix (including intercept), already rescaled and
  ##    possibly blockwise orthonormalized.
  ## y: response vector
  ## index: vector which defines the grouping of the variables. Components
  ##        sharing the same number build a group. Non-penalized
  ##        coefficients are marked with "NA".
  ## weights: vector of observation weights.
  ## offset: vector of offset values.
  ## coef.init: initial parameter vector. Penalized groups are discarded.
  ## penscale: rescaling function to adjust the value of the penalty
  ##           parameter to the degrees of freedom of the parameter group.
  ## model: an object of class "lassoModel" implementing
  ##        the negative log-likelihood, gradient, hessian etc. See
  ##        "lassoModel" for more details.
  ## nlminb.opt: arguments to be supplied to "nlminb".
  ## ... : additional arguments to be passed to the functions defined in
  ##       model.
  ## ----------------------------------------------------------------------
  ## Author: Lukas Meier, Date: 20 Apr 2006, 11:24

  if(any(iina <- is.na(index))) ## recode 'NA' to '0' , for back compatibility,
    ## as in grplasso,  NA's (only!) where used to indicate "non.pen."
    index[iina] <- 0L
  any.notpen <- any(inp <- index <= 0)
  coef.npen <- coef.init[inp] ## unpenalized parameters
  inotpen.which <- which(inp)

  ## Index vector of the penalized parameter groups
  ipen <- index[!inp]

  ## Table of degrees of freedom
  dict.pen <- sort(unique(ipen))
  ipen.tab <- table(ipen)[as.character(dict.pen)]

  ## Indices of parameter groups
  ipen.which <- split((1:ncol(x))[index>0], ipen)

  ## The 'center' part was missing in earlier versions of pkg 'lassogrp'
  ## This is "cut & paste" from lassoGrpFit() above
  interc.which  <-
    if(all(x[,1] == 1)) 1L ## = 99% of cases
    else which(apply(x==1, 2, all))
  n.int <- length(interc.which)
  if(n.int > 1)
    stop("Multiple intercepts (columns of 1) in 'x'")
  has.interc <- n.int == 1
  if (is.na(center) || is.null(center)) center <- has.interc
  else if(center && !has.interc) {
    message("Couldn't find intercept column. Setting center = FALSE.")
    center <- FALSE
  }
  if(center) { ## also  has.interc (!)
    ctr <- colMeans(x. <- x[,-interc.which, drop = FALSE])
    x[,-interc.which] <- sweep(x., 2, ctr)
  }

  if(standardize) {
    stand <- varBlockStand(x, ipen.which, inotpen.which)
    x     <- stand$x
  }

  x.npen <- x[,inotpen.which, drop = FALSE]
  if (length(weights)==0) weights <- rep(1, length(y))

  nlogLikFUN <- function(par)
    model@nloglik(y, offset + x.npen %*% par, weights, ...)

  mu0 <-
    if(any.notpen){
      par0 <- do.call(nlminb, c(list(start = coef.npen,
                                     objective = nlogLikFUN), nlminb.opt))$par
      model@invlink(offset + x.npen %*% par0)
    }
    else model@invlink(offset)

  ngrad0 <- model@ngradient(x, y, mu0, weights, ...)[index>0]

  ##gradnorms <- numeric(length(dict.pen))

  gradnorms <- c(sqrt(rowsum(ngrad0^2, group = ipen))) / penscale(ipen.tab)

  ##for(j in seq_along(dict.pen)){
  ##  gradnorms[j] <- sqrt(crossprod(ngrad0[which(index == dict.pen[j])])) /
  ##    penscale(sum(index == dict.pen[j], na.rm = TRUE))
  ##}
  max(gradnorms)
}
## ==================================================================
## ==================================================================
## control
setClass("lassoControl",
         representation = representation(
           save.x       = "logical",
           update.hess  = "character",
           update.every = "numeric",
           inner.loops  = "numeric",
           line.search  = "logical",
           max.iter     = "numeric",
           tol          = "numeric",
           lower        = "numeric",
           upper        = "numeric",
           beta         = "numeric",
           sigma        = "numeric",
           trace        = "numeric"),

         prototype = list(
           save.x       = FALSE,
           update.hess  = "lambda",
           update.every = 3,
           inner.loops  = 10,
           line.search  = TRUE,
           max.iter     = 500,
           tol          = 5 * 10^-8,
           lower        = 10^-2,
           upper        = 10^9,
           beta         = 0.5,
           sigma        = 0.1,
           trace        = 0),

         validity = function(object){
           if(object@update.every != as.integer(object@update.every) || object@update.every <= 0)
             return("update.every has to be a natural number")

           if(object@inner.loops != as.integer(object@inner.loops) || object@inner.loops < 0)
             return("inner.loops has to be a natural number or 0")

           if(object@max.iter != as.integer(object@max.iter) || object@max.iter <= 0)
             return("inner.loops has to be a natural number or greater than 0")

           if(object@beta <= 0 || object@beta >= 1)
             return("beta has to be in (0, 1)")

           if(object@sigma <= 0 || object@sigma >= 1)
             return("sigma has to be in (0, 1)")

           if(object@tol <= 0)
             return("tol has to be positive")

           if(object@lower > object@upper)
             return("lower <= upper has to hold")

           return(TRUE)
         }
)

## ==================================================================
lassoControl <- function(save.x = FALSE,
                         update.hess = c("lambda", "always"),
                         update.every = 3, inner.loops = 10,
                         line.search = TRUE, max.iter = 500,
                         tol = 5 * 10^-8, lower = 10^-2, upper = Inf,
                         beta = 0.5, sigma = 0.1, trace = 0){

  ## Purpose: Options for the Group Lasso Algorithm
  ## ----------------------------------------------------------------------
  ## Arguments:
  ## save.x: a logical indicating whether the design matrix should be saved.
  ## update.hess: should the hessian be updated in each
  ##              iteration ("always")? update.hess = "lambda" will update
  ##              the Hessian once for each component of the penalty
  ##              parameter "lambda" based on the parameter estimates
  ##              corresponding to the previous value of the penalty
  ##              parameter.
  ## inner.loops: how many loops should be done (at maximum) when solving
  ##              only the active set (without considering the remaining
  ##              predictors)
  ## tol: convergence tolerance; the smaller the more precise, see
  ##      details below.

  ## lower: lower bound for the diagonal approximation of the
  ##        corresponding block submatrix of the Hessian of the negative
  ##        log-likelihood function.
  ## upper: upper bound for the diagonal approximation of the
  ##        corresponding block submatrix of the Hessian of the negative
  ##        log-likelihood function.
  ## beta: scaling factor beta < 1 of the Armijo line search.
  ## sigma: 0 < \sigma < 1 used in the Armijo line search.
  ## trace: integer. "0" omits any output,
  ##        "1" prints the current lambda value,
  ##        "2" prints the improvement in the objective function after each
  ##        sweep through all the parameter groups and additional
  ##        information.
  ## ----------------------------------------------------------------------
  ## Author: Lukas Meier, Date:  1 Jun 2006, 10:02

  new("lassoControl",
             save.x       = save.x,
             update.hess  = match.arg(update.hess),
             update.every = update.every,
             inner.loops  = inner.loops,
             line.search  = line.search,
             max.iter     = max.iter,
             tol          = tol,
             lower        = lower,
             upper        = upper,
             beta         = beta,
             sigma        = sigma,
             trace        = trace)
}
## ===========================================================================
setClass("lassoModel", representation = representation(
    invlink          = "function",
    link             = "function",
    nloglik          = "function",
    ngradient        = "function",
    nhessian         = "function",
    check            = "function",
    name             = "character",
    comment          = "character"
))

setMethod("show", "lassoModel", function(object) {
    cat("Model:", object@name, "\n")
    cat("Comment:", object@comment, "\n\n")
})

lassoModel <- function(invlink, link, nloglik, ngradient, nhessian,
                       check, name = "user-specified",
                       comment = "user-specified"){
  ## Purpose: Generates models to be used for the Group Lasso algorithm.
  ## ----------------------------------------------------------------------
  ## Arguments:
  ## invlink: a function with arguments "eta" implementing the inverse link
  ##          function.
  ## link:    a function with arguments "mu" implementing the link
  ##          function.
  ## nloglik: a function with arguments "y", "mu" and "weights"
  ##          implementing the negative log-likelihood function.
  ## ngradient: a function with arguments "x", "y", "mu" and "weights"
  ##            implementing the negative gradient of the log-likelihood
  ##            function.
  ## nhessian: a function with arguments "x", "mu" and "weights"
  ##           implementing the negative} hessian of the log-likelihood
  ##           function.
  ## name: a character name
  ## comment: a character comment
  ## ----------------------------------------------------------------------
  ## Author: Lukas Meier, Date:  1 Jun 2006, 10:12

    new("lassoModel",
               invlink   = invlink,
               link      = link,
               nloglik   = nloglik,
               ngradient = ngradient,
               nhessian  = nhessian,
               check     = check,
               name      = name,
               comment   = comment)
}
## ===========================================================================
## family.R
## ===========================================================================
## Logistic Regression
LogReg <- function(){
  lassoModel(invlink   = function(eta) 1 / (1 + exp(-eta)),
             link      = function(mu) log(mu / (1 - mu)),
             nloglik   = function(y, eta, weights, ...)
               -sum(weights * (y * eta - log(1 + exp(eta)))),
             ngradient = function(x, y, mu, weights, ...)
               -crossprod(x, weights * (y - mu)),
             nhessian  = function(x, mu, weights, ...)
               crossprod(x, weights * mu * (1 - mu) * x),
             check     = function(y) all(y %in% c(0, 1)),
             name      = "Logistic Regression Model",
             comment   = "Binary response y has to be encoded as 0 and 1")
}
## ===========================================================================
## Linear Regression
LinReg <- function(){
  lassoModel(invlink  = function(eta) eta,
             link  = function(mu) mu,
             nloglik   = function(y, eta, weights, ...)
               sum(weights * (y - eta)^2),
             ngradient = function(x, y, mu, weights, ...)
               -2 * crossprod(x, weights * (y - mu)),
             nhessian  = function(x, mu, weights, ...)
               2 * crossprod(x, weights * x),
             check     = function(y) TRUE,
             name      = "Linear Regression Model",
             comment   = "Use update.hess=\"lambda\" in lassoControl because the Hessian is constant")
}
## ===========================================================================
## Poisson Regression
PoissReg <- function(){
  lassoModel(invlink    = function(eta) exp(eta),
             link       = function(mu) log(mu),
             nloglik    = function(y, eta, weights, ...)
               sum(weights * (exp(eta) - y * eta)),
             ngradient  = function(x, y, mu, weights, ...)
               -crossprod(x, weights * (y - mu)),
             nhessian   = function(x, mu, weights, ...)
               crossprod(x, weights * mu * x),
             check      = function(y) all(y >= 0) & all(y == ceiling(y)),
             name       = "Poisson Regression Model",
             comment    = "")
}

Try the lassogrp package in your browser

Any scripts or data that you put into this service are public.

lassogrp documentation built on July 25, 2021, 3 a.m.