R/relaxnet.R
In relaxnet: Relaxation of glmnet models (as in relaxed lasso, Meinshausen 2007)

################################## relaxnet.R ##################################

## look at the columns of beta mat and return the set of column
## indices defining new sets of selected variables
## return 0 if no columns have any nonzero coefs

## this assumes that any all-zero columns come first

.check.beta <- function(beta, lambda, max.vars, lambda.min) {

  ## beta needs to be some kind of matrix
  ## lambda values are decreasing along columns of beta

  ncol.beta <- ncol(beta)

  current.col <- 1L

  current.zeros <- beta[, current.col] == 0

  ## if all coeffs are zero, keep going
  
  while(all(current.zeros) && current.col < ncol.beta) {

    current.col <- current.col + 1L

    current.zeros <- beta[, current.col] == 0

  }

  col.indices <- vector("integer", length = 0)

  while(current.col < ncol.beta) {

    current.col <- current.col + 1L

    next.zeros <- beta[, current.col] == 0

    if(!all(next.zeros == current.zeros)) {
      
      col.indices <- c(col.indices, current.col - 1L)
      current.zeros[] <- next.zeros
    }
  }

  ## add last column
  
  col.indices <- c(col.indices, current.col)

  ## remove columns according to max.vars and lambda.min
  
  num.vars <- apply(beta[, col.indices, drop = FALSE], 2,
                    function(beta.col) { sum(beta.col != 0) })

  lambda.sub <- lambda[col.indices]

  ## need to return num.vars vec to check later which columns are all zero

  index.to.use <- num.vars <= max.vars & lambda.sub >= lambda.min
  
  return(list(col.indices = col.indices[index.to.use],
              num.vars = num.vars[index.to.use]))
}


## version 1.9-5 of glmnet does not allow a single column x, this function
## provides a workaround. This is a temporary solution which creates a dummy
## glmnet object which contains only a single solution, either the least squares
## solution (for type = gaussian) or the logistic regression solution
## for type = "binomial"

## x should be a single column matrix, y should be a vector

.single.column <- function(x, y, family) {

  glm.data <- data.frame(y = y, x = x[, 1])

  glm.formula <- y ~ x

  glm.result <- glm(glm.formula, family, glm.data)

  obj <- list(a0 = coef(glm.result)[1],
              beta = matrix(coef(glm.result)[2], 1, 1,
                            dimnames = list(colnames(x), "s0")),
              dim = c(1, 1),
              lambda = 0,
              call = match.call(),
              nobs = nrow(x))
  
  class(obj) <- c("glmnet", ifelse(family == "gaussian", "elnet", "lognet"),
                  "single.column.model")

  return(obj)

}
  
## run glmnet with relaxation loop and return combined results

relaxnet <- function(x, y, family = c("gaussian", "binomial"),
                     nlambda = 100,
                     alpha = 1,
                     relax = TRUE,
                     relax.nlambda = 100,
                     relax.max.vars = min(nrow(x), ncol(x)) * 0.8,
                     ## do this for now, but make note that this could be set
                     ## to a number > nrow(x) when ncol(x) is greater
                     ## and alpha < 1
                     lambda = NULL,
                     relax.lambda.index = NULL,
                     relax.lambda.list = NULL,
                     ...) {

  start.time <- Sys.time()

  family = match.arg(family)

  ## check all other args except relax.lambda.index, relax.max.vars

  ## make sure that x is a matrix and has unique colnames

  if(!(inherits(x, "matrix") || inherits(x,"sparseMatrix")))
    stop('x must be a "matrix" or "sparseMatrix"')

  if(ncol(x) < 2) stop("x must have at least 2 columns")
  
  x.colnames <- colnames(x)

  if(is.null(x.colnames) || length(unique(x.colnames)) != length(x.colnames))
    stop("x must have unique colnames")

  if(relax && ncol(x) == 1) {

    warning("x has only one column, setting relax to FALSE")
    relax <- FALSE
  }

  if(relax) {

    if(!is.null(relax.lambda.index)) {

      if(is.null(lambda))
        stop("if you are specifying a relax.lambda.index",
             "you must also specify a lambda")

      ## check relax.lambda.index

      if(!(all(relax.lambda.index %% 1 == 0)
           && all(relax.lambda.index > 0)
           && all(relax.lambda.index <= length(lambda))))
        stop("relax.lambda.index must be a vector of integers",
             "which subsets the lambda vector")

    } else {

      ## check value of relax.max.vars

      if(length(relax.max.vars) != 1
         || relax.max.vars < 1)
        stop("relax.max.vars should be a single value",
             "between 1 and ncol(x) - 1")

      if(relax.max.vars > (ncol(x) - 1) ) {

        warning("relax.max.vars is too high, resetting to ncol(x) - 1")
        relax.max.vars <- ncol(x) - 1
      }
    }
  }

  main.time1 <- Sys.time()
  
  main.glmnet.fit <- glmnet(x, y, family,
                            nlambda = nlambda,
                            lambda = lambda,
                            alpha = alpha,
                            ...)

  main.time2 <- Sys.time()

  if(!relax) {

    end.time <- Sys.time()
    
    result <- list(call = match.call(),
                   main.glmnet.fit = main.glmnet.fit,
                   relax = relax,
                   relax.glmnet.fits = NA,
                   relax.num.vars = NA,
                   relax.lambda.index = NA,
                   total.time = as.double(difftime(end.time,
                                                   start.time,
                                                   units = "secs")),
                   main.fit.time = as.double(difftime(main.time2,
                                                      main.time1,
                                                      units = "secs")),
                   relax.fit.times = NA)

    class(result) <- "relaxnet"

    return(result)
  }
  
  ##############################################################################

  ## Logical indicating whether we have a list of lambda values
  ## to be used for all the relaxed models
  
  lam.list <- FALSE
  
  if(!is.null(relax.lambda.index)) {

    ## get num vars for each of the columns of beta specified by the index

    num.vars <- apply(main.glmnet.fit$beta[, relax.lambda.index, drop = FALSE],
                      2, function(beta.col) { sum(beta.col != 0) })

    ## check the relax.lambda.list arg here

    if(!is.null(relax.lambda.list)) lam.list <- TRUE

  } else { ## find out which lambda values should be used

    lambda <- main.glmnet.fit$lambda
    lambda.max <- max(lambda)

    ## did the user enter values for relax.max.vars and/or relax.multiplier

    ## left over from removed feature
    
    lambda.min <- -Inf

    check.beta.result <- .check.beta(main.glmnet.fit$beta, lambda,
                                     relax.max.vars, lambda.min)
    relax.lambda.index <- check.beta.result$col.indices
    num.vars <- check.beta.result$num.vars
    rm(check.beta.result)
  }

  num.relaxed.models <- length(relax.lambda.index)

  relax.glmnet.fits <- vector("list", length = num.relaxed.models)

  relax.fit.times <- vector("double", length = num.relaxed.models)

  for(i in 1:num.relaxed.models) {

    time1 <- Sys.time()

    if(num.vars[i] == 0) {

      relax.glmnet.fits[[i]] <- main.glmnet.fit$a0[1]
      ## this assumes that the first lambda value for the main fit
      ## is lambda max (i.e. such that all coeffs are zero)

      class(relax.glmnet.fits[[i]]) <- "relaxnet.intercept.only"
      
      time2 <- Sys.time()

      relax.fit.times[i] <- as.double(difftime(time2, time1, units = "secs"))

      next
    }

    lam.index <- relax.lambda.index[i]

    var.index <- which(main.glmnet.fit$beta[, lam.index] != 0)

    if(lam.list) {
      relax.lambda <- relax.lambda.list[[i]]
    } else {
      relax.lambda <- NULL
    }


    ## need a special case for single column x
    
    if(length(var.index) > 1) {
      
      relax.glmnet.fits[[i]] <-
        glmnet(x[, var.index, drop = FALSE], y, family,
               nlambda = relax.nlambda,
               alpha = alpha,
               lambda = relax.lambda,
               ...)
    } else {

      relax.glmnet.fits[[i]] <- .single.column(x[, var.index, drop = FALSE],
                                               y, family)
    }
    
    time2 <- Sys.time()

    relax.fit.times[i] <- as.double(difftime(time2, time1, units = "secs"))

  }

  ## remove any relaxed models for which lambda doesn't go below
  ## the original value determining the subset on which the model
  ## is based

  keep <- rep(TRUE, length = num.relaxed.models)

  lambda.gap <- 10e-10 ## necessary since the lambda values are sometimes
  ## perturbed slightly, probably somewhere in the fortran code

  ## don't do this if relax.lambda.list was specified

  if(!lam.list) {

    for(i in 1:num.relaxed.models) {

      relax.lambda <- relax.glmnet.fits[[i]]$lambda

      relax.lambda.start.val <- lambda[relax.lambda.index[i]]

      if(all(relax.lambda > (relax.lambda.start.val - lambda.gap)))
        keep[i] <- FALSE
    }

    ## subset all the relax components

    relax.glmnet.fits <- relax.glmnet.fits[keep]
    num.vars <- num.vars[keep]
    relax.lambda.index <- relax.lambda.index[keep]
  }

  end.time <- Sys.time()

  result <- list(call = match.call(),
                 main.glmnet.fit = main.glmnet.fit,
                 relax = relax,
                 relax.glmnet.fits = relax.glmnet.fits,
                 relax.num.vars = num.vars,
                 relax.lambda.index = relax.lambda.index,
                 total.time = as.double(difftime(end.time,
                                                 start.time,
                                                 units = "secs")),
                 main.fit.time = as.double(difftime(main.time2,
                                                    main.time1,
                                                   units = "secs")),
                 relax.keep = keep,
                 relax.fit.times = relax.fit.times)

  class(result) <- "relaxnet"

  return(result)
}

Any scripts or data that you put into this service are public.

relaxnet documentation built on May 2, 2019, 12:39 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

relaxnet
Relaxation of glmnet models (as in relaxed lasso, Meinshausen 2007)

R/relaxnet.R
In relaxnet: Relaxation of glmnet models (as in relaxed lasso, Meinshausen 2007)

Try the relaxnet package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

relaxnet Relaxation of glmnet models (as in relaxed lasso, Meinshausen 2007)

R/relaxnet.R In relaxnet: Relaxation of glmnet models (as in relaxed lasso, Meinshausen 2007)

Try the relaxnet package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

relaxnet
Relaxation of glmnet models (as in relaxed lasso, Meinshausen 2007)

R/relaxnet.R
In relaxnet: Relaxation of glmnet models (as in relaxed lasso, Meinshausen 2007)