R/rlasso.R
In hdm: High-Dimensional Metrics

globalVariables(c("post", "intercept", "penalty", "control", "error", "n", "select.Z" , "select.X", "aes", "element_blank", "scale_x_discrete", "model.part", "all.categories", "X"))

#' rlasso: Function for Lasso estimation under homoscedastic and heteroscedastic non-Gaussian
#' disturbances
#'
#' The function estimates the coefficients of a Lasso regression with
#' data-driven penalty under homoscedasticity and heteroscedasticity with non-Gaussian noise and X-dependent or X-independent design. The
#' method of the data-driven penalty can be chosen. The object which is
#' returned is of the S3 class \code{rlasso}.
#'
#' The function estimates the coefficients of a Lasso regression with
#' data-driven penalty under homoscedasticity / heteroscedasticity and non-Gaussian noise. The options \code{homoscedastic} is a logical with \code{FALSE} by default.
#' Moreover, for the calculation of the penalty parameter it can be chosen, if the penalization parameter depends on the  design matrix (\code{X.dependent.lambda=TRUE}) or \code{independent} (default, \code{X.dependent.lambda=FALSE}).
#' The default value of the constant \code{c} is \code{1.1} in the post-Lasso case and \code{0.5} in the Lasso case. 
#'  A \emph{special} option is to set \code{homoscedastic} to \code{none} and to supply a values \code{lambda.start}. Then this value is used as penalty parameter with independent design and heteroscedastic errors to weight the regressors.
#' For details of the
#' implementation of the Algorithm for estimation of the data-driven penalty,
#' in particular the regressor-independent loadings, we refer to Appendix A in
#' Belloni et al. (2012). When the option "none" is chosen for \code{homoscedastic} (together with
#' \code{lambda.start}), lambda is set to \code{lambda.start} and the
#' regressor-independent loadings und heteroscedasticity are used. The options "X-dependent" and
#' "X-independent" under homoscedasticity are described in Belloni et al. (2013). 
# \code{lambda.start} can be component-specific. When used with one of the
# other option, the values are used as starting values.
#'
#' The option \code{post=TRUE} conducts post-lasso estimation, i.e. a refit of
#' the model with the selected variables.
#'
#' @aliases rlasso
#' @param post logical. If \code{TRUE}, post-Lasso estimation is conducted.
#' @param intercept logical. If \code{TRUE}, intercept is included which is not
#' penalized.
#' @param model logical. If \code{TRUE} (default), model matrix is returned.
#' @param penalty list with options for the calculation of the penalty. 
#' \itemize{
#' \item{\code{c} and \code{gamma}}{ constants for the penalty with default \code{c=1.1} and \code{gamma=0.1}}
#' \item{\code{homoscedastic}}{ logical, if homoscedastic errors are considered (default \code{FALSE}). Option \code{none} is described below.}
#' \item{\code{X.dependent.lambda}}{ logical,  \code{TRUE}, if the penalization parameter depends on the the design of the matrix \code{x}. \code{FALSE}, if independent of the design matrix  (default).}
#' \item{\code{numSim}}{ number of simulations for the dependent methods, default=5000}
#' \item{\code{lambda.start}}{ initial penalization value, compulsory for method "none"}
#' }
#' @param control list with control values.
#' \code{numIter} number of iterations for the algorithm for
#' the estimation of the variance and data-driven penalty, ie. loadings,
#' \code{tol} tolerance for improvement of the estimated variances.
#'\code{threshold} is applied to the final estimated lasso
#' coefficients. Absolute values below the threshold are set to zero.
#' @param ... further arguments (only for consistent defintion of methods)
#' @return \code{rlasso} returns an object of class \code{rlasso}. An object of
#' class "rlasso" is a list containing at least the following components:
#' \item{coefficients}{parameter estimates}
#' \item{beta}{parameter estimates (named vector of coefficients without intercept)}
#' \item{intercept}{value of the intercept}
#' \item{index}{index of selected
#' variables (logical vector)} \item{lambda}{data-driven penalty term for each
#' variable, product of lambda0 (the penalization parameter) and the loadings}
#' \item{lambda0}{penalty term} \item{loadings}{loading for each regressor}
#' \item{residuals}{residuals, response minus fitted values} \item{sigma}{root of the variance of
#' the residuals} \item{iter}{number of iterations} \item{call}{function call}
#' \item{options}{options}
#' \item{model}{model matrix (if \code{model = TRUE} in function call)}
#' @references A. Belloni, D. Chen, V. Chernozhukov and C. Hansen (2012).
#' Sparse models and methods for optimal instruments with an application to
#' eminent domain. \emph{Econometrica} 80 (6), 2369-2429.
#' @references A. Belloni, V. Chernozhukov and C. Hansen (2013). Inference for
#' high-dimensional sparse econometric models. In Advances in Economics and
#' Econometrics: 10th World Congress, Vol. 3: Econometrics, Cambirdge
#' University Press: Cambridge, 245-295.
#' @examples 
#' set.seed(1)
#' n = 100 #sample size
#' p = 100 # number of variables
#' s = 3 # nubmer of variables with non-zero coefficients
#' X = Xnames = matrix(rnorm(n*p), ncol=p)
#' colnames(Xnames) <- paste("V", 1:p, sep="")
#' beta = c(rep(5,s), rep(0,p-s))
#' Y = X%*%beta + rnorm(n)
#' reg.lasso <- rlasso(Y~Xnames)
#' Xnew = matrix(rnorm(n*p), ncol=p)  # new X
#' colnames(Xnew) <- paste("V", 1:p, sep="")
#' Ynew =  Xnew%*%beta + rnorm(n)  #new Y
#' yhat = predict(reg.lasso, newdata = Xnew)
#' @export
#' @rdname rlasso
rlasso <- function(x, ...) {
  UseMethod("rlasso") # definition generic function
  }
#' @param formula an object of class "formula" (or one that can be coerced to
#' that class): a symbolic description of the model to be fitted in the form
#' \code{y~x}
#' @param data an optional data frame, list or environment (or object coercible
#' by as.data.frame to a data frame) containing the variables in the model. If
#' not found in data, the variables are taken from environment(formula),
#' typically the environment from which \code{rlasso} is called.
#' @rdname rlasso
#' @export
rlasso.formula <- function(formula, data = NULL, post = TRUE, intercept = TRUE, model = TRUE, 
                           penalty = list(homoscedastic = FALSE, X.dependent.lambda = FALSE, lambda.start = NULL, c = 1.1, gamma = .1/log(n)),
                          control = list(numIter = 15, tol = 10^-5, threshold = NULL), ...) {
  cl <- match.call()
  #if (missing(data))  data <- environment(formula)
  mf <- match.call(expand.dots = FALSE)
  m <- match(c("formula", "data"), names(mf), 0L)
  mf <- mf[c(1L, m)]
  mf$drop.unused.levels <- TRUE
  mf[[1L]] <- quote(stats::model.frame)
  mf <- eval(mf, parent.frame())
  mt <- attr(mf, "terms")
  attr(mt, "intercept") <- 1
  y <- model.response(mf, "numeric")
  n <- length(y)
  x <- model.matrix(mt, mf)[,-1, drop=FALSE]
  if (missing(data)) {
    if (is.call(formula[[3]])) { 
    #colnames(x) <- sub(format(formula[[3]]), "", colnames(x))
    colnames(x) <- sub(re.escape(format(formula[[3]])), "", colnames(x))
    } else {
      colnames(x) <- sub(re.escape(formula[[3]]), "", colnames(x))  
    }
  }
  est <- rlasso(x, y, post = post, intercept = intercept, penalty=penalty, model=model, 
               control = control)
  est$call <- cl
  return(est)
}

#' @rdname rlasso
#' @export
rlasso.character <- function(x, data = NULL, post = TRUE, intercept = TRUE, model = TRUE, 
                             penalty = list(homoscedastic = FALSE, X.dependent.lambda = FALSE, lambda.start = NULL, c = 1.1, gamma = .1/log(n)),
                             control = list(numIter = 15, tol = 10^-5, threshold = NULL), ...) {
  formula <- as.formula(x)
  if (missing(penalty))
    rlasso.formula(formula, data = data, post = post, intercept = intercept, model = model, 
                   control = control, ...)
  else
    rlasso.formula(x, x, data = NULL, post = TRUE, intercept = TRUE, model = TRUE, 
                   penalty = penalty,
                   control = list(numIter = 15, tol = 10^-5, threshold = NULL), ...)
}
# rlasso.character <- function(x, data = NULL, post = TRUE, intercept = TRUE, model = TRUE, 
#                            penalty = list(homoscedastic = FALSE, X.dependent.lambda = FALSE, lambda.start = NULL, c = 1.1, gamma = .1/log(n)),
#                            control = list(numIter = 15, tol = 10^-5, threshold = NULL), ...) {
#   formula <- as.formula(x)
#   res <- rlasso.formula(formula, data = data, post = post, intercept = intercept, model = model, 
#                         penalty = penalty, control = control, ...)
# }


#' @rdname rlasso
#' @export
#' @param y dependent variable (vector, matrix or object can be coerced to matrix)
#' @param x regressors (vector, matrix or object can be coerced to matrix)
rlasso.default <- function(x, y, post = TRUE, intercept = TRUE, model = TRUE, 
                           penalty = list(homoscedastic = FALSE, X.dependent.lambda = FALSE, lambda.start = NULL, c = 1.1, gamma = .1/log(n)),
                           control = list(numIter = 15, tol = 10^-5, threshold = NULL),...) {
  x <- as.matrix(x)
  y <- as.matrix(y)
  
  n <- dim(x)[1]
  p <- dim(x)[2]
  
  if (is.null(colnames(x)))
    colnames(x) <- paste("V", 1:p, sep = "")
  ind.names <- 1:p
  # set options to default values if missing
  if (!exists("homoscedastic", where = penalty))  penalty$homoscedastic = "FALSE"
  if (!exists("X.dependent.lambda", where = penalty))  penalty$X.dependent.lambda = "FALSE"
  if (!exists("gamma", where = penalty))  penalty$gamma = 0.1/log(n)
  
  if (penalty$homoscedastic=="none" & !exists("lambda.start", where=penalty)) stop("lambda.start must be provided!")
  # checking input numIter, tol
  if (!exists("numIter", where = control)) {
    control$numIter = 15
  }
  
  if (!exists("tol", where = control)) {
    control$tol = 10^-5
  }
  
  #if (post==FALSE & (!exists("c", where = penalty) | is.na(match("penalty", names(as.list(match.call)))))) {
  if (post==FALSE & (!exists("c", where = penalty))) {  
    penalty$c = 0.5
  }
  
  default_pen <-  list(homoscedastic = FALSE, X.dependent.lambda = FALSE, lambda.start = NULL, c = 1.1, gamma = .1/log(n))
  if (post==FALSE &  isTRUE(all.equal(penalty, default_pen))) {  
    penalty$c = 0.5
  }
  
  # Intercept handling and scaling
  if (intercept) {
    meanx <- colMeans(x)
    x <- scale(x, meanx, FALSE)
    mu <- mean(y)
    y <- y - mu
  } else {
    meanx <- rep(0, p)
    mu <- 0
  }
  
  normx <- sqrt(apply(x, 2, var))
  Psi <- apply(x, 2, function(x) mean(x^2)) 
  ind <- rep(FALSE, p) #
  
  # variables with low variation are taken out, because normalization is not reliable
  # eps <- 10^-9  # precision for scaling
  #ind <- which(normx < eps)
  #if (length(ind) != 0) {
  #  x <- x[, -ind]
  #  normx <- normx[-ind]
  #  ind.names <- ind.names[-ind]
  #  p <- dim(x)[2]
  #  if (!is.null(penalty$lambda.start)) {
  #    penalty$lambda.start <- penalty$lambda.start[-ind]
  #  }
  #}
  
  #
  
  XX <- crossprod(x)
  Xy <- crossprod(x, y)
  
  startingval <- init_values(x,y)$residuals
  pen <- lambdaCalculation(penalty = penalty, y = startingval, x = x)
  lambda <- pen$lambda
  Ups0 <- Ups1 <- pen$Ups0
  lambda0 <- pen$lambda0
  
  mm <- 1
  s0 <- sqrt(var(y))
  
  while (mm <= control$numIter) {
    # calculation parameters
    #coefTemp <- LassoShooting.fit(x, y, lambda, XX = XX, Xy = Xy)$coefficients
    #xn <- t(t(x)/as.vector(Ups1))
    if (mm==1 && post) {
      coefTemp <- LassoShooting.fit(x, y, lambda/2, XX = XX, Xy = Xy)$coefficients
      #lasso.reg <- glmnet::glmnet(xn, y, family = c("gaussian"), alpha = 1,
      #                            lambda = lambda0/(2*n)/2, standardize = FALSE, intercept = FALSE)
      #lasso.reg <- glmnet::glmnet(x, y, family = c("gaussian"), alpha = 1,
      #                           lambda = lambda0/(2*n)/2, standardize = FALSE, intercept = FALSE,  penalty.factor = Ups1)
    } else {
      coefTemp <- LassoShooting.fit(x, y, lambda, XX = XX, Xy = Xy)$coefficients
      #lasso.reg <- glmnet::glmnet(xn, y, family = c("gaussian"), alpha = 1,
      #                           lambda = lambda0/(2*n), standardize = FALSE, intercept = FALSE)
      #lasso.reg <- glmnet::glmnet(x, y, family = c("gaussian"), alpha = 1,
      #                           lambda = lambda0/(2*n), standardize = FALSE, intercept = FALSE,  penalty.factor = Ups1)
    }
    #coefTemp <- as.vector(lasso.reg$beta)
    #names(coefTemp) <- colnames(x)
    coefTemp[is.na(coefTemp)] <- 0
    ind1 <- (abs(coefTemp) > 0)
    x1 <- as.matrix(x[, ind1, drop = FALSE])
    if (dim(x1)[2] == 0) {
      if (intercept) {
        intercept.value <- mean(y + mu)
        coef <- rep(0,p+1)
        names(coef) <- c("intercept", colnames(x)) #c("intercept", names(coefTemp))
      } else {
        intercept.value <- mean(y)
        coef <- rep(0,p)
        names(coef) <- colnames(x) #names(coefTemp)
      }
      est <- list(coefficients = coef, beta=rep(0,p), intercept=intercept.value, index = rep(FALSE, p),
                  lambda = lambda, lambda0 = lambda0, loadings = Ups0, residuals = y -
                    mean(y), sigma = var(y), iter = mm, call = match.call(),
                  options = list(post = post, intercept = intercept, ind.scale=ind, 
                                 control = control, mu = mu, meanx = meanx))
      if (model) {
        est$model <- x
      } else {
        est$model <- NULL
      }
      est$tss <- est$rss <- sum((y - mean(y))^2)
      est$dev <- y - mean(y)
      class(est) <- "rlasso"
      return(est)
    }
    
    # refinement variance estimation
    if (post) {
      reg <- lm(y ~ -1 + x1)
      coefT <- coef(reg)
      coefT[is.na(coefT)] <- 0
      e1 <- y - x1 %*% coefT
      coefTemp[ind1] <- coefT
    }
    if (!post) {
      e1 <- y - x1 %*% coefTemp[ind1]
    }
    s1 <- sqrt(var(e1))
    
    # homoscedatic and X-independent
    if (penalty$homoscedastic == TRUE && penalty$X.dependent.lambda == FALSE) {
      Ups1 <- c(s1)*Psi
      #lambda <- rep(pen$lambda0 * s1, p)
      lambda <- pen$lambda0*Ups1
    }
    # homoscedatic and X-dependent
    if (penalty$homoscedastic == TRUE && penalty$X.dependent.lambda == TRUE) {
      Ups1 <- c(s1)*Psi
      #lambda <- rep(pen$lambda0 * s1, p)
      lambda <- pen$lambda0 * Ups1
    }
    # heteroscedastic and X-independent
    if (penalty$homoscedastic == FALSE && penalty$X.dependent.lambda == FALSE) {
      Ups1 <- 1/sqrt(n) * sqrt(t(t(e1^2) %*% (x^2)))
      lambda <- pen$lambda0 * Ups1
    }
    
    # heteroscedastic and X-dependent
    if (penalty$homoscedastic == FALSE && penalty$X.dependent.lambda == TRUE) {
      lc <- lambdaCalculation(penalty, y=e1, x=x)
      Ups1 <- lc$Ups0
      lambda <- lc$lambda
    }
    
    
    
    # none
    if (penalty$homoscedastic == "none") {
      if (is.null(penalty$lambda.start)) stop("Argument lambda.start required!")
      Ups1 <- 1/sqrt(n) * sqrt(t(t(e1^2) %*% (x^2)))
      lambda <- pen$lambda0 * Ups1
    }
    
    mm <- mm + 1
    if (abs(s0 - s1) < control$tol) {
      break
    }
    s0 <- s1
  }
  
  if (dim(x1)[2] == 0) {
    coefTemp = NULL
    ind1 <- rep(0, p)
  }
  coefTemp <- as.vector(coefTemp)
  coefTemp[abs(coefTemp) < control$threshold] <- 0
  ind1 <- as.vector(ind1)
  coefTemp <- as.vector(as.vector(coefTemp))
  names(coefTemp) <- names(ind1) <- colnames(x)
  if (intercept) {
    if (is.null(mu)) mu <-0
    if (is.null(meanx))  meanx <-  rep(0, length(coefTemp))  #<- 0
    if (sum(ind)==0) {
      intercept.value <- mu - sum(meanx*coefTemp)
    } else {
      intercept.value <- mu - sum(meanx*coefTemp) #sum(meanx[-ind]*coefTemp)
    }
  } else {
    intercept.value <- NA
  }
  
  #if (intercept) {
  #  e1 <- y - x1 %*% coefTemp[ind1] - intercept.value 
  #} else {
  #  e1 <- y - x1 %*% coefTemp[ind1]
  #}
  if (intercept) {
    beta <- c(intercept.value, coefTemp)
    names(beta)[1] <- "(Intercept)"
  } else {
    beta <- coefTemp
  }
  
  s1 <- sqrt(var(e1))
  est <- list(coefficients = beta, beta=coefTemp, intercept=intercept.value, index = ind1, lambda = lambda,
              lambda0 = lambda0, loadings = Ups1, residuals = as.vector(e1), sigma = s1,
              iter = mm, call = match.call(), options = list(post = post, intercept = intercept,
                                                             control = control, penalty = penalty, ind.scale=ind,
                                                             mu = mu, meanx = meanx), model=model)
  if (model) {
    x <- scale(x, -meanx, FALSE)
    est$model <- x
  } else {
    est$model <- NULL
  }
  est$tss <- sum((y - mean(y))^2)
  est$rss <- sum(est$residuals^2)
  est$dev <- y - mean(y)
  class(est) <- "rlasso"
  return(est)
}


################ function lambdaCalculation

#' Function for Calculation of the penalty parameter
#'
#' This function implements different methods for calculation of the penalization parameter \eqn{\lambda}. Further details can be found under \link{rlasso}.
#'
#' @param penalty list with options for the calculation of the penalty. 
#' \itemize{
#' \item{\code{c} and \code{gamma}}{ constants for the penalty with default \code{c=1.1} and \code{gamma=0.1}}
#' \item{\code{homoscedastic}}{ logical, if homoscedastic errors are considered (default \code{FALSE}). Option \code{none} is described below.}
#' \item{\code{X.dependent.lambda}}{ if \code{independent} or \code{dependent} design matrix \code{X} is assumed for calculation of the parameter \eqn{\lambda}}
#' \item{\code{numSim}}{ number of simulations for the X-dependent methods}
#' \item{\code{lambda.start}}{ initial penalization value, compulsory for method "none"}
#' }
#' @param x matrix of regressor variables
#' @param y residual which is used for calculation of the variance or the data-dependent loadings
#' @return The functions returns a list with the penalty \code{lambda} which is the product of \code{lambda0} and \code{Ups0}. \code{Ups0}
#' denotes either the variance (\code{independent} case) or the data-dependent loadings for the regressors. \code{method} gives the selected method for the calculation.
#' @export


lambdaCalculation <- function(penalty = list(homoscedastic = FALSE, X.dependent.lambda = FALSE, lambda.start = NULL, c = 1.1, gamma = 0.1),
                              y = NULL, x = NULL) {
  checkmate::checkChoice(penalty$X.dependent.lambda, c(TRUE, FALSE, NULL))
  checkmate::checkChoice(penalty$homoscedastic, c(TRUE, FALSE, "none"))
  if (!exists("homoscedastic", where = penalty))  penalty$homoscedastic = "FALSE"
  if (!exists("X.dependent.lambda", where = penalty))  penalty$X.dependent.lambda = "FALSE"
  if (!exists("c", where = penalty) & penalty$homoscedastic!="none") {
    penalty$c = 1.1
  }
  if (!exists("gamma", where = penalty) & penalty$homoscedastic!="none") {
    penalty$gamma = 0.1
  }


  # homoscedastic and X-independent
  if (penalty$homoscedastic==TRUE && penalty$X.dependent.lambda == FALSE) {
    p <- dim(x)[2]
    n <- dim(x)[1]
    lambda0 <- 2 * penalty$c * sqrt(n) * qnorm(1 - penalty$gamma/(2 *
                                                                    p))
    Ups0 <- sqrt(var(y))
    lambda <- rep(lambda0 * Ups0, p)
  }

  # homoscedastic and X-dependent
  if (penalty$homoscedastic==TRUE && penalty$X.dependent.lambda == TRUE) {
    if (!exists("numSim", where = penalty)) {
      penalty$numSim = 5000
    }
    p <- dim(x)[2]
    n <- dim(x)[1]
    R <- penalty$numSim
    sim <- vector("numeric", length = R)
    # for (l in 1:R) {
    #   g <- matrix(rep(rnorm(n), each = p), ncol = p, byrow = TRUE)
    #   #sim[l] <- n * max(2 * colMeans(x * g))
    #   psi <- apply(x, 2, function(x) mean(x^2))
    #   sim[l] <- n * max(2 * abs(colMeans(t(t(x)/sqrt(psi)) * g)))
    # }
    
    psi <- apply(x, 2, function(x) mean(x^2))
    tXtpsi <- t(t(x)/sqrt(psi))
    
      for (l in 1:R) {
        g <- matrix(rep(rnorm(n), each = p), ncol = p, byrow = TRUE)
        sim[l] <- n * max(2 * abs(colMeans(tXtpsi * g)))
      }
   
    lambda0 <- penalty$c * quantile(sim, probs = 1 - penalty$gamma)
    Ups0 <- sqrt(var(y))
    lambda <- rep(lambda0 * Ups0, p)
  }

  # heteroscedastic and X-independent (was "standard")
  if (penalty$homoscedastic==FALSE && penalty$X.dependent.lambda == FALSE) {
    p <- dim(x)[2]
    n <- dim(x)[1]
    #lambda0 <- 2*penalty$c*sqrt(n)*sqrt(2*log(2*p*log(n)/penalty$gamma))
    lambda0 <- 2 * penalty$c * sqrt(n) * qnorm(1 - penalty$gamma/(2 *
                                                                    p * 1))  # 1=num endogenous variables
    Ups0 <- 1/sqrt(n) * sqrt(t(t(y^2) %*% (x^2)))
    lambda <- lambda0 * Ups0
  }
  
  # heteroscedastic and X-dependent
  if (penalty$homoscedastic==FALSE && penalty$X.dependent.lambda == TRUE) {
    if (!exists("numSim", where = penalty)) {
      penalty$numSim = 5000
    }
    p <- dim(x)[2]
    n <- dim(x)[1]
    R <- penalty$numSim
    sim <- vector("numeric", length = R)
    #lasso.x.y <- rlasso(y ~ x)
    #eh <- lasso.x.y$residuals
    eh <- y
    ehat <- matrix(rep(eh, each = p), ncol = p, byrow = TRUE) # might be improved by initial estimator or passed through
    # for (l in 1:R) {
    #   g <- matrix(rep(rnorm(n), each = p), ncol = p, byrow = TRUE)
    #   #sim[l] <- n * max(2 * colMeans(x * ehat* g))
    #   xehat <- x*ehat
    #   psi <- apply(xehat, 2, function(x) mean(x^2))
    #   sim[l] <- n * max(2 * abs(colMeans(t(t(xehat)/sqrt(psi)) * g)))
    # }
    xehat <- x*ehat
    psi <- apply(xehat, 2, function(x) mean(x^2))
    tXehattpsi <- t(t(xehat)/sqrt(psi))
    
      for (l in 1:R) {
        g <- matrix(rep(rnorm(n), each = p), ncol = p, byrow = TRUE)
        #sim[l] <- n * max(2 * colMeans(x * ehat* g))
        sim[l] <- n * max(2 * abs(colMeans(tXehattpsi * g)))
      }
    
    
    lambda0 <- penalty$c * quantile(sim, probs = 1 - penalty$gamma)
    Ups0 <- 1/sqrt(n) * sqrt(t(t(y^2) %*% (x^2)))
    lambda <- lambda0 * Ups0
  }
  
  
  if (!is.null(penalty$lambda.start)) {
    p <- dim(x)[2]
    if (length(penalty$lambda.start) == 1) {
      lambda.start <- rep(penalty$lambda.start, p)
    }
    lambda <- as.matrix(penalty$lambda.start)
  }

  if (penalty$homoscedastic == "none") {
    if (is.null(penalty$lambda.start) | !exists("lambda.start", where = penalty))
      stop("For method \"none\" lambda.start must be provided")
    n <- dim(x)[1]
    lambda0 <- penalty$lambda.start
    Ups0 <- 1/sqrt(n) * sqrt(t(t(y^2) %*% (x^2)))
    lambda <- lambda0 * Ups0
  }

  return(list(lambda0 = lambda0, lambda = lambda, Ups0 = Ups0, method = penalty))
}





################# Methods for Lasso

#' Methods for S3 object \code{rlasso}
#'
#' Objects of class \code{rlasso} are constructed by \code{rlasso}.
#' \code{print.rlasso} prints and displays some information about fitted \code{rlasso} objects.
#' \code{summary.rlasso} summarizes information of a fitted \code{rlasso} object.
#' \code{predict.rlasso} predicts values based on a \code{rlasso} object.
#' \code{model.matrix.rlasso} constructs the model matrix of a \code{rlasso} object.
#'
#' @param object an object of class \code{rlasso}
#' @param x an object of class \code{rlasso}
#' @param all logical, indicates if coefficients of all variables (TRUE) should be displayed or only the non-zero ones (FALSE)
#' @param digits significant digits in printout
#' @param newdata new data set for prediction. An optional data frame in which to look for variables with which to predict. If omitted, the fitted values are returned.
#' @param ... arguments passed to the print function and other methods
#' @rdname methods.rlasso
#' @aliases methods.rlasso print.rlasso predict.rlasso model.matrix.rlasso
#' @export

print.rlasso <- function(x, all=TRUE ,digits = max(3L, getOption("digits") - 3L), ...) {
  cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "")
  if (length(coef(x))) {
    if (all) {
      cat("Coefficients:\n")
      print.default(format(coef(x), digits = digits), print.gap = 2L,
                    quote = FALSE)
    } else {
      if (x$options$intercept) {
      print.default(format(coef(x)[c(TRUE,x$index)], digits = digits), print.gap = 2L,
                    quote = FALSE)
      } else {
        print.default(format(x$beta[x$index], digits = digits), print.gap = 2L,
                      quote = FALSE)
      }
    }
  }
  else cat("No coefficients\n")
  cat("\n")
  invisible(x)
}

#' @rdname methods.rlasso
#' @export

summary.rlasso <- function(object, all=TRUE, digits = max(3L, getOption("digits") - 3L), ...) {
  cat("\nCall:\n", paste(deparse(object$call), sep = "\n", collapse = "\n"), "\n", sep = "")
  cat("\nPost-Lasso Estimation: ",  paste(deparse(object$options$post), sep = "\n", collapse = "\n"), "\n", sep = " ")
  coefs <- object$coefficients
  p <- length(object$beta)
  num.selected <- sum(abs(object$beta)>0)
  n <- length(object$residuals)
  cat("\nTotal number of variables:", p)
  cat("\nNumber of selected variables:", num.selected, "\n", sep=" ")
  resid <- object$residuals
  cat("\nResiduals: \n")
  nam <- c("Min", "1Q", "Median", "3Q", "Max")
  rq <- structure(apply(t(resid), 1L, quantile), dimnames = list(nam, dimnames(resid)[[2L]]))
  print(drop(t(rq)), digits = digits)
  cat("\n")
  if (all) {
    coefm <- matrix(NA, length(coefs), 1)
    coefm[,1] <- coefs
    colnames(coefm) <- "Estimate"
    rownames(coefm) <- names(coefs)
    printCoefmat(coefm, digits = digits, na.print = "NA")
  } else {
    coefs <- coefs[abs(coefs)>0]
    coefm <- matrix(NA, length(coefs), 1)
    coefm[,1] <- coefs
    colnames(coefm) <- "Estimate"
    rownames(coefm) <- names(coefs)
    printCoefmat(coefm, digits = digits, na.print = "NA")
  }
  cat("\nResidual standard error:", format(signif(object$sigma, digits)))
  cat("\n")
  
  if (object$options$intercept) {
    df.int <- 1
  } else {
    df.int <- 0
  }
  
  object$r.squared <- 1 - object$rss/object$tss
  object$adj.r.squared <- 1 - (1-object$r.squared)*((n-df.int)/(n-num.selected-df.int))
  cat("Multiple R-squared: ", formatC(object$r.squared, digits = digits))
  cat("\nAdjusted R-squared: ", formatC(object$adj.r.squared, digits = digits))
  
  if (!is.null(object$model)) {
    object$supscore <- sqrt(n)*max(abs(colMeans(object$model*as.vector(object$dev))))
    R <- 500
    stat <- vector("numeric", length=R)
    for (i in 1:R) {
      g <- rnorm(n)
      dev.g <- as.vector(g*object$dev)
      mat <- object$model*dev.g
      stat[i] <- sqrt(n)*max(abs(colMeans(mat)))
    }
    #quantstat <- quantile(stat, probs=c(1-alpha))
    object$pvalue <- sum(stat>object$supscore)/R
    cat("\nJoint significance test:\n", "the sup score statistic for joint significance test is", formatC(object$supscore, digits = digits), "with a p-value of", 
        formatC(object$pvalue, digits = digits))
  }
  cat("\n")
  invisible(object)
}

#' @rdname methods.rlasso
#' @export

# model.matrix.rlasso <- function(object, ...) {
#   if (is.call(object$call[[2]])) {
#     if(is.null(object$call$data)){
#       X <- model.frame(object$call[[2]])
#       mt <- attr(X, "terms")
#       attr(mt, "intercept") <- 0
#       mm <- model.matrix(mt, X)
#     } else {
#       dataev <- eval(object$call$data)
#       mm <- as.matrix(dataev[,names(object$beta)])
#     }
#   } else {
#     mm <- eval(object$call[[2]])
#   }
#   return(mm)
# }
model.matrix.rlasso <- function (object, ...) 
{
  if(is.null(object$model)){
    if (!is.null(object$call$x)){
      mm <- as.matrix(eval(object$call$x))
    } else {
      if(!is.null(object$call$data)){
        mm <- model.matrix(eval(object$call$formula), data = eval(object$call$data))[, -1, drop = FALSE]
      } else {
        mm <- model.matrix(eval(object$call$formula))[, -1, drop = FALSE]
      }
    }
  } else {
    mm <- object$model
  }
  return(mm)
}



#' @rdname methods.rlasso
#' @export

# predict.rlasso <- function (object, newdata = NULL, ...){
#   if (missing(newdata) || is.null(newdata)) {
#     if (is.matrix(model.matrix(object))) {
#       X <- model.matrix(object)
#     }
#     if (class(model.matrix(object))=="formula") {
#       form <- eval(model.matrix(object))
#       X <- model.matrix(form)[,-1, drop=FALSE]
#     }
#     if(sum(object$options$ind.scale)!=0) {
#       X <- X[,-object$options$ind.scale]
#     }
#   }
#   else {
#     varcoef <- names(object$beta)
#     if(all(is.element(varcoef,colnames(newdata)))){
#       X <- as.matrix(newdata[,varcoef])
#     } else {
#       if (is.matrix(newdata)) {
#         X <- as.matrix(newdata)
#       } else {
#         #X <- as.matrix(newdata)
#         formula <- eval(object$call[[2]])
#         X <- model.matrix(formula, data=newdata)[,-1, drop=FALSE]
#       }
#       if(sum(object$options$ind.scale)!=0) {
#         X <- X[,-object$options$ind.scale]
#       }
#       stopifnot(ncol(X)==length(object$beta))
#     }
#   }
#   n <- dim(X)[1] #length(object$residuals)
#   beta <- object$beta
#   
#   if (object$options[["intercept"]]) {
#     yhat <- X %*% beta + object$intercept
#     if (dim(X)[2]==0) yhat <- rep(object$intercept, n)
#   }
#   if (!object$options[["intercept"]]) {
#     yhat <- X %*% beta
#     if (dim(X)[2]==0) yhat <- rep(0, n)
#   }
#   return(yhat)
# }

# predict.rlasso2 <- function (object, newdata = NULL, ...) 
# {
#   if (missing(newdata) || is.null(newdata)) {
#     X <- model.matrix(object)
#     if (sum(object$options$ind.scale) != 0) {
#       X <- X[, -object$options$ind.scale]
#     }
#   } else {
#     varcoef <- names(object$beta)
#     if (all(is.element(varcoef, colnames(newdata)))){
#       X <- as.matrix(newdata[, varcoef])
#     } else {
#       if(!is.null(object$call$x)){
#         stop("newdata does not contain the correct variables")
#       } else {
#         mod.frame <- as.data.frame(cbind(rep(1, nrow(newdata)), newdata))
#         colnames(mod.frame)[1] <- as.character(eval(object$call$formula)[[2]])
#         X <- try(model.matrix(eval(object$call$formula), data = mod.frame)[, -1, drop = FALSE])
#         if(inherits(X, "try-error")){
#           stop("newdata does not contain the variables specified in formula")
#         }
#       }
#     } 
#     if (sum(object$options$ind.scale) != 0) {
#       X <- X[, -object$options$ind.scale]
#     }
#   }
#   n <- dim(X)[1]
#   beta <- object$beta
#   if (object$options[["intercept"]]) {
#     yhat <- X %*% beta + object$intercept
#     if (dim(X)[2] == 0) 
#       yhat <- rep(object$intercept, n)
#   }
#   if (!object$options[["intercept"]]) {
#     yhat <- X %*% beta
#     if (dim(X)[2] == 0) 
#       yhat <- rep(0, n)
#   }
#   return(yhat)
# }


predict.rlasso <- function (object, newdata = NULL, ...) 
{
  mf <- match.call(expand.dots = TRUE)
  m <- match("newx", names(mf), 0L)
  if (m!=0L) stop("Please use argument \"newdata\" instead of \"newx\" to provide data for prediction.")
  k <- length(object$beta)
  if (missing(newdata) || is.null(newdata)) {
    X <- model.matrix(object)
    if (sum(object$options$ind.scale) != 0) {
      X <- X[, -object$options$ind.scale]
    }
  } else {
    if (is.null(colnames(newdata))) {
      X <- as.matrix(newdata)
      if (dim(X)[2] != k) {
        stop("No variable names provided in newdata and number of parameters does not fit!")
      } else {
        #message("No variable names provided in newdata. Prediction relies on right ordering of the variables.")
      }
    } else {
      varcoef <- names(object$beta)
      if (all(is.element(varcoef, colnames(newdata)))){
        X <- as.matrix(newdata[, varcoef])
      } else {
        mod.frame <- as.data.frame(cbind(rep(1, nrow(newdata)), newdata))
        colnames(mod.frame)[1] <- as.character(eval(object$call$formula)[[2]])
        X <- try(model.matrix(eval(object$call$formula), data = mod.frame)[, -1, drop = FALSE])
        if(inherits(X, "try-error")){
          stop("newdata does not contain the variables specified in formula")
        } 
      }
    }
  }
    if (sum(object$options$ind.scale) != 0) {
      X <- X[, -object$options$ind.scale]
    }
  
  n <- dim(X)[1]
  beta <- object$beta
  if (object$options[["intercept"]]) {
    yhat <- X %*% beta + object$intercept
    if (dim(X)[2] == 0) 
      yhat <- rep(object$intercept, n)
  }
  if (!object$options[["intercept"]]) {
    yhat <- X %*% beta
    if (dim(X)[2] == 0) 
      yhat <- rep(0, n)
  }
  return(yhat)
}