R/penreg.R

Defines functions print.penreg predict.penreg .genetfit genet sglasso robpr robBridge bridge_map gdp_map blasso_map bats_map penreg

Documented in bats_map blasso_map bridge_map gdp_map genet penreg predict.penreg print.penreg robBridge robpr sglasso

#' penreg: penalized regression models
#'
#' This is defines the methods for an assortment of models.
#'
#' @title penreg: cross-validated regression models
#' @param x argument
#' @param ... other arguments
#' @examples
#' penreg()
#' @rdname penreg
#' @export penreg
penreg <- function(x, ...){
  UseMethod("penreg")
}



#' Bayesian Adaptive T-Shrinkage Regression
#'
#' This implements the block-updating expectation maximization algorithm presented in Mutshinda & Sillanpää (2012)
#' for a regression model with marginal student t priors for the coefficients, along with coefficient specific
#' shrinkage. The model takes two hyperparameters: nu, which is the degrees of freedom for the priors,
#' and eta, which is the squared inverse scale parameter for the student t priors. When nu is 1, the marginal priors
#' are Cauchy densities, and as nu tends to infinity it results in Gaussian densities and yields a ridge
#' regression estimator as a result. Eta results in greater shrinkage as it increases, as it controls the
#' precision of the priors.
#'
#' @param formula model formula
#' @param data a data frame
#' @param nu the degrees of freedom parameter for the student t priors. defaults to 3.
#' @param eta the prior precision parameter for the student t priors. defaults to 4.
#' @param nval the length of the sequence of candidate lambda values to try.
#' @param opt.crit the criterion to maximize for finding the optimal lambda when a sequence is provided. defaults to the log posterior ("logPost") to act as a MAP estimator, but final prediction error ("fpe") is also an option.
#'
#' @return a penreg object
#' @export
#'
#' @references Mutshinda, C. M., & Sillanpää, M. J. (2012). Swift block-updating EM and pseudo-EM procedures for Bayesian shrinkage analysis of quantitative trait loci. Theoretical and Applied Genetics, 125(7), 1575–1587. doi:10.1007/s00122-012-1936-1

bats_map = function(formula, data, prior = NULL){

  this.call <- match.call()
  X <- model.matrix(formula, data); if (all(X[,1]==1)){X<-X[,-1]}
  y <- model.response(model.frame(formula, data))
  yscale <- sd(y); xscale <- numeric(); ycenter <- mean(y)

  for (i in 1:ncol(X)){
    if (length(unique(X[,i])) > 2){xscale[i] <- sd(X[,i])} else{xscale[i] <- 1}
  }

  data <- Scale(data)
  X <- model.matrix(formula, data); if (all(X[,1]==1)){X<-X[,-1]}
  y <- model.response(model.frame(formula, data))

  log_posterior <- function(par){
    nu <- par[1]; eta <- par[2]
    out <- cvreg:::bats_em(X,y,nu,eta)
    sigma <- out$sigma
    coef <- as.vector(out$coefficients)
    log_invsig <- dgamma(1/(sigma^2), 0.0001, 0.0001, log = T)
    log_beta <- sum(dst(coef, nu = nu, mu = 0, scale = eta, log = T))
    log_lik <- sum(dnorm(y,as.vector(X%*%coef),sigma,log=T))
    lp <- log_lik + log_beta + log_invsig
    -1*lp
  }
  if (is.null(prior)){
    find_penalty <- Rsolnp::solnp(c(1,1),log_posterior,LB=c(1,0),UB=c(100,100),control=list(trace=F,rho=1.5))
    nu <- find_penalty$pars[1]
    eta <- find_penalty$pars[2]
    out <- cvreg:::bats_em(X,y,nu,eta)
    logPost <- log_posterior(c(nu,eta))
    coef <- as.vector(out$coefficients)
    names(coef) <- colnames(X)
    fitted <- as.vector(X%*%coef)
    logLik <- sum(dnorm(y, fitted, out$sigma, log = TRUE))
    intercept = mean(y - fitted)
    coef <- c("(Intercept)" = intercept, coef)
    fitted <- as.vector(cbind(1,X)%*%coef)
    residuals <- y - fitted
    sigma <- out$sigma
  }
  else{
    if (length(prior)<2){
      return(cat(crayon::red("Please provide both a value for 'nu' and 'eta'")))
    }
    nu <- prior[1]; eta <- prior[2]
    out <- cvreg:::bats_em(X,y,nu,eta)
    logPost <- log_posterior(c(nu,eta))
    coef <- as.vector(out$coefficients)
    names(coef) <- colnames(X)
    fitted <- as.vector(X%*%coef)
    logLik <- sum(dnorm(y, fitted, out$sigma, log = TRUE))
    intercept = mean(y - fitted)
    coef <- c("(Intercept)" = intercept, coef)
    fitted <- as.vector(cbind(1,X)%*%coef)
    residuals <- y - fitted
    sigma <- out$sigma
  }
  xscale <- c(yscale, xscale)
  coef <- sapply(1:length(coef), function(i) coef[i] * (yscale/xscale[i]))
  sigma <- sigma * yscale
  fitted <- (fitted*yscale)+ycenter
  residuals <- residuals*yscale
  out <- list(coefficients=coef,sigma=sigma,nu=nu,eta=eta,fitted=fitted,residuals=residuals,logPost=logPost,logLik=logLik)
  return(structure(out, class = "penreg", is.list = FALSE, penalty = "BATS"))
}




#' Bayesian LASSO maximum a posteriori estimator
#'
#' @param formula model formula
#' @param data a data frame
#' @param lambda either a sequence of numbers, a single number, or NULL to generate a sequence of length equal to nval.
#' @param nval the length of the sequence of candidate lambda values to try.
#' @param opt.crit the criterion to maximize for finding the optimal lambda when a sequence is provided. defaults to the log posterior ("logPost") to act as a MAP estimator, but final prediction error ("fpe") is also an option.
#'
#' @return a penreg object
#' @export
#'
blasso_map = function(formula, data, penalty = NULL){
  this.call <- match.call()
  X <- model.matrix(formula, data); if (all(X[,1]==1)){X<-X[,-1]}
  y <- model.response(model.frame(formula, data))
  yscale <- sd(y); xscale <- numeric(); ycenter <- mean(y)

  for (i in 1:ncol(X)){
    if (length(unique(X[,i])) > 2){xscale[i] <- sd(X[,i])} else{xscale[i] <- 1}
  }

  data <- Scale(data)
  X <- model.matrix(formula, data); if (all(X[,1]==1)){X<-X[,-1]}
  y <- model.response(model.frame(formula, data))

  if (is.null(penalty)){
    log_posterior <- function(penalty){
      -1 * cvreg:::blasso_em(X,y,penalty)$logPost
    }
    find_penalty <- Rsolnp::solnp(1,log_posterior,LB=0,UB=1e4,control=list(trace=F,rho=1.5))
    penalty <- find_penalty$pars
    penalty.hessian <- find_penalty$hessian
    out <- cvreg:::blasso_em(X,y,penalty)
    logPost <- out$logPost
    coef <- as.vector(out$coefficients)
    names(coef) <- colnames(X)
    fitted <- as.vector(X%*%coef)
    logLik <- sum(dnorm(y, fitted, out$sigma, log = TRUE))
    intercept = mean(y - fitted)
    coef <- c("(Intercept)" = intercept, coef)
    fitted <- as.vector(cbind(1,X)%*%coef)
    residuals <- y - fitted
    sigma <- out$sigma
    lambda <- out$lambda
    vcov<-out$vcov
  }
  else{
    if (length(penalty)>1){
      penalty <- penalty[1]
      cat(crayon::red("Warning: Argument 'penalty' was provided with more than 1 value. Only the first element will be used."))
    }
    out <- cvreg:::blasso_em(X,y,penalty)
    logPost <- out$logPost
    coef <- as.vector(out$coefficients)
    names(coef) <- colnames(X)
    fitted <- as.vector(X%*%coef)
    logLik <- sum(dnorm(y, fitted, out$sigma, log = TRUE))
    intercept = mean(y - fitted)
    coef <- c("(Intercept)" = intercept, coef)
    fitted <- as.vector(cbind(1,X)%*%coef)
    residuals <- y - fitted
    sigma <- out$sigma
    lambda <- out$lambda
    vcov<-out$vcov
  }
  xscale <- c(yscale, xscale)
  coef <- sapply(1:length(coef), function(i) coef[i] * (yscale/xscale[i]))
  sigma <- sigma * yscale
  fitted <- (fitted*yscale)+ycenter
  residuals <- residuals*yscale
  out <- list(coefficients=coef,sigma=sigma,penalty=penalty,lambda=lambda,fitted=fitted,residuals=residuals,logPost=logPost,logLik=logLik,vcov=vcov)
  return(structure(out, class = "penreg", is.list = FALSE, penalty = "BLASSO"))

}


#' Bayesian Generalized Double Pareto LASSO maximum a posteriori estimator
#'
#' @param formula model formula
#' @param data a data frame
#' @param alpha the alpha parameter. defaults to NULL.
#' @param zeta the zeta parameter. defaults to NULL.
#' @return a penreg object
#' @export
#' @references Armagan, A., Dunson, D. B., & Lee, J. (2013). Generalized Double Pareto Shrinkage. Statistica Sinica, 23(1), 119–143.
#'
gdp_map = function(formula, data, prior = NULL){

  this.call <- match.call()
  X <- model.matrix(formula, data); if (all(X[,1]==1)){X<-X[,-1]}
  y <- model.response(model.frame(formula, data))
  yscale <- sd(y); xscale <- numeric(); ycenter <- mean(y)

  for (i in 1:ncol(X)){
    if (length(unique(X[,i])) > 2){xscale[i] <- sd(X[,i])} else{xscale[i] <- 1}
  }

  data <- Scale(data)
  X <- model.matrix(formula, data); if (all(X[,1]==1)){X<-X[,-1]}
  y <- model.response(model.frame(formula, data))

  log_posterior <- function(par){
    alpha <- par[1]; zeta <- par[2]
    cvreg:::gdp_em(X,y,alpha,zeta)$logPost
  }
  if (is.null(prior)){
    find_penalty <- Rsolnp::solnp(c(3,1),log_posterior,LB=c(0.5,0.5),UB=c(4,4),control=list(trace=F,rho=1.75))
    alpha <- find_penalty$pars[1]
    zeta <- find_penalty$pars[2]
    out <- cvreg:::gdp_em(X,y,alpha,zeta)
    logPost <- out$logPost
    coef <- as.vector(out$coefficients)
    names(coef) <- colnames(X)
    fitted <- as.vector(X%*%coef)
    logLik <- sum(dnorm(y, fitted, out$sigma, log = TRUE))
    intercept = mean(y - fitted)
    coef <- c("(Intercept)" = intercept, coef)
    fitted <- as.vector(cbind(1,X)%*%coef)
    residuals <- y - fitted
    sigma <- out$sigma
  }
  else{
    if (length(prior)<2){
      return(cat(crayon::red("Please provide both a value for 'alpha' and 'zeta'")))
    }
    out <- cvreg:::gdp_em(X,y,alpha,zeta)
    logPost <- out$logPost
    coef <- as.vector(out$coefficients)
    names(coef) <- colnames(X)
    fitted <- as.vector(X%*%coef)
    logLik <- sum(dnorm(y, fitted, out$sigma, log = TRUE))
    intercept = mean(y - fitted)
    coef <- c("(Intercept)" = intercept, coef)
    fitted <- as.vector(cbind(1,X)%*%coef)
    residuals <- y - fitted
    sigma <- out$sigma
  }
  xscale <- c(yscale, xscale)
  coef <- sapply(1:length(coef), function(i) coef[i] * (yscale/xscale[i]))
  sigma <- sigma * yscale
  fitted <- (fitted*yscale)+ycenter
  residuals <- residuals*yscale

  out <- list(coefficients=coef,sigma=sigma,alpha=alpha,zeta=zeta,fitted=fitted,residuals=residuals,logPost=logPost,logLik=logLik)
  return(structure(out, class = "penreg", is.list = FALSE, penalty = "GDP"))
}




#' Bayesian Bridge Regression maximum a posteriori estimator
#'
#' @param formula model formula
#' @param data a data frame
#' @param kappa the norm you wish to use. the minimum allowed is 0.250. the default is 0.50.
#' @param prior prior the shape and rate parameters for the gamma prior on nu.
#' @return a penreg object
#' @export
#'
#' @references
#'
#' Mallick, H. & Yi, N. (2018) Bayesian Bridge Regression. Journal of Applied Statistics, 45(6), 988-1008, doi: 10.1080/02664763.2017.1324565 \cr \cr
#' Polson, N.G., Scott, J.G., & Windle, J. (2014) The Bayesian Bridge. Journal of the Royal Statistical Society: Series B (Statistical Methodology), 76(4), 713-33. doi: 10.1111/rssb.12042 \cr \cr
#'
bridge_map <- function(formula, data, kappa = 0.50, prior = NULL){

  this.call <- match.call()
  X <- model.matrix(formula, data); if (all(X[,1]==1)){X<-X[,-1]}
  y <- model.response(model.frame(formula, data))
  yscale <- sd(y); xscale <- numeric(); ycenter <- mean(y)

  for (i in 1:ncol(X)){
    if (length(unique(X[,i])) > 2){xscale[i] <- sd(X[,i])} else{xscale[i] <- 1}
  }

  data <- Scale(data)
  X <- model.matrix(formula, data); if (all(X[,1]==1)){X<-X[,-1]}
  y <- model.response(model.frame(formula, data))

  log_posterior <- function(par){
    shape  <- par[1]; rate <- par[2]
    cvreg:::bridge_em(X,y,kappa,shape,rate)$logPost
  }
  if (is.null(prior)){
    find_penalty <- Rsolnp::solnp(c(2,2),log_posterior,LB=c(0.5,0.5),UB=c(8,8),control=list(trace=F,rho=1.5))
    shape <- find_penalty$pars[1]
    rate <- find_penalty$pars[2]
    out <- cvreg:::bridge_em(X,y,kappa,shape,rate)
    logPost <- out$logPost
    coef <- as.vector(out$coefficients)
    names(coef) <- colnames(X)
    fitted <- as.vector(X%*%coef)
    logLik <- sum(dnorm(y, fitted, out$sigma, log = TRUE))
    intercept = mean(y - fitted)
    coef <- c("(Intercept)" = intercept, coef)
    fitted <- as.vector(cbind(1,X)%*%coef)
    residuals <- y - fitted
    sigma<-out$sigma;nu<-out$nu;lambda<-as.vector(out$lambda)
  }
  else{
    out <- cvreg:::bridge_em(X,y,kappa,shape,rate)
    logPost <- out$logPost
    coef <- as.vector(out$coefficients)
    names(coef) <- colnames(X)
    fitted <- as.vector(X%*%coef)
    logLik <- sum(dnorm(y, fitted, out$sigma, log = TRUE))
    intercept = mean(y - fitted)
    coef <- c("(Intercept)" = intercept, coef)
    fitted <- as.vector(cbind(1,X)%*%coef)
    residuals <- y - fitted
    sigma<-out$sigma;nu<-out$nu;lambda<-as.vector(out$lambda)
  }
  out <- list(coefficients=coef,sigma=sigma,shape=shape,rate=rate,fitted=fitted,residuals=residuals,logPost=logPost,logLik=logLik)
  return(structure(out, class = "penreg", is.list = FALSE, penalty = "Bridge"))

}


#' Robust Bridge Regression estimator
#'
#' @description This adapts the expectation maximization algorithm from \code{\link{bridge_map}}
#' to the case of robust estimation. The following adjustments are made: \cr \cr
#'
#' \cr
#'
#' @param formula model formula
#' @param data a data frame
#' @param kappa the norm you wish to use. the minimum allowed is 0.250. the default is 0.50.
#' @param shape the shape parameter for the gamma prior on nu (optional).
#' @param rate the rate of parameter for the gamma prior on nu (optional).
#
#' @return a penreg object
#' @export
#'
robBridge <- function(formula, data, kappa = 0.50, shape = NULL, rate = NULL){
  this.call <- match.call()
  prior <- c(shape, rate)
  if(all(is.null(prior))) prior <- NULL
  this.call <- match.call()
  yzscale <- function(x) unname(cvreg:::.scaleTauYZ(x))
  yzcentr <- function(x) unname(cvreg:::.scaleTauYZ(x, mu.too = TRUE)[1])
  X <- model.matrix(formula, data)
  y <- model.response(model.frame(formula, data))
  yscale <- yzscale(y); xscale <- numeric(); ycenter <- yzcentr(y)

  for (i in 1:ncol(X)){
    if (length(unique(X[,i])) > 2){
      xscale[i] <- yzscale(X[,i]);
    } else{
      xscale[i] <- 1
    }
  }

  data <- Scale2(data, yzcentr, yzscale)
  X <- model.matrix(formula, data); if (all(X[,1]==1)){X<-X[,-1]}
  y <- model.response(model.frame(formula, data))

  initbeta <- 0.50*ridge.rob(formula, data)$coefficients[-1]
  make.loss <- function(initbeta){
    initbeta <- as.matrix(initbeta)
    function(par){
      shape  <- par[1]; rate <- par[2]
      -1 * cvreg:::robridge_em(X,y,kappa,shape,rate,initbeta)$logPost
    }
  }
  loss <- make.loss(initbeta)
  if (is.null(prior)){
    find_penalty <- Rsolnp::solnp(c(2,2),loss,LB=c(0.5,0.5),UB=c(6,6),control=list(trace=F,rho=1.5))
    shape <- find_penalty$pars[1]
    rate <- find_penalty$pars[2]
    out <- cvreg:::robridge_em(X,y,kappa,shape,rate,as.matrix(initbeta))
    coef <- as.vector(out$coefficients)
    names(coef) <- colnames(X)
    fitted <- as.vector(X%*%coef)
    logLik <- sum(dnorm(y, fitted, out$sigma, log = TRUE))
    intercept = yzcentr(y - fitted)
    coef <- c("(Intercept)" = intercept, coef)
    fitted <- as.vector(cbind(1,X)%*%coef)
    residuals <- y - fitted
    sigma<-out$sigma;nu<-out$nu;lambda<-as.vector(out$lambda)
  }
  else{
    out <- cvreg:::robridge_em(X,y,kappa,shape,rate,as.matrix(initbeta))
    logPost <- out$logPost
    coef <- as.vector(out$coefficients)
    names(coef) <- colnames(X)
    fitted <- as.vector(X%*%coef)
    logLik <- sum(dnorm(y, fitted, out$sigma, log = TRUE))
    intercept = yzcentr(y - fitted)
    coef <- c("(Intercept)" = intercept, coef)
    fitted <- as.vector(cbind(1,X)%*%coef)
    residuals <- y - fitted
    sigma<-out$sigma;nu<-out$nu;lambda<-as.vector(out$lambda)
  }
  xscale <- c(1, xscale)
  coef <- sapply(1:length(coef), function(i) coef[i] * (yscale/xscale[i]))
  sigma <- sigma * yscale
  fitted <- (fitted*yscale)+ycenter
  residuals <- residuals*yscale
  out <- list(coefficients=coef,sigma=sigma,shape=shape,rate=rate,fitted=fitted,residuals=residuals,logLik=logLik)
  return(structure(out, class = "penreg", is.list = FALSE, penalty = "robBridge"))
}



#' Robust Penalized Regression Estimator
#'
#' This set of penalized regression estimators offers the LASSO, MCP (minimax concave penalty), and SCAD (smoothly clipped absolute
#' deviation) methods of regularization for robust Huber regression. These estimators are inspired by a series of papers by
#' Wang et al (2018), Pan et al(2019), Sun et al (2019). The R package ILAMM has another implementation of these estimators.
#' The optimal tuning parameter is selected based on a robust final prediction error (rfpe) metric.
#'
#' @param formula model formula
#' @param data a data frame
#' @param lambda a lambda value. if left as NULL lambda will be estimated via optimization.
#' @param k tuning constant for Huber's psi function. defaults to 1.345 which gives 95 pct efficiency when errors are normally distributed.
#' @param gamma the tuning parameter for nonconvex penalties. If left as NULL, 3 is used for MCP and 3.7 is used for SCAD. Not applicable to LASSO.
#' @param penalty one of "LASSO", "MCP", or "SCAD"
#'
#' @return a penreg object
#' @export
#'
#' @references
#' Pan, X.O., Sun, Q., & Zhou, W. (2019). Nonconvex Regularized Robust Regression with Oracle Properties in Polynomial Time. \cr
#' Sun, Q., Zhou, W-X. and Fan, J. (2019). Adaptive Huber regression. J. Amer. Stat. Assoc. 0 1-12. \cr \cr
#' Wang, L., Zheng, C., Zhou, W. and Zhou, W-X. (2018). A new principle for tuning-free Huber regression. Preprint. \cr \cr
#'
robpr <- function(formula, data, lambda = NULL, k = 1.345, gamma = NULL, penalty = c("LASSO", "MCP", "SCAD", "RIDGE")){

  this.call <- match.call()
  penalty <- match.arg(penalty)
  if (is.null(gamma)){
    gamma <- switch(penalty, "MCP" = 3, "SCAD" = 3.7, "LASSO" = 1, "RIDGE" = 1)
  } else {
    gamma <- gamma
  }
  yzscale <- function(x) cvreg:::.scaleTauYZ(x)
  yzcentr <- function(x) cvreg:::.scaleTauYZ(x, mu.too = TRUE)[1]
  X <- model.matrix(formula, data); if (all(X[,1]==1)){X<-X[,-1]};
  names <- c("(Intercept)" , colnames(X))
  y <- model.response(model.frame(formula, data))
  yscale <- yzscale(y); xscale <- numeric(); ycenter <- yzcentr(y)

  for (i in 1:ncol(X)){
    if(length(unique(X[,i]))>2){xscale[i]<-yzscale(X[,i])}else{xscale[i]<-1}
  }

  data <- Scale2(data, yzcentr, yzscale)
  X <- model.matrix(formula, data); if (all(X[,1]==1)){X<-X[,-1]}
  y <- model.response(model.frame(formula, data))
  n = nrow(X); p = ncol(X)

  if (is.null(lambda)){
    max.lambda <- function(y, x){
        n = length(y)
        max(abs(y %*% x)) / n
    }
    lossfun <- function(lambda, X, y, gamma, penalty, k) {
        if (penalty == "RIDGE"){
          out = cvreg:::ridgeHuber(X, y, lambda, k = k)
          beta <- round(as.vector(as.vector(out$coefficients)), 8)
        }
        else{
          out = cvreg:::HuberPenalizedReg(X, y, lambda, gamma, penalty, k = k)
          beta <- zapsmall(as.vector(as.vector(out$coefficients)), 6)
        }
        res <- y - as.vector(X%*%beta)
        absres <- sort(abs(res))
        q <- pnorm(k); h <- floor(nrow(X)*q)
        val <- sum(absres[1:h])
        return(val)
      }
      out <- Rsolnp::solnp(sqrt((2*log(p))/n),lossfun,LB=1e-4,UB=max.lambda(y,X),X=X,y=y,gamma=gamma,penalty=penalty,k=k,control=list(trace=0,rho=1.5,delta=1e-5,tol=1e-6))
      converged <- out$convergence
      lambda <- out <- out$pars
      if (penalty == "RIDGE"){
       out <- cvreg:::ridgeHuber(X, y, lambda, k = k)
       betas <- round(as.vector(as.vector(out$coefficients)), 8)
      }
      else{
        out <- cvreg:::HuberPenalizedReg(X, y, lambda, gamma, penalty, k = k)
        betas <- zapsmall(as.vector(as.vector(out$coefficients)), 6)
      }

  }
  else{
    if (penalty == "RIDGE"){
      out <- cvreg:::ridgeHuber(X, y, lambda, k = k)
      betas <- round(as.vector(as.vector(out$coefficients)), 8)
    }
    else{
      out <- cvreg:::HuberPenalizedReg(X, y, lambda, gamma, penalty, k = k)
      betas <- zapsmall(as.vector(as.vector(out$coefficients)), 6)
    }
  }

  res <- y - as.vector(X%*%betas)
  betas <- c(yzcentr(res), betas)
  fitted <- as.vector(cbind(1,X)%*%betas)
  residuals <- y - as.vector(cbind(1,X)%*%betas)
  sigma <- yzscale(residuals)
  xscale <- c(1, xscale)
  betas <- sapply(1:length(betas), function(i) betas[i] * (yscale/xscale[i]))
  sigma <- sigma * yscale
  fitted <- (fitted*yscale)+ycenter
  residuals <- residuals*yscale
  names(betas) <- names
  structure(list(coefficients = betas, sigma = sigma, fitted = fitted, lambda = lambda, formula = formula, call = this.call), is.list = FALSE, class = "penreg", penalty = penalty)
}

#' Standardized Group LASSO
#'
#' @description This utilizes a group lasso penalty which operates on orthonormalized projections of the covariates.
#' The advantage of this is that by treating the terms for each variable as a group of coefficients, entire
#' variable groups can be dropped from the model at once more efficiently.
#'
#' As in the elastic net, the L1 shrinkage penalty is \eqn{\lambda_1 = \alpha * \lambda}, and the L2 shrinkage penalty is
#' \eqn{\lambda_2 = (1-\alpha) * \lambda}. This results in smoothing of the covariates within each group. This differs from
#' the sparse group LASSO which imposes within-group L1 penalities instead.
#'
#' @param X the covariates. must be in the same order as the group labels. it is recommended to sort the variables by group.
#' @param y a continuous outcome
#' @param idx the group label assignments. must be in the same order as the covariates.
#' @param alpha the penalty mixing parameter, which can take values of \eqn{0 \le \alpha \ge 1}. defaults to 0.75.
#' @param lambda the shrinkage parameter or a sequence of shrinkage parameters. if left as NULL, a sequence will be
#' generated with the length given by nlambda.
#' @param nlambda the number of lambda values to try. defaults to 100.
#' @param maxit maximum number of iterations
#' @param min.lam.frac the rate of the smallest lambda to the largest lambda. defaults to 0.05.
#' @param wch.pen which variables to penalize. defaults to a sequence of 1s of length equal to the number of variables represented by spline terms. provide a list with entries of 0 for the variable(s) you desire to leave unpenalized. d.
#' @param opt.crit the criterion to  maximize for finding the optimal lambda when a sequence is used. must be one of "fpe" (final prediction error; the default) or "bic" (Bayesian Information Criterion).
#'
#' @return a penreg object
#' @export
#'
#' @examples
#' sglasso(Alz$x,Alz$ab_42,Alz$idx,nlambda = 30, min.lam.frac = 0.01, alpha = 0.65)
#'
#' @references
#' Simon, N., & Tibshirani, R. (2012). Standardization and the Group Lasso Penalty. Statistica Sinica, 22(3), 983-1001. Retrieved March 17, 2020 doi: 10.5705/ss.2011.075
#'
sglasso <- function(X, y, idx, alpha = 0.75, lambda = NULL, nlambda = 100, maxit = 1000, min.lam.frac = 0.05, wch.pen = rep(1,length(index)), opt.crit = c("fpe", "aic", "bic")){

  this.call <- match.call()
  opt.crit <- match.arg(opt.crit)
  thresh <- 1e-6
  X <- X
  y <- y
  index <- idx
  lambdaSeq <-function(y, X, index, alpha, nlambda = 20, min.lam = 0.05, wch.pen = rep(1,length(unique(index)))){
    groups <- unique(index)
    fitNorm <- rep(0,length(groups))
    normy <- sqrt(sum(y^2))
    for(i in 1:length(groups)){
      if(wch.pen[i] != 0){
        newInd <- which(index == groups[i])
        if(length(newInd) >= nrow(X)){
          fitNorm[i] <- normy
        }
        else{
          fitNorm[i] <- sqrt(sum((X[,newInd]%*%pseudoinverse(t(X[,newInd])%*%X[,newInd])%*%t(X[,newInd]) %*% y)^2)/length(newInd))
        }
      }
    }
    lam.max <- max(fitNorm) * alpha
    lam.min <- min.lam * lam.max
    lam.list <- nlambda:1
    scale <- (log(lam.max)-log(lam.min))/(nlambda - 1)
    shift <- log(lam.min) - scale
    lam.list <- exp(lam.list * scale + shift)
    return(lam.list)
  }


  fittingFunction <- function(y, X, index, lambda, thresh, maxit, Us, Vs, Ds, alpha, oldFit, resid, active.set, wch.pen){

    ngroups <- length(unique(index))
    groupLen <- rep(0,ngroups)
    for(i in 1:ngroups){
      groupLen[i] <- length(which(index == unique(index)[i]))
    }

    newD <- list(c(1,2,3))
    dfs <- rep(0,ngroups)

    for(j in 1:ngroups){
      newD[[j+1]]<- Ds[[j]]^2/(Ds[[j]]^2 + (1-alpha)*lambda*wch.pen[j])
      dfs[j] <- sum(newD[[j+1]])
    }
    for(j in 1:ngroups){
      newD[[j]] <- newD[[j+1]]
    }

    diff <- 1
    iter <- 1
    iConverged <- 1
    Groups <- 1:ngroups
    change <- 1
    qqq <- 0

    while(change == 1){
      change <- 0;iConverged <- 1;diff <- 1
      while(diff > thresh && iter < maxit){
        iter <- iter + 1;diff <- 0
        if(iConverged == 1){Groups <- 1:ngroups}else if(iConverged == 0){Groups <- which(active.set == 1)}
        L2penalty <- function(U,D,y){iprod <- rep(0,ncol(U));p <- rep(0,length(y));iprod <- D*(crossprod(U, y));p <- U %*% iprod;return(p)}
        for(i in Groups){
          resid <- resid + oldFit[,i];proj <- L2penalty(Us[[i]], newD[[i]], resid);normProj <- sqrt(sum((proj)^2))
          penalty <-  max(c((1-sqrt(dfs[i])*lambda*alpha*wch.pen[i] / normProj),0))
          newFit <- penalty * proj;resid <- resid - newFit;diff <- diff + sum(abs(oldFit[,i] - newFit))

          if(max(abs(oldFit[,i])) == 0 && max(abs(newFit)) > 0){active.set[i] <- 1;change <- 1}
          oldFit[,i] <- newFit
        }
        iConverged <- 0
        diff <- diff / nrow(X)
      }
    }
    Betas <- list()
    for(i in 1:ngroups){
      fit.ind <- which(abs(Ds[[i]]) > 1e-4)
      beta <- Vs[[i]][,fit.ind] %*% ((1/Ds[[i]][fit.ind]) * crossprod(Us[[i]], oldFit[,i]))
      Betas[[i]] <- zapsmall(beta, 5)
    }
    Betas <- unlist(Betas)
    return(list(Betas = Betas, oldFit = oldFit, resid = resid, active.set = active.set))
  }

  if(is.null(lambda)){lambda <- lambdaSeq(y, X, index, alpha, nlambda, min.lam.frac, wch.pen)}
  ngroups <- length(unique(index))
  groupLen <- rep(0,ngroups)
  for(i in 1:ngroups){groupLen[i] <- length(which(index == unique(index)[i]))}
  Us <- list();Vs <- list();Ds <- list()
  for(i in 1:ngroups){
    groupiMat <- X[,which(index == unique(index)[i])];svdDecomp <- wsvd(groupiMat);Us[[i]] <- svdDecomp$u;Vs[[i]] <- svdDecomp$v;Ds[[i]] <- svdDecomp$d
  }

  oldFit <- matrix(0, ncol = length(index), nrow = length(y));resid <- y;active.set = rep(0, ngroups)
  Betas <- matrix(0, ncol = length(lambda), nrow = ncol(X))
  for(i in 1:length(lambda)){
    fit <- fittingFunction(y, X, index, lambda[i], thresh = thresh, maxit = maxit, Us = Us, Vs = Vs, Ds = Ds, alpha = alpha, oldFit = oldFit, resid = resid, active.set = active.set, wch.pen = wch.pen)
    resid <- fit$resid;oldFit <- fit$oldFit;active.set <- fit$active.set;Betas[,i] <- fit$Betas
  }

  if (length(lambda) > 1){ord <- order(lambda);lambda <- lambda[ord];Betas <- Betas[,ord]}
  yhat <- apply(Betas, 2, function(b)  as.vector(tcrossprod(b, X)))
  sigma <- sapply(1:ncol(yhat), function(j) sd(y - yhat[,j]))
  y.mu <- mean(y)
  intercept <- apply(yhat, 2, function(yHat)  y.mu - mean(yHat))
  yhat <- sapply(1:ncol(yhat), function(j) yhat[,j] + intercept[j])
  Betas <- rbind(intercept, Betas)
  rownames(Betas) <- c("(Intercept)" , colnames(X))
  fpe.fun <- function(sse, k, n) {if (k >= n) {Inf}else{sse * ((k + n + 1) / (n - k - 1))}}
  bic.fun <- function(yhat, y, k, n) {(-2 * sum(dnorm(y, yhat, mean((y-yhat)^2), log = T))) + (k * log(n) * n)/(n - k - 1)}
  aic.fun <- function(yhat, y, k, n) {(-2 * sum(dnorm(y, yhat, mean((y-yhat)^2), log = T))) + (2 * k) + (((2 * k) * (k + 1))/(n - k - 1))}
  sserrs <- sapply(1:ncol(yhat), function(j) sum((y - yhat[,j])^2))
  fpe <- sapply(1:length(sserrs), function(j) fpe.fun(sserrs[j], sum(Betas[,j]!=0), length(y)))
  bic <- sapply(1:length(sserrs), function(j) bic.fun(yhat[,j], y, sum(Betas[,j]!=0), length(y)))
  aic <- sapply(1:length(sserrs), function(j) aic.fun(yhat[,j], y, sum(Betas[,j]!=0), length(y)))
  included <- sapply(1:ncol(Betas), function(j) sapply(unique(index), function(x) as.numeric(sum(abs(as.vector(Betas[which(index == x), j]))) != 0)))
  included <- rbind(rep(TRUE, ncol(included)), included)
  if (length(lambda) > 1){
    if (opt.crit == "bic"){
      lambda.opt <- lambda[which.min(bic)]
      wch.min <- which.min(bic)
    }
    else if (opt.crit == "aic"){
      lambda.opt <- lambda[which.min(aic)]
      wch.min <- which.min(aic)
    }
    else{
      lambda.opt <- lambda[which.min(fpe)]
      wch.min <- which.min(fpe)
    }
  }
  else{
    lambda.opt <- lambda
  }
  if (length(lambda) == 1)
    return(structure(list(coefficients = Betas, lambda = lambda, included = included, fitted = yhat, sigma = sigma, aic = aic, bic = bic, fpe = fpe, formula = formula, call = this.call, lambda.opt = lambda.opt), class = "penreg", penalty = "sglasso", is.list = FALSE))
  else
    return(structure(list(coefficients = Betas, lambda = lambda, included = included, fitted = yhat, sigma = sigma, aic = aic, bic = bic, fpe = fpe, lambda.opt = lambda.opt, formula = formula, call = this.call), class = "penreg", which.opt = wch.min, criterion = opt.crit, penalty = "sglasso", is.list = TRUE))
}



#' Generalized Elastic Net/Bridge Penalty for GLMs
#'
#' The elastic net penalty is defined by a linear combination of lasso (L1) and ridge (L2) penalties.
#' The generalized elastic net penalty replaces the L2 penalty with the bridge penalty (Lp).
#'
#' @param formula a model formula.
#' @param data a data frame
#' @param family the glm family. defaults to gaussian().
#' @param lambda the penalty
#' @param alpha the mixing parameter for the lasso and bridge penalties. defaults
#' to 0.5. 0 gives full weight to the bridge penalty, while 1 gives full weight to
#' the lasso penalty.
#' @param kappa the Lp norm of the bridge penalty. defaults to 1.4.
#' @param weights an optional vector of weights to be used in the fitting process.
#' @param start starting values for the coefficients.
#' @param etastart starting values for the linear predictor.
#' @param mustart starting values for the fitted values.
#' @param offset this can be used to specify an a priori known component to be included
#' in the linear predictor during fitting.
#' @param standardize whether the regressors should be standardized (this is recommended)
#' or not. defaults to TRUE.
#'
#' @return
#' @export
#'
genet <- function(formula, data, family = gaussian(), lambda = NULL, alpha = 0.5, kappa = 1.4, weights = NULL, start = NULL, etastart = NULL, mustart = NULL, offset = rep (0, nobs), standardize = TRUE){

  mf <- model.frame(formula, data)
  y <- model.response(mf)
  terms <- terms(mf)
  if (family$family == "binomial"){
    y <- as.numeric(as.factor(y))-1
  }
  x <- model.matrix(formula, data)
  if (all(x[,1]==1)) {x<-x[,-1]}
  if (is.null(weights)) weights <- rep(1, nrow(x))

  if (is.null(lambda) && !is.null(alpha) && !is.null(kappa)){
    find_lambda <- function(lambda, x, y, family, alpha, kappa, weights, start, etastart, mustart, offset, standardize){
      .genetfit(x, y, family, lambda, alpha, kappa, weights, start, etastart, mustart, offset, control=.gnetControl(), intercept = TRUE, standardize)$aic
    }
    solnp_lambda <- Rsolnp::solnp(3,find_lambda,LB=1e-3,UB=1e25,kappa=kappa,alpha=alpha,x=x, y=y, family=family, weights=weights, start=start, etastart=etastart, mustart= mustart, offset=offset, standardize=standardize,control=list(trace=F,rho=1.5))
    lambda <- solnp_lambda$pars
  }
  else if (is.null(lambda) && is.null(alpha) && !is.null(kappa)){
    find_pars <- function(pars, x, y, family, kappa, weights, start, etastart, mustart, offset, standardize){
      lambda<-pars[1]; alpha<-pars[2]
      .genetfit(x, y, family, lambda, alpha, kappa, weights, start, etastart, mustart, offset, control=.gnetControl(), intercept = TRUE, standardize)$aic
    }
    solnp_pars <- Rsolnp::solnp(c(3,0.5), find_pars,LB=c(1e-3,0),UB=c(1e25,1),kappa=kappa,x=x, y=y, family=family, weights=weights, start=start, etastart=etastart, mustart= mustart, offset=offset, standardize=standardize,control=list(trace=F,rho=2.5))
    lambda <- solnp_pars$pars[1]
    alpha <- solnp_pars$pars[2]
  }
  else if (is.null(lambda) && is.null(alpha) && is.null(kappa)){
    find_pars <- function(pars, x, y, family, weights, start, etastart, mustart, offset, standardize){
      lambda <- pars[1]; alpha <- pars[2]; kappa <- pars[3]
      .genetfit(x, y, family, lambda, alpha, kappa, weights, start, etastart, mustart, offset, control=.gnetControl(), intercept = TRUE, standardize)$aic
    }
    solnp_pars <- Rsolnp::solnp(c(3,0.5,1.6),find_pars, LB=c(1e-3,0,0.5), UB=c(1e25,1,4), x=x, y=y, family=family, weights=weights, start=start, etastart=etastart, mustart= mustart, offset=offset, standardize=standardize,control=list(trace=F,rho=3.25))
    lambda <- solnp_pars$pars[1]
    alpha <- solnp_pars$pars[2]
    kappa <- solnp_pars$pars[3]
  }
  else if (is.null(lambda) && !is.null(alpha) && is.null(kappa)){
    find_pars <- function(pars, alpha, x, y, family, weights, start, etastart, mustart, offset, standardize){
      lambda<-pars[1]; kappa<-pars[2]
      .genetfit(x, y, family, lambda, alpha, kappa, weights, start, etastart, mustart, offset, control=.gnetControl(), intercept = TRUE, standardize)$aic
    }
    solnp_pars <- Rsolnp::solnp(c(2, 1.5),find_pars,LB=c(1e-3,0.5),UB=c(1e25,4),alpha=alpha,x=x, y=y, family=family, weights=weights, start=start, etastart=etastart, mustart= mustart, offset=offset, standardize=standardize, control=list(trace=F,rho=2.25))
    lambda <- solnp_pars$pars[1]
    kappa <- solnp_pars$pars[2]
  }
  else if (!is.null(lambda) && any(c(is.null(alpha), is.null(kappa)))){
    return(cat(crayon::red("alpha and kappa cannot be null if lambda is specified")))
  }
  else if (!is.null(lambda) && !is.null(alpha) && !is.null(kappa)){
    out <- .genetfit(x, y, family, lambda, alpha, kappa, weights, start, etastart, mustart, offset, control=.gnetControl(), intercept = TRUE, standardize)
    out$lambda <- lambda; out$alpha <- alpha; out$kappa <- kappa; out$mf <- mf; out$terms <- terms
    return(out)
  }
  out <- .genetfit(x, y, family, lambda, alpha, kappa, weights, start, etastart, mustart, offset, control=.gnetControl(), intercept = TRUE, standardize)
  out$lambda <- lambda; out$alpha <- alpha; out$kappa <- kappa; out$mf <- mf; out$terms <- terms
  return(out)
}


.genetfit <- function(x, y, family = gaussian(), lambda = 0.001, alpha = 0.5, kappa = 1.4, weights, start = NULL, etastart = NULL, mustart = NULL, offset = rep (0, nobs), control = .gnetControl (), intercept = TRUE, standardize = TRUE, ...){

  call <- match.call ()
  method <- ".genetiteration"
  varnames <- colnames(x)
  .genetpenaltyfun <- function (lambda = NULL, alpha = 0.5, kappa = 1.4){

    lambda1 <- lambda
    alpha <- alpha
    kappa <- kappa

    if (alpha > 1)
      stop ("'alpha must be between 0 and 1\n")

    if (kappa <= 0)
      stop ("'kappa must be greater than zero \n")

    getpenmat <- function (beta = NULL, c1 = .gnetControl()$c1, ...){
      if (is.null (beta))
        stop ("'beta' must be the current coefficient vector \n")

      if (c1 < 0)
        stop ("'c1' must be non-negative \n")

      penmat <- lambda1 * diag (((1 - alpha) * kappa * abs (beta)^(kappa - 1) + alpha) / (sqrt (beta^2 + c1)), length (beta))
      penmat
    }

    structure (list(penalty = "genet", lambda = lambda, getpenmat = getpenmat), class = "penalty")
  }

  penalty <- .genetpenaltyfun(lambda = lambda, alpha = alpha, kappa = kappa)

  if (is.character (family))
    family <- get (family, mode = "function", envir = parent.frame ())

  if (is.function (family))
    family <- family ()

  if (is.null (family$family)) {
    print (family)
    stop ("'family' not recognized")
  }

  if (is.null (method))
    stop ("method not specified")

  if (intercept & (var (x[,1]) > control$var.eps))
    x <- cbind (1, x)

  x <- as.matrix (x)
  xnames <- dimnames (x)[[2L]]

  ynames <- if (is.matrix (y))
    rownames(y)
  else
    names(y)

  nobs <- nrow (x)
  nvars <- ncol (x)
  ones <- rep (1, nobs)
  mean.x <- drop (ones %*% x) / nobs
  if (intercept)
    mean.x[1] <- 0

  x.std <- scale (x, mean.x, FALSE)   # centers the regressor matrix

  norm.x <- if (standardize){
    norm.x <- sqrt (drop (ones %*% (x.std^2)))   # computes the euclidean norm of the regressors
    nosignal <- apply (x, 2, var) < control$var.eps
    if (any (nosignal))    # modify norm.x for variables with too small a variance (e.g. the intercept)
      norm.x[nosignal] <- 1

    norm.x
  }
  else
    rep (1, nvars)

  x.std <- scale (x.std, FALSE, norm.x)  # standardizes the centered regressor matrix
  fit <- do.call(method, list (x = x.std, y = y, family = family, penalty = penalty, intercept = intercept, control = control, ...))
  coef <- fit$coefficients
  if (intercept){
    coef[1] <- coef[1] - sum (mean.x[-1] * coef[-1] / norm.x[-1])
    coef[-1] <- coef[-1] / norm.x[-1]
  }
  else{
    coef <- coef / norm.x
  }
  eta <- drop (x %*% coef)
  mu <- family$linkinv (eta)
  mu.eta.val <- family$mu.eta (eta)
  wt <- sqrt ((weights * mu.eta.val^2) / family$variance (mu))
  dev <- sum (family$dev.resids (y, mu, weights))
  wtdmu <- sum (weights * y) / sum (weights)
  nulldev <- sum (family$dev.resids (y, wtdmu, weights))
  n.ok <- nobs - sum (weights == 0)
  nulldf <- n.ok - as.integer (intercept)
  residuals <- (y - mu) / mu.eta.val

  xnames <- colnames (x)
  ynames <- names (y)
  names (residuals) <- names (mu) <- names (eta) <- names (weights) <- names (wt) <- ynames
  names (coef) <- xnames

  Amat <- fit$Amat
  Amat <- t (norm.x * t (norm.x * Amat))   # must be (quadratically) transformed in order to cope with the transformed parameter space

  if (is.null (fit$tr.H))
    stop ("quadpen.fit: Element 'tr.H' has not been returned from 'method'")

  model.aic <- dev + 2 * fit$tr.H
  model.bic <- dev + log (nobs) * fit$tr.H
  resdf <- n.ok - fit$tr.H

  dispersion <- ifelse (!((family$family == "binomial") | (family$family == "poisson")), sum ((y - mu)^2 / family$variance (mu)) / (nobs - fit$tr.H), 1)
  names(coef) <- c("(Intercept)", varnames)
  fit <- list (coefficients = coef, residuals = residuals, sigma = sd(residuals), fitted.values = mu, family = family, penalty = penalty,
               linear.predictors = eta, deviance = dev, aic = model.aic, bic = model.bic, null.deviance = nulldev, n.iter = fit$stop.at,
               best.iter = fit$m.stop, weights = wt, prior.weights = weights, df.null = nulldf, df.residual = resdf, converged = fit$converged, mean.x = mean.x,
               norm.x = norm.x, Amat = Amat, method = method, rank = fit$tr.H, x = x, y = y, fit.obj = fit, call = call, dispersion = dispersion)

  class (fit) <- c ("penreg")
  attr(fit, "penalty") <- "genet"
  fit
}

.genetiteration <- function (x, y, family = NULL, penalty = NULL, intercept = TRUE, weights = rep (1, nobs), control = .gnetControl (), initial.beta, mustart, eta.new, kappa1 = 1, ...) {
  kappa <- kappa1

  if (is.null (family))
    stop ("lqa.update: family not specified")

  if (is.null (penalty))
    stop ("lqa.update: penalty not specified")

  if (!is.null (dim (y)))
    stop ("lqa.update: y must be a vector")

  x <- as.matrix (x)
  converged <- FALSE
  n.iter <- control$max.steps
  eps <- control$conv.eps
  c1 <- control$c1


  stop.at <- n.iter
  p <- ncol (x)
  nobs <- nrow (x)
  converged <- FALSE
  beta.mat <- matrix (0, nrow = n.iter, ncol = p)    # to store the coefficient updates

  if (missing (initial.beta))
    initial.beta <- rep (0.01, p)
  else
    eta.new <- drop (x %*% initial.beta)

  if (missing (mustart))
  {
    etastart <- drop (x %*% initial.beta)
    eval (family$initialize)
  }

  if (missing (eta.new))
    eta.new <- family$linkfun (mustart)    # predictor

  for (i in 1 : n.iter){
    beta.mat[i,] <- initial.beta
    mu.new <- family$linkinv(eta.new)      # fitted values
    d.new <- family$mu.eta(eta.new)        # derivative of response function
    v.new <- family$variance(mu.new)       # variance function of the response
    weights <- d.new / sqrt(v.new)  # decomposed elements (^0.5) of weight matrix W, see GLM notation
    x.star <- weights * x
    y.tilde.star <- weights * (eta.new  + (y - mu.new) / d.new)
    A.lambda <- get.Amat(initial.beta = initial.beta, penalty = penalty, intercept = intercept, c1 = c1, x = x, ...)
    p.imat.new <- crossprod (x.star) + A.lambda       # penalized information matrix
    inv.pimat.new <- pseudoinverse(p.imat.new)        # pseudoinverse for information matrix
    beta.new <- kappa * drop (inv.pimat.new %*% crossprod(x.star,y.tilde.star) + (1 - kappa) * beta.mat[i,])  # computes the next iterate of the beta vector

    # check for convergence
    if ((sum (abs (beta.new - initial.beta)) / sum (abs (initial.beta)) <= eps)){
      converged <- TRUE
      stop.at <- i
      if (i < n.iter)
        break
    }
    else{
      initial.beta <- beta.new    # update beta vector
      eta.new <- drop (x %*% beta.new)
    }
  }

  Hatmat <- x.star %*% tcrossprod(inv.pimat.new,x.star)
  tr.H <- sum(diag(Hatmat))
  dev.m <- sum(family$dev.resids (y, mu.new, weights))
  aic.vec <- dev.m + 2 * tr.H
  bic.vec <- dev.m + log (nobs) * tr.H
  if (!converged & (stop.at == n.iter))
    cat (penalty$penalty, ": convergence failed! (lambda = ", penalty$lambda, ")\n")
  fit <- list (coefficients = beta.new, beta.mat = beta.mat[1 : stop.at,], tr.H = tr.H, fitted.values = mu.new, family = family, Amat = A.lambda, converged = converged, stop.at = stop.at, m.stop = stop.at, linear.predictors = eta.new, weights = weights^2, p.imat = p.imat.new, inv.pimat = inv.pimat.new, x.star = x.star, v.new = v.new)
}

get.Amat <- function (initial.beta = NULL, penalty = NULL, intercept = TRUE, c1 = .gnetControl()$c1, x = NULL, ...){
  env <- NULL

  if (intercept)
    x <- x[,-1]

  if (is.null (initial.beta))
    stop ("get.Amat: 'initial.beta' is missing.")

  if (is.null (penalty))
    stop ("get.Amat: 'penalty' is missing.")

  nreg <- length (initial.beta) - as.integer (intercept)

  coefm0 <- if (intercept)
    initial.beta[-1]
  else
    initial.beta


  #### Computation of A.lambda:

  if (is.null (penalty$getpenmat))
  {
    a.coefs <- if (is.null (penalty$a.coefs))
      diag (nreg)    # just built for the p regressors! (Intercept not accounted for!!!)
    else
      penalty$a.coefs (coefm0, env = env, x = x, ...)

    A.lambda <- matrix (0, nrow = nreg, ncol = nreg)
    xim0 <- drop (t (a.coefs) %*% coefm0)
    A.lambda2 <- drop (penalty$first.deriv (coefm0, env = env, x = x, ...) * as.integer (xim0 != 0) / sqrt (c1 + xim0^2))

    Jseq <- 1 : ncol (a.coefs)
    l1 <- sapply (Jseq, function (Jseq) {which (a.coefs[,Jseq] != 0)})   # extracts the positions of elements != 0 in the columns of 'a.coefs'
    just.one <- which (sapply (Jseq, function (Jseq) {length (l1[[Jseq]]) == 1}) == TRUE)   # extracts the column indices of 'a.coefs' with just one element != 0
    less.sparse <- setdiff (Jseq, just.one)   # extracts the column indices of 'a.coefs' with more than one element != 0

    if (length (just.one) > 0)  # <FIXME: Does not work if there are more than 'p' columns of 'a.coefs' with just one element != 0!>
    {
      jo2 <- rep (0, nreg)
      jo1 <- A.lambda2[just.one]    ### extracts the elements corresponding with just.one
      sort1 <- sapply (just.one, function (just.one) {l1[[just.one]]})  ### bestimmt die Reihenfolge
      jo2[sort1] <- jo1
      A.lambda <- A.lambda + diag (jo2)
    }

    for (i in less.sparse)
    {
      ci <- l1[[i]]
      a.ci <- a.coefs[ci, i]
      A.lambda[ci,ci] <- A.lambda[ci,ci] + A.lambda2[i] * outer (a.ci, a.ci)
    }
  }
  else
    A.lambda <- penalty$getpenmat (beta = coefm0, env = env, x = x, ...)      # if the 'x' argument is not needed (e.g. for penalreg penalty) then it should be accounted for automatically... (hopefully ;-)

  if (intercept)
  {
    A.lambda <- cbind (0, A.lambda)
    A.lambda <- rbind (0, A.lambda)
  }

  A.lambda
}

.gnetControl <- function (x = NULL, var.eps = .Machine$double.eps, max.steps = 5000, conv.eps = 1e-03, conv.stop = TRUE, c1 = 1e-08, digits = 5, ...){

  if (!is.numeric (var.eps) || var.eps < 0)
    stop ("value of var.eps must be >= 0")

  max.steps <- as.integer (max.steps)
  digits <- as.integer (digits)

  if (max.steps < 0)
    stop ("max.steps must be positive integer")

  if (!is.numeric (conv.eps) || conv.eps <= 0)
    stop ("value of conv.eps must be > 0")

  if (!is.logical (conv.stop))
    stop ("conv.stop must be 'TRUE' or 'FALSE'")

  if (!is.numeric (c1) || c1 < 0)
    stop ("value of 'c1' must be >= 0")

  if (!is.numeric (digits) || digits < 0)
    stop ("value of 'digits' must be >= 0")

  list (var.eps = var.eps, max.steps = max.steps, conv.eps = conv.eps, conv.stop = conv.stop, digits = digits, c1 = c1)
}





#' predict method for penreg class models
#'
#' @param fit the model fit.
#' @param newdata a data frame. if NULL the fitted values are returned.
#' @param type whether the linear predictor ("link", the default) or the mean function ("response") should be returned.
#' @param which if the model was evaluated over a grid, which column of coefficients to return.
#' @method predict penreg
#' @return a matrix or vector
#' @export
#'
predict.penreg <- function(fit, newdata = NULL, type = c("link", "response"), which = NULL){

  type <- match.arg(type)

  if (!is.null(fit$family)){
    family <- fit$family
  }
  else if (is.null(fit$family)){
    family <- gaussian()
  }

  if (is.null(newdata)){
    if (is.null(attr(fit, "is.list")) || isFALSE(attr(fit, "is.list"))){
      mu <- fit$fitted
      if (type=="response"){return(mu)}else if(type=="link"){family$linkfun(mu)}
    }
    else{
      if (is.null(newdata)){
        if (attr(fit, "is.list")){
          if (!is.null(which)){
            mu <- fit$fitted[, which]
            if (type=="response"){return(mu)}else if(type=="link"){family$linkfun(mu)}
          }
          else {
            mu <- fit$fitted
            if (type=="response"){return(mu)}else if(type=="link"){apply(mu,2,family$linkfun)}
          }
        }
      }
    }
  } else {
    newdata = model.matrix(fit$formula, newdata)

    if (is.null(attr(fit, "is.list")) || isFALSE(attr(fit, "is.list"))){
      eta <- as.vector(fit$coefficients %*% as.matrix(newdata))
      if (type=="link"){return(eta)}else if(type=="response"){family$linkinv(eta)}
    }
    else if (attr(fit, "is.list")){
      if (!is.null(which)){
        eta <- as.vector(as.matrix(newdata) %*% as.matrix(fit$coefficients[, which]))
        if (type=="link"){return(eta)}else if(type=="response"){family$linkinv(eta)}
      } else {
        eta <- apply(fit$coefficients, 2, function(b) as.vector(as.matrix(newdata) %*% b))
        if (type=="link"){return(eta)}else if(type=="response"){apply(eta,2,function(e)family$linkinv(e))}
      }
    }
  }
}

#' print method for penreg objects
#'
#' @param fit the model fit
#' @method print penreg
#' @keywords internal
#' @export
print.penreg <- function(fit){

  darkpurple <- crayon::make_style(rgb(0.34, 0.16, 0.29))
  brightgreen <- crayon::make_style(rgb(0.536, 0.7378, 0))
  cat(darkpurple("Call: "))
  print(fit$call)
  if (attr(fit, "penalty") != "GDP" && attr(fit, "penalty") != "Bridge" && attr(fit, "penalty") != "robBridge" && attr(fit, "penalty") != "genet"){
      cat("\n\n")
      cat(crayon::green(paste0("Optimal lambda : "), fit$lambda, "\n\n"))
    }
    else if (attr(fit, "penalty") == "BLASSO"){
      cat("\n\n")
      cat(crayon::green(paste0("Optimal Parameters : "), "penalty=", round(fit$lambda,3), "\n\n"))
    }
    else if (attr(fit, "penalty") == "GDP"){
      cat("\n\n")
      cat(crayon::green(paste0("Optimal Parameters : "), "alpha=", round(fit$alpha,3), "zeta=", round(fit$zeta, 3), "\n\n"))
    }
    else if (attr(fit, "penalty") == "genet"){
      cat("\n\n")
      cat(crayon::green(paste0("Optimal Parameters : "), "alpha=", round(fit$alpha, 3), "kappa=", round(fit$kappa, 3), "lambda=", round(fit$lambda, 3), "\n\n"))
    }
    else if (attr(fit, "penalty") == "Bridge"){
      cat("\n\n")
      cat(crayon::green(paste0("Optimal Parameters : "), "shape=", round(fit$shape,3), "rate=", round(fit$rate, 3), "\n\n"))
    }
    else if (attr(fit, "penalty") == "robBridge"){
      cat("\n\n")
      cat(crayon::green(paste0("Optimal Parameters : "),  "shape=", round(fit$shape,3), "rate=", round(fit$rate, 3), "\n\n"))
    }
    cat("Statistics for optimal model : \n \n")
    cat(crayon::cyan("Coefficients: \n\n"))

    if (is.matrix(fit$coefficients)){coefnames <- rownames(fit$coefficients)}
    else {coefnames <- names(fit$coefficients)}
    coefs <- as.vector(fit$coefficients)
    names(coefs) <- coefnames
    print(round(coefs, 3))
    cat("\n")
    blueishgreen <- crayon::make_style(rgb(0.002, 0.6951, 0.64))
    cat(crayon::blue("Residual standard deviation: "), round(fit$sigma, 3), "\n")
  if (attr(fit, "penalty") == "BLASSO" || attr(fit, "penalty") == "GDP"){
      cat(crayon::red("log-likelihood :"), round(fit$logLik, 3), "\n")
      cat(crayon::magenta("log-posterior :"), round(fit$logPost, 3), "\n")
    }
  # }
  #  else {
  #   cat(crayon::cyan("Coefficients: \n\n"))
  #  if (is.matrix(fit$coefficients)){
  #   coefnames <- rownames(fit$coefficients)
  #   coefs <- as.vector(fit$coefficients)
  #    names(coefs) <- coefnames
  #  } else{
  #    coefs <- fit$coefficients
  # }
  #  print(signif(round(coefs, 3), 3))
  #  cat("\n")
  # blueishgreen <- crayon::make_style(rgb(0.002, 0.6951, 0.64))
  # cat(crayon::blue("Residual standard deviation: "), round(fit$sigma, 3), "\n\n")
  # if (attr(fit, "penalty") == "BLASSO" || attr(fit, "penalty") == "GDP"){
      #    cat(crayon::red("log-likelihood :"), round(fit$logLik, 3), "\n")
      #    cat(crayon::magenta("log-posterior :"), round(fit$logPost, 3), "\n")
      #  }
  # }
  if (attr(fit, "penalty") == "sglasso"){
    if (length(fit$lambda) > 1){
      which.incl <- fit$included[,attr(fit, "which.opt")] == 1
      included <- varnames <- rownames(fit$coefficients)[which.incl]
    }
    else{
      which.incl <- fit$included[,1] == 1
      included <- varnames <- rownames(fit$coefficients)[which.incl]
    }
    bluecyan <- crayon::make_style(rgb(0, 0.4665, 0.5335))
    cat(bluecyan("Selected Variables:"), included, "\n")
  }
  cat(brightgreen("Penalty: "), attr(fit, "penalty"), "\n")
}
abnormally-distributed/cvreg documentation built on May 3, 2020, 3:45 p.m.