#' Fit the sparse group-subgroup lasso (SGSL)
#'
#' @param x p by N matrix of predictors (N: sample size, p: number of predictors)
#' @param y 1 by N matrix of response variable
#' @param type One of "lasso" (for the standard lasso), "group" (for the group lasso), "ggroup" (for the group lasso among subgroups), "ggroupind" (for the lasso with individual features), "sgsl" (for the sparse-group-subgroup lasso) or "groupsgl (for the sparse group lasso at subgroup level).
#' @param index.subgroup index for subgroups
#' @param tau multiplier for using a multiplicative grid for penalty parameter lambda, starting at maximal lambda value
#' @param delta Among the lasso solution path, the best descriptive model is the one which minimizes the loss function: (residual sum of squares)/(estimator of the model error variance) - (sample size) + delta*(number of predictors in the selected model). If delta = 2, this loss function is Mallows' Cp.
#' @param delta.group delta applied to C_p criterion for group lasso
#' @param delta.subgroup delta applied to C_p critierian for group lasso among subgroups
#' @param delta.ind delta applied to C_p criterion for lasso with individual features
#' @param standardize logical. TRUE for standardizing the data.
#'
#' @return out: indicators of the selected predictors. 1 for selected predictors and 0 for not selected predictors
#' @export
#'
#' @examples
#' set.seed(1)
#' x <- matrix(rnorm(360), nrow=12)
#' y <- 0.5*x[1,] + 0.5*x[2,] + 1.0*x[4,] + matrix(rnorm(30), nrow=1)
#' index.subgroup <- matrix(NA,nrow=3,ncol=12)
#' index.subgroup[1,1:2]=1; index.subgroup[1,3:4]=2
#' index.subgroup[2,5:6]=3; index.subgroup[2,7:8]=4
#' index.subgroup[3,9:10]=5; index.subgroup[3,11:12]=6
#' out_lasso <- sgsl(x,y,type="lasso",index.subgroup = index.subgroup)
#' out_group <- sgsl(x,y,type="group",index.subgroup = index.subgroup,tau=0.94)
#' out_ggroup <- sgsl(x,y,type="ggroup",index.subgroup = index.subgroup,tau=0.94)
#' out_ggroupind <- sgsl(x,y,type="ggroupind",index.subgroup = index.subgroup,tau=0.94)
#'
sgsl <- function(x,y,type=c("lasso", "group", "ggroup", "ggroupind", "sgsl", "groupsgl")[1],
index.subgroup,
tau=0.94,
alpha1=0.05,
alpha2=0.2,
alpha3= 0.1,
cv.criterion=FALSE,
nlam=100,
delta=2,delta.group=2,delta.subgroup=2,delta.ind=2,
standardize=TRUE){
tools <- form.tools(index.subgroup)
index <- tools$index
p.group <- tools$p.group
p.subgroup <- tools$p.subgroup
x <- as.data.frame(x)
y <- as.data.frame(y)
## allocate names to X
if(is.null(colnames(x))){
colnames(x) <- paste0("X_",1:ncol(x))
}
if(is.null(colnames(y))){
colnames(y) <- paste0("y",1:ncol(y))
}
## Methods ##
if (type == "lasso"){
## Apply Lasso ##
out <- lasso.computations(XX=x,response=y,delta=delta,standardize=standardize)
} else if (type == "group"){
## Apply pure group Lasso ##
out <- pure.grplasso.computations(XX=x,response=y,
index=index,p.group=p.group,tau=tau,
delta.group=delta.group,
standardize=standardize)
} else if (type == "ggroup"){
## Apply the group lasso among subgroups ##
out <- group.group.lasso.computations(XX=x,response=y,index,index.subgroup,p.group,tau=tau,
delta.group=delta.group,
delta.subgroup=delta.subgroup,
standardize=standardize)
} else if (type == "ggroupind"){
## Apply the lasso with individual features ##
out <- group.group.indlasso.lasso.computations(XX=x,response=y,index,index.subgroup,p.group,tau=tau,
delta.group=delta.group,delta.subgroup=delta.subgroup,delta.ind=delta.ind,
standardize=standardize)
} else if (type == "sgsl"){
out <- sparse.group.subgroup.computations(XX=x,response=y,group.index=index,subgroup.index=index.subgroup,
tau=tau,alpha1,alpha2,alpha3,nlam,
lambdas=NULL,lambda.accuracy=1e-4,
delta.group=delta.group,
cv.criterion=cv.criterion,
nfold=10,alphas.cv.range=seq(0.1,0.95,by=0.05))
} else {
print("type should be either lasso, group, ggroup, ggroupind, sgsl or groupsgl")
}
return(out)
}
## function to form p.group,p.subgroup, and index variables
## p.group : number of covariates in each group
## p.subgroup : matrix of number of covariates in each subgroup (each row corresponds to a group)
## index : group assignments
form.tools <- function(index.subgroup){
p.group <- apply(index.subgroup,1,number.non.na)
p.subgroup <- as.numeric(table(index.subgroup))
index <- rep(1:length(p.group),p.group)
list(p.group=p.group,p.subgroup=p.subgroup,index=index)
}
###################################
## Functions for organizing data ##
###################################
## function to count number of non-NA elements in a vector
number.non.na <- function(x){
return(sum(!is.na(x)))
}
####################
## LASSO Approach ##
####################
lasso.computations <- function(XX,response,delta.use,standardize){
interest <- matrix(0,nrow=ncol(XX),ncol=ncol(response))
interest <- as.data.frame(interest)
rownames(interest) <- colnames(XX)
colnames(interest) <- colnames(response)
data.yy <- response
for(i in 1:ncol(interest)){
##print(i)
lasso.out <- lasso(yy=data.yy[,i],XX,delta=delta,standardize=standardize)
interest[,i] <- lasso.out
}
return(interest)
}
## function to standardize a vector x
make.std <- function(x){
N <- length(x)
( x-mean(x) ) / ( sd(as.vector(x)) * sqrt( N / (N-1) ) )
}
## function to center a vector x
make.center <- function(x){
return(x-mean(x))
}
#' @import lars
lasso <- function(yy,XX,delta=2,standardize=TRUE){
#####################
## Set up the data ##
#####################
y <- yy
X <- XX
N <- length(y)
if(standardize==TRUE){
y1 <- make.std(y)
## Standardized design matrix X
X1 <- apply(X,2,make.std)
} else {
y1 <- y
X1 <- X
}
## adjust use.Gram
if(ncol(X1)>500){
use.Gram <- FALSE
} else {
use.Gram <- TRUE
}
## Run Lasso
Lasso.out <- lars::lars(X1, y1, type = c("lasso"),
trace = FALSE, normalize = FALSE, intercept = FALSE,
use.Gram=use.Gram)
## use Cp-like criterion to find best descriptive model
p = dim(X1)[2]
s = length(Lasso.out$df)
p.pos = NULL
RSS = NULL
for (i in 1:s){
RSS[i] = sum((y1-lars::predict(Lasso.out, X1, s=i, type = c("fit"))$fit)**2)
p.pre = lars::predict(Lasso.out, X1, s=i, type = c("coefficients"))$coefficients
p.pos = c(p.pos,length(p.pre[abs(p.pre)>1e-10]))
}
# Estimated MSE
MSE <- sd(as.vector(y1)) * sqrt( N / (N-1) )
MSE <- MSE^2
p.min = which.min(RSS/MSE+delta*p.pos)
## final best descriptive model
predict.out <- lars::predict(Lasso.out, X1, s=p.min, type = c("coefficients"))
ind <- which(abs(predict.out$coefficients)>1e-10)
sig.variables <- rep(0,ncol(XX))
sig.variables[ind] <- 1
return(sig.variables)
}
#########################
## GROUP LASSO Approach #
#########################
#' @import grplasso
group.lasso <- function(XX,yy,index,tau,
delta=2,standardize=TRUE){
y <- yy
X <- XX
N <- length(y)
if(standardize==TRUE){
y1 <- make.std(y)
X1 <- apply(X,2,make.std)
} else {
y1 <- y
X1 <- X
}
## Use a multiplicative grid for penalty parameter lambda, starting at maximal lambda value
lambda <- grplasso::lambdamax(X1,y=y1, index=index,penscale=sqrt,model=LinReg(),
center=FALSE,
standardize=standardize) * c(tau^(0:100),0)
## Run Group Lasso
Lasso.out <- grplasso::grplasso(X1, y = y1, index = index, lambda = lambda,
model = LinReg(),
penscale = sqrt,
control = grpl.control(update.hess = "lambda",
trace = 0),center=FALSE,
standardize=standardize)
## use Cp-like criterion to find best descriptive model
p = dim(X1)[2]
s = length(Lasso.out$lambda)
p.pos = NULL
RSS = NULL
for (i in 1:s){
RSS[i] = sum((y1-Lasso.out$fitted[,i])**2)
p.pre = Lasso.out$coefficients[,i]
p.pos = c(p.pos,length(p.pre[abs(p.pre)>1e-10]))
}
## Estimated MSE
MSE <- sd(as.vector(y1)) * sqrt( N / (N-1) )
MSE <- MSE^2
p.min = which.min(RSS/MSE+delta*p.pos)
## final best descriptive model
predict.out <- Lasso.out$coefficients[,p.min]
ind <- which(abs(predict.out)>1e-10)
sig.variables <- rep(0,ncol(XX))
sig.variables[ind] <- 1
return(sig.variables)
}
##############################
## PURE GROUP LASSO Approach #
##############################
pure.grplasso.computations <- function(XX,response,index,p.group,tau,
delta.group=2,
standardize=TRUE){
interest <- matrix(0,nrow=ncol(XX),ncol=ncol(response))
interest <- as.data.frame(interest)
rownames(interest) <- colnames(XX)
colnames(interest) <- colnames(response)
data.yy <- response
for(i in 1:ncol(interest)){
##print(i)
lasso.out <- group.lasso(data.yy[,i],XX,index,tau,
delta=delta.group,standardize=standardize)
interest[,i] <- lasso.out
}
return(interest)
}
############################################
## PURE GROUP LASSO & GROUP LASSO Approach #
############################################
## delta.group : delta applied to C_p criterion for group lasso
## delta.subgroup : delta applied to C_p critierian for group lasso among subgroups
group.group.lasso <- function(yy,XX,index,index.subgroup,p.group,tau,
delta.group=2,delta.subgroup=2,
standardize=TRUE){
## group lasso to groups
group.lasso.out <- group.lasso(yy,XX,index,tau,
delta=delta.group,standardize=standardize)
sig.variables <- group.lasso.out
main.ind <- which(sig.variables==1)
## setting up to apply group Lasso to subgroups
tmp.subgroup <- as.vector(t(index.subgroup))
tmp.subgroup <- tmp.subgroup[!is.na(tmp.subgroup)]
new.index.subgroup <- tmp.subgroup[main.ind]
if(sum(main.ind)!=0){
X.tmp <- XX[,main.ind]
subgroup.lasso.out <- group.lasso(yy,X.tmp,new.index.subgroup,tau,
delta=delta.subgroup,standardize=standardize)
sig.variables[main.ind] <- subgroup.lasso.out
}
return(sig.variables)
}
group.group.lasso.computations <- function(XX,response,index,index.subgroup,p.group,tau,
delta.group=2,
delta.subgroup=2,standardize=TRUE){
interest <- matrix(0,nrow=ncol(XX),ncol=ncol(response))
interest <- as.data.frame(interest)
rownames(interest) <- colnames(XX)
colnames(interest) <- colnames(response)
data.yy <- response
for(i in 1:ncol(interest)){
##print(i)
lasso.out <- group.group.lasso(data.yy[,i],XX,
index,index.subgroup,p.group,tau,
delta.group,delta.subgroup,
standardize=standardize)
interest[,i] <- lasso.out
}
return(interest)
}
######################################################
## PURE GROUP LASSO, GROUP LASSO, and LASSO Approach #
######################################################
## delta.group : delta applied to C_p criterion for group lasso
## delta.subgroup : delta applied to C_p critierian for group lasso among subgroups
## delta.ind : delta applied to C_p criterion for lasso with individual features
group.group.indlasso.lasso <- function(XX,yy,index,index.subgroup,p.group,tau,
delta.group=2,delta.subgroup=2,delta.ind=2,
standardize=TRUE){
## group lasso to groups
group.lasso.out <- group.lasso(yy,XX,index,tau,
delta=delta.group,standardize=standardize)
sig.variables <- group.lasso.out
main.ind <- which(sig.variables==1)
################################################
## setting up to apply group Lasso to subgroups #
#################################################
tmp.subgroup <- as.vector(t(index.subgroup))
tmp.subgroup <- tmp.subgroup[!is.na(tmp.subgroup)]
new.index.subgroup <- tmp.subgroup[main.ind]
if(sum(main.ind)!=0){
X.tmp <- XX[,main.ind]
##if(length(main.ind)>1){
## X.tmp <- microbes[main.ind,]
##} else {
## X.tmp <- t(data.frame(microbes[main.ind,]))
##}
subgroup.lasso.out <- group.lasso(yy,X.tmp,new.index.subgroup,tau,
delta=delta.subgroup,standardize=standardize)
sig.variables[main.ind] <- subgroup.lasso.out
}
######################################################
## setting up to apply lasso among variables selected #
#######################################################
new.main.ind <- which(sig.variables==1)
if(sum(new.main.ind)!=0){
X.tmp <- XX[,new.main.ind]
##if(length(new.main.ind)>1){
## X.tmp <- microbes[new.main.ind,]
##} else {
## X.tmp <- t(data.frame(microbes[new.main.ind,]))
##}
lasso.out <- lasso(yy,X.tmp,
delta=delta.ind)
sig.variables[new.main.ind] <- lasso.out
}
return(sig.variables)
}
group.group.indlasso.lasso.computations <- function(XX,yy,index,index.subgroup,p.group,tau,
delta.group=2,delta.subgroup=2,delta.ind=2,
standardize=TRUE){
interest <- matrix(0,nrow=ncol(XX),ncol=ncol(response))
interest <- as.data.frame(interest)
rownames(interest) <- colnames(XX)
colnames(interest) <- colnames(response)
data.yy <- response
for(i in 1:ncol(interest)){
##print(i)
lasso.out <- group.group.indlasso.lasso(data.yy[,i],XX,index,index.subgroup,p.group,tau,
delta.group,delta.subgroup,delta.ind,
standardize=standardize)
interest[,i] <- lasso.out
}
return(interest)
}
################################
## SPARSE GROUP SUBGROUP LASSO #
################################
sparse.group.subgroup <- function(yy,XX,group.index,subgroup.index,tau,alpha1=0.45,alpha2=0.45,
alpha3=1-alpha1-alpha2,
nlam=100,lambdas=NULL,lambda.accuracy=1e-4,
delta=2,standardize=TRUE){
y <- yy
X <- XX
N <- length(y)
if(standardize==TRUE){
y1 <- make.std(y)
## Standardized design matrix X
X1 <- apply(X,2,make.std)
} else {
y1 <- y
X1 <- X
}
## Put data in a list
data.list <- list(x=X1,y=y1)
## Run sparse group subgroup lasso
Lasso.out <- subgroup.SGL(data.list, group.index=group.index,
subgroup.index=subgroup.index,
type = "linear",nlam=nlam,standardize=FALSE,
alpha1=alpha1,alpha2=alpha2,alpha3=alpha3,
lambdas=lambdas,
lambda.accuracy=lambda.accuracy)
if(ncol(X1)==1){
Lasso.out$beta <- t(as.matrix(Lasso.out$beta))
}
## Samuel's corrections 11/9/2011
## use Cp-like criterion to find best descriptive model
p = dim(X1)[2]
s = length(Lasso.out$lambdas)
p.pos = NULL
RSS = NULL
for (i in 1:s){
RSS[i] = sum((y1-predictSGL(Lasso.out, X1, lam=i))**2)
p.pre = Lasso.out$beta[,i]
p.pos = c(p.pos,length(p.pre[abs(p.pre)>1e-10]))
}
## Estimated MSE
MSE <- sd(as.vector(y1)) * sqrt( N / (N-1) )
MSE <- MSE^2
p.min = which.min(RSS/MSE+delta*p.pos)
##print("subgroup SGL")
##print(Lasso.out$lambdas[p.min])
## final best descriptive model
predict.out <- Lasso.out$beta[,p.min]
ind <- which(abs(predict.out)>1e-10)
sig.variables <- rep(0,ncol(XX))
sig.variables[ind] <- 1
return(list(sig.variables=sig.variables,predict.out=predict.out))
}
## cross-validation code to get one set of alphas for sparse.group.subgroup
cv.sparse.group.subgroup.alphas <- function(yy,XX,group.index,subgroup.index,tau,alphas=NULL,
nfold=10,ratio=1,
nlam=100,lambdas=NULL,lambda.accuracy=1e-4,
delta=2,standardize=TRUE){
## Matrix to store results
residmat <- matrix(NA, nrow(alphas), nfold)
## do transpose to get correct dimension
y <- yy
X <- XX
## Randomly partition the data
all.folds <- cv.folds(length(y),nfold)
for(a in 1:nrow(alphas)){
for(j in seq(nfold)){
## data to omit
omit <- all.folds[[j]]
yy.fold <- t(y[-omit])
XX.fold <- t(X[-omit,,drop=FALSE])
beta.omit <- sparse.group.subgroup(yy.fold,XX.fold,group.index,subgroup.index,tau,
alpha1=as.numeric(alphas[a,1]),
alpha2=as.numeric(alphas[a,2]),
alpha3=as.numeric(alphas[a,3]),
nlam=nlam,lambdas=lambdas,lambda.accuracy=lambda.accuracy,
delta=delta,standardize=standardize)
## Find final fit with data omitted
fit <- X[omit,,drop=FALSE] %*% beta.omit$predict.out
## Store residual
residmat[a,j] <- apply((y[omit]-fit)^2,2,sum)
}
}
cv <- apply(residmat,1,mean)
## Check which alpha's lead to min(cv)
alpha.ind <- which(cv==min(cv,na.rm=TRUE)) ## Ignore NA values
alpha.opt <- alphas[alpha.ind,]
## Is alpha.opt unique? If not, take average.
if(length(alpha.ind)>1){
alpha.opt <- apply(alpha.opt,2,mean)
}
return(alpha.opt)
}
make.alphas <- function(alphas.cv=NULL,ratio=1){
## Index of fixed alpha
fixed <- which(lapply(alphas.cv,length)==1)
## Index of NON-varying alpha
other <- which(lapply(alphas.cv,is.null)==TRUE)
## Index of varying alpha
vary <- setdiff(1:length(alphas.cv),c(fixed,other))
alphas <- matrix(0,nrow=length(alphas.cv[[vary]]),ncol=3)
colnames(alphas) <- c("alpha1","alpha2","alpha3")
alphas[,vary] <- alphas.cv[[vary]]
alphas[,other] <- ratio * alphas.cv[[vary]]
alphas[,fixed] <- alphas.cv[[fixed]]
## Only keep rows such that sum(alphas)<=1
alphas <- good.alphas(alphas)
return(alphas)
}
## function to keep those alphas such that sum(alphas) <=1
good.alphas <- function(alphas){
## Only keep rows such that sum(alphas)<=1
ind <- which(apply(alphas,1,sum)==1)
alphas <- alphas[ind,]
return(alphas)
}
cv.sparse.group.subgroup <- function(yy,XX,group.index,subgroup.index,tau,
alphas.cv.range=c(seq(0.01,0.1,by=0.01),seq(0.15,0.95,by=0.05)),
nfold=10,nlam=100,lambdas=NULL,lambda.accuracy=1e-4,
delta=2,standardize=TRUE,sam.implement=FALSE){
if(sam.implement==FALSE){
alphas.cv <- list(alpha1.cv = alphas.cv.range, alpha2.cv = alphas.cv.range, alpha3.cv = alphas.cv.range)
alphas <- do.call(expand.grid,alphas.cv)
alphas <- good.alphas(alphas)
cv.out <- cv.sparse.group.subgroup.alphas(yy,XX,group.index,subgroup.index,
tau,alphas=alphas,
nfold=nfold,ratio=ratio,
nlam=nlam,lambdas=lambdas,lambda.accuracy=lambda.accuracy,
standardize=standardize)
alpha.opt <- as.numeric(cv.out$alpha.opt)
} else {
###########################################################
## First application: alpha3=1/3, alpha1=ratio * alpha2, ratio=1 ##
###########################################################
alphas.cv <- list(alpha1.cv = NULL, alpha2.cv = alphas.cv.range, alpha3.cv = 1/3)
ratio <- 1
alphas <- make.alphas(alphas.cv,ratio)
cv.out <- cv.sparse.group.subgroup.alphas(yy,XX,group.index,subgroup.index,
tau,alphas=alphas,
nfold=nfold,ratio=ratio,
nlam=nlam,lambdas=lambdas,lambda.accuracy=lambda.accuracy,
standardize=standardize)
alpha.opt <- cv.out$alpha.opt
#############################################################################################
## Second application: alpha2= alphas[2], alpha1=ratio * alpha3, ratio=alphas[1]/alphas[3] ##
#############################################################################################
alphas.cv <- list(alpha1.cv = NULL, alpha2.cv = alpha.opt[2], alpha3.cv = alphas.cv.range)
ratio <- alpha.opt[1] / alpha.opt[3]
alphas <- make.alphas(alphas.cv,ratio)
cv.out <- cv.sparse.group.subgroup.alphas(yy,XX,group.index,subgroup.index,
tau,alphas=alphas,
nfold=nfold,ratio=ratio,
nlam=nlam,lambdas=lambdas,lambda.accuracy=lambda.accuracy,
delta=delta,standardize=standardize)
alpha.opt <- cv.out$alpha.opt
############################################################################################
## Third application: alpha1= alphas[1], alpha2=ratio * alpha3, ratio=alphas[2]/alphas[3] ##
############################################################################################
alphas.cv <- list(alpha1.cv = alpha.opt[1], alpha2.cv = NULL, alpha3.cv = alphas.cv.range)
ratio <- alpha.opt[2] / alpha.opt[3]
alphas <- make.alphas(alphas.cv,ratio)
cv.out <- cv.sparse.group.subgroup.alphas(yy,XX,group.index,subgroup.index,
tau,alphas=alphas,
nfold=nfold,ratio=ratio,
nlam=nlam,lambdas=lambdas,lambda.accuracy=lambda.accuracy,
delta=delta,standardize=standardize)
alpha.opt <- cv.out$alpha.opt
}
############################################################
## Re-run sparse group-subgroup with optimal alpha values ##
############################################################
Lasso.out <- sparse.group.subgroup(yy,XX,group.index,subgroup.index,tau,
alpha1=as.numeric(alpha.opt[1]),
alpha2=as.numeric(alpha.opt[2]),
alpha3=as.numeric(alpha.opt[3]),
nlam=nlam,lambdas=lambdas,lambda.accuracy=lambda.accuracy,
delta=delta,standardize=standardize)
list(sig.variables = Lasso.out$sig.variables, alphas=alpha.opt)
}
sparse.group.subgroup.computations <- function(XX,response,group.index,subgroup.index,tau,alpha1,alpha2,
alpha3,nlam,lambdas,
lambda.accuracy,
delta.group=2,format.data=TRUE,
cv.criterion=FALSE,nfold=10,alphas.cv.range=seq(0.1,0.95,by=0.05)){
interest <- matrix(0,nrow=ncol(XX),ncol=ncol(response))
interest <- as.data.frame(interest)
rownames(interest) <- colnames(XX)
colnames(interest) <- colnames(response)
data.yy <- response
## Store alpha values
alpha.out <- matrix(0,nrow=3,ncol=ncol(response))
for(i in 1:ncol(interest)){
##print(i)
if(cv.criterion==TRUE){
lasso.out <- cv.sparse.group.subgroup(data.yy[,i],XX,group.index,subgroup.index,tau,
alphas.cv.range=alphas.cv.range,
nfold=nfold,nlam=nlam,lambdas=lambdas,
lambda.accuracy=lambda.accuracy,
delta=delta.group)
alpha.out[,i] <- lasso.out
} else {
lasso.out <- sparse.group.subgroup(data.yy[,i],XX,group.index,subgroup.index,
tau,alpha1,alpha2,alpha3,nlam=nlam,lambdas=lambdas,
lambda.accuracy=lambda.accuracy,
delta=delta.group)
}
interest[,i] <- lasso.out$sig.variables
}
return(list(interest=interest,alpha.out=alpha.out))
}
#######################################################
# R function to implement Sparse Group-Subgroup Lasso #
#######################################################
#############
# Functions #
#############
################
# subgroup.SGL #
################
# data : list that contains design matrix (x) and response vector (y)
# group.index: vector indicating which covariates belong to which group
# subgroup.index: matrix indicating which covariates belong to which subgroup within a group,
# Each row of the subgroup.index corresponds to a group, and entries determine which subgroup the element belongs where
# some an entry of "NA" means that there is no subgroup association. We ASSUME row 1 of subgroup.index corresponds to elements
# in group 1, row 2 for elements in group 2, etc.
# type : linear regression model
# maxit: maximum number of iterations in algorithm
# thresh: threshold to determine if beta converges
# min.frac: used in determining appropriate lambda values; this is the minimum value of penalty parameter (lambda_max) as a fraction of maximum value
# nlam : number of lambda values used
# gamma : fitting parameter used in backtracking (inner loop of subgroup)
# standardize: indicator to standardize covariates
# verbose : indicator to display results as algorithm progresses
# step : fitting parameter used for initial backtracking step size
# reset : fitting parameter used in nesterov momentum; it is the number of iterations before momentum is reset
# alpha1 : alpha_1 value
# alpha2 : alpha_2 value
# lambdas : lambda values used; if NULL, program selects best set of lambda values
# lambda.accuracy : accuracy of lambda.max calculation
subgroup.SGL <- function(data, group.index, subgroup.index,
type = "linear", maxit = 1000,
thresh = 0.001, min.frac = 0.1, nlam = 20,
gamma = 0.8, standardize = TRUE, verbose = FALSE, step = 1, reset = 10,
alpha1 = 0.45, alpha2=0.45, alpha3=1-alpha1-alpha2, lambdas = NULL, lambda.accuracy=1e-4){
X.transform <- NULL
if(standardize == TRUE){
X <- data$x
means <- apply(X,2,mean)
X <- t(t(X) - means)
var <- apply(X,2,function(x)(sqrt(sum(x^2))))
X <- t(t(X) / var)
data$x <- X
X.transform <- list(X.scale = var, X.means = means)
}
if(type == "linear"){
if(standardize == TRUE){
intercept <- mean(data$y)
data$y <- data$y - intercept
}
Sol <- oneDimNew(data, group.index,subgroup.index, thresh, nlam = nlam, lambdas = lambdas,
inner.iter = maxit, outer.iter = maxit, outer.thresh = thresh,
gamma = gamma, step = step, reset = reset, alpha1 = alpha1, alpha2=alpha2, alpha3=alpha3,
min.frac = min.frac, verbose = verbose, lambda.accuracy = lambda.accuracy)
if(standardize == TRUE){
Sol <- list(beta = Sol$beta, lambdas = Sol$lambdas, type = type, intercept = intercept, X.transform = X.transform)
}
if(standardize == FALSE){
Sol <- list(beta = Sol$beta, lambdas = Sol$lambdas, type = type, X.transform = X.transform)
}
}
class(Sol) = "subgroup.SGL"
return(Sol)
}
##############
# oneDimNew #
##############
# data : list that contains design matrix (x) and response vector (y)
# group.index: vector indicating which covariates belong to which group
# subgroup.index: matrix indicating which covariates belong to which subgroup within a group,
# Each row of the subgroup.index corresponds to a group, and entries determine which subgroup the element belongs where
# some an entry of "NA" means that there is no subgroup association.
# thresh: threshold to determine if beta converges
# nlam : number of lambda values used
# lambdas : lambda values used; if NULL, program selects best set of lambda values
# beta.naught : initial beta values, these must be zero!!!
# inner.iter : maximum number of iterations for inner loop (set to maxit)
# outer.iter : maximum number of iterations for outer loop (set to maxit)
# outer.thresh : threshold to determine convergence in iterations for outer loop
# gamma : fitting parameter used in backtracking (inner loop of subgroup)
# step : fitting parameter used for initial backtracking step size
# reset : fitting parameter used in nesterov momentum; it is the number of iterations before momentum is reset
# alpha1 : alpha_1 value
# alpha2 : alpha_2 value
# min.frac: used in determining appropriate lambda values; this is the minimum value of penalty parameter (lambda_max) as a fraction of maximum value
# verbose : indicator to display results as algorithm progresses
oneDimNew <-
function(data, group.index, subgroup.index, thresh = 0.0001, nlam = 20, lambdas = NULL,
beta.naught = rep(0,ncol(data$x)), inner.iter = 100, outer.iter = 100, outer.thresh = 0.0001,
gamma = 0.8, step = 1, reset = 10, alpha1 = 0.45, alpha2=0.45, alpha3=1-alpha1-alpha2, min.frac = 0.05, verbose = FALSE,
lambda.accuracy=1e-4){
if(is.null(lambdas)){
lambdas <- betterPathCalcNew(data = data, group.index = group.index, subgroup.index = subgroup.index, alpha1 = alpha1,
alpha2 = alpha2, alpha3=alpha3, min.frac = min.frac, nlam = nlam,
type = "linear", lambda.accuracy = lambda.accuracy)
}
X <- data$x
y <- data$y
n <- nrow(X)
p <- ncol(X)
## Setting up group lasso stuff ##
ord.group <- order(group.index)
group.index <- group.index[ord.group]
groups <- unique(group.index)
num.groups <- length(groups)
range.group.ind <- rep(0,(num.groups+1))
for(i in 1:num.groups){
range.group.ind[i] <- min(which(group.index == groups[i])) - 1
}
range.group.ind[num.groups+1] <- ncol(X)
group.length <- diff(range.group.ind)
## Setting up sub-group lasso stuff ##
ord.subgroup <- NULL
new.subgroup.index <- NULL
subgroups <- NULL
num.subgroups <- NULL
range.subgroup.ind <- NULL
subgroup.length <- NULL
tmp2 <- 0
for(k in 1:nrow(subgroup.index)){
## Order subgroup indices
ord.subgroup.tmp <- order(subgroup.index[k,],na.last=NA)
ord.subgroup <- c(ord.subgroup,ord.subgroup.tmp)
## Re-order subgroup index
subgroup.index.tmp <- subgroup.index[k,ord.subgroup.tmp]
new.subgroup.index <- c(new.subgroup.index,subgroup.index.tmp)
## Determine unique subgroups in each group
subgroups.tmp <- unique(subgroup.index.tmp)
subgroups <- c(subgroups,subgroups.tmp)
## Number of subgroups in each group
num.subgroups.tmp <- length(unique(subgroup.index.tmp))
num.subgroups <- c(num.subgroups,num.subgroups.tmp)
## Range.subgroup.ind
range.subgroup.ind.tmp <- NULL
## To get correct range.subgroup.ind.tmp
for(i in 1:num.subgroups.tmp){
tmp <- min(which(subgroup.index.tmp==subgroups.tmp[i]))-1 + tmp2
range.subgroup.ind.tmp <- c(range.subgroup.ind.tmp,tmp)
}
range.subgroup.ind.tmp <- c(range.subgroup.ind.tmp,length(subgroup.index.tmp)+tmp2)
range.subgroup.ind <- c(range.subgroup.ind,range.subgroup.ind.tmp[-length(range.subgroup.ind.tmp)])
## length of subgroup
subgroup.length <- c(subgroup.length, diff(range.subgroup.ind.tmp))
## update tmp2
tmp2 <- length(subgroup.index.tmp)+ tmp2
}
range.subgroup.ind <- c(range.subgroup.ind,ncol(X))
## Coming up with other C++ info ##
X <- as.matrix(X[,ord.subgroup])
unOrd <- match(1:length(ord.subgroup),ord.subgroup)
## DONE SETTING UP C STUFF ##
#alpha <- sqrt(2*log(p))/(1+sqrt(2*log(num.groups)/min(group.length)) + sqrt(2*log(p)))
nlam = length(lambdas)
beta.old <- rep(0,ncol(X))
beta.is.zero.group <- rep(1,num.groups)
beta.is.zero.subgroup <- rep(1,sum(num.subgroups))
beta <- array(0, c(ncol(X),nlam))
eta <- rep(0,n)
for(k in 1:nlam){
beta.is.zero.group <- rep(1, num.groups)
beta.is.zero.subgroup <- rep(1,sum(num.subgroups))
beta.old <- rep(0, ncol(X))
eta <- rep(0,n)
junk <- .C("CWrapper", X = as.double(as.vector(X)), y = as.double(y),
indexGroup = as.integer(group.index),indexSubGroup = as.integer(new.subgroup.index),
nrow = as.integer(nrow(X)), ncol = as.integer(ncol(X)),
numGroup = as.integer(num.groups),numSubGroup = as.integer(num.subgroups), totalSubGroup = as.integer(sum(num.subgroups)),
rangeGroupInd = as.integer(range.group.ind), rangeSubGroupInd = as.integer(range.subgroup.ind),
groupLen = as.integer(group.length), subGroupLen = as.integer(subgroup.length),
lambda1 = as.double(lambdas[k]*alpha1), lambda2 = as.double(lambdas[k]*alpha2), lambda3=as.double(lambdas[k]*alpha3),
beta = as.double(beta.old), innerIter = as.integer(inner.iter), outerIter = as.integer(outer.iter),
thresh = as.double(thresh), outerThresh = as.double(outer.thresh), eta = as.double(eta),
gamma = as.double(gamma),
betaIsZeroGroup = as.integer(beta.is.zero.group), betaIsZeroSubGroup = as.integer(beta.is.zero.subgroup),
step = as.double(step), reset = as.integer(reset))
beta.new <- junk$beta
beta[,k] <- beta.new
beta.is.zero.group <- junk$betaIsZeroGroup
beta.is.zero.subgroup <- junk$betaIsZeroSubGroup
eta <- junk$eta
beta.old <- beta.new
if(verbose == TRUE){
write(paste("***Lambda", k, "***"),"")
}
}
return(list(beta = beta[unOrd,], lambdas = lambdas))
}
#######################################
# Function to find good lambda values #
#######################################
betterPathCalcNew <- function(data, group.index, subgroup.index, alpha1 = 0.45, alpha2=0.45, alpha3=1-alpha1-alpha2,
min.frac = 0.05, nlam = 20, type = "linear",lambda.accuracy=1e-4){
n <- nrow(data$x)
if(type == "linear"){
X <- data$x
resp <- data$y
n <- nrow(X)
p <- ncol(X)
}
## function which computes x_+
threshold.function <- function(x){
if(x<0){
return(0)
} else {
return(x)
}
}
## function which computes euclidean norm
euclidean.norm <- function(x){
out <- sqrt(sum(x^2))
return(out)
}
## function which computes condition for which each group has it's coefficients equal to zero.
main.condition <- function(lambda){
group.condition.LHS <- rep(NA,nrow(subgroup.index))
group.condition.RHS <- group.condition.LHS
for(i in 1:nrow(subgroup.index)){
group.condition.RHS[i] <- sum(!is.na(subgroup.index[i,]))
subgroups <- unique(subgroup.index[i,])
subgroups <- subgroups[!is.na(subgroups)]
subgroup.condition <- rep(NA,length(subgroups))
for(m in 1:length(subgroups)){
ind <- subgroups[m]
X.fit <- data.frame(X[,which(subgroup.index[i,]==ind)])
subgroup.length <- ncol(X.fit)
res <- t(X.fit) %*% resp/n # We divide by n to keep consistency with C++ program,
# We could make CHANGES here!!! (i.e., remove n)
soft.threshold.vector <- apply(abs(res)-alpha3*lambda,1,threshold.function)
soft.threshold.norm <- euclidean.norm(soft.threshold.vector)
subgroup.condition[m] <- threshold.function(soft.threshold.norm - alpha2 * lambda * sqrt(subgroup.length))
}
group.condition.LHS[i] <- euclidean.norm(subgroup.condition)
}
out <- ( group.condition.LHS <= alpha1 * lambda * sqrt(group.condition.RHS) )
return(out)
}
## get initial range of lambda values by gradually increasing the interval
lambda.range <- c(0,2^0) # initial range of lambda values
change.lambda.range <- TRUE # should we adjust lambda range?
j <- 0
while(change.lambda.range){
j <- j+1
lambda <- lambda.range[2]
condition.check <- main.condition(lambda)
if(sum(condition.check)!=length(condition.check)){
# All groups have NOT satisfied the condition, so we update lambda.range
lambda.range[1] <- lambda
lambda.range[2] <- 2^j
} else {
# All groups HAVE satisfied the condition, so we are DONE.
change.lambda.range <- FALSE
}
}
## improve accuracy of lambda.range
num.bisect <- 0
while(abs(diff(lambda.range))>lambda.accuracy){
num.bisect <- num.bisect + 1
lambda <- sum(lambda.range)/2
condition.check <- main.condition(lambda)
if(sum(condition.check)!=length(condition.check)){
# All groups have NOT satisfied the condition, so we update lambda.range
lambda.range[1] <- lambda
} else {
lambda.range[2] <- lambda
}
}
max.lam <- max(lambda.range)
min.lam <- min.frac*max.lam
lambdas <- exp(seq(log(max.lam),log(min.lam), (log(min.lam) - log(max.lam))/(nlam-1)))
return(lambdas)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.