Nothing
#' calculate the CV optimal beta and estimated support set
#' @description
#' cv_all calculate the CV optimal beta and estimated
#' support set in the problem 1/n |y - X beta|^2 + 1/nu |D beta - gamma|^2 + lambda |gamma|_1.
#'
#' @param X the design matrix
#' @param y the response vector
#' @param D the linear transform
#' @param option options for screening
#'
#' @return beta_hat: CV optimal beta
#' @return stat_cv: various intermedia statistics, including the estimated support sets
#' @export
#'
#' @importFrom glmnet glmnet
cv_screen <- function(X, y, D, option){
# cv_screen calculate the CV optimal beta
# support set in the problem
# 1/n |y - X beta|^2 + 1/nu |D beta - gamma|^2 + lambda |gamma|_1.
#
# input arguments
# X : the design matrix
# y : the response vector
# D : the linear transform
#
# output arguments
# beta_hat: CV optimal beta
# stat_cv: various intermedia statistics, including the estimated support sets
k_fold = 10
n = nrow(X)
# appoint the set of \nu
if (!is.null(option$nu_cv)){
nu_s = option$nu_cv
}else{
nu_s = 10.^seq(2, 0, by = -0.4)
}
# appoint a set of lambda
if (!is.null(option$lambda_cv)){
lambda_s = option$lambda_cv
}else{
lambda_s = 10.^seq(0, -8, by = -0.4)
}
nlambda = length(lambda_s)
# screen for a estimated support set of beta
lambda_vec = 10.^seq(0, -6, by = -0.1)
cvfit = glmnet::cv.glmnet(X, y, lambda = lambda_vec)
index = which(cvfit$lambda == cvfit$lambda.1se)[1]
beta_hat = cvfit$glmnet.fit$beta[, index]
stat_cv = list()
stat_cv$beta_supp = which(abs(beta_hat) >= 10^(-2))
X = X[, stat_cv$beta_supp]
D = D[, stat_cv$beta_supp]
m = nrow(D)
p = ncol(D)
test_size = floor(n / k_fold)
# randomly split data
set.seed(1)
rand_rank = sample(n)
# create matrix to store result for split lasso test loss
loss_sl = array(0, dim = c(k_fold, length(nu_s), nlambda))
for (k in 1:k_fold){
# generate test set
test_index = test_size * (k-1) + (1: test_size)
test = rand_rank[test_index]
# training set
X_train = X[!(1:n %in% test), ]
y_train = y[!(1:n %in% test)]
# test set
X_test = X[test, ]
y_test = y[test]
# normalization
n_train = nrow(X_train)
n_test = nrow(X_test)
X_train = X_train - matrix(rep(colMeans(X_train), n_train), nrow = n_train, ncol = p, byrow = TRUE)
y_train = y_train - mean(y_train)
X_test = X_test - matrix(rep(colMeans(X_test), n_test), nrow = n_test, ncol = p, byrow = TRUE)
y_test = y_test - mean(y_test)
# test loss for split lasso
for (i in 1:length(nu_s)){
nu = nu_s[i]
# generate split LASSO design matrix
A_beta <- rbind(X_train/sqrt(n_train),D/sqrt(nu))
A_gamma <- rbind(matrix(0,n_train,m),-diag(m)/sqrt(nu))
tilde_y <- rbind(matrix(y_train, ncol = 1)/sqrt(n_train),matrix(0,m,1))
# set penalty
penalty <- matrix(1,m+p,1)
for (tmp in 1: p) {
penalty[tmp, 1] = 0
}
# lasso path settings for glmnet
fit = glmnet(cbind(A_beta,A_gamma) , tilde_y, lambda = lambda_s, standardize = FALSE, intercept = FALSE, penalty.factor = penalty)
coefs <- fit$beta
for (j in 1:length(lambda_s)){
coef = coefs[ ,j]
nonzeros = sum(coef != 0)
beta = coef[1:p]
# calculate loss
y_sl = as.matrix(X_test) %*% as.matrix(beta)
loss_sl[k, i, j] = norm(y_sl - y_test, type = "F")^2/test_size
if (nonzeros > option$n_2){
loss_sl[k, i, j] = Inf
}
}
}
}
mean_loss_sl = colMeans(loss_sl)
sd_loss_sl = apply(loss_sl, c(2, 3), stats::sd)
# find minimal
min_ind = which(mean_loss_sl <= min(mean_loss_sl + sd_loss_sl, na.rm = TRUE), arr.ind = TRUE)
ind_r = nrow(min_ind)
nu_number = min_ind[ind_r, 1]
lambda_number = min_ind[ind_r, 2]
nu_sl = nu_s[nu_number]
lambda_sl = lambda_s[lambda_number]
stat_cv$nu = nu_sl
stat_cv$lambda = lambda_sl
# calculate beta
A_beta <- rbind(X/sqrt(n),D/sqrt(nu_sl))
A_gamma <- rbind(matrix(0,n,m),-diag(m)/sqrt(nu_sl))
tilde_y <- rbind(matrix(y, ncol = 1)/sqrt(n),matrix(0,m,1))
fit = glmnet(cbind(A_beta,A_gamma) , tilde_y, lambda = lambda_sl, standardize = FALSE, intercept = FALSE, penalty.factor = penalty)
coef <- fit$beta
beta_hat = coef[1:p]
gamma = coef[(p+1):(p+m)]
stat_cv$gamma_supp = which(abs(gamma) >= 10^(-2))
if (length(stat_cv$beta_supp) + length(stat_cv$gamma_supp) > option$n_2){
gamma_sort = sort(abs(gamma), decreasing = TRUE)
threshold = gamma_sort[option$n_2 - length(stat_cv$beta_supp) + 1]
stat_cv$gamma_supp = which(abs(gamma) > threshold)
}
return(list(beta_hat = beta_hat, stat_cv = stat_cv))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.