#' Robust subgroup analysis and variable selection simultaneously
#'
#' This function utilize the cpp solver when carring out robust subgroup
#' analysis and variable selection simultaneously. And it is the core solver
#' for \code{\link{RSAVS_Path}}.
#'
#' @param y_vec numerical vector of response. n = length(y_vec) is the number of observations.
#' @param x_mat numerical matrix of covariates. Each row for one observation and
#' \code{p = ncol(x_mat)} is the number of covariates.
#' @param l_type character string, type of loss function.
#' \itemize{
#' \item "L1": l-1 loss(absolute value loss)
#' \item "L2": l-2 loss(squared error loss)
#' \item "Huber": Huber loss. Its parameter is given in l_param.
#' }
#' Default value is "L1".
#' @param l_param numeric vector containing necessary parameters of the corresponding loss function.
#' The default value is \code{NULL}.
#' @param p1_type,p2_type a character indicating the penalty types for subgroup identification and variable selection.
#' \itemize{
#' \item "S": SCAD
#' \item "M": MCP
#' \item "L": Lasso
#' }
#' Default values for both parameters are "S".
#' @param p1_param,p2_param numerical vectors providing necessary parameters for the corresponding penalties.
#' \itemize{
#' \item For Lasso, lam = p_param[1]
#' \item For SCAD and MCP, lam = p_param[1], gamma = p_param[2]
#' }
#' Default values for both parameters are \code{c(2, 3.7)}.
#' Note: This function searches the whole lam1_vec * lam2_vec grid for the best solution.
#' Hence the \code{lambda}s provided in these parameters serve only as placeholder
#' and will be ignored and overwritten in the actual computation.
#' @param lam1_vec,lam2_vec numerical vectors of customized lambda vectors.
#' For \code{lam1_vec}, it's preferred to be in the order from small to big.
#' @param min_lam_ratio the ratio between the minimal and maximal lambda, equals to (minimal lambda) / (maximal lambda).
#' The default value is 0.03.
#' @param lam1_len,lam2_len integers, lengths of the auto-generated lambda vectors.
#' @param initial_values list of vector, providing initial values for the algorithm.
#' @param additional a list providing additional variables needed during the algorithm.
#' @param phi numerical variable. A parameter needed for mBIC.
#' @param const_r123 a length-3 numerical vector, providing the scalars needed in the
#' augmented lagrangian part of the ADMM algorithm
#' @param const_abc a length-3 numeric vector, providing the scalars to adjust weight
#' of regression function, penalty for subgroup identification and penalty for
#' variable selection in the overall objective function. Defaults to \code{c(1, 1, 1)}.
#' @param tol numerical, convergence tolerance for the algorithm.
#' @param cd_tol numerical, convergence tolerance for the coordinate descent part
#' when updating \code{mu} and \code{beta}.
#' @param max_iter integer, max number of iteration during the algorithm.
#' @param cd_max_iter integer, max number of iteration during the coordinate descent
#' update of \code{mu} and \code{beta}. If set to 0, will use analytical solution(
#' instead of coordinate descent algorithm) to update \code{mu} and \code{beta}.
#' @param subgroup_benchmark bool. Whether this call should be taken as a benchmark of subgroup identification.
#' If \code{TRUE}, then the penalty for variable selection will be surpressed to a minimal value.
#' @param update_mu list of parameters for updating \code{mu_vec} in the algorithm into meaningful subgroup structure.
#' Defaults to \code{NULL}, which means there is no update performed. The update of \code{mu_vec} is carried out through
#' \code{RSAVS_Determine_Mu} and the necessary parameters in \code{update_mu} are:
#' \itemize{
#' \item \code{UseS}: a bool variable, whether the \code{s_vec} should be used to provide subgroup structure information.
#' \item \code{klim}: a length-3 integer vector, given the range of number of cluster for considering.
#' \item \code{usepam}: a bool variable, whether to use \code{pam} for clustering.
#' \item \code{round_digits}: non-negative integer digits, indicating the rounding digits when merging \code{mu_vec}
#' }
#' Please refer to \code{RSAVS_Determine_Mu} to find out more details about how the algorithm works
#' @param loss_track boolen, whether to track the value of objective function(loss value) during each iteration.
#' @param diff_update boolen, whether to update the difference between each iteration. If set to \code{FALSE},
#' the algorithm will still stop when it reaches \code{max_iter}.
#' @param omp_zsw a length-three integer vector, defaults to \code{c(1, 4, 1)}. It represents how many
#' parallel threads to be used during the update of \code{z}, \code{s} and \code{w} respectively.
#' @param eigen_pnum integer number, representing the number of Eigen threads for matrix computation,
#' defaults to 4.
#' @param s_v2 boolen, whether to use the updated and faster version during the computation of
#' \code{s} and \code{q2}
#' @seealso \code{\link{RSAVS_Path}}, \code{\link{RSAVS_Path_PureR}}, \code{\link{RSAVS_LargeN}}
RSAVS_Solver <- function(y_vec, x_mat, l_type = "L1", l_param = NULL,
p1_type = "S", p1_param = c(2, 3.7), p2_type = "S", p2_param = c(2, 3.7),
const_r123, const_abc = rep(1, 3),
initial_values, additional, tol = 0.001, max_iter = 10, cd_max_iter = 1, cd_tol = 0.001,
phi = 1.0, subgroup_benchmark = FALSE, update_mu = NULL,
loss_track = FALSE, diff_update = TRUE,
omp_zsw = c(1, 4, 1), eigen_pnum = 1, s_v2 = TRUE){
# Core solver for small n situation
# check the dataset
y_vec <- matrix(y_vec, ncol = 1) # make sure y_vec is column vector
n <- length(y_vec)
x_mat <- matrix(x_mat, nrow = n) # make sure x_mat is a matrix, even if it has only one column
p <- ncol(x_mat)
# Under construction, no variable checking is implemented!
r1 <- const_r123[1]
r2 <- const_r123[2]
r3 <- const_r123[3]
const_a <- const_abc[1]
const_b <- const_abc[2]
const_c <- const_abc[3]
# prepare update functions for z, s and w
if(l_type == "L1"){
UpdateZ <- RSAVS_UpdateZ_L1
}else{
if(l_type == "L2"){
UpdateZ <- RSAVS_UpdateZ_L2
}else{
if(l_type == "Huber"){
UpdateZ <- RSAVS_UpdateZ_Huber
}else{
stop("Unsupported type of loss function!")
}
}
}
if(p1_param[1] == 0){
# No penalty for mu part
UpdateS <- RSAVS_UpdateSW_Identity
initial_values$q2_init <- rep(0, n * (n - 1) / 2) # fix q2 at 0
# r2 <- 0 # fix r2 to 0
}else{
if(p1_type == "L"){
UpdateS <- RSAVS_UpdateSW_Lasso
}else{
if(p1_type == "S"){
UpdateS <- RSAVS_UpdateSW_SCAD
}else{
if(p1_type == "M"){
UpdateS <- RSAVS_UpdateSW_MCP
}else{
stop("Unsupported type of penalty for mu!")
}
}
}
}
if(p2_param[1] == 0){
# No penalty for beta part
UpdateW <- RSAVS_UpdateSW_Identity
initial_values$q3_init <- rep(0, p) # fix q3 at 0
# r3 <- 0 # fix r3 to 0
}else{
if(p2_type == "L"){
UpdateW <- RSAVS_UpdateSW_Lasso
}else{
if(p2_type == "S"){
UpdateW <- RSAVS_UpdateSW_SCAD
}else{
if(p2_type == "M"){
UpdateW <- RSAVS_UpdateSW_MCP
}else{
stop("Unsupported type of penalty for beta!")
}
}
}
}
#------ main algorithm ------
res <- RSAVS_Solver_Cpp(y_vec, x_mat, n, p, l_type, l_param,
p1_type, p1_param, p2_type, p2_param,
const_r123, const_abc, tol, max_iter, cd_tol, cd_max_iter,
initial_values, additional, phi, loss_track, diff_update,
omp_zsw, eigen_pnum, s_v2)
# update the resulting mu vector into meaningfull subgroup results
if(is.null(update_mu)){
res$mu_updated_vec <- res$mu_vec
}else{
d_mat <- RSAVS_Generate_D_Matrix(n)
useS <- update_mu$useS
if(is.null(update_mu$klim)){
klim <- c(2, 7, 4)
}else{
klim <- update_mu$klim
}
if(is.null(update_mu$usepam)){
usepam <- length(res$mu_vec < 2000)
}else{
usepam <- update_mu$usepam
}
round_digits <- update_mu$round_digits
if(useS){
group_res <- RSAVS_S_to_Groups(res$s_vec, n)
mu_updated_vec <- RSAVS_Determine_Mu(res$mu_vec, group_res, klim = klim, usepam = usepam)
}else{
mu_updated_vec <- RSAVS_Determine_Mu(res$mu_vec, klim = klim, usepam = usepam, round_digits = round_digits)
}
res$mu_updated_vec <- mu_updated_vec
res$s_vec <- SparseM::as.matrix(d_mat %*% mu_updated_vec)
}
return(res)
}
#' Robust subgroup analysis and variable selection simultaneously
#'
#' This function carries out robust subgroup analysis and variable selection
#' simultaneously. It utilizes the cpp core solver and supports different
#' types of loss functions and penalties.
#'
#' @param y_vec numerical vector of response. n = length(y_vec) is the number of observations.
#' @param x_mat numerical matrix of covariates. Each row for one observation and
#' \code{p = ncol(x_mat)} is the number of covariates.
#' @param l_type character string, type of loss function.
#' \itemize{
#' \item "L1": l-1 loss(absolute value loss)
#' \item "L2": l-2 loss(squared error loss)
#' \item "Huber": Huber loss. Its parameter is given in l_param.
#' }
#' Default value is "L1".
#' @param l_param numeric vector containing necessary parameters of the corresponding loss function.
#' The default value is \code{NULL}.
#' @param p1_type,p2_type a character indicating the penalty types for subgroup identification and variable selection.
#' \itemize{
#' \item "S": SCAD
#' \item "M": MCP
#' \item "L": Lasso
#' }
#' Default values for both parameters are "S".
#' @param p1_param,p2_param numerical vectors providing necessary parameters for the corresponding penalties.
#' \itemize{
#' \item For Lasso, lam = p_param[1]
#' \item For SCAD and MCP, lam = p_param[1], gamma = p_param[2]
#' }
#' Default values for both parameters are \code{c(2, 3.7)}.
#' Note: This function searches the whole lam1_vec * lam2_vec grid for the best solution.
#' Hence the \code{lambda}s provided in these parameters serve only as placeholder
#' and will be ignored and overwritten in the actual computation.
#' @param lam1_vec,lam2_vec numerical vectors of customized lambda vectors.
#' For \code{lam1_vec}, it's preferred to be in the order from small to big.
#' @param lam1_sort,lam2_sort boolen, whether to force sorting the provided \code{lam1_vec} and \code{lam2_vec}.
#' By default, \code{lam1_vec} will sort in increasing order while \code{lam2_vec} in decreasing order.
#' @param min_lam1_ratio,min_lam2_ratio the ratio between the minimal and maximal
#' lambda, equals to (minimal lambda) / (maximal lambda). The default value is 0.03.
#' @param lam1_max_ncvguard a safe guard constant for \code{lam1_max} when the penalty is nonconvex such as SCAD and MCP.
#' @param lam1_len,lam2_len integers, lengths of the auto-generated lambda vectors.
#' @param initial_values list of vector, providing initial values for the algorithm.
#' @param phi numerical variable. A parameter needed for mBIC.
#' @param const_r123 a length-3 numerical vector, providing the scalars needed in the
#' augmented lagrangian part of the ADMM algorithm
#' @param const_abc a length-3 numeric vector, providing the scalars to adjust weight
#' of regression function, penalty for subgroup identification and penalty for
#' variable selection in the overall objective function. Defaults to \code{c(1, 1, 1)}.
#' @param tol numerical, convergence tolerance for the algorithm.
#' @param cd_tol numerical, convergence tolerance for the coordinate descent part
#' when updating \code{mu} and \code{beta}.
#' @param max_iter integer, max number of iteration during the algorithm.
#' @param cd_max_iter integer, max number of iteration during the coordinate descent
#' update of \code{mu} and \code{beta}. If set to 0, will use analytical solution(
#' instead of coordinate descent algorithm) to update \code{mu} and \code{beta}.
#' @param subgroup_benchmark bool. Whether this call should be taken as a benchmark of subgroup identification.
#' If \code{TRUE}, then the penalty for variable selection will be surpressed to a minimal value.
#' @param update_mu list of parameters for updating \code{mu_vec} in the algorithm into meaningful subgroup structure.
#' Defaults to \code{NULL}, which means there is no update performed. The update of \code{mu_vec} is carried out through
#' \code{RSAVS_Determine_Mu} and the necessary parameters in \code{update_mu} are:
#' \itemize{
#' \item \code{UseS}: a bool variable, whether the \code{s_vec} should be used to provide subgroup structure information.
#' \item \code{klim}: a length-3 integer vector, given the range of number of cluster for considering.
#' \item \code{usepam}: a bool variable, whether to use \code{pam} for clustering.
#' \item \code{round_digits}: non-negative integer digits, indicating the rounding digits when merging \code{mu_vec}
#' }
#' Please refer to \code{RSAVS_Determine_Mu} to find out more details about how the algorithm works
#' @param loss_track boolen, whether to track the value of objective function(loss value) during each iteration.
#' @param diff_update boolen, whether to update the difference between each iteration. If set to \code{FALSE},
#' the algorithm will still stop when it reaches \code{max_iter}.
#' @param omp_zsw a length-three integer vector, defaults to \code{c(1, 4, 1)}. It represents how many
#' parallel threads to be used during the update of \code{z}, \code{s} and \code{w} respectively.
#' @param eigen_pnum integer number, representing the number of Eigen threads for matrix computation,
#' defaults to 4.
#' @param s_v2 boolen, whether to use the updated and faster version during the computation of
#' \code{s} and \code{q2}
#' @param dry_run boolen, whether this is a so-called 'dry-run'. A dry-run will not carry out the real
#' core solver, but only prepares and return the necessary initial values, additional values and
#' solution plane for the core solver.
#' @seealso \code{\link{RSAVS_Solver}}, \code{\link{RSAVS_Path_PureR}}, \code{\link{RSAVS_LargeN}}
#' @examples
#' # a toy example
#' # first we generate data
#' n <- 200 # number of observations
#' q <- 5 # number of active covariates
#' p <- 50 # number of total covariates
#' k <- 2 # number of subgroups
#'
#' # k subgroup effect, centered at 0
#' group_center <- seq(from = 0, to = 2 * (k - 1), by = 2) - (k - 1)
#' # covariate effect vector
#' beta_true <- c(rep(1, q), rep(0, p - q))
#' # subgroup effect vector
#' alpha_true <- sample(group_center, size = n, replace = TRUE)
#' x_mat <- matrix(rnorm(n * p), nrow = n, ncol = p) # covariate matrix
#' err_vec <- rnorm(n, sd = 0.1) # error term
#' y_vec <- alpha_true + x_mat %*% beta_true + err_vec # response vector
#'
#' # a simple analysis using default loss and penalties
#' res <- RSAVS_Path(y_vec = y_vec, x_mat = x_mat,
#' lam1_len = 50, lam2_len = 40,
#' phi = 5)
#' @export
RSAVS_Path <- function(y_vec, x_mat, l_type = "L1", l_param = NULL,
p1_type = "S", p1_param = c(2, 3.7), p2_type = "S", p2_param = c(2, 3.7),
lam1_vec = NULL, lam2_vec = NULL, lam1_sort = TRUE, lam2_sort = TRUE,
min_lam1_ratio = 0.03, min_lam2_ratio = 0.03, lam1_max_ncvguard = 100,
lam1_len = 50, lam2_len = 40,
const_r123 = NULL, const_abc = rep(1, 3),
initial_values = NULL, phi = 1.0, tol = 0.001, max_iter = 10,
cd_max_iter = 1, cd_tol = 0.001,
subgroup_benchmark = FALSE, update_mu = NULL, loss_track = FALSE, diff_update = TRUE,
omp_zsw = c(1, 4, 1), eigen_pnum = 4, s_v2 = TRUE, dry_run = FALSE){
### preparation ###
# preparation for x and y #
y_vec <- matrix(y_vec, ncol = 1) # make sure y_vec is column vector
n <- length(y_vec)
x_mat <- matrix(x_mat, nrow = n) # make sure x_mat is a matrix, even if it has only one column
p <- ncol(x_mat)
# preparation for loss function #
if(l_type == "L2"){
# l_type = "2"
l_param = 0
} else{
if(l_type == "L1"){
# l_type = "1"
l_param = 0
} else {
if(l_type == "Huber"){
# l_type = "H"
huber_c = l_param[1]
}else{
stop("Error of Loss type!")
}
}
}
# preparation for penalties #
if(p1_type == "L"){
p1_lam = p1_param[1]
} else{
if((p1_type == "S") || (p1_type == "M")){
p1_lam = p1_param[1]
p1_gamma = p1_param[2]
} else {
stop("Error of Penalty 1 type!")
}
}
if(p2_type == "L"){
p2_lam = p2_param[1]
} else {
if((p2_type == "S") || (p2_type == "M")){
p2_lam = p2_param[1]
p2_gamma = p2_param[2]
} else {
stop("Error of Penalty 2 type!")
}
}
# preparation for r1, r2 and r3
if(is.null(const_r123)){
message("`const_r123` is missing. Use default settings!")
r1 <- 2
if(p1_type == "L"){
r2 <- 2
}
if(p1_type == "S"){
r2 <- max(1.3 * 1 / (p1_gamma - 1), 1 / (p1_gamma - 1) + 1, 2)
}
if(p1_type == "M"){
r2 <- max(1.3 * 1 / p1_gamma, 1 / p1_gamma + 1, 2)
}
if(p2_type == "L"){
r3 <- 2
}
if(p2_type == "S"){
r3 <- max(1.3 * 1 / (p2_gamma - 1), 1 / (p2_gamma - 1) + 1, 2)
}
if(p2_type == "M"){
r3 <- max(1.3 * 1 / p2_gamma, 1 / p2_gamma + 1, 2)
}
const_r123 <- c(r1, r2, r3)
}else{
const_r123 <- abs(const_r123)
r1 <- const_r123[1]
r2 <- const_r123[2]
r3 <- const_r123[3]
# check for r2
r2min <- 2
if(p1_type == "S"){
r2min <- max(1.3 * 1 / (p1_param[2] - 1), 1 / (p1_param[2] - 1) + 1, 2)
}
if(p1_type == "M"){
r2min <- max(1.3 * 1 / p1_param[2], 1 / p1_param[2] + 1, 2)
}
if(r2 < r2min){
message("r2 = ", r2, "< r2min = ", r2min, ". Modify value of r2!")
r2 <- r2min
const_r123[2] <- r2
}
# check for r3
r3min <- 2
if(p2_type == "S"){
r3min <- max(1.3 * 1 / (p2_param[2] - 1), 1 / (p2_param[2] - 1) + 1, 2)
}
if(p2_type == "M"){
r3min <- max(1.3 * 1 / p2_param[2], 1 / p2_param[2] + 1, 2)
}
if(r3 < r3min){
message("r3 = ", r3, "< r3min = ", r3min, ". Modify value of r3!")
r3 <- r3min
const_r123[3] <- r3
}
}
# check const_abc
const_abc <- abs(const_abc)
# preparation for lam_vec
if(is.null(lam1_vec)){
# newer version
message("lam1_vec is missing, use default values...")
lam1_max <- RSAVS_Get_Lam_Max(y_vec = y_vec, l_type = l_type, l_param = l_param,
lam_ind = 1,
const_abc = const_abc, eps = 10^(-6))
lam1_min <- lam1_max * min_lam1_ratio
if(p1_type != "L"){
lam1_max <- lam1_max * lam1_max_ncvguard # safe guard for non-convex penalties
}
# lam1_vec <- exp(seq(from = log(lam1_max), to = log(lam1_min), length.out = lam1_len))
lam1_vec <- exp(seq(from = log(lam1_min), to = log(lam1_max), length.out = lam1_len))
}else{
# lam1_vec <- sort(abs(lam1_vec), decreasing = T)
# lam1_length = length(lam1_vec)
lam1_len <- length(lam1_vec)
if(lam1_sort){
lam1_vec <- sort(abs(lam1_vec), decreasing = FALSE)
}else{
lam1_vec <- abs(lam1_vec)
}
}
if(is.null(lam2_vec)){
# newer version
message("lam2_vec is missing, use default values...")
lam2_max <- RSAVS_Get_Lam_Max(y_vec = y_vec, x_mat = x_mat,
l_type = l_type, l_param = l_param,
lam_ind = 2,
const_abc = const_abc, eps = 10^(-6))
lam2_min <- lam2_max * min_lam2_ratio
lam2_vec <- exp(seq(from = log(lam2_max), to = log(lam2_min), length.out = lam2_len))
}else{
lam2_len <- length(lam2_vec)
if(lam2_sort){
lam2_vec <- sort(abs(lam2_vec), decreasing = TRUE)
}else{
lam2_vec <- abs(lam2_vec)
}
}
if(subgroup_benchmark){
# supress lambda for variable selection if this is subgroup benchmark
lam2_vec <- lam2_vec * 0.05
}
# --- prepare initial values for the algorithm ---
if(is.null(initial_values)){
message("`initial_values` is missing, use default settings!")
# if(l_type == "L1"){
# mu0 <- median(y_vec)
# }else{
# if(l_type == "L2"){
# mu0 <- mean(y_vec)
# }else{
# if(l_type == "Huber"){
# tmp <- MASS::rlm(y_vec ~ 1, k = l_param[1])
# mu0 <- tmp$coefficients[1]
# }
# }
# }
mu_init <- as.vector(y_vec) # lam1 start from small values, hence we use y_vec as initial values for mu_vec
d_mat <- RSAVS_Generate_D_Matrix(n)
initial_values <- list(beta_init = rep(0, p),
mu_init = mu_init,
z_init = y_vec - mu_init,
# s_init = rep(0, n * (n - 1) / 2),
s_init = as.vector(d_mat %*% mu_init),
w_init = rep(0, p),
q1_init = rep(0, n),
q2_init = rep(0, n * (n - 1) / 2),
q3_init = rep(0, p))
}
# --- prepare other values ---
message("prepare intermediate variables needed by the algorithm")
# intermediate variables needed for the algorithm
message("generate pairwise difference matrix")
d_mat <- RSAVS_Generate_D_Matrix(n) # pairwise difference matrix
message("compute `beta_lhs`")
beta_lhs <- NA # left part for updating beta
if(n >= p){
beta_lhs <- solve(r1 * t(x_mat) %*% x_mat + r3 * diag(nrow = p))
}else{
beta_lhs <- 1.0 / r3 * (diag(nrow = p) - r1 * t(x_mat) %*% solve(r1 * x_mat %*% t(x_mat) + r3 * diag(nrow = n)) %*% x_mat)
}
# left part for updating mu
message("compute `mu_lhs`")
mu_lhs <- solve(SparseM::as.matrix(r1 * diag(nrow = n) + r2 * SparseM::t(d_mat) %*% d_mat))
mu_beta_lhs <- matrix(0, nrow = 1, ncol = 1) # left part for updating beta and mu together
if(cd_max_iter == 0){
message("compute `mu_beta_lhs'")
mu_beta_lhs <- matrix(0, nrow = n + p, ncol = n + p)
mu_beta_lhs[1 : n, 1 : n] <- SparseM::as.matrix(r1 * diag(nrow = n) + r2 * SparseM::t(d_mat) %*% d_mat)
mu_beta_lhs[n + (1 : p), 1 : n] <- r1 * t(x_mat)
mu_beta_lhs[1 : n, n + (1 : p)] <- r1 * x_mat
mu_beta_lhs[n + (1 : p), n + (1 : p)] <- r1 * t(x_mat) %*% x_mat + r3 * diag(nrow = p)
mu_beta_lhs <- solve(mu_beta_lhs)
}
additional <- list(d_mat = d_mat,
beta_lhs = beta_lhs,
mu_lhs = mu_lhs,
mu_beta_lhs = mu_beta_lhs)
message("additional variables prepared!")
if(dry_run){
res <- list(
lam1_vec = lam1_vec,
lam2_vec = lam2_vec,
const_r123 = const_r123,
const_abc = const_abc,
additional = additional
)
return(res)
}
# --- variables storing the results ---
beta_mat <- matrix(0, nrow = lam1_len * lam2_len, ncol = p)
mu_mat <- matrix(0, nrow = lam1_len * lam2_len, ncol = n)
mu_updated_mat <- matrix(0, nrow = lam1_len * lam2_len, ncol = n)
z_mat <- matrix(0, nrow = lam1_len * lam2_len, ncol = n)
s_mat <- matrix(0, nrow = lam1_len * lam2_len, ncol = n * (n - 1) / 2)
w_mat <- matrix(0, nrow = lam1_len * lam2_len, ncol = p)
q1_mat <- matrix(0, nrow = lam1_len * lam2_len, ncol = n)
q2_mat <- matrix(0, nrow = lam1_len * lam2_len, ncol = n * (n - 1) / 2)
q3_mat <- matrix(0, nrow = lam1_len * lam2_len, ncol = p)
step_vec <- rep(1, lam1_len * lam2_len)
diff_vec <- rep(tol + 1, lam1_len * lam2_len)
loss_mat <- matrix(0, nrow = lam1_len * lam2_len, ncol = max_iter + 1)
# run algorithm over the path
pb <- progressr::progressor(steps = lam1_len * lam2_len)
for(i in 1 : lam1_len){
for(j in 1 : lam2_len){
# index for current solution and initial values
ind <- (j - 1) * lam1_len + i
# grid points for lam1 and lam2 are designed in a matrix form:
#
# +--- lam_2 ---+
# || --> |
# || --> |
# |+----------> |
# lam_1 |
# | |
# | |
# | |
# +-------------+
#
# But the variables are stored in a col-major vector form
# with `ind <- (j - 1) * lam1_len + i` for coord `(i, j)`
# if(j == 1){
# ind_old <- i - 1
# }else{
# ind_old <- (j - 2) * lam1_len + i
# }
# prepare lambda's
p1_param[1] <- lam1_vec[i]
p2_param[1] <- lam2_vec[j]
# carry out the algorithm
res <- RSAVS_Solver(y_vec, x_mat, l_type = l_type, l_param = l_param,
p1_type = p1_type, p1_param = p1_param, p2_type = p2_type, p2_param = p2_param,
const_r123 = const_r123, const_abc = const_abc,
initial_values = initial_values, additional = additional,
tol = tol, max_iter = max_iter, cd_max_iter = cd_max_iter, cd_tol = cd_tol,
phi = phi, subgroup_benchmark = subgroup_benchmark, update_mu = update_mu,
loss_track = loss_track, diff_update = diff_update,
omp_zsw = omp_zsw, eigen_pnum = eigen_pnum, s_v2 = s_v2)
# store the results
# NOTE: we should store the result before prepare initial values for next iteration.
# Otherwise it causes the initial values to be all 0 when lam2_len = 1.
beta_mat[ind, ] <- res$beta_vec
mu_mat[ind, ] <- res$mu_vec
mu_updated_mat[ind, ] <- res$mu_updated_vec
z_mat[ind, ] <- res$z_vec
s_mat[ind, ] <- res$s_vec
w_mat[ind, ] <- res$w_vec
q1_mat[ind, ] <- res$q1_vec
q2_mat[ind, ] <- res$q2_vec
q3_mat[ind, ] <- res$q3_vec
step_vec[ind] <- res$current_step
diff_vec[ind] <- res$diff
loss_mat[ind, ] <- res$loss_vec
# update inital values for next run
if(j == lam2_len){
ind_old <- i
initial_values <- list(beta_init = beta_mat[ind_old, ],
mu_init = mu_updated_mat[ind_old, ],
z_init = z_mat[ind_old, ],
s_init = s_mat[ind_old, ],
w_init = w_mat[ind_old, ],
q1_init = q1_mat[ind_old, ],
q2_init = q2_mat[ind_old, ],
q3_init = q3_mat[ind_old, ])
}else{
initial_values <- list(beta_init = res$beta_vec,
mu_init = res$mu_updated_vec,
z_init = res$z_vec,
s_init = res$s_vec,
w_init = res$w_vec,
q1_init = res$q1_vec,
q2_init = res$q2_vec,
q3_init = res$q3_vec)
}
pb(message = paste("lam1: ", i, "/", lam1_len, ", lam2: ", j, "/", lam2_len, sep = ""))
}
}
# --- Use mBIC to find the best solution in this solution plane ---
res <- list(beta_mat = beta_mat,
mu_mat = mu_mat,
mu_updated_mat = mu_updated_mat,
z_mat = z_mat,
s_mat = s_mat,
w_mat = w_mat,
q1_mat = q1_mat,
q2_mat = q2_mat,
q3_mat = q3_mat,
step_vec = step_vec,
diff_vec = diff_vec,
lam1_vec = lam1_vec,
lam2_vec = lam2_vec,
loss_mat = loss_mat,
const_r123 = const_r123,
const_abc = const_abc
)
return(res)
}
RSAVS_Simple_Path <- function(y_vec, x_mat, l_type = "L1", l_param = NULL,
p1_type = "S", p1_param = c(2, 3.7), p2_type = "S", p2_param = c(2, 3.7),
lam1_vec = NULL, lam2_vec = NULL,
min_lam1_ratio = 0.03, min_lam2_ratio = 0.03, lam1_max_ncvguard = 100,
lam1_len = 100, lam2_len = 80,
const_r123_s1, const_r123_s2,
const_abc_s1 = rep(1, 3), const_abc_s2 = rep(1, 3),
bic_const_a_s1 = 1, bic_const_a_s2 = 1,
bic_double_loglik_s1 = TRUE, bic_double_loglik_s2 = FALSE,
initial_values_s1, initial_values_s2,
phi_s1 = 5.0, phi_s2 = 5.0,
tol_s1 = 0.001, tol_s2 = 0.001,
max_iter_s1 = 100, max_iter_s2 = 100,
cd_max_iter_s1 = 1, cd_max_iter_s2 = 1,
cd_tol_s1 = 0.001, cd_tol_s2 = 0.001,
subgroup_benchmark_s1 = FALSE, subgroup_benchmark_s2 = TRUE,
update_mu_s11 = NULL, update_mu_s12 = list(useS = FALSE, round_digits = NULL),
update_mu_s21 = NULL, update_mu_s22 = list(useS = FALSE, round_digits = NULL),
complex_upper_bound = 15,
loss_track_s1 = FALSE, loss_track_s2 = FALSE,
diff_update_s1 = TRUE, diff_update_s2 = TRUE,
omp_zsw = c(1, 4, 1), eigen_pnum = 4, s_v2 = TRUE){
message("------ Analysis stage1: search over lam2_vec ------")
message("--- a dry run to prepare some variables ---")
dry_run_s1 <- RSAVS_Path(y_vec = y_vec, x_mat = x_mat, l_type = l_type, l_param = l_param,
p1_type = p1_type, p1_param = p1_param, p2_type = p2_type, p2_param = p2_param,
lam1_vec = lam1_vec, lam2_vec = lam2_vec, lam1_sort = FALSE, lam2_sort = FALSE,
min_lam1_ratio = min_lam1_ratio, min_lam2_ratio = min_lam2_ratio,
lam1_max_ncvguard = lam1_max_ncvguard, subgroup_benchmark = FALSE,
lam1_len = lam1_len, lam2_len = lam2_len,
const_r123 = const_r123_s1, const_abc = const_abc_s1,
initial_values = initial_values_s1,
cd_max_iter = cd_max_iter_s1,
omp_zsw = omp_zsw, eigen_pnum = eigen_pnum, s_v2 = s_v2,
dry_run = TRUE
)
n <- length(y_vec)
p <- ncol(x_mat)
if(is.null(initial_values_s1)){
mu0 <- RSAVS_Get_Mu0(y_vec, l_type, l_param)
initial_values_s1 <- list(beta_init = rep(0, p),
mu_init = rep(mu0, n * (n - 1) / 2),
z_init = as.vector(y_vec - mu0),
s_init = rep(0, n * (n - 1) / 2),
w_init = rep(0, p),
q1_init = rep(0, n),
q2_init = rep(0, n * (n - 1) / 2),
q3_init = rep(0, p))
}
lam1_chosen <- max(dry_run_s1$lam1_vec)
lam2_vec_s1 <- dry_run_s1$lam2_vec
message("--- real analysis ---")
res_s1 <- RSAVS_Path(y_vec = y_vec, x_mat = x_mat, l_type = l_type, l_param = l_param,
p1_type = p1_type, p1_param = p1_param, p2_type = p2_type, p2_param = p2_param,
lam1_vec = lam1_chosen, lam2_vec = lam2_vec_s1, lam2_sort = FALSE,
# min_lam2_ratio = min_lam2_ratio, lam2_len = lam2_len,
subgroup_benchmark = subgroup_benchmark_s1,
const_r123 = const_r123_s1, const_abc = const_abc_s1,
initial_values = initial_values_s1, phi = phi_s1,
tol = tol_s1, max_iter = max_iter_s1,
cd_tol = cd_tol_s1, cd_max_iter = cd_max_iter_s1,
update_mu = update_mu_s11,
loss_track = loss_track_s1, diff_update = diff_update_s1,
omp_zsw = omp_zsw, eigen_pnum = eigen_pnum, s_v2 = s_v2, dry_run = FALSE)
message("------ mBIC check on results over lam2_vec ------")
lam2_len <- length(res_s1$lam2_vec)
mu_further_improve_mat <- matrix(0, nrow = 1 * lam2_len, ncol = n)
beta_further_improve_mat <- matrix(0, nrow = 1 * lam2_len, ncol = p)
bic_vec_s1 <- rep(0, 1 * lam2_len)
group_num_vec_s1 <- rep(1, 1 * lam2_len)
active_num_vec_s1 <- rep(0, 1 * lam2_len)
max_bic <- Inf
message("--- Compute mBIC ---")
bic_res <- future.apply::future_lapply(1 : (1 * lam2_len),
FUN = RSAVS_Compute_BIC_V2,
rsavs_res = list(w_mat = res_s1$w_mat,
mu_updated_mat = res_s1$mu_updated_mat),
y_vec = y_vec, x_mat = x_mat,
l_type = "L1", l_param = NULL,
phi = phi_s1, const_a = bic_const_a_s1,
update_mu = update_mu_s12,
double_log_lik = bic_double_loglik_s1,
from_rsi = FALSE)
gc()
message("--- check mBIC results ---")
for(j in 1 : (1 * lam2_len)){
# print(j)
tmp <- bic_res[[j]]
mu_new <- as.vector(tmp$mu_vec)
beta_new <- tmp$beta_vec
mu_further_improve_mat[j, ] <- mu_new
beta_further_improve_mat[j, ] <- beta_new
bic_vec_s1[j] <- tmp$bic_info["bic"]
# update bic according to complexsity upper bound
current_group_num <- length(unique(mu_new))
current_active_num <- sum(beta_new != 0)
group_num_vec_s1[j] <- current_group_num
active_num_vec_s1[j] <- current_active_num
if((current_active_num + current_group_num) >= complex_upper_bound){
bic_vec_s1[j] <- max_bic
}
# update bic according to active number
if(current_active_num == 0){
bic_vec_s1[j] <- max_bic
}
}
# save the summarized information with the further improved estimation
best_id_s1 <- which.min(bic_vec_s1)
mu_s1 <- mu_further_improve_mat[best_id_s1, ]
beta_s1 <- beta_further_improve_mat[best_id_s1, ]
active_idx <- which(beta_s1 != 0)
if(length(active_idx) == 0){
active_idx <- sample(1 : p, size = 1)
}
if(length(active_idx) > (n / 2)){
active_idx <- active_idx[1 : (n / 2)]
}
active_chosen_num <- length(active_idx)
lam2_chosen <- res_s1$lam2_vec[best_id_s1]
message("------ Analysis stage2: search over lam1_vec ------")
message("--- a dry run to prepare some variables ---")
dry_run_s2 <- RSAVS_Path(y_vec = y_vec, x_mat = x_mat[, active_idx, drop = FALSE],
l_type = l_type, l_param = l_param,
p1_type = p1_type, p1_param = p1_param,
p2_type = p2_type, p2_param = p2_param,
lam1_vec = lam1_vec, lam2_vec = lam2_vec, lam1_sort = FALSE, lam2_sort = FALSE,
min_lam1_ratio = min_lam1_ratio, min_lam2_ratio = min_lam2_ratio,
lam1_max_ncvguard = lam1_max_ncvguard, subgroup_benchmark = FALSE,
lam1_len = lam1_len, lam2_len = lam2_len,
const_r123 = const_r123_s2, const_abc = const_abc_s2,
initial_values = initial_values_s2,
cd_max_iter = cd_max_iter_s2,
omp_zsw = omp_zsw, eigen_pnum = eigen_pnum, s_v2 = s_v2,
dry_run = TRUE
)
if(is.null(initial_values_s2)){
mu0 <- RSAVS_Get_Mu0(y_vec, l_type, l_param)
initial_values_s2 <- list(beta_init = rep(0, active_chosen_num),
mu_init = rep(mu0, n * (n - 1) / 2),
z_init = as.vector(y_vec - mu0),
s_init = rep(0, n * (n - 1) / 2),
w_init = rep(0, active_chosen_num),
q1_init = rep(0, n),
q2_init = rep(0, n * (n - 1) / 2),
q3_init = rep(0, active_chosen_num))
}
lam1_vec <- sort(dry_run_s2$lam1_vec, decreasing = TRUE)
message("--- real analysis ---")
res_s2 <- RSAVS_Path(y_vec = y_vec, x_mat = x_mat[, active_idx, drop = FALSE],
l_type = l_type, l_param = l_param,
p1_type = p1_type, p1_param = p1_param,
p2_type = p2_type, p2_param = p2_param,
lam1_vec = lam1_vec, lam2_vec = lam2_chosen, lam1_sort = FALSE,
# min_lam2_ratio = min_lam2_ratio, lam2_len = lam2_len,
subgroup_benchmark = subgroup_benchmark_s2,
const_r123 = const_r123_s2, const_abc = const_abc_s2,
initial_values = initial_values_s2, phi = phi_s2,
tol = tol_s2, max_iter = max_iter_s2,
cd_tol = cd_tol_s2, cd_max_iter = cd_max_iter_s2,
update_mu = update_mu_s21,
loss_track = loss_track_s2, diff_update = diff_update_s2,
omp_zsw = omp_zsw, eigen_pnum = eigen_pnum, s_v2 = s_v2, dry_run = FALSE)
message("------ mBIC check on results over lam1_vec ------")
lam1_len <- length(lam1_vec)
mu_further_improve_mat <- matrix(0, nrow = lam1_len * 1, ncol = n)
beta_further_improve_mat <- matrix(0, nrow = lam1_len * 1, ncol = active_chosen_num)
bic_vec_s2 <- rep(0, lam1_len * 1)
group_num_vec_s2 <- rep(1, lam1_len * 1)
active_num_vec_s2 <- rep(0, lam1_len * 1)
max_bic <- Inf
message("--- compute mBIC ---")
bic_res <- future.apply::future_lapply(1 : (lam1_len * 1),
FUN = RSAVS_Compute_BIC_V2,
rsavs_res = list(w_mat = res_s2$w_mat,
mu_updated_mat = res_s2$mu_updated_mat),
y_vec = y_vec, x_mat = x_mat[, active_idx, drop = FALSE],
l_type = l_type, l_param = l_param,
phi = phi_s2, const_a = bic_const_a_s2,
update_mu = update_mu_s22,
double_log_lik = bic_double_loglik_s2,
from_rsi = FALSE)
gc()
message("--- check mBIC results ---")
for(j in 1 : (lam1_len * 1)){
# print(j)
tmp <- bic_res[[j]]
mu_new <- as.vector(tmp$mu_vec)
beta_new <- tmp$beta_vec
mu_further_improve_mat[j, ] <- mu_new
beta_further_improve_mat[j, ] <- beta_new
bic_vec_s2[j] <- tmp$bic_info["bic"]
# update bic according to complexsity upper bound
current_group_num <- length(unique(mu_new))
current_active_num <- sum(beta_new != 0)
group_num_vec_s2[j] <- current_group_num
active_num_vec_s2[j] <- current_active_num
if((current_active_num + current_group_num) >= complex_upper_bound){
bic_vec_s2[j] <- max_bic
}
# update bic according to active number
if(current_active_num == 0){
bic_vec_s2[j] <- max_bic
}
}
# save the summarized information with the further improved estimation
best_id_s2 <- which.min(bic_vec_s2)
mu_s2 <- mu_further_improve_mat[best_id_s2, ]
beta_s2 <- beta_further_improve_mat[best_id_s2, ]
lam1_chosen <- res_s2$lam1_vec[best_id_s2]
# combine the final result
mu_best <- mu_s2
beta_best <- rep(0, p)
beta_best[active_idx] <- beta_s2
res_full <- list(res_s1 = res_s1,
res_s2 = res_s2,
best_id_s1 = best_id_s1,
best_id_s2 = best_id_s2,
bic_vec_s1 = bic_vec_s1,
bic_vec_s2 = bic_vec_s2,
group_num_vec_s1 = group_num_vec_s1,
group_num_vec_s2 = group_num_vec_s2,
active_num_vec_s1 = active_num_vec_s1,
active_num_vec_s2 = active_num_vec_s2,
mu_best = mu_best,
beta_best = beta_best
)
return(res_full)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.