Nothing
# =====================================================================
# stepOptim.R - Two-Stage Pre-Optimization for missoNet
# =====================================================================
# Purpose: Efficient hyperparameter selection using a two-stage search
# Stage 1: Fix lambda.theta, search over lambda.beta
# Stage 2: Fix lambda.beta (from Stage 1), search over lambda.theta
# =====================================================================
stepOptim <- function(X, Y, init.obj, GoF, cv, eta, eps,
lamB.vec, lamTh.vec, n.lamB, n.lamTh,
lamB.min.ratio, lamTh.min.ratio,
lamB.scale.factor = NULL, lamTh.scale.factor = NULL,
Beta.maxit, Beta.thr, Theta.maxit, Theta.thr,
verbose) {
# -------------------- Setup --------------------
n <- nrow(X); p <- ncol(X); q <- ncol(Y)
X <- robust_scale(X, init.obj$mx, init.obj$sdx)
Y <- robust_scale(Y, init.obj$my, init.obj$sdy)
Z <- Y; Z[is.na(Z)] <- 0
# -------------------- Observation Probability Matrices --------------------
# rho.mat.1: pxq matrix for X'Y scaling
# rho.mat.2: qxq matrix for Y'Y scaling
obs_prob <- init.obj$obs_prob
rho.mat.1 <- matrix(obs_prob, nrow = p, ncol = q, byrow = TRUE)
rho.mat.2 <- outer(obs_prob, obs_prob, `*`); diag(rho.mat.2) <- obs_prob
# -------------------- Precompute Sufficient Statistics --------------------
# These are the unbiased moment estimators accounting for missing data
xtx <- crossprod(X)
xtx <- make_positive_definite(xtx)
til.xty <- crossprod(X, Z) / rho.mat.1 # Unbiased X'Y
til.ytx <- t(til.xty)
til.yty <- crossprod(Z) / rho.mat.2 # Unbiased Y'Y
til.yty <- make_positive_definite(til.yty)
# -------------------- Lambda Grid Generation --------------------
if (is.null(lamB.vec) || is.null(lamTh.vec)) {
lam.obj <- InitLambda(
lamB = lamB.vec, lamTh = lamTh.vec, init.obj = init.obj,
n = n, p = p, q = q,
n.lamB = n.lamB, n.lamTh = n.lamTh,
lamB.min.ratio = lamB.min.ratio,
lamTh.min.ratio = lamTh.min.ratio,
lamB.scale.factor = NULL,
lamTh.scale.factor = NULL
)
lamB.vec <- sort(unique(lam.obj$lamB.vec), decreasing = TRUE)
lamTh.vec <- sort(unique(lam.obj$lamTh.vec), decreasing = TRUE)
} else {
lamB.vec <- sort(unique(lamB.vec), decreasing = TRUE)
lamTh.vec <- sort(unique(lamTh.vec), decreasing = TRUE)
}
# -------------------- Adaptive Tolerance Settings --------------------
# For pre-optimization, we can use looser tolerances for speed
# These will be tightened in the final model fitting
n_eff <- init.obj$n_eff %||% (n * (1 - mean(init.obj$rho.vec))) # Effective sample size
high_dim <- (n_eff < p) || (n_eff < q) # Check if high-dimensional
if (high_dim) {
# High-dimensional: prioritize speed with looser tolerances
beta_tol_fast <- min(0.01, Beta.thr * 10)
theta_tol_fast <- min(0.01, Theta.thr * 10)
beta_maxit_fast <- min(Beta.maxit, 500)
theta_maxit_fast <- min(Theta.maxit, 500)
} else {
# Low-dimensional: can afford tighter tolerances
beta_tol_fast <- min(0.001, Beta.thr * 5)
theta_tol_fast <- min(0.001, Theta.thr * 5)
beta_maxit_fast <- min(Beta.maxit, 1000)
theta_maxit_fast <- min(Theta.maxit, 1000)
}
# =====================================================================
# Stage 1: Search over lambda.beta (with fixed lambda.theta)
# =====================================================================
GoF.vec <- numeric(length(lamB.vec))
temp.list <- lapply(seq_along(lamB.vec), function(x) matrix(0, p, q))
if (cv) {
mse.vec <- numeric(length(lamB.vec))
}
# Initialize Theta with a conservative choice (largest lambda.theta)
residual.cov <- til.yty / (n - 1) # Initial residual covariance (Beta = 0)
lamTh.mat <- lamTh.vec[1] * init.obj$lamTh.pf
lamTh.mat[lamTh.mat == 0] <- 1e-12
diag(lamTh.mat) <- 1e-12 # Don't penalize diagonal
Theta.out <- tryCatch({
run_glasso(S = residual.cov,
rho = lamTh.mat,
thr = theta_tol_fast,
maxIt = theta_maxit_fast)
}, error = function(e) {
if (verbose > 0) cat(" Glasso failed in warm start, using diagonal approximation\n")
list(wi = diag(pmin(pmax(1/diag(residual.cov), eps), 1/eps)))
})
Theta <- make_symmetric(Theta.out$wi)
if (verbose > 0) {
cat("\n - Stage: 1/2 (Searching lambda.beta)\n")
pb <- txtProgressBar(min = 0, max = length(lamB.vec), style = 3, width = 50, char = "=")
}
# Initialize Beta
Beta <- matrix(0, p, q)
for (i in seq_along(lamB.vec)) {
# Update Beta with current lambda.beta
lamB.mat <- lamB.vec[i] * init.obj$lamB.pf
lamB.mat[lamB.mat == 0] <- 1e-12
Beta <- updateBeta(
Theta = Theta,
B0 = Beta, # Warm start
n = n,
xtx = xtx,
xty = til.xty,
lamB = lamB.mat,
eta = eta,
tolin = beta_tol_fast,
maxitrin = beta_maxit_fast
)$Bhat
# Update residual covariance
E <- Y - X %*% Beta
residual.cov <- getResCov(E, n, rho.mat.2)
# Update Theta
Theta.out <- tryCatch({
run_glasso(S = residual.cov,
rho = lamTh.mat,
thr = theta_tol_fast,
maxIt = theta_maxit_fast)
}, error = function(e) {
list(wi = Theta) # Keep previous Theta
})
Theta <- make_symmetric(Theta.out$wi)
# Store evaluation metrics
if (cv) {
mse.vec[i] <- getEvalErr(
yty = til.yty,
ytx = til.ytx,
xty = til.xty,
xtx = xtx,
Beta = Beta,
n = n
)
}
GoF.vec[i] <- GoF_func(
GoF = GoF,
gamma = 0.5,
n = n,
p = p,
q = q,
Theta = Theta,
S = residual.cov,
Beta = Beta
)
temp.list[[i]] <- Beta
if (verbose > 0) {
setTxtProgressBar(pb, i)
}
}
if (verbose > 0) {
close(pb)
}
# Select optimal lambda.beta
if (cv) {
# Use weighted average of MSE and GoF rankings
# rank_mse <- rank(mse.vec, ties.method = "average")
# rank_gof <- rank(GoF.vec, ties.method = "average")
# combined_score <- 0.5 * rank_mse / length(mse.vec) + 0.5 * rank_gof / length(GoF.vec)
# lamB.id <- which.min(combined_score)
lamB.id <- ceiling((which.min(mse.vec) + which.min(GoF.vec)) / 2)
} else {
lamB.id <- which.min(GoF.vec)
}
lamB.min <- lamB.vec[lamB.id]
lamB.mat <- lamB.min * init.obj$lamB.pf
lamB.mat[lamB.mat == 0] <- 1e-12
Beta <- temp.list[[lamB.id]]
# =====================================================================
# Stage 2: Search over lambda.theta (with fixed lambda.beta)
# =====================================================================
GoF.vec <- numeric(length(lamTh.vec))
if (cv) {
mse.vec <- numeric(length(lamTh.vec))
}
# Compute residual covariance with optimal Beta
E <- Y - X %*% Beta
residual.cov <- getResCov(E, n, rho.mat.2)
if (verbose > 0) {
cat(" - Stage: 2/2 (Searching lambda.theta)\n")
pb <- txtProgressBar(min = 0, max = length(lamTh.vec), style = 3, width = 50, char = "=")
}
# Re-initialize Theta for Stage 2
# Use the first (largest) lambda.theta as starting point
lamTh.mat <- lamTh.vec[1] * init.obj$lamTh.pf
lamTh.mat[lamTh.mat == 0] <- 1e-12
Theta.out <- tryCatch({
run_glasso(S = residual.cov,
rho = lamTh.mat,
thr = theta_tol_fast,
maxIt = theta_maxit_fast)
}, error = function(e) {
if (verbose > 0) cat(" Glasso failed in warm start, using diagonal approximation\n")
list(wi = diag(pmin(pmax(1/diag(residual.cov), eps), 1/eps)))
})
Theta <- make_symmetric(Theta.out$wi)
for (i in seq_along(lamTh.vec)) {
# Update Theta with current lambda.theta
lamTh.mat <- lamTh.vec[i] * init.obj$lamTh.pf
lamTh.mat[lamTh.mat == 0] <- 1e-12
Theta.out <- tryCatch({
run_glasso(S = residual.cov,
rho = lamTh.mat,
thr = theta_tol_fast,
maxIt = theta_maxit_fast)
}, error = function(e) {
list(wi = Theta) # Keep previous Theta
})
Theta <- make_symmetric(Theta.out$wi)
# Update Beta with fixed lambda.beta
Beta <- updateBeta(
Theta = Theta,
B0 = Beta, # Warm start
n = n,
xtx = xtx,
xty = til.xty,
lamB = lamB.mat,
eta = eta,
tolin = beta_tol_fast,
maxitrin = beta_maxit_fast
)$Bhat
# Update residual covariance
E <- Y - X %*% Beta
residual.cov <- getResCov(E, n, rho.mat.2)
# Store evaluation metrics
if (cv) {
mse.vec[i] <- getEvalErr(
yty = til.yty,
ytx = til.ytx,
xty = til.xty,
xtx = xtx,
Beta = Beta,
n = n
)
}
GoF.vec[i] <- GoF_func(
GoF = GoF,
gamma = 0.5,
n = n,
p = p,
q = q,
Theta = Theta,
S = residual.cov,
Beta = Beta
)
if (verbose > 0) {
setTxtProgressBar(pb, i)
}
}
if (verbose > 0) {
close(pb); cat("\n")
}
# Select optimal lambda.theta
if (cv) {
# Use weighted average favoring GoF for Theta selection
# rank_mse <- rank(mse.vec, ties.method = "average")
# rank_gof <- rank(GoF.vec, ties.method = "average")
# combined_score <- 0.3 * rank_mse / length(mse.vec) + 0.7 * rank_gof / length(GoF.vec)
# lamTh.id <- which.min(combined_score)
lamTh.id <- ceiling((which.min(mse.vec) + which.min(GoF.vec)) / 2)
} else {
lamTh.id <- which.min(GoF.vec)
}
lamTh.min <- lamTh.vec[lamTh.id]
# -------------------- Return Results --------------------
return(list(
lamB.unique = lamB.vec,
lamB.min = lamB.min,
lamB.id = lamB.id,
lamTh.unique = lamTh.vec,
lamTh.min = lamTh.min,
lamTh.id = lamTh.id
))
}
# GoF_func <- function(GoF, gamma, n, p, q, Theta, S, Beta) {
# # Compute goodness-of-fit criterion
# if (GoF == "eBIC") {
# # Extended BIC
# loglik <- -n/2 * (q * log(2 * pi) + determinant(solve(Theta), logarithm = TRUE)$modulus +
# sum(diag(S %*% Theta)))
# df_beta <- sum(Beta != 0)
# df_theta <- sum(Theta[upper.tri(Theta)] != 0)
# df <- df_beta + df_theta
#
# # eBIC with gamma parameter for high-dimensional adjustment
# ebic <- -2 * loglik + log(n) * df + 4 * gamma * df * log(max(p, q))
# return(ebic)
# } else if (GoF == "AIC") {
# # Standard AIC
# loglik <- -n/2 * (q * log(2 * pi) + determinant(solve(Theta), logarithm = TRUE)$modulus +
# sum(diag(S %*% Theta)))
# df <- sum(Beta != 0) + sum(Theta[upper.tri(Theta)] != 0)
# aic <- -2 * loglik + 2 * df
# return(aic)
# } else {
# stop("GoF must be either 'eBIC' or 'AIC'")
# }
# }
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.