#' @title
#' Compute information crieteria for the \code{FuncompCGL} model.
#'
#'
#' @description
#' Tune the grid values of the penalty parameter code{lam} and the degrees of freedom of
#' the basis function \code{k} in the \code{FuncompCGL} model by GIC, BIC, or AIC. This
#' function calculates the GIC, BIC, or AIC curve and returns the optimal values of
#' \code{lam} and \code{k}.
#'
#' @usage
#' GIC.FuncompCGL(y, X, Zc = NULL, lam = NULL, nlam = 100, k = 4:10, ref = NULL,
#' intercept = TRUE, W = rep(1,times = p - length(ref)),
#' type = c("GIC", "BIC", "AIC"),
#' mu_ratio = 1.01, outer_maxiter = 1e+6, ...)
#'
#' @param k an integer vector specifying the degrees of freedom of the basis function.
#'
#' @param type a character string specifying which crieterion to use. The choices include
#' \code{"GIC"} (default), \code{"BIC"}, and \code{"AIC"}.
#'
#' @param outer_maxiter maximum number of loops allowed for the augmented Lanrange method.
#'
#' @param \dots other arguments that could be passed to FuncompCL.
#'
#'
#' @inheritParams FuncompCGL
#'
#' @details
#' The \code{FuncompCGL} model estimation is conducted through minimizing the
#' linearly constrained group lasso criterion
#' \deqn{
#' \frac{1}{2n}\|y - 1_n\beta_0 - Z_c\beta_c - Z\beta\|_2^2 + \lambda \sum_{j=1}^{p} \|\beta_j\|_2,
#' s.t. \sum_{j=1}^{p} \beta_j = 0_k.}
#'
#' The tuning parameters can be selected by the generalized information crieterion (GIC),
#' \deqn{
#' GIC(\lambda,k) = \log{(\hat{\sigma}^2(\lambda,k))} +
#' (s(\lambda, k) - 1)k \log{(max(p*k+p_c+1, n))} \log{(\log{n})}/n
#' ,}
#' where
#' \eqn{\hat{\sigma}^2(\lambda,k) = \|y - 1_n\hat{\beta_0}(\lambda, k) -
#' Z_c\hat{\beta_c}(\lambda, k) - Z\hat{\beta}(\lambda, k) \|_{2}^{2}/n} with \eqn{\hat{\beta_0}(\lambda, k)},
#' \eqn{\hat{\beta_c}(\lambda, k)} and \eqn{\hat{\beta}(\lambda, k)} being the regularized estimators
#' of the regression coefficients, and \eqn{s(\lambda, k)} is the number of nonzero coefficient groups in
#' \eqn{\hat{\beta}(\lambda, k)}.
#'
#' @references
#' Sun, Z., Xu, W., Cong, X., Li G. and Chen K. (2020) \emph{Log-contrast regression with
#' functional compositional predictors: linking preterm infant's gut microbiome trajectories
#' to neurobehavioral outcome}, \href{https://arxiv.org/abs/1808.02403}{https://arxiv.org/abs/1808.02403}
#' \emph{Annals of Applied Statistics}.
#'
#' Fan, Y., and Tang, C. Y. (2013) \emph{Tuning parameter selection in high
#' dimensional penalized likelihood},
#' \href{https://rss.onlinelibrary.wiley.com/doi/abs/10.1111/rssb.12001}{https://rss.onlinelibrary.wiley.com/doi/abs/10.1111/rssb.12001}
#' \emph{Journal of the Royal Statistical Society. Series B} \strong{75} 531-552.
#'
#' @return An object of S3 class \code{"GIC.FuncompCGL"} is returned, which is
#' a list containing:
#' \item{FuncompCGL.fit}{a list of length \code{length(k)},
#' with fitted \code{\link{FuncompCGL}}
#' objects of different degrees
#' of freedom of the basis function.}
#'
#' \item{lam}{the sequence of the penalty parameter \code{lam}.}
#'
#' \item{GIC}{a \code{k} by \code{length(lam)} matirx of GIC values.}
#'
#' \item{lam.min}{the optimal values of the degrees of freedom \code{k}
#' and the penalty parameter \code{lam}.}
#'
#' \item{MSE}{a \code{k} by \code{length(lam)} matirx of mean squared errors.}
#'
#' @seealso
#' \code{\link{FuncompCGL}} and \code{\link{cv.FuncompCGL}},
#' and \code{\link[=predict.GIC.FuncompCGL]{predict}},
#' \code{\link[=coef.GIC.FuncompCGL]{coef}} and
#' \code{\link[=plot.GIC.FuncompCGL]{plot}} methods for \code{"GIC.FuncompCGL"}
#' object.
#'
#' @examples
#' \donttest{
#' df_beta = 5
#' p = 30
#' beta_C_true = matrix(0, nrow = p, ncol = df_beta)
#' beta_C_true[1, ] <- c(-0.5, -0.5, -0.5 , -1, -1)
#' beta_C_true[2, ] <- c(0.8, 0.8, 0.7, 0.6, 0.6)
#' beta_C_true[3, ] <- c(-0.8, -0.8 , 0.4 , 1 , 1)
#' beta_C_true[4, ] <- c(0.5, 0.5, -0.6 ,-0.6, -0.6)
#' n_train = 50
#' n_test = 30
#' k_list <- c(4,5)
#' Data <- Fcomp_Model(n = n_train, p = p, m = 0, intercept = TRUE,
#' SNR = 4, sigma = 3, rho_X = 0.2, rho_T = 0.5,
#' df_beta = df_beta, n_T = 20, obs_spar = 1, theta.add = FALSE,
#' beta_C = as.vector(t(beta_C_true)))
#' arg_list <- as.list(Data$call)[-1]
#' arg_list$n <- n_test
#' Test <- do.call(Fcomp_Model, arg_list)
#'
#' ## GIC_cgl: Constrained group lasso
#' GIC_cgl <- GIC.FuncompCGL(y = Data$data$y, X = Data$data$Comp,
#' Zc = Data$data$Zc, intercept = Data$data$intercept,
#' k = k_list)
#' coef(GIC_cgl)
#' plot(GIC_cgl)
#' y_hat <- predict(GIC_cgl, Znew = Test$data$Comp, Zcnew = Test$data$Zc)
#' plot(Test$data$y, y_hat, xlab = "Observed response", ylab = "Predicted response")
#'
#' ## GIC_naive: ignoring the zero-sum constraints
#' ## set mu_raio = 0 to identifying without linear constraints,
#' ## no outer_loop for Lagrange augmented multiplier
#' GIC_naive <- GIC.FuncompCGL(y = Data$data$y, X = Data$data$Comp,
#' Zc = Data$data$Zc, intercept = Data$data$intercept,
#' k = k_list, mu_ratio = 0)
#' coef(GIC_naive)
#' plot(GIC_naive)
#' y_hat <- predict(GIC_naive, Znew = Test$data$Comp, Zcnew = Test$data$Zc)
#' plot(Test$data$y, y_hat, xlab = "Observed response", ylab = "Predicted response")
#'
#' ## GIC_base: random select a component as reference
#' ## mu_ratio is set to 0 automatically once ref is set to a integer
#' ref <- sample(1:p, 1)
#' GIC_base <- GIC.FuncompCGL(y = Data$data$y, X = Data$data$Comp,
#' Zc = Data$data$Zc, intercept = Data$data$intercept,
#' k = k_list, ref = ref)
#' coef(GIC_base)
#' plot(GIC_base)
#' y_hat <- predict(GIC_base, Znew = Test$data$Comp, Zcnew = Test$data$Zc)
#' plot(Test$data$y, y_hat, xlab = "Observed response", ylab = "Predicted response")
#' }
#'
#' @export
GIC.FuncompCGL <- function(y, X, Zc = NULL, lam = NULL, nlam = 100, k = 4:10, ref = NULL,
intercept = TRUE, W = rep(1,times = p - length(ref)),
type = c("GIC", "BIC", "AIC"),
mu_ratio = 1.01, outer_maxiter = 1e+6, ...) {
y <- drop(y)
n <- length(y)
type <- match.arg(type)
object <- as.list(seq(length(k)))
names(object) <- k
if(!is.null(lam) || length(k) == 1) {
## Case I
if(dim(X)[1] == n ) p = ncol(X) / k else p <- ncol(X) - 2
for(i in 1:length(k)){
object[[i]] <- FuncompCGL(y = y, X = X, Zc = Zc, lam = lam, nlam = nlam, ref = ref,
k = k[i], outer_maxiter = outer_maxiter, W = W,
intercept = intercept, mu_ratio = mu_ratio, ...)
}
} else {
## Case II
## find commom lambda first
p <- ncol(X) - 2
this.call <- as.list(seq(length(k)))
for(i in 1:length(k)){
## Caculate integral Z and W matrix (if W is a functoin)
object[[i]] <- FuncompCGL(y = y, X = X, Zc = Zc, nlam = 1, ref = ref,
k = k[i], outer_maxiter = 0, W = W,
intercept = intercept, mu_ratio = mu_ratio, ...)
this.call[[i]] <- object[[i]]$call
}
sseq <- object[[1]]$sseq
lam0 <- max(sapply(object, "[[", "lam")) # Shared lam0 for different k
## Solution path for each df k
for(i in 1:length(k)) {
object[[i]] <- FuncompCGL(y = y, X = object[[i]]$Z, Zc = Zc,
k = k[i], W = object[[i]]$W, ref = ref,
lam = lam0, nlam = nlam,
outer_maxiter = outer_maxiter,
intercept = intercept, mu_ratio = mu_ratio, ...)
object[[i]]$call <- this.call[[i]]
object[[i]]$sseq <- sseq
}
}
# lam_start = max of all lam_start
# lam_end might diff beacuse of stopping criterion
GIC.nlam <- sapply(object, function(x) length(drop(x$lam)))
GIC.nlam_id <- which.min(GIC.nlam)
GIC.lam <- object[[GIC.nlam_id]]$lam
GIC.nlam <- GIC.nlam[GIC.nlam_id]
### >>> get GIC curves <<<
GIC_curve <- matrix(NA, nrow = length(k), ncol = GIC.nlam)
MSE <- GIC_curve
pc = ifelse(is.null(Zc), 0, ncol(Zc)) + as.integer(intercept)
for(i in seq(length(k))) {
df = k[i]
scaler = switch(type,
"GIC" = log(max(p*df+ pc, n)) * log(log(n)) / n,
"BIC" = log(n) / n,
"AIC" = 2 / n
)
predmat <- predict.linear(object = object[[i]], newX = cbind(object[[i]]$Z, Zc))
MSE[i, ] <- apply(predmat[, 1:GIC.nlam] - y, 2, function(x) mean(x^2))
## L: maximuize value of the likelihood fucntion for the estiamted model
## Gaussian model: -2*log(L) = n * log(MSE)
GIC_curve[i, ] <- log(MSE[i, ])
N_zero <- object[[i]]$df[1:GIC.nlam]
if(mu_ratio != 0) {
## CGL or Baseline model, both with linear constraints
GIC_curve[i, ] = GIC_curve[i, ] + (ifelse(N_zero == 0, 0, N_zero - 1) * df + pc) * scaler
} else {
## naive model without linear constraints
GIC_curve[i, ] = GIC_curve[i, ] + (N_zero * df + pc) * scaler
}
}
rownames(GIC_curve) = paste(type, k, sep = "_")
## select the optimal grid valud of lambda and k which achieves minimum value of GIC curve
lam.min <- ggetmin(lam=GIC.lam, cvm = GIC_curve, k_list = k)$lam.min
result <- list(FuncompCGL.fit = object,
lam = GIC.lam,
GIC = GIC_curve,
lam.min = lam.min,
MSE = MSE)
class(result) <- "GIC.FuncompCGL"
return(result)
}
#' @title
#' Compute information crieteria for the \code{compCL} model.
#'
#' @description
#' Tune the penalty parameter code{lam} in the \code{compCGL} model by GIC, BIC, or AIC. This
#' function calculates the GIC, BIC, or AIC curve and returns the optimal value of
#' \code{lam}.
#'
#' @inheritParams compCL
#'
#' @param \dots other arguments that can be passed to compCL.
#'
#' @return an object of S3 class \code{GIC.compCL} is returned, which is a list:
#' \item{compCL.fit}{a fitted \code{\link{compCL}} object.}
#' \item{lam}{the sequence of \code{lam}.}
#' \item{GIC}{a vector of GIC value(s).}
#' \item{lam.min}{the \code{lam} value that minimizes \code{GIC}(\eqn{\lambda}).}
#'
#' @details
#' The model estimation is conducted through minimizing the following criterion:
#' \deqn{\frac{1}{2n}\|y-Z\beta\|_2^2 + \lambda\|\beta\|_1, s.t. \sum_{j=1}^{p} \beta_j = 0.}
#' The GIC is defined as:
#' \deqn{GIC(\lambda) = \log{\hat{\sigma}^2(\lambda)} +
#' (s(\lambda) -1) \log{(max(p, n))} * \log{(\log{n})} / n,}
#' where \eqn{\hat{\sigma}^2(\lambda) = \|y - Z\hat{\beta}(\lambda)\|_{2}^{2}/n},
#' \eqn{\hat{\beta}(\lambda)} is the regularized estimator,
#' and \eqn{s(\lambda)} is the number of nonzero coefficients in \eqn{\hat{\beta}(\lambda)}.
#' Because of the zero-sum constraint, the effective number of free parameters is
#' \eqn{s(\lambda) - 1} for \eqn{s(\lambda) \ge 2}.
#' The optimal \eqn{\lambda} is selected by minimizing \code{GIC}(\eqn{\lambda}).
#'
#'
#' @examples
#' p = 30
#' n = 50
#' beta = c(1, -0.8, 0.6, 0, 0, -1.5, -0.5, 1.2)
#' beta = c(beta, rep(0, times = p - length(beta)))
#' Comp_data = comp_Model(n = n, p = p, beta = beta, intercept = FALSE)
#' GICm1 <- GIC.compCL(y = Comp_data$y, Z = Comp_data$X.comp,
#' Zc = Comp_data$Zc, intercept = Comp_data$intercept)
#' coef(GICm1)
#' plot(GICm1)
#' test_data = comp_Model(n = 100, p = p, beta = Comp_data$beta, intercept = FALSE)
#' y_hat = predict(GICm1, Znew = test_data$X.comp, Zcnew = test_data$Zc)
#' plot(test_data$y, y_hat, xlab = "Observed value", ylab = "Predicted value")
#' abline(a = 0, b = 1, col = "red")
#'
#'
#' @references
#' Lin, W., Shi, P., Peng, R. and Li, H. (2014) \emph{Variable selection in
#' regression with compositional covariates},
#' \href{https://academic.oup.com/biomet/article/101/4/785/1775476}{https://academic.oup.com/biomet/article/101/4/785/1775476}.
#' \emph{Biometrika} \strong{101} 785-979
#'
#' Fan, Y., and Tang, C. Y. (2013) \emph{Tuning parameter selection in high
#' dimensional penalized likelihood},
#' \href{https://rss.onlinelibrary.wiley.com/doi/abs/10.1111/rssb.12001}{https://rss.onlinelibrary.wiley.com/doi/abs/10.1111/rssb.12001}
#' \emph{Journal of the Royal Statistical Society. Series B} \strong{75} 531-552
#'
#' @seealso
#' \code{\link{compCL}} and \code{\link{cv.compCL}},
#' and \code{\link[=coef.GIC.compCL]{coef}}, \code{\link[=predict.GIC.compCL]{predict}} and
#' \code{\link[=plot.GIC.compCL]{plot}} methods for \code{"GIC.compCL"} object.
#'
#' @export
## TODO: add GIC and AIC feature
GIC.compCL <- function(y, Z, Zc = NULL, intercept = FALSE,
lam = NULL, ...) {
this.call <- match.call()
y <- drop(y)
n <- length(y)
p <- ncol(Z)
pc <- ifelse(is.null(Zc), 0, dim(Zc)[2])
pc <- pc + as.integer(intercept)
compCL.object <- compCL(y = y, Z = Z, Zc = Zc, intercept = intercept, lam = lam, ...)
lam <- compCL.object$lam
## >>> MSE <<<
# predmat <- predict(compCL.object, newx = Z, newzc = Zc)
newx <- cbind(compCL.object$Z_log, Zc)
predmat <- predict.linear(compCL.object, newx, s = NULL)
cvraw <- (y - predmat)^2
MSE <- colSums(cvraw) / n
## <<< MSE >>>
## >>> GIC curve <<<
scaler <- log(log(n)) * log(max(p + pc, n)) / n
S <- apply(abs(compCL.object$beta[1:p, ]) > 0, 2, sum) # support
GIC <- log(MSE) + scaler * (ifelse(S>=2, S-1, 0) + pc)
# digits = 5
# GIC <- round(GIC, digits = digits)
## <<< GIC curve >>>
# >>> lambda selection <<<
# GIC.min <- min(GIC[drop(compCL.object$df) > 0])
GIC.min <- min(GIC)
idmin <- GIC <= GIC.min
# idmin[drop(compCL.object$df) < 2 ] <- FALSE
lam.min <- max(lam[idmin])
# <<< lambda selection >>>
result <- list(compCL.fit = compCL.object,
lam = lam,
GIC = GIC,
lam.min = lam.min)
class(result) <- "GIC.compCL"
result$call <- this.call
return(result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.