Nothing
# A function to tune the optimal penalty threshold, and perform sparse
# regression.
# Author: Zhang
## ============================================================##
##============================================================##
## Tuning function
##============================================================##
#' @title Choose the tuning parameter value in sparse regression
#'
#' @description Finds the tuning parameter value that yields the smallest BIC.
#'
#' @param formula an object of class \code{formula} (or one that can be coerced to that class): a symbolic description of the model to be fitted. The response has to be on the left hand side of ~.
#' @param data an optional data frame, list or environment (or object coercible by \code{as.data.frame} to a data frame) containing the variables in the model. If not found in \code{data} when using function \code{MGLMtune}, the variables are taken from \code{environment(formula)}, typically the environment from which \code{MGLMtune} is called.
#' @param dist a description of the distribution to fit. See \code{\link{dist}} for the details.
#' @param penalty penalty type for the regularization term. Can be chosen from \code{"sweep"}, \code{"group"}, or \code{"nuclear"}. See \link{MGLMsparsereg} for the description of each penalty type.
#' @param lambdas an optional vector of the penalty values to tune. If missing, the vector of penalty values will be set inside the function. \code{ngridpt} must be provided if \code{lambdas} is missing.
#' @param ngridpt an optional numeric variable specifying the number of grid points to tune. If \code{lambdas} is given, \code{ngridpt} will be ignored. Otherwise, the maximum \eqn{\lambda} is determined from the data. The smallest \eqn{\lambda}is set to \eqn{1/n}, where \eqn{n} is the sample size.
#' @param warm.start an optional logical variable to specify whether to give warm start at each tuning grid point. If \code{warm.start=TRUE}, the fitted sparse regression coefficients will be used as the initial value when fitting the sparseregression with the next tuning grid.
#' @param keep.path an optional logical variable controling whether to output the whole solution path. The default is \code{keep.path=FALSE}. If \code{keep.path=TRUE}, the sparse regression result at each grid point will be kept, and saved in the output object \code{select.list}.
#' @param display an optional logical variable to specify whether to show each tuning step.
#' @param weight an optional vector of weights assigned to each row of the data. Should be \code{NULL} or a numeric vector. Could be a variable from the \code{data}, or a variable from \code{environment(formula)} with the length equal to the number of rows of the data. If \code{weight=NULL}, equal weights of ones will be assigned.
#' @param init an optional matrix of initial value of the parameter estimates. Should have the compatible dimension with the data. See \code{\link{dist}} for details of dimensions in each distribution.
#' @param penidx a logical vector indicating the variables to be penalized. The default value is \code{rep(TRUE, p)}, which means all predictors are subject to regularization. If \code{X} contains intercept, use \code{penidx=c(FALSE,rep(TRUE,p-1))}.
#' @param maxiters an optional numeric controlling the maximum number of iterations. The default value is \code{maxiters=150}.
#' @param ridgedelta an optional numeric controlling the behavior of the Nesterov's accelerated proximal gradient method. The default value is \eqn{\frac{1}{pd}}{1/(pd)}.
#' @param epsilon an optional numeric controlling the stopping criterion. The algorithm terminates when the relative change in the objective values of two successive iterates is less then \code{epsilon}. The default value is \code{epsilon=1e-5}.
#' @param regBeta an optional logical variable used when running negative multinomial regression (\code{dist="NegMN"}). \code{regBeta} controls whether to run regression on the over-dispersion parameter. The default is \code{regBeta=FALSE}.
#' @param overdisp an optional numerical variable used only when fitting sparse negative multinomial model and \code{regBeta=FALSE}. \code{overdisp} gives the over-dispersion value for all the observations. The default value is estimated using negative-multinomial regression. When \code{dist="MN", "DM", "GDM"} or \code{regBeta=TRUE}, the value of \code{overdisp} is ignored.
#'
#' @return \itemize{
#' \item{\code{select}}{ the final sparse regression result, using the optimal tuning parameter.}
#' \item{\code{path}}{ a data frame with degrees of freedom and BICs at each lambda.}
#' }
#'
#' @author Yiwen Zhang and Hua Zhou
#'
#' @seealso \code{\link{MGLMsparsereg}}
#'
#' @examples
#' set.seed(118)
#' n <- 50
#' p <- 10
#' d <- 5
#' m <- rbinom(n, 100, 0.8)
#' X <- matrix(rnorm(n * p), n, p)
#' alpha <- matrix(0, p, d)
#' alpha[c(1, 3, 5), ] <- 1
#' Alpha <- exp(X %*% alpha)
#' Y <- rdirmn(size=m, alpha=Alpha)
#' sweep <- MGLMtune(Y ~ 0 + X, dist="DM", penalty="sweep", ngridpt=10)
#' show(sweep)
#'
#'
#' @export
MGLMtune <- function(formula, data, dist, penalty, lambdas, ngridpt, warm.start = TRUE,
keep.path = FALSE, display = FALSE, init, weight, penidx, ridgedelta, maxiters = 150,
epsilon = 1e-05, regBeta = FALSE, overdisp) {
call <- match.call()
if (missing(data))
data <- environment(formula)
mf <- match.call(expand.dots = FALSE)
mf <- mf[c(1L, match(c("formula", "data", "weight"), names(mf), 0L))]
mf$drop.unused.levels <- TRUE
mf[[1L]] <- as.name("model.frame")
mf <- eval(mf, parent.frame(n = 1))
mt <- attr(mf, "terms")
Y <- model.response(mf, "any")
X <- model.matrix(mt, mf, contrasts)
ow <- getOption("warn")
if (!missing(weight))
weight <- weight[rowSums(Y) != 0]
X <- as.matrix(X[rowSums(Y) != 0, ])
Y <- as.matrix(Y[rowSums(Y) != 0, colSums(Y) != 0])
d <- ncol(Y)
m <- rowSums(Y)
p <- ncol(X)
N <- nrow(Y)
if (missing(weight))
weight <- rep(1, N)
## ----------------------------------------##
## Check distribution and d
## ----------------------------------------##
if (dist == "GDM" && d == 2)
stop("When d=2, GDM model is equivilant to DM model, please use dist='DM'.")
## ----------------------------------------##
## Pennalty type
## ----------------------------------------##
if (penalty == "group")
penalty <- "group_row"
if (!penalty %in% c("sweep", "group_row", "nuclear"))
stop("Penalty type can only be sweep, group, or nuclear.")
## ----------------------------------------##
## regularization set
## ----------------------------------------##
if (missing(penidx))
penidx <- rep(TRUE, p)
## ----------------------------------------##
## Starting point
## ----------------------------------------##
if (missing(init)) {
if (dist == "MN")
init <- matrix(0, p, (d - 1)) else if (dist == "DM")
init <- matrix(0, p, d) else if (dist == "GDM")
init <- matrix(0, p, 2 * (d - 1)) else if (dist == "NegMN") {
if (regBeta)
init <- matrix(0, p, (d + 1)) else init <- matrix(0, p, d)
}
}
if (dist == "NegMN" && regBeta == FALSE && missing(overdisp)) {
options(warn = -1)
est <- eval(call("DMD.NegMN.Alpha.reg", Y = Y, init = init, X = X, weight = weight,
epsilon = epsilon, maxiters = maxiters, parallel = FALSE, cores = 1,
cl = NULL, sys = NULL, display = FALSE))
overdisp <- est$coefficients$phi
options(warn = ow)
} else {
overdisp <- NULL
}
## ----------------------------------------##
## Ridgedelta
## ----------------------------------------##
if (missing(ridgedelta))
ridgedelta <- 1/(p * d)
## ----------------------------------------##
## Pennalty values
## ----------------------------------------##
fit.max <- NULL
if (missing(lambdas)) {
if (missing(ngridpt))
ngridpt <- 10
## ----------------------------------------##
## Find maximum lambda
## ----------------------------------------##
fit.max <- eval(call("MGLMsparsereg.fit", Y = Y, X = X, dist = dist, lambda = Inf,
penalty = penalty, weight = weight, penidx = penidx, init = init, ridgedelta = ridgedelta,
maxiters = maxiters, epsilon = epsilon, regBeta = regBeta, overdisp = overdisp))
maxlambda <- fit.max@maxlambda
# lambdas <- exp(seq(from=log(maxlambda/N), to=log(maxlambda), length.out=15))
# ----------------------------------------## Find minimum lambda
# if(penalty=='group_row'){ for(j in 1:15){ if(j==1) B0 <- fit.max$coefficients
# else B0 <- B_hat temp <- eval(call('MGLMsparsereg.fit', Y=Y, X=X, dist=dist,
# lambda=lambdas[j], penalty=penalty, weight=weight, penidx=penidx, init=init,
# ridgedelta=ridgedelta, maxiters=maxiters, epsilon=epsilon, regBeta=regBeta,
# overdisp=overdisp)) B_hat <- temp$coefficients nz <- sum(rowSums(B_hat^2)>0)
# if(nz==p){ next }else{ if(j>1) minlambda <- lambdas[j-1] else minlambda <-
# lambdas[1]/10 break } } }else{
minlambda <- maxlambda/N
# }
lambdas <- exp(seq(from = log(maxlambda), to = log(minlambda), length.out = ngridpt))
} else {
ngridpt <- length(lambdas)
}
BICs <- rep(NA, ngridpt)
AICs <- rep(NA, ngridpt)
logL <- rep(NA, ngridpt)
Dof <- rep(NA, ngridpt)
select.list <- list()
for (j in 1:ngridpt) {
if (j == 1 & !is.null(fit.max)) {
temp <- fit.max
select.list[[j]] <- temp
B_hat <- temp@coefficients
BICs[j] <- temp@BIC
AICs[j] <- temp@AIC
logL[j] <- temp@logL
Dof[j] <- temp@Dof
if (display)
print(paste(j, " lamda=", sprintf("%.2f", lambdas[j]), " BIC=",
sprintf("%.2f", BICs[j]), " AIC=", sprintf("%.2f", AICs[j]), " logL=",
sprintf("%.2f", logL[j]), " Dof=", sprintf("%.2f", Dof[j]), sep = ""))
next
}
if (warm.start) {
if (j == 1)
B0 <- init else B0 <- B_hat
} else {
B0 <- init
}
temp <- eval(call("MGLMsparsereg.fit", Y = Y, X = X, dist = dist, lambda = lambdas[j],
penalty = penalty, weight = weight, penidx = penidx, init = B0, ridgedelta = ridgedelta,
maxiters = maxiters, epsilon = epsilon, regBeta = regBeta, overdisp = overdisp))
select.list[[j]] <- temp
B_hat <- temp@coefficients
BICs[j] <- temp@BIC
AICs[j] <- temp@AIC
logL[j] <- temp@logL
Dof[j] <- temp@Dof
if (display)
print(paste(j, " lamda=", sprintf("%.2f", lambdas[j]), " BIC=", sprintf("%.2f",
BICs[j]), " AIC=", sprintf("%.2f", AICs[j]), " logL=", sprintf("%.2f",
logL[j]), " Dof=", sprintf("%.2f", Dof[j]), sep = ""))
}
chosen.lambda <- lambdas[which.min(BICs)]
select <- select.list[[which.min(BICs)]]
select@call <- match.call()
select@data <- list(Y = Y, X = X)
select@distribution <- ifelse(dist == "MN", "Multinomial", ifelse(dist == "DM",
"Dirichlet Multinomial", ifelse(dist == "GDM", "Generalized Dirichlet Multinomial",
"Negative Multinomial")))
select@penalty <- ifelse(penalty == "group_row", "group", ifelse(penalty == "nuclear",
"nuclear", "sweep"))
select@lambda <- chosen.lambda
outDf <- data.frame(Dof = Dof, Lambda = lambdas, BIC = BICs, AIC = AICs, logL = logL)
## ----------------------------------------##
## Set class
## ----------------------------------------##
if (keep.path) new("MGLMtune", call = select@call, select = select, path = outDf, select.list = select.list)
else new("MGLMtune", call = select@call, select = select, path = outDf, select.list = list())
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.