# R/RIDGEsigma.R In ADMMsigma: Penalized Precision Matrix Estimation via ADMM

## Matt Galloway

#' @title Ridge penalized precision matrix estimation
#'
#' @description Ridge penalized matrix estimation via closed-form solution. If one is only interested in the ridge penalty, this function will be faster and provide a more precise estimate than using \code{ADMMsigma}. \cr
#' Consider the case where
#' \eqn{X_{1}, ..., X_{n}} are iid \eqn{N_{p}(\mu, \Sigma)}
#' and we are tasked with estimating the precision matrix,
#' denoted \eqn{\Omega \equiv \Sigma^{-1}}. This function solves the
#' following optimization problem:
#' \describe{
#' \item{Objective:}{
#' \eqn{\hat{\Omega}_{\lambda} = \arg\min_{\Omega \in S_{+}^{p}}
#' \left\{ Tr\left(S\Omega\right) - \log \det\left(\Omega \right) +
#' \frac{\lambda}{2}\left\| \Omega \right\|_{F}^{2} \right\}}}
#' }
#' where \eqn{\lambda > 0} and \eqn{\left\|\cdot \right\|_{F}^{2}} is the Frobenius
#' norm.
#'
#' @param X option to provide a nxp data matrix. Each row corresponds to a single observation and each column contains n observations of a single feature/variable.
#' @param S option to provide a pxp sample covariance matrix (denominator n). If argument is \code{NULL} and \code{X} is provided instead then \code{S} will be computed automatically.
#' @param lam positive tuning parameters for ridge penalty. If a vector of parameters is provided, they should be in increasing order. Defaults to grid of values \code{10^seq(-2, 2, 0.1)}.
#' @param path option to return the regularization path. This option should be used with extreme care if the dimension is large. If set to TRUE, cores will be set to 1 and errors and optimal tuning parameters will based on the full sample. Defaults to FALSE.
#' @param K specify the number of folds for cross validation.
#' @param cores option to run CV in parallel. Defaults to \code{cores = 1}.
#' @param trace option to display progress of CV. Choose one of \code{progress} to print a progress bar, \code{print} to print completed tuning parameters, or \code{none}.

#'
#' @return returns class object \code{RIDGEsigma} which includes:
#' \item{Lambda}{optimal tuning parameter.}
#' \item{Lambdas}{grid of lambda values for CV.}
#' \item{Omega}{estimated penalized precision matrix.}
#' \item{Sigma}{estimated covariance matrix from the penalized precision matrix (inverse of Omega).}
#' \item{Path}{array containing the solution path. Solutions are ordered dense to sparse.}
#' \item{Gradient}{gradient of optimization function (penalized gaussian likelihood).}
#' \item{MIN.error}{minimum average cross validation error (cv.crit) for optimal parameters.}
#' \item{AVG.error}{average cross validation error (cv.crit) across all folds.}
#' \item{CV.error}{cross validation errors (cv.crit).}
#'
#' @references
#' \itemize{
#' \item Rothman, Adam. 2017. 'STAT 8931 notes on an algorithm to compute the Lasso-penalized Gaussian likelihood precision matrix estimator.'
#' }
#'
#' @author Matt Galloway \email{[email protected]@umn.edu}
#'
#' @seealso \code{\link{plot.RIDGE}}, \code{\link{ADMMsigma}}
#'
#' @export
#'
#' @examples
#' # generate data from a sparse matrix
#' # first compute covariance matrix
#' S = matrix(0.7, nrow = 5, ncol = 5)
#' for (i in 1:5){
#'  for (j in 1:5){
#'    S[i, j] = S[i, j]^abs(i - j)
#'  }
#'  }
#'
#' # generate 100 x 5 matrix with rows drawn from iid N_p(0, S)
#' set.seed(123)
#' Z = matrix(rnorm(100*5), nrow = 100, ncol = 5)
#' out = eigen(S, symmetric = TRUE)
#' S.sqrt = out$vectors %*% diag(out$values^0.5)
#' S.sqrt = S.sqrt %*% t(out$vectors) #' X = Z %*% S.sqrt #' #' # ridge penalty no ADMM #' RIDGEsigma(X, lam = 10^seq(-5, 5, 0.5)) # we define the ADMM covariance estimation function RIDGEsigma = function(X = NULL, S = NULL, lam = 10^seq(-2, 2, 0.1), path = FALSE, K = 5, cores = 1, trace = c("none", "progress", "print")) { # checks if (is.null(X) && is.null(S)) { stop("Must provide entry for X or S!") } if (!all(lam > 0)) { stop("lam must be positive!") } if (all(c(K, cores)%%1 != 0)) { stop("Entry must be an integer!") } if (cores < 1) { stop("Number of cores must be positive!") } if (cores > 1 && path) { cat("Parallelization not possible when producing solution path. Setting cores = 1...\n\n") cores = 1 } if (path) { K = 1 } # match values call = match.call() trace = match.arg(trace) MIN.error = AVG.error = CV.error = NULL Lambdas = lam = sort(lam) n = ifelse(is.null(X), nrow(S), nrow(X)) # compute sample covariance matrix, if necessary if (is.null(S)) { S = (nrow(X) - 1)/nrow(X) * cov(X) } # perform cross validation, if necessary if ((length(lam) > 1) & (!is.null(X) || path)) { # run CV in parallel? if (cores > 1) { # execute ParallelCV RIDGE = CVP_RIDGE(X = X, lam = lam, K = K, cores = cores, trace = trace) MIN.error = RIDGE$min.error
AVG.error = RIDGE$avg.error CV.error = RIDGE$cv.error
lam = RIDGE$lam } else { # execute CV_RIDGEsigma if (is.null(X)) { X = matrix(0) } RIDGE = CV_RIDGEc(X = X, S = S, lam = lam, path = path, K = K, trace = trace) MIN.error = RIDGE$min.error
AVG.error = RIDGE$avg.error CV.error = RIDGE$cv.error
lam = RIDGE$lam Path = RIDGE$path

}

# print warning if lam on boundary
if ((lam == Lambdas[1]) || (lam == Lambdas[length(Lambdas)]) &&
(length(Lambdas) != 1)) {
cat("\nOptimal tuning parameter on boundary...!")
}

# compute final estimate at best tuning parameters
Omega = RIDGEc(S = S, lam = lam)

} else {

# execute RIDGEsigmac
if (length(lam) > 1) {
stop("Must specify X, set path = TRUE, or provide single value for lam.")
}
Omega = RIDGEc(S = S, lam = lam)

}

# compute gradient
grad = S - qr.solve(Omega) + lam * Omega

# compute penalized loglik
loglik = (-n/2) * (sum(Omega * S) - determinant(Omega, logarithm = TRUE)$modulus[1] + lam * sum(Omega^2)) # return values tuning = matrix(c(log10(lam), lam), ncol = 2) colnames(tuning) = c("log10(lam)", "lam") if (!path) { Path = NULL } returns = list(Call = call, Lambda = tuning, Lambdas = Lambdas, Omega = Omega, Sigma = qr.solve(Omega), Path = Path, Gradient = grad, Loglik = loglik, MIN.error = MIN.error, AVG.error = AVG.error, CV.error = CV.error) class(returns) = "RIDGE" return(returns) } ##----------------------------------------------------------------------------------- #' @title Print RIDGE object #' @description Prints RIDGE object and suppresses output if needed. #' @param x class object RIDGE. #' @param ... additional arguments. #' @keywords internal #' @export print.RIDGE = function(x, ...) { # print call cat("\nCall: ", paste(deparse(x$Call), sep = "\n", collapse = "\n"),
"\n", sep = "")

# print optimal tuning parameter
cat("\nTuning parameter:\n")
print.default(round(x$Lambda, 3), print.gap = 2L, quote = FALSE) # print loglik cat("\nLog-likelihood: ", paste(round(x$Loglik, 5), sep = "\n",
collapse = "\n"), "\n", sep = "")

# print Omega if dim <= 10
if (nrow(x$Omega) <= 10) { cat("\nOmega:\n") print.default(round(x$Omega, 5))
} else {
cat("\n(...output suppressed due to large dimension!)\n")
}

}

##-----------------------------------------------------------------------------------

#' @title Plot RIDGE object
#' @description Produces a heat plot for the cross validation errors, if available.
#' @param x class object RIDGE
#' @param type produce either 'heatmap' or 'line' graph
#' @param footnote option to print footnote of optimal values. Defaults to TRUE.
#' @param ... additional arguments.
#' @export
#' @examples
#' # generate data from a sparse matrix
#' # first compute covariance matrix
#' S = matrix(0.7, nrow = 5, ncol = 5)
#' for (i in 1:5){
#'  for (j in 1:5){
#'    S[i, j] = S[i, j]^abs(i - j)
#'  }
#'  }
#'
#' # generate 100 x 5 matrix with rows drawn from iid N_p(0, S)
#' set.seed(123)
#' Z = matrix(rnorm(100*5), nrow = 100, ncol = 5)
#' out = eigen(S, symmetric = TRUE)
#' S.sqrt = out$vectors %*% diag(out$values^0.5)
#' S.sqrt = S.sqrt %*% t(out$vectors) #' X = Z %*% S.sqrt #' #' # produce CV heat map for RIDGEsigma #' plot(RIDGEsigma(X, lam = 10^seq(-5, 5, 0.5))) #' #' # produce line graph for RIDGEsigma #' plot(RIDGEsigma(X), type = 'line') plot.RIDGE = function(x, type = c("heatmap", "line"), footnote = TRUE, ...) { # check type = match.arg(type) Means = NULL if (is.null(x$CV.error)) {
stop("No cross validation errors to plot!")
}

if (type == "line") {

# gather values to plot
cv = cbind(expand.grid(lam = x$Lambdas, alpha = 0), Errors = as.data.frame.table(x$CV.error)$Freq) # produce line graph graph = ggplot(summarise(group_by(cv, lam), Means = mean(Errors)), aes(log10(lam), Means)) + geom_jitter(width = 0.2, color = "navy blue") + theme_minimal() + geom_line(color = "red") + labs(title = "Cross-Validation Errors", y = "Error") } else { # augment values for heat map (helps visually) lam = x$Lambdas
cv = expand.grid(lam = lam, alpha = 0)
Errors = 1/(c(x$AVG.error) + abs(min(x$AVG.error)) + 1)
cv = cbind(cv, Errors)

# design color palette
bluetowhite <- c("#000E29", "white")

# produce ggplot heat map
graph = ggplot(cv, aes(alpha, log10(lam))) + geom_raster(aes(fill = Errors)) +
scale_fill_gradientn(colours = colorRampPalette(bluetowhite)(2),
guide = "none") + theme_minimal() + labs(title = "Heatmap of Cross-Validation Errors") +
theme(axis.title.x = element_blank(), axis.text.x = element_blank(),
axis.ticks.x = element_blank())

}

if (footnote) {

# produce with footnote
graph + labs(caption = paste("**Optimal: log10(lam) = ",
x\$Lambda[1], sep = ""))

} else {

# produce without footnote
graph

}
}


## Try the ADMMsigma package in your browser

Any scripts or data that you put into this service are public.

ADMMsigma documentation built on May 2, 2019, 6:23 a.m.