Nothing
### TODO
### return :: penalties (l1 tv), sparsity, n_edges for each model varying tv
### path (list of adjacency mats), beta (list of betas l1 fixed)
### input :: symmetrization rule for path
### make maxit visible
#' Inference of Multiscale Gaussian Graphical Model.
#'
#' Cluster variables using L2 fusion penalty and simultaneously estimates a
#' gaussian graphical model structure with the addition of L1 sparsity penalty.
#'
#' Estimates a gaussian graphical model structure while hierarchically grouping
#' variables by optimizing a pseudo-likelihood function combining Lasso and
#' fuse-group Lasso penalties. The problem is solved via the *COntinuation
#' with NEsterov smoothing in a Shrinkage-Thresholding Algorithm* (Hadj-Selem et
#' al. 2018). Varying the fusion penalty \eqn{\lambda_2} in a multiplicative
#' fashion allow to uncover a seemingly hierarchical clustering structure. For
#' \eqn{\lambda_2 = 0}, the approach is theoretically equivalent to the
#' Meinshausen-Bühlmann (2006) *neighborhood selection* as resuming to the
#' optimization of *pseudo-likelihood* function with \eqn{\ell_1} penalty
#' (Rocha et al., 2008). The algorithm stops when all the variables have merged
#' into one cluster. The criterion used to merge clusters is the
#' \eqn{\ell_2}-norm distance between regression vectors.
#'
#' For each iteration of the algorithm, the following function is optimized :
#' \deqn{1/2 \sum_{i=1}^p || X^i - X^{\ i} \beta^i ||_2 ^2 + \lambda_1 \sum_{i
#' = 1}^p || \beta^i ||_1 + \lambda_2 \sum_{i < j} || \beta^i -
#' \tau_{ij}(\beta^j) ||_2.}
#'
#' where \eqn{\beta^i} is the vector of coefficients obtained after regression
#' \eqn{X^i} on the others and \eqn{\tau_{ij}} is a permutation exchanging
#' \eqn{\beta_j^i} with \eqn{\beta_i^j}.
#'
#' @param x Numeric matrix (\eqn{n x p}). Multivariate normal sample with
#' \eqn{n} independent observations.
#' @param lambda1 Positive numeric scalar. Lasso penalty.
#' @param fuse_thresh Positive numeric scalar. Threshold for clusters fusion.
#' @param maxit Integer scalar. Maximum number of iterations.
#' @param distance Character. Distance between regression vectors with
#' permutation on symmetric coefficients.
#' @param lambda2_start Numeric scalar. Starting value for fused-group Lasso
#' penalty (clustering penalty).
#' @param lambda2_factor Numeric scalar. Step used to update fused-group Lasso
#' penalty in a multiplicative way..
#' @param precision Tolerance for the stopping criterion (duality gap).
#' @param weights_ Matrix of weights.
#' @param type If "initial" use classical version of **MGLasso** without
#' weights.
#' @param compact Logical scalar. If TRUE, only save results when previous
#' clusters are different from current.
#' @param verbose Logical scalar. Print trace. Default value is FALSE.
#'
#' @return A list-like object of class `mglasso` is returned.
#' \item{out}{List of lists. Each element of the list corresponds to a
#' clustering level. An element named `levelk` contains the regression
#' matrix `beta` and clusters vector `clusters` for a clustering in
#' `k` clusters. When `compact = TRUE` `out` has as many
#' elements as the number of unique partitions. When set to `FALSE`, the
#' function returns as many items as the the range of values taken by
#' `lambda2`.} \item{l1}{the sparsity penalty `lambda1` used in the
#' problem solving.}
#'
#' @export
#'
#' @seealso [conesta()] for the problem solver,
#' [plot_mglasso()] for plotting the output of `mglasso`.
#'
#' @examples
#' \dontrun{
#' reticulate::use_condaenv("rmglasso", required = TRUE)
#' n = 50
#' K = 3
#' p = 9
#' rho = 0.85
#' blocs <- list()
#' for (j in 1:K) {
#' bloc <- matrix(rho, nrow = p/K, ncol = p/K)
#' for(i in 1:(p/K)) { bloc[i,i] <- 1 }
#' blocs[[j]] <- bloc
#' }
#'
#' mat.covariance <- Matrix::bdiag(blocs)
#' mat.covariance
#'
#' set.seed(11)
#' X <- mvtnorm::rmvnorm(n, mean = rep(0,p), sigma = as.matrix(mat.covariance))
#' X <- scale(X)
#'
#' res <- mglasso(X, 0.1, lambda2_start = 0.1)
#' res$out[[1]]$clusters
#' res$out[[1]]$beta
#' }
mglasso <- function(x, lambda1 = 0, fuse_thresh = 1e-3, maxit = NULL,
distance = c("euclidean", "relative"), lambda2_start = 1e-4, lambda2_factor = 1.5,
precision = 1e-2, weights_ = NULL, type = c("initial"), compact = TRUE,
verbose = FALSE) {
## Check input
stopifnot(lambda1 >= 0 | fuse_thresh >= 0 | lambda2_start >= 0 | lambda2_factor >= 0)
distance = match.arg(distance)
type = match.arg(type)
p <- ncol(x)
x <- scale(x)
clusters <- 1:p
t <- 1 # index for the out list.
iter <- 0
out <- list()
clusters_prev <- NULL
## Loop until all the variables merged
while (length(unique(clusters)) > 1) {
clusters <- 1:p
if (iter == 0) {
beta_old <- beta_to_vector(beta_ols(x)) ## init OLS
lambda2 <- 0
}
if (iter == 1) {
lambda2 <- lambda2_start
}
beta <- conesta(x, lambda1, lambda2, beta_old, prec_ = precision,
type_ = type, W_ = weights_)
if(verbose) cat("lambda1 = ", lambda1, " ")
beta_old <- beta_to_vector(beta) ## For warm start
diffs <- dist_beta(beta, distance = distance) ## Update distance matrix
pairs_to_merge <- which(diffs <= fuse_thresh, arr.ind = TRUE) ## Clustering starts
if (nrow(pairs_to_merge) != 0)
{
clusters <- merge_clusters(pairs_to_merge, clusters) # merge clusters
} ## Clustering ends here
cost_ <- cost(beta, x)
if(verbose) cat("nclusters =", length(unique(clusters)), "lambda2", lambda2,
"cost =", cost_, "\n")
if (compact) {
if (!identical(clusters, clusters_prev)) {
out[[t]] <- list(beta = beta, clusters = clusters)
names(out)[[t]] <- paste0("level", length(unique(clusters)))
clusters_prev <- clusters
t <- t + 1
}
} else {
out[[t]] <- list(beta = beta, clusters = clusters)
names(out)[[t]] <- paste0("level", length(unique(clusters)))
t <- t + 1
}
lambda2 <- lambda2 * lambda2_factor
iter <- iter + 1
}
result <- list(out = out, lambda1 = lambda1)
if(verbose) cat("niter == ", iter)
class(result) <- "mglasso"
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.