# # a toy example - have to fix this
# N <- 1000 # sample size
# sigma1 <- matrix(c(1, .5, 0, 0,
# .5, 1, .2, 0,
# 0, .2, 1, 0,
# 0, 0, 0, 1), ncol = 4)
#
# sigma2 <- matrix(c(1, .5, .4, .4,
# .5, 1, .2, 0,
# .4, .2, 1, 0,
# .4, 0, 0, 1), ncol = 4)
#
# sigma3 <- matrix(c(1, .5, 0, 0,
# .5, 1, .2, .1,
# 0, .2, 1, 0,
# 0, 0, .1, 1), ncol = 4)
#
# dat <- list()
# dat[[1]] <- MASS::mvrnorm(n = N, mu = rep(0, ncol(sigma1)), Sigma = solve(sigma1))
# dat[[2]] <- MASS::mvrnorm(n = N, mu = rep(0, ncol(sigma2)), Sigma = solve(sigma2))
# dat[[3]] <- MASS::mvrnorm(n = N, mu = rep(0, ncol(sigma2)), Sigma = solve(sigma3))
# n <- rep(N, length(dat))
# S <- lapply(dat, function(x) corpcor::cor2pcor(cor(x)))
# dat <- data.frame(rbind(dat[[1]], dat[[2]], dat[[3]]))
# dat$splt <- c(rep(1, N), rep(2, N), rep(3, N))
#
# jgl <- JGL_AIC_sequentialsearch(dat = dat, splt = "splt", ncores = 3, aicfun = AIC_jgl2)
# parallelization does not seem doing good here, perhaps the example is too simple!
# system.time(
# JGL_AIC_sequentialsearch(dat = dat, splt = "splt", ncores = 7)
# )
#
# system.time(
# JGL_AIC_sequentialsearch(dat = dat, splt = "splt", ncores = 1)
# )
# performs a sequential search of the values of l1 and l2. First it finds the best fitting value of l1 according to the selected aicfun (AIC/BIC)
# then it finds the best value of l2 given the l1 selected at the first stage.
# Optionally, it can use the values of l1 and l2 as the input for a 2-dimensional optimization, to search if the aicfun can be further improved.
JGL_AIC_sequentialsearch <- function(dat, splt, return.whole.theta = TRUE, l1min = 0, l1max = 1, l2min = 0, l2max = 1, ncand = 20, criterion = c("ebic","aic"), gamma = 0.5, globalopt = TRUE, optmethod = "CG", ncores = 1, ...)
# dat = a dataframe
# splt = the column of the dataframe dat that defines multiple classes
# return.whole.theta = see JGL
# l1min, l1max, l2min, l2max = minimum/maximum values for lambdas
# ncand = number of candidate lambdas initially considered
# aicfun = AIC / BIC / others that take as input (1) the output of JGL, (2) n = vector of sample sizes, (3) S = list of covariance matrices and give in output a criterion to minimize
# globalopt = if TRUE, perform a further step that tries a 2-dimensional optimization of both l1 and l2 using optim optimizer
# optmethod = parameter method to be passed to optim
# ... = Other parameters to be passed to JGL (may not work well for parallelized code!!)
{
criterion = match.arg(criterion)
aicfun = switch(criterion, aic = AIC_jgl, ebic = BIC_jgl)
# standardize data data within classes
sp <- split(dat[, !names(dat) == splt], dat[, splt])
sp_sc <- lapply(sp, scale)
dat <- lapply(sp_sc, data.frame)
S <- lapply(dat, cov)
n <- sapply(dat, nrow)
##################
# Select lambda1 #
##################
# Define a range of candidate values for lambda 1. Select, as the largest lambda1 (l1max)
# the one that would make all edges = 0 in at least one of the networks if the glasso
# penalty was applied alone (i.e., with lambda2 = 0).
# code for l1max is adapted from package qgraph, which in turn took it from package huge
maxl1 <- function(S) max(max(S - diag(nrow(S))), -min(S - diag(nrow(S))))
l1_theormax <- sapply(S, maxl1)
l1max <- min(l1_theormax, l1max)
if(l1max < l1min) stop("the specified minimum value for l1 is less than the maximun useful value for l1")
# define an equally spaced sequence of ncand candidates
l1cand <- seq(from = l1min, to = l1max, length.out = ncand)
# select the value of l1 with minimal AIC among the candidates, considering a minimal value of l2
fun <- function(lambda1, ...)
{
jgl <- JGL(Y = dat, lambda1 = lambda1, lambda2 = l2min, return.whole.theta = TRUE, ...)
aicfun(jgl = jgl, n = n, S = S, gamma = gamma)
}
if(ncores > 1)
{
cl <- makeCluster(ncores)
clusterExport(cl, list("fun", "dat", "n", "S", "JGL", "l1cand", "l2min", "aicfun", "gamma"), envir = environment())
aicsl1 <- parSapply(cl = cl, l1cand, fun)
stopCluster(cl)
} else {
aicsl1 <- sapply(l1cand, fun)
}
l1cand <- c(l1min, l1cand, l1max)
l1min <- l1cand[which.min(aicsl1)]
l1max <- l1cand[which.min(aicsl1) + 2]
# see if AIC can be further improved in the neighborhood of the chosen value
opt <- optimize(f = fun, interval = c(l1min, l1max))
lambda1 <- opt$minimum
##################
# Select lambda2 #
##################
# find the lowest value of lambda2 that, given l1, that makes all the elements of the precision matix
# equal among different classes (max difference allowed = tol), using the stats::optimize function
fun <- function(lambda2, tol = 1e-5, ...)
{
maxdiff <- c()
jgl <- JGL(Y = dat, lambda1 = lambda1, lambda2 = lambda2, return.whole.theta = TRUE, ...)
thetas <- jgl$theta
for(i in 2:length(thetas))
{
for(j in 1:(i-1))
{
maxdiff <- c(maxdiff, max(abs(thetas[[i]] - thetas[[j]])))
}
}
maxdiff <- max(maxdiff)
ifelse(maxdiff > tol, l2max+1, lambda2)
}
opt <- optimize(f = fun, interval = c(0, l2max))
l2_theormax <- opt$minimum
l2max <- min(c(l2max, l2_theormax))
# define a sequence of ncand candidate values for l2, equally spaced between l2min and l2max
l2cand <- seq(from = l2min, to = l2max, length.out = ncand)
# select the value of l2 with minimal AIC among the candidates
fun <- function(lambda2, ...)
{
jgl <- JGL(Y = dat, lambda1 = lambda1, lambda2 = lambda2, return.whole.theta = TRUE, ...)
aicfun(jgl = jgl, n = n, S = S, gamma = gamma)
}
if(ncores > 1)
{
cl <- makeCluster(ncores)
clusterExport(cl, list("fun", "dat", "lambda1", "n", "S", "JGL", "l2cand", "aicfun", "gamma"), envir = environment())
aicsl2 <- parSapply(cl = cl, l2cand, fun)
stopCluster(cl)
} else {
aicsl2 <- sapply(l2cand, fun)
}
# find the lambda 2 that locally minimizes AIC around the choosen value
l2cand <- c(l2min, l2cand, l2max)
l2min <- l2cand[which.min(aicsl2)]
l2max <- l2cand[which.min(aicsl2) + 2]
# see if AIC can be further improved in the neighborhood of the chosen value
opt <- optimize(f = fun, interval = c(l2min, l2max))
lambda2 <- opt$minimum
if(globalopt)
{
startl1 <- lambda1
startl2 <- lambda2
averylargenumber <- 1e10
fun <- function(lambda, ...)
{
if(any(lambda < 0)) return(averylargenumber)
jgl <- JGL(Y = dat, lambda1 = lambda[1], lambda2 = lambda[2], return.whole.theta = TRUE, ...)
aicfun(jgl = jgl, n = n, S = S, gamma = gamma)
}
opt <- optim(par = c(startl1, startl2), fun, method = "BFGS")
lambda1 <- opt$par[1]
lambda2 <- opt$par[2]
}
# compute the networks using the parameters lambda 1 and lambda2 choosen according to AIC
jgl <- JGL(Y = dat, lambda1 = lambda1, lambda2 = lambda2, return.whole.theta = return.whole.theta, ...)
names(jgl$theta) <- names(dat)
aic <- aicfun(jgl, n, S, gamma = gamma)
# return
list("jgl" = jgl,
"lambda1" = lambda1,
"lambda2" = lambda2,
"l1theormax" = l1_theormax,
"l2theormax" = l2_theormax,
"aic" = aic)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.