R/EstimateGroupNetwork.R
In EstimateGroupNetwork: Perform the Joint Graphical Lasso and Selects Tuning Parameters

Documented in covNoBessel EstimateGroupNetwork

EstimateGroupNetwork <- function(X,
                                 inputType = c("dataframe", "list.of.dataframes", "list.of.covariance.matrices"), n, covfun = covNoBessel, groupID, labels,
                                 method = c("InformationCriterion", "crossvalidation"), strategy = c("sequential", "simultaneous"),
                                 nlambda1 = 100, lambda1.min.ratio = .01, logseql1 = TRUE,
                                 nlambda2 = 100, lambda2.min.ratio = .01, logseql2 = TRUE,
                                 k = 10, seed,
                                 criterion = c("ebic", "bic", "aic"), count.unique = FALSE, gamma = .5, dec = 5, optimize = TRUE, optmethod = "CG",
                                 penalty = c("fused", "group"), weights = c("equal", "sample.size"), penalize.diagonal = FALSE, maxiter = 500, rho = 1, truncate = 1e-5,
                                 ncores = 1, simplifyOutput = TRUE)

{
  # Check input data #
  ####################
  # This section of the code checks data in input and convert different types of data to
  # a list of covariance matrices S and a vector of sample sizes n
  criterion <- match.arg(criterion)
  penalty <- match.arg(penalty)
  method <- match.arg(method)
  strategy <- match.arg(strategy)
  l2max <- 1

  if(!is.list(X)) stop('data argument must be a list of dataframes or a list of variance-covariance matrices')

  # Identify the type of input
  if(missing(inputType))
  {
    if(is.data.frame(X))
    {
      inputType <- "dataframe"
    } else if (is.list(X)) {
      dm <- sapply(X, dim)
      if(all(dm[1,] == dm[2,]))
      {
        inputType <- "list.of.covariance.matrices"
      } else if(all(sapply(X, is.data.frame))) {
        inputType <- "list.of.dataframes"
      } else {
        stop("Cannot determine the type of input X. Please specify 'X' and 'inputType' consistently. see  ?EstimateGroupNetwork")
      }
    }
  } else {
    inputType <- match.arg(inputType)
  }

  if(inputType == "list.of.covariance.matrices" & method == "crossvalidation")
  {
    stop ("To use crossvalidation, the original dataset is necessary. If you only have covariance matrices,
          select another 'method' for determining tuning parameters")
  }
  # Convert any kind of input to a list of covariance matrices S
  switch(inputType,
         "dataframe" = {
           if(missing(groupID)) stop("if 'X' is a single dataframe, the 'groupID' factor must be specified. See ?EstimateGroupNetwork")
           if(is.character(groupID))
           {
             if(!groupID %in% names(X))
             {
               stop("'groupID' must be a string corresponding to a column name in 'X' or a number corresponding to a column in 'X'. See ?EstimateGroupNetwork")
             } else {
               grp <- as.factor(X[[groupID]])
               X <- split(X[, !names(X) %in% groupID], grp)
               groups <- levels(grp)
             }
           } else if(is.numeric(groupID)) {
             if(ncol(X) < groupID)
             {
               stop("'groupID' must be a string corresponding to a column name in 'X' or a number corresponding to a column in 'X'. See ?EstimateGroupNetwork")
             } else {
               grp <- as.factor(X[[groupID]])
               X <- split(X[, -groupID], grp)
               groups <- levels(grp)
             }
           } else {
             stop("'groupID' must be a string corresponding to a column name in 'X' or a number corresponding to a column in 'X'. See ?EstimateGroupNetwork")
           }
           if(!missing(n)) warning("argument 'n' is ignored if the input type is a dataframe. The sample sizes are extracted from the input data.")

           S <- lapply(X, covfun)
           S <- lapply(S, cov2cor)
           n <- sapply(X, nrow)
         },

         "list.of.dataframes" = {
           if(!missing(n)) warning("argument 'n' is ignored if the input type is a list of dataframes. The sample sizes are extracted from the input data.")
           n <- sapply(X, nrow)
           ncl <- sapply(X, ncol)
           allequal <- length(unique(ncl)) == 1
           if(!allequal) stop("The input data are a list of dataframes with a different number of variables per group. Please check the input and try again. See ?EstimateGroupNetwork")

           S <- lapply(X, covfun)
           S <- lapply(S, cov2cor)
           n <- sapply(X, nrow)
           groups <- names(X)
         },

         "list.of.covariance.matrices" = {
           if(missing(n)) stop("if 'X' encodes a covariance/correlation matrix, sample sizes 'n' must be specified. See ?EstimateGroupNetwork")
           ncl <- sapply(X, ncol)
           allequal <- length(unique(ncl)) == 1
           if(!allequal) stop("The input data are a list of dataframes with a different number of variables per group. Please check the input and try again. See ?EstimateGroupNetwork")
           S <- X
           S <- lapply(S, cov2cor)
           groups <- names(X)
         })

  # number of variables
  p <- unique(sapply(S, ncol))
  if(length(p) > 1) stop("all datasets must include the same number of variables")
  K <- length(S) # number of groups
  # set weights
  wei <- weights <- match.arg(weights)
  switch(weights,
         equal = {weights = rep(1,K)},
         sample.size = {weights = n/sum(n)})
  # set argument list for myJGL
  tol <- 10^(-dec)
  myJGLarglist <-list("penalty" = penalty,
                      "weights" = weights,
                      "penalize.diagonal" = penalize.diagonal,
                      "maxiter" = maxiter,
                      "tol" = tol,
                      "rho" = rho,
                      "truncate" = truncate)

  # assign names to variables if not specified
  if(missing(labels))
  {
    if(!is.null(colnames(S[[1]])))
    {
      labels <- colnames(S[[1]])
    } else
    {
      labels <- paste0("V", seq(p))
      for(i in seq(length(S))) colnames(S[[i]]) <- rownames(S[[i]]) <- labels
    }
  } else if(length(labels) != p)
  {
    warning("The length of argument 'labels' must match that of the data in 'X'. Argument 'labels' was ignored")
    labels <- paste0("V", seq(p))
    for(i in seq(length(S))) colnames(S[[i]]) <- rownames(S[[i]]) <- labels
  }

  #####################################################
  # Perform a search of the values of lambda 1 and 2  #
  #####################################################

  ## method 1: k-fold crossvalidation
  #####################################
  if(method == "crossvalidation")
  {
    # define folds for k-fold cross validation
    if(!missing(seed)) set.seed(seed)
    all.folds <- lapply(n, function(x) split(sample(seq(x)), rep(seq(k), length = x)))

    if(strategy == "simultaneous")
    {
      # Candidate l1 and l2 values
      l1cand <- l1sequence(S, nlambda1, lambda1.min.ratio, logseql1)
      l2cand <- l2sequence(S, n, l1cand, nlambda2, lambda2.min.ratio, logseql2, l2max, ncores, myJGLarglist)

      # put all candidate lambda1 and lambda2 pairs in a matrix
      lambdas <- matrix(ncol = 2, nrow = nlambda1 * nlambda2)
      lambdas[,1] <- sort(rep(l1cand, nlambda2))
      lambdas[,2] <- l2cand

      # use function crossval to compute a matrix with the average negative predictive log likelihood function (cost function) in each split
      anpllmat <- sapply(seq(k), function(i) crossval(i,  X = X, all.folds = all.folds, K = K, ncores = ncores, lambdas = lambdas, covfun = covfun,
                                                      weights, penalty = penalty, penalize.diagonal=penalize.diagonal, maxiter=maxiter, tol=tol, rho=rho, truncate=truncate))
      # average across folds
      anpllvec <- apply(anpllmat, 1, sum)

      # the pair of lambdas values with the minimum cost function in the validation set
      la <- lambdas[which.min(anpllvec), ]
      lambda1 <- unlist(la[1])
      lambda2 <- unlist(la[2])

    } else if(strategy == "sequential")
    {
      ###################
      # select l1 first #
      ###################
      # Candidate l1 values
      l1cand <- l1sequence(S, nlambda1, lambda1.min.ratio, logseql1)
      # Select a very low value of l2 as a candidate
      l2cand <- l2sequence(S, n, l1cand[1], nlambda2, lambda2.min.ratio, logseql2, l2max, ncores, myJGLarglist)
      l2cand <- l2cand[1,]

      # put all candidate lambda1 and lambda2 pairs in a matrix
      lambdas <- matrix(ncol = 2, nrow = nlambda1)
      lambdas[,1] <- l1cand
      lambdas[,2] <- l2cand


      # use function crossval to compute a matrix with the average negative predictive log likelihood function (cost function) in each split
      anpllmat <- sapply(seq(k), function(i) crossval(i,  X = X, all.folds = all.folds, K = K, ncores = ncores, lambdas = lambdas, covfun = covfun,
                                                      weights, penalty = penalty, penalize.diagonal=penalize.diagonal, maxiter=maxiter, tol=tol, rho=rho, truncate=truncate))
      # average across folds
      anpllvec <- apply(anpllmat, 1, sum)

      # select the lambda1 associated with the minimum cost for that minimum value of l2
      lambda1 <- lambdas[which.min(anpllvec), 1]

      ##################
      # select l2 then #
      ##################
      l2cand <- l2sequence(S, n, lambda1, nlambda2, lambda2.min.ratio, logseql2, l2max, ncores, myJGLarglist)

      # put all candidate lambda1 and lambda2 pairs in a matrix
      lambdas <- matrix(ncol = 2, nrow = nlambda2)
      lambdas[,1] <- lambda1
      lambdas[,2] <- l2cand

      # use function crossval to compute a matrix with the average negative predictive log likelihood function (cost function) in each split
      anpllmat <- sapply(seq(k), function(i) crossval(i,  X = X, all.folds = all.folds, K = K, ncores = ncores, lambdas = lambdas, covfun = covfun,
                                                      weights, penalty = penalty, penalize.diagonal=penalize.diagonal, maxiter=maxiter, tol=tol, rho=rho, truncate=truncate))
      # average across folds
      anpllvec <- apply(anpllmat, 1, sum)

      # select the lambda1 associated with the minimum cost for that minimum value of l2
      lambda2 <- lambdas[which.min(anpllvec), 2]
    }
  }

  ## method 2: use an information criterion
  ##########################################
  if(method == "InformationCriterion")
  {
    if(strategy == "sequential")
    {
      ###################
      # select l1 first #
      ###################
      # Candidate l1 values
      l1cand <- l1sequence(S, nlambda1, lambda1.min.ratio, logseql1)
      # Select a very low value of l2 as a candidate
      l2cand <- l2sequence(S, n, l1cand[1], nlambda2, lambda2.min.ratio, logseql2, l2max, ncores, myJGLarglist)
      l2cand <- l2cand[1,]

      # select the value of l1 with minimal value of an information criterion among the candidates, considering a minimal value of l2
      fun1 <- function(lambda1, lambda2, S, n, myJGLarglist, criterion, count.unique, gamma, dec)
      {
        theta <- myJGL(S = S, n = n, lambda1 = lambda1, lambda2 = lambda2,
                       penalty = myJGLarglist$penalty, weights = myJGLarglist$weights,
                       penalize.diagonal = myJGLarglist$penalize.diagonal,
                       maxiter = myJGLarglist$maxiter, tol = myJGLarglist$tol, rho = myJGLarglist$rho,
                       truncate = myJGLarglist$truncate)
        theta <- theta$concentrationMatrix
        InformationCriterion(theta = theta, S = S, n = n, criterion = criterion, count.unique = count.unique, gamma = gamma, dec = dec)
      }
      if(ncores > 1)
      {
        cl <- makeCluster(ncores)
        clusterExport(cl, list("fun1", "S", "n", "l2cand", "myJGLarglist", "criterion", "count.unique", "gamma", "dec"), envir = environment())
        ICl1 <- parSapply(cl = cl, l1cand, fun1, lambda2 = l2cand, S = S, n = n, myJGLarglist = myJGLarglist, criterion = criterion, count.unique = count.unique, gamma = gamma, dec = dec)
        stopCluster(cl)
      } else {
        ICl1 <- sapply(l1cand, fun1, lambda2 = l2cand, S = S, n = n, myJGLarglist = myJGLarglist, criterion = criterion, count.unique = count.unique, gamma = gamma, dec = dec)
      }

      lambda1 <- l1cand[which.min(ICl1)]

      if(optimize)
      {
        l1candOp <- c(l1cand[1]*(1-lambda1.min.ratio ), l1cand, l1cand[nlambda1]*(1+lambda1.min.ratio))
        l1min <- l1candOp[which.min(ICl1)]
        l1max <- l1candOp[which.min(ICl1) + 2]
        # see if AIC can be further improved in the neighborhood of the chosen value
        opt <- optimize(f = fun1, interval = c(l1min, l1max), lambda2 = l2cand, S = S, n = n, myJGLarglist = myJGLarglist, criterion = criterion, count.unique = count.unique, gamma = gamma, dec = dec)

        lambda1 <- ifelse(which.min(c(min(ICl1), opt$objective)) == 1,
                          lambda1,
                          opt$minimum)
      }

      ##################
      # Select lambda2 #
      ##################
      l2cand <- l2sequence(S, n, lambda1, nlambda2, lambda2.min.ratio, logseql2, l2max, ncores, myJGLarglist)

      # select the value of l2 with minimal value of an information criterion among the candidates, considering a minimal value of l1
      fun2 <- function(lambda2, lambda1, S, n, myJGLarglist, criterion, count.unique, gamma, dec)
      {
        theta <- myJGL(S = S, n = n, lambda1 = lambda1, lambda2 = lambda2,
                       penalty = myJGLarglist$penalty, weights = myJGLarglist$weights,
                       penalize.diagonal = myJGLarglist$penalize.diagonal,
                       maxiter = myJGLarglist$maxiter, tol = myJGLarglist$tol, rho = myJGLarglist$rho,
                       truncate = myJGLarglist$truncate)
        theta <- theta$concentrationMatrix
        InformationCriterion(theta = theta, S = S, n = n, criterion = criterion, count.unique = count.unique, gamma = gamma, dec = dec)
      }

      if(ncores > 1)
      {
        cl <- makeCluster(ncores)
        clusterExport(cl, list("fun2", "S", "n", "l2cand", "lambda1", "myJGLarglist", "criterion", "count.unique", "gamma", "dec"), envir = environment())
        ICl2 <- parSapply(cl = cl, l2cand, fun2, lambda1 = lambda1, S = S, n = n, myJGLarglist = myJGLarglist, criterion = criterion, count.unique = count.unique, gamma = gamma, dec = dec)
        stopCluster(cl)
      } else {
        ICl2 <- sapply(l2cand, fun2, lambda1 = lambda1, S = S, n = n, myJGLarglist = myJGLarglist, criterion = criterion, count.unique = count.unique, gamma = gamma, dec = dec)
      }

      lambda2 <- l2cand[which.min(ICl2)]

      if(optimize)
      {
        l2candOp <- c(l2cand[1]*(1-lambda2.min.ratio ), l2cand, l2cand[nlambda1]*(1+lambda2.min.ratio))
        l2min <- l2candOp[which.min(ICl2)]
        maxl2 <- l2candOp[which.min(ICl2) + 2]
        # see if AIC can be further improved in the neighborhood of the chosen value
        opt <- optimize(f = fun2, interval = c(l2min, maxl2), lambda1 = lambda1, S = S, n = n, myJGLarglist = myJGLarglist, criterion = criterion, count.unique = count.unique, gamma = gamma, dec = dec)

        lambda2 <- ifelse(which.min(c(min(ICl2), opt$objective)) == 1,
                          lambda2,
                          opt$minimum)

    }
    } else if(strategy == "simultaneous")
    {
      # Candidate l1 and l2 values
      l1cand <- l1sequence(S, nlambda1, lambda1.min.ratio, logseql1)
      l2cand <- l2sequence(S, n, l1cand, nlambda2, lambda2.min.ratio, logseql2, l2max, ncores, myJGLarglist)

      # put all candidate lambda1 and lambda2 pairs in a matrix
      lambdas <- matrix(ncol = 2, nrow = nlambda1 * nlambda2)
      lambdas[,1] <- sort(rep(l1cand, nlambda2))
      lambdas[,2] <- l2cand

      # among each pair of l1 and l2 values, search for the one that results in the minimum value of the information criterion
      fun3 <- function(lambdas, S, n, myJGLarglist, criterion, count.unique, gamma, dec)
      {
        theta <- myJGL(S = S, n = n, lambda1 = lambdas[1], lambda2 = lambdas[2],
                       penalty = myJGLarglist$penalty, weights = myJGLarglist$weights,
                       penalize.diagonal = myJGLarglist$penalize.diagonal,
                       maxiter = myJGLarglist$maxiter, tol = myJGLarglist$tol, rho = myJGLarglist$rho,
                       truncate = myJGLarglist$truncate)
        theta <- theta$concentrationMatrix
        InformationCriterion(theta = theta, S = S, n = n, criterion = criterion, count.unique = count.unique, gamma = gamma, dec = dec)
      }

      if(ncores > 1)
      {
        cl <- makeCluster(ncores)
        clusterExport(cl, list("fun3", "S", "n", "lambdas", "myJGLarglist", "criterion", "count.unique", "gamma", "dec"), envir = environment())
        IC <- parApply(cl = cl, lambdas, 1, fun3, S = S, n = n, myJGLarglist = myJGLarglist, criterion = criterion, count.unique = count.unique, gamma = gamma, dec = dec)
        stopCluster(cl)
      } else
      {
        IC <- apply(lambdas, 1, fun3, S = S, n = n, myJGLarglis = myJGLarglist, criterion = criterion, count.unique = count.unique, gamma = gamma, dec = dec)
      }

      lambda1 <- lambdas[which.min(IC), 1]
      lambda2 <- lambdas[which.min(IC), 2]

      if(optimize)
      {
        opt <- optim(c(lambda1, lambda2), fun3, S = S, n = n, myJGLarglis = myJGLarglist, criterion = criterion, count.unique = count.unique, gamma = gamma, dec = dec,
                     method = optmethod)

        if(opt$value < min(IC))
        {
          lambda1 <- opt$par[1]
          lambda2 <- opt$par[2]
        }

        if(lambda1 < 0) lambda1 <- 0
        if(lambda2 < 0) lambda2 <- 0
      }
    }
  }

  # Put the final output together
  theta <- myJGL(S = S, n = n, lambda1 = lambda1, lambda2 = lambda2,
                 penalty = myJGLarglist$penalty, weights = myJGLarglist$weights,
                 penalize.diagonal = myJGLarglist$penalize.diagonal,
                 maxiter = myJGLarglist$maxiter, tol = myJGLarglist$tol, rho = myJGLarglist$rho,
                 truncate = myJGLarglist$truncate)

  for(i in seq(length(theta$network)))
    colnames(theta$network[[i]]) <- rownames(theta$network[[i]]) <-
    colnames(theta$concentrationMatrix[[i]]) <- rownames(theta$concentrationMatrix[[i]]) <-labels
  out <- lapply(theta$network, as.matrix)
  names(out) <- groups

  if(!simplifyOutput)
  {
    out <- list()
    out$network <- lapply(theta$network, as.matrix)
    names(out$network) <- groups
    out$concentrationMatrix <- lapply(theta$concentrationMatrix, as.matrix)
    names(out$concentrationMatrix) <- groups
    out$correlationMatrix <- lapply(S, as.matrix)
    names(out$correlationMatrix) <- groups
    out$InformationCriteria <- c("AIC" = InformationCriterion(theta = theta$concentrationMatrix, S = S, n = n, criterion = "aic", count.unique = count.unique, gamma = gamma, dec = dec),
                                 "BIC" = InformationCriterion(theta = theta$concentrationMatrix, S = S, n = n, criterion = "bic", count.unique = count.unique, gamma = gamma, dec = dec),
                                 "eBIC" = InformationCriterion(theta = theta$concentrationMatrix, S = S, n = n, criterion = "ebic", count.unique = count.unique, gamma = gamma, dec = dec),
                                 "Count Unique" = count.unique,
                                 "gamma for eBIC" = gamma)
    out$TuningParameters <- c("lambda1" = lambda1,
                              "lambda2" = lambda2)
    out$n <- n
    out$miscellaneous <- c("method" = method,
                           "strategy" = strategy,
                           "k for crossvalidation" = k,
                           "seed for crossvalidation" = ifelse(missing("seed"), "none", seed),
                           "information criterion used" = ifelse(criterion == "InformationCriterion", criterion, "none"),
                           "nlambda1" = nlambda1,
                           "lambda1.min.ratio" = lambda1.min.ratio,
                           "logseql1" = logseql1,
                           "nlambda2" = nlambda2,
                           "lambda2.min.ratio" = lambda2.min.ratio,
                           "logseql2" = logseql2,
                           "optimize" = optimize,
                           "optmethod" = optmethod,
                           "type of penalty" = ifelse(penalty == "fused", "Fused Graphical Lasso", "Group Graphical Lasso"),
                           "weights" = wei,
                           "lambda 1 penalized diagonal?" = penalize.diagonal,
                           "maxiter" = maxiter,
                           "rho" = rho,
                           "truncate" = truncate)



  }

  out
}


InformationCriterion <- function(theta, S, n, criterion = c("aic", "bic", "ebic"), count.unique = TRUE, gamma = .5, dec = 5)
{
  # theta = output of myJGL
  # n = vector of sample sizes corresponding to each element of jgl (same order)
  # S = list of covariance matrices corresponding to each element of jgl (same order)

  criterion <- match.arg(criterion)

  p <- ncol(theta[[1]])
  K <- length(theta)

  # log likelihoods
  Lik <- numeric(K)
  for (i in seq(K)){
    Lik[i] <- logGaus(S[[i]], theta[[i]], n[i])
  }
  L <- sum(Lik)

  # number of parameters
  k <- npar(theta, count.unique, dec)

  switch(criterion,
         aic = {
           out = 2 * k -2 * L
         },
         bic = {
           out = k * log(sum(n)) -2 * L
         },
         ebic = {
           out = k * log(sum(n)) -2 * L + 4 * k * gamma * log(p)
         })
  out
}


npar <- function(theta, count.unique = TRUE, dec = 5)
{
  if(count.unique == FALSE) {
    E <- sum(sapply(theta, function(x) x[lower.tri(x)] != 0))
  } else if (count.unique == TRUE)
  {
    p <- ncol(theta[[1]])
    K <- length(theta)

    # Number of unique pars:
    arr <- array(unlist(theta), dim = c(p, p, K))
    arr <- round(arr, dec)
    Nuni <- apply(arr, 1:2, function(x) {
      un <- unique(x)
      length(un[un!=0])
    })

    # Number of pars:
    E <- sum(Nuni[upper.tri(Nuni)])
  }

  E
}

logGaus <- function(S, theta, n)
{
  theta <- as.matrix(theta)
  S <- as.matrix(S)
  trThS = sum(theta * S)
  return(n/2 * (log(det(theta)) - trThS)  )
}

l1sequence <- function(S, nlambda1, lambda1.min.ratio, logseql1)
{
  # find l1_theormax, the value of l1 that would cause all edges in at least one network to be missing
  maxl1 <- function(S) max(max(S - diag(nrow(S))), -min(S - diag(nrow(S))))
  l1_theormax  <- min(sapply(S, maxl1))

  # define the sequence of l1 values
  l1min <- l1_theormax * lambda1.min.ratio
  if(logseql1 == TRUE)
  {
    l1cand <- exp(seq(log(l1min), log(l1_theormax), length = nlambda1))
  } else
  {
    l1cand <- seq(from = 0, to = l1_theormax, length.out = nlambda1)
  }
  l1cand
}


l2sequence <- function(S, n, l1cand, nlambda2, lambda2.min.ratio, logseql2, l2max, ncores, myJGLarglist)
{
  nlambda1 <- length(l1cand)
  # For each candidate l1, identify the lowest value of lambda2 that makes all the elements of the precision matix
  #  equal among different classes (max difference allowed = tol), using the stats::optimize function

  fun4 <- function(lambda2, lambda1, S, n, myJGLarglist)
  {
    #cat(paste(lambda1, lambda2, "\n"))
    tol <- myJGLarglist$tol
    maxdiff <- c()
    thetas <- myJGL(S = S, n = n, lambda2 = lambda2, lambda1 = lambda1, penalty = myJGLarglist$penalty,
                    weights = myJGLarglist$weights, penalize.diagonal = myJGLarglist$penalize.diagonal,
                    maxiter = myJGLarglist$maxiter, tol = myJGLarglist$tol, rho = myJGLarglist$rho,
                    truncate = myJGLarglist$truncate)
    thetas <- thetas$concentrationMatrix

    for(i in seq(2, length(thetas)))
    {
      for(j in seq(i-1))
      {
        maxdiff <- c(maxdiff, max(abs(thetas[[i]] - thetas[[j]])))
      }
    }
    maxdiff <- max(maxdiff)

    ifelse(maxdiff > tol, l2max+1, lambda2)
  }

  if(ncores > 1 & nlambda1 > 1)
  {
    cl <- makeCluster(ncores)
    clusterExport(cl, list("fun4", "S", "n", "l2max", "l1cand", "myJGLarglist"), envir = environment())
    l2_theormax <- parSapply(cl = cl, seq(nlambda1), function(i) optimize(f = fun4, S = S, n = n, lambda1 = l1cand[i], myJGLarglist = myJGLarglist, interval = c(0, l2max))$minimum)
    stopCluster(cl)
  } else {
    l2_theormax <-  sapply(seq(nlambda1), function(i) optimize(f = fun4, lambda1 = l1cand[i], S= S, n = n, myJGLarglist = myJGLarglist, interval = c(0, l2max))$minimum)
  }

  l2min <- l2_theormax * lambda2.min.ratio

  if(logseql2 == TRUE)
  {
    logl2min <- log(l2min)
    logl2max <- log(l2_theormax)
    l2cand <- sapply(seq_along(l2min), function(i) exp(seq(logl2min[i], logl2max[i], length = nlambda2)))
  } else
  {
    l2cand <- sapply(seq_along(l2min), function(i) seq(l2min[i], l2_theormax[i], length = nlambda2))
  }
  l2cand
}

# this way of computing the covariance matrix was taken from function JGL::JGL
covNoBessel <- function(x, ...)
{
  n <- nrow(x)
  (cov(x, ...)*(n-1))/n
}



crossval <- function(i, X, all.folds, K, ncores, lambdas, covfun, ...)
{
  # some of the code for the k-fold cross validation has been taken from the R package parcor, function adalasso

  # X = input data as a list of dataframes
  # for a given fold, function crossval computes Sigmas, the covaraince matrices of the training set for each class,
  # and Omegas, a list that includes, for each pair of lambdas, the estimated concentration matrices for each class
  # finally, using function anpll, for each pair of lambdas it computes the average negative predictive log likelihood function
  # in each training sample
  # i is the fold, an integer between 1 and k
  # ... -> arguments to be passed to myJGL
  # The function is parallelized for Windows
  omit <- lapply(all.folds, function(q) q[[i]]) # the indices of the observations in the ith fold in each class
  dat_train <- lapply(1:K, function(k) X[[k]][-omit[[k]], , drop = FALSE]) # for each class, the data after omitting the ith fold = training set
  dat_test <- lapply(1:K, function(k) X[[k]][omit[[k]], , drop = FALSE]) # for each class, the data after keeping only the ith fold = validation set
  SigmaTrain <- lapply(dat_train, covfun) # for each class, the covariance matrices in the validation set
  SigmaTest <- lapply(dat_test, covfun) # for each class, the covariance matrices in the validation set

  # for each pair of l1 and l2, compute the concentration matrix of the training set and use anpll to
  # validate it against the sample covariance matrix of the validation set
  n <- sapply(dat_train, nrow)
  if(ncores > 1)
  {
    cl <- makeCluster(ncores)
    clusterExport(cl, list("SigmaTrain", "n", "lambdas", "myJGL"), envir=environment())
    Omegas <- parLapply(cl = cl, seq(nrow(lambdas)), function(la) myJGL(S = SigmaTrain, n = n, lambda1 = lambdas[la, 1], lambda2 = lambdas[la, 2], ...)$concentrationMatrix)
    stopCluster(cl)

  } else  {
    Omegas <- lapply(seq(nrow(lambdas)), function(la) myJGL(S = SigmaTrain, n = n, lambda1 = lambdas[la, 1], lambda2 = lambdas[la, 2], ...)$concentrationMatrix)
  }

  fits <- sapply(Omegas, function(O) anpll(S = SigmaTest, O = O))
  fits
}
Any scripts or data that you put into this service are public.
EstimateGroupNetwork documentation built on Feb. 10, 2021, 9:06 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
EstimateGroupNetwork
Perform the Joint Graphical Lasso and Selects Tuning Parameters

R/EstimateGroupNetwork.R
In EstimateGroupNetwork: Perform the Joint Graphical Lasso and Selects Tuning Parameters

Defines functions crossval covNoBessel l2sequence l1sequence logGaus npar InformationCriterion EstimateGroupNetwork

Documented in covNoBessel EstimateGroupNetwork

Try the EstimateGroupNetwork package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EstimateGroupNetwork Perform the Joint Graphical Lasso and Selects Tuning Parameters

R/EstimateGroupNetwork.R In EstimateGroupNetwork: Perform the Joint Graphical Lasso and Selects Tuning Parameters

Defines functions crossval covNoBessel l2sequence l1sequence logGaus npar InformationCriterion EstimateGroupNetwork

Documented in covNoBessel EstimateGroupNetwork

Try the EstimateGroupNetwork package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EstimateGroupNetwork
Perform the Joint Graphical Lasso and Selects Tuning Parameters

R/EstimateGroupNetwork.R
In EstimateGroupNetwork: Perform the Joint Graphical Lasso and Selects Tuning Parameters