R/consensusTOM.R

Defines functions consensusTOM .mergeConsensusTOMInformationLists hierarchicalConsensusTOM individualTOMs newBlockInformation .consensusCalculation.base.FromList .consensusCalculation.base .qorder .checkAndDelete .emptyDist .vector2dist .loadObject .loadAsList .saveChunks .mtd.checkDimConsistencyAndGetDimnames .checkListDimConsistencyAndGetDimnames .shiftList .dimensions .useDiskCache .networkCalculation .corCalculation newNetworkOptions newCorrelationOptions newConsensusOptions consensusTreeInputs newConsensusTree .checkPower

Documented in consensusTOM consensusTreeInputs hierarchicalConsensusTOM individualTOMs newBlockInformation newConsensusOptions newConsensusTree newCorrelationOptions newNetworkOptions

# New multilevel specification of consensus: a hierarchical list.

# The consensus calculation needs to be general enough so it can be used for module merging (consensus
# eigengene network) as well as for calculation of consensus kMEs, and possibly for other stuff as well.

# Consensus specification for a single operation: 
#   - inputs: a list specifying the input of the consensus. This should be general enough to handle
#   blockwise data but also not specific to adjacencies.
#     The inut should be a multiData structure, with each 
#   - calibrationMethod: currently one of "none", "simple quantile", "full quantile"
#   - consensusQuantile
#   - saveCalibratedIndividualTOMs (= FALSE)
#   - calibratedIndividualTOMFilePattern (= "calibratedIndividualTOM-Set%s-Block%b.RData")

# The consensus calculation itself does not need information about correlation type etc; the consensus
# calculation is very general.

# For network analysis applications of the consensus: will also have to keep information about how the
# individual network adjacencies (TOMs) are to be created or were created - correlation type, network type,
# TOM type, correlation options etc.


# So we will keep 2 different types of information around: 
# 1. network construction options. A simple list giving the necessary network construction options.
# 2. ConsensusOptions: This will also be a list giving the options but not holding the actual consensus
# inputs or outputs. 

# outputs of network construction and consensus construction should be held in separate lists.


# Output of individualTOMs: 
#   - adjacency data, a list of blockwiseData instances
#   - block information
#   - network construction options, separately for each adjacency (network construction could be different)

# For consensusTOM:
#   Inputs: 
#      - adjacency data: either from individual TOMs or from other consensus TOMs 
#          . note that constructing a complicated consensus manually (i.e., using consensus TOMs 
#            as inputs to higher-level consensus) is of limited use 
#            since consensus modules would need the full consensus tree anyway.
#      - consensus tree
#   Not really needed: block information
#   outputs:
#      - consensus adjacency data
#      - copy of consensus options
#      - other (diagnostic etc) information

# For consensus modules
#   Inputs: 
#      - undelying expression data
#      - optional consensus TOM
#      - block information
#      - network options for each expression data set
#      - consensus tree


.checkPower = function(power) 
{
    if (any(!is.finite(power)) | (sum(power<1)>0) | (sum(power>50)>0) )  
        stop("power must be between 1 and 50.");
}


#==========================================================================================================
#
# Defining a single consensus operation
#
#==========================================================================================================

newConsensusTree = function(consensusOptions = newConsensusOptions(),
                            inputs,
                            analysisName = NULL)
{
  if (!inherits(consensusOptions, "ConsensusOptions"))
    stop("'consensusOptions' must be of class ConsensusOptions.");
  out = list(consensusOptions = consensusOptions,
       inputs = inputs,
       analysisName = analysisName);
  class(out) = c("ConsensusTree", class(out));
  out;
}

consensusTreeInputs = function(consensusTree, flatten = TRUE)
{
  out = lapply(consensusTree$inputs, function(inp) if (inherits(inp, "ConsensusTree")) consensusTreeInputs(inp) else inp)
  if (flatten) out = unlist(out);
  out;
}

newConsensusOptions = function(
      calibration = c("full quantile", "single quantile", "none"),

      # Simple quantile scaling options
      calibrationQuantile = 0.95,
      sampleForCalibration = TRUE, 
      sampleForCalibrationFactor = 1000,

      # Consensus definition
      consensusQuantile = 0,
      useMean = FALSE,
      setWeights = NULL,

      # Consensus post-processing
      suppressNegativeResults = FALSE,  # Potentially useful for Nowick-type signed TOM

      # Name to prevent clashing of files
      analysisName = "")
{
  calibration = match.arg(calibration);
  if (any(!is.finite(setWeights))) stop("Entries of 'setWeights' must all be finite.");
  if ( (consensusQuantile < 0) | (consensusQuantile > 1) ) 
      stop("'consensusQuantile' must be between 0 and 1.");
  out = list( calibration = calibration,
              calibrationQuantile = calibrationQuantile,
              sampleForCalibration = sampleForCalibration,
              sampleForCalibrationFactor = sampleForCalibrationFactor,
              consensusQuantile = consensusQuantile,
              useMean = useMean,
              setWeights = setWeights,
              suppressNegativeResults = suppressNegativeResults,
              analysisName = analysisName);
  class(out) = c("ConsensusOptions", class(out))
  out;
}

newCorrelationOptions = function(
      corType = c("pearson", "bicor"),
      maxPOutliers = 0.05,
      quickCor = 0,
      pearsonFallback = "individual",
      cosineCorrelation = FALSE,
      nThreads = 0,
      corFnc = if (corType=="bicor") "bicor" else "cor",
      corOptions = c(
        list(use = 'p',
             cosine = cosineCorrelation, 
             quick = quickCor,
             nThreads = nThreads), 
        if (corType=="bicor") 
           list(maxPOutliers = maxPOutliers,  
                pearsonFallback = pearsonFallback) else NULL))
{ 
  if ( (maxPOutliers < 0) | (maxPOutliers > 1)) stop("maxPOutliers must be between 0 and 1.");
  if (quickCor < 0) stop("quickCor must be positive.");
  if (nThreads < 0) stop("nThreads must be positive.");
  corType = match.arg(corType);
  if ( (maxPOutliers < 0) | (maxPOutliers > 1)) stop("maxPOutliers must be between 0 and 1.");
  if (quickCor < 0) stop("quickCor must be positive.");
  fallback = pmatch(pearsonFallback, .pearsonFallbacks)
  if (is.na(fallback))
      stop(paste("Unrecognized 'pearsonFallback'. Recognized values are (unique abbreviations of)\n",
           paste(.pearsonFallbacks, collapse = ", ")))
  out = list(
    corType = corType,
    maxPOutliers = maxPOutliers,
    quickCor = quickCor,
    pearsonFallback = pearsonFallback,
    pearsonFallback.code = match(pearsonFallback, .pearsonFallbacks),
    cosineCorrelation = cosineCorrelation,
    corFnc = corFnc,
    corOptions = corOptions,
    corType.code = match(corType, .corTypes),
    nThreads = nThreads);
  class(out) = c("CorrelationOptions", class(out));
  out;
}

newNetworkOptions = function(
    correlationOptions = newCorrelationOptions(),
 
    # Adjacency options
    replaceMissingAdjacencies = TRUE,
    power = 6,
    networkType = c("signed hybrid", "signed", "unsigned"),
    checkPower = TRUE,

    # Topological overlap options
    TOMType = c("signed", "signed Nowick", "unsigned", "none",
                "signed 2", "signed Nowick 2", "unsigned 2"),
    TOMDenom = c("mean", "min"),
    suppressTOMForZeroAdjacencies = FALSE,
    suppressNegativeTOM = FALSE,

    # Internal behavior options
    useInternalMatrixAlgebra = FALSE)
{
  if (checkPower) .checkPower(power);
  networkType = match.arg(networkType);
  TOMType = match.arg(TOMType);
  TOMDenom = match.arg(TOMDenom);
  out = c(correlationOptions,
      list(replaceMissingAdjacencies = replaceMissingAdjacencies,
           power = power,
           networkType = networkType,
           TOMType = TOMType,
           TOMDenom = TOMDenom,
           networkType.code = match(networkType, .networkTypes),
           TOMType.code = match(TOMType, .TOMTypes),
           TOMDenom.code = match(TOMDenom, .TOMDenoms),
           suppressTOMForZeroAdjacencies = suppressTOMForZeroAdjacencies,
           suppressNegativeTOM = suppressNegativeTOM,
           useInternalMatrixAlgebra = useInternalMatrixAlgebra));
  class(out) = c("NetworkOptions", class(correlationOptions));
  out;
}

#====================================================================================================
#
# cor, network, and consensus calculations
#
#====================================================================================================

.corCalculation = function(x, y = NULL, weights.x = NULL, weights.y = NULL, correlationOptions)
{
  if (!inherits(correlationOptions, "CorrelationOptions"))
    stop(".corCalculation: 'correlationOptions' does not have correct type.");
  do.call(match.fun(correlationOptions$corFnc), 
          c(list(x = x, y= y, weights.x = weights.x, weights.y = weights.y), 
            correlationOptions$corOptions));
}


# network calculation. Returns the resulting topological overlap or adjacency matrix.

.networkCalculation = function(data, networkOptions, weights = NULL, 
                verbose = 1, indent = 0)
{
  if (is.data.frame(data)) data = as.matrix(data);
  if (!inherits(networkOptions, "NetworkOptions"))
    stop(".networkCalculation: 'networkOptions' does not have correct type.");
 
   .checkAndScaleWeights(weights, data, scaleByMax = FALSE);

   callVerb = max(0, verbose - 1); callInd = indent + 2;
   CcorType = networkOptions$corType.code - 1;
   CnetworkType = networkOptions$networkType.code - 1;
   CTOMType = networkOptions$TOMType.code -1;
   CTOMDenom = networkOptions$TOMDenom.code -1;

   warn = as.integer(0);
   # For now return the full matrix; eventually we may return just the dissimilarity.
   # To make full use of the lower triagle space saving we'd have to modify the underlying C code
   # dramatically, otherwise it will still need to allocate the full matrix for the matrix multiplication.
   .Call("tomSimilarity_call", data, weights,
          as.integer(CcorType), as.integer(CnetworkType), 
          as.double(networkOptions$power), as.integer(CTOMType),
          as.integer(CTOMDenom),
          as.double(networkOptions$maxPOutliers),
          as.double(networkOptions$quickCor),
          as.integer(networkOptions$pearsonFallback.code),
          as.integer(networkOptions$cosineCorrelation),
          as.integer(networkOptions$replaceMissingAdjacencies),
          as.integer(networkOptions$suppressTOMForZeroAdjacencies),
          as.integer(networkOptions$suppressNegativeTOM),
          as.integer(networkOptions$useInternalMatrixAlgebra),
          warn, as.integer(min(1, networkOptions$nThreads)),
          as.integer(callVerb), as.integer(callInd), PACKAGE = "WGCNA");
}

# the following is contained in consensusOptions:
#  out = list( calibration = calibration,
#              calibrationQuantile = calibrationQuantile,
#              sampleForCalibration = sampleForCalibration,
#              sampleForCalibrationFactor = sampleForCalibrationFactor,
#              consensusQuantile = consensusQuantile,
#              useMean = useMean,
#              setWeights = setWeights);


#==============================================================================================
#
# general utility functions
#
#==============================================================================================

# Try to guess whether disk cache should be used
# this should work for both multiData as well as for simple lists of arrays.

.useDiskCache = function(multiExpr, blocks = NULL, chunkSize = NULL,
                         nSets = if (isMultiData(multiExpr)) length(multiExpr) else 1,
                         nGenes = checkSets(multiExpr)$nGenes)
{
  if (is.null(chunkSize)) chunkSize = as.integer(.largestBlockSize/(2*nSets))

  if (length(blocks) == 0) {
    blockLengths = nGenes;
  } else 
    blockLengths = as.numeric(table(blocks));

  max(blockLengths) > chunkSize;
}

.dimensions = function(x)
{
   if (is.null(dim(x))) return(length(x))
   return(dim(x))
}

.shiftList = function(c0, lst)
{
  if (length(lst)==0) return(list(c0));
  ll = length(lst)
  out = list();
  out[[1]] = c0;
  for (i in 1:ll)
    out[[i+1]] = lst[[i]];
  out;
}

.checkListDimConsistencyAndGetDimnames = function(dataList)
{
  nPars = length(dataList)
  dn = NULL
  for (p in 1:nPars)
  {
      if (mode(dataList[[p]])!="numeric")
          stop(paste("Argument number", p, " is not numeric."))
       if (p==1) {
          dim = .dimensions(dataList[[p]])
       } else {
          if (!isTRUE(all.equal(.dimensions(dataList[[p]]), dim)))
             stop("Argument dimensions are not consistent.");
       }
       if (prod(dim)==0) stop(paste("Argument has zero dimension."));
       if (is.null(dn)) dn = dimnames(dataList[[p]]);
   }
   dn;
}

.mtd.checkDimConsistencyAndGetDimnames = function(mtd)
{
  nPars = length(mtd)
  dn = NULL
  for (p in 1:nPars)
  {
      if (mode(mtd[[p]]$data)!="numeric")
          stop(paste("Argument number", p, " is not numeric."))
       if (p==1) {
          dim = .dimensions(mtd[[p]]$data)
       } else {
          if (!isTRUE(all.equal(.dimensions(mtd[[p]]$data), dim)))
             stop("Argument dimensions are not consistent.");
       }
       if (prod(dim)==0) stop(paste("Argument has zero dimension."));
       if (is.null(dn)) dn = dimnames(mtd[[p]]$data);
   }
   dn;
}



#==============================================================================================
#
# general utility functions for working with disk cache
#
#==============================================================================================

.saveChunks = function(data, chunkSize, fileBase, cacheDir, fileNames = NULL)
{
   ld = length(data);
   nChunks = ceiling(ld/chunkSize);
   if (is.null(fileNames))
   {
     if (length(fileBase)!=1)
       stop("Internal error: length of 'fileBase' must be 1.");

     fileNames = rep("", nChunks);
     x = 1;
     for (c in 1:nChunks)
     {
       fileNames[c] = tempfile(pattern = fileBase, tmpdir = cacheDir);
       # This effectively reserves the file name
       save(x, file = fileNames[c]);
     }
   } else {
     if (length(fileNames)!=nChunks)
       stop("Internal error: length of 'fileNames' must equal the number of chunks.");
   }

   chunkLengths = rep(0, nChunks);
   start = 1;
   for (c in 1:nChunks)
   {
     end = min(start + chunkSize-1, ld);
     chunkLengths[c] = end - start + 1;
     temp = data[start:end];
     save(temp, file = fileNames[c]);
     start = end + 1;
   }
   rm(temp); #gc();
   list(files = fileNames, chunkLengths = chunkLengths);
}

.loadAsList = function(file)
{
  env = new.env();
  load(file = file, envir = env);
  as.list(env);
};


.loadObject = function(file, name = NULL, size = NULL)
{
  x = .loadAsList(file);
  if (!is.null(name) && (names(x)[1]!=name))
    stop("File ", file, " does not contain object '", name, "'.")

  obj = x[[1]];
  if (!is.null(size) && (length(obj)!=size))
    stop("Object '", name, "' does not have the correct length.");
  obj;
}

.vector2dist = function(x)
{
  n = length(x);
  n1 = (1 + sqrt(1 + 8*n))/2
  if (floor(n1)!=n1) stop("Input length not consistent with a distance structure.");
  attributes(x) = list(Size = as.integer(n1), Diag = FALSE, Upper = FALSE);
  class(x) = "dist";
  x;
}

.emptyDist = function(nObjects, fill = 0)
{
  n = (nObjects * (nObjects-1))/2;
  .vector2dist(rep(fill, n));
}

.checkAndDelete = function(files)
{
  if (length(files)>0) lapply(as.character(files), function(file) if (file.exists(file)) file.remove(file));
  NULL;
}

.qorder = function(data)
{
  data = as.numeric(data);
  .Call("qorder", data, PACKAGE = "WGCNA")
}

# Actual consensus calculation distilled into one function. data is assumed to have sets in columns
# and samples/observations/whatever in rows. setWeightMat should be a matrix of dimensions (nSets, 1) 
# and be normalized to sum=1.

.consensusCalculation.base = function(data, useMean, setWeightMat, consensusQuantile)
{
  nSets = ncol(data);
  if (nSets==1)
  {
    out.list = list(consensus = as.vector(data));
  } else {
    if (useMean)
    {
       if (any(is.na(data)))
       {
         finiteMat = 1-is.na(data);
         data[is.na(data)] = 0;
         out = data %*% setWeightMat / finiteMat%*%setWeightMat;
       } else {
         out = data %*% setWeightMat;
       }
       out.list = list(consensus = out);
    } else if (consensusQuantile == 0) 
    {
        whichmin = .Call("minWhich_call", data, 1L, PACKAGE = "WGCNA");
        out.list = list(consensus = whichmin$min);
    } else {
       out.list = list(consensus = rowQuantileC(data, p = consensusQuantile));
    }
  }
  if (is.null(out.list$originCount))
  {
    out.list$originCount = apply(data, 2, function(x) sum(x <= out.list$consensus, na.rm = TRUE));
    names(out.list$originCount) = 1:nSets
  }
  out.list;
}

.consensusCalculation.base.FromList = function(dataList, useMean, setWeights, consensusQuantile)
{
  nSets = length(dataList);
  if (nSets==1)
  {
    out.list = list(consensus = dataList[[1]]);
  } else {
    if (useMean)
    {
       out.list = list(consensus = pmean(dataList, setWeights));
    } else if (consensusQuantile == 0)
    {
        whichmin = pminWhich.fromList(dataList);
        out.list = list(consensus = whichmin$min);
    } else {
       out.list = list(consensus = pquantile.fromList(dataList, prob = consensusQuantile));
    }
  }
  if (is.null(out.list$originCount))
  {
    out.list$originCount = sapply(dataList, function(x) sum(x <= out.list$consensus, na.rm = TRUE));
    names(out.list$originCount) = 1:nSets
  }
  out.list;
}

#==============================================================================================
#
# utility functions for working with multiple blockwise adjacencies.
#
#==============================================================================================

newBlockInformation = function(
    blocks,
    goodSamplesAndGenes)
{
  blockGenes = tapply(1:length(blocks), blocks, identity)
  names(blockGenes) = sort(unique(blocks));
  nGGenes = sum(goodSamplesAndGenes$goodGenes);
  gBlocks = blocks[goodSamplesAndGenes$goodGenes];
  out = list(blocks = blocks,
             blockGenes = blockGenes, 
             goodSamplesAndGenes = goodSamplesAndGenes,
             nGGenes = nGGenes,
             gBlocks = gBlocks);
  class(out) = c("BlockInformation", class(out));
  out;
}
  
#=======================================================================================================
#
# individualTOMs
# This is essentially a re-badged blockwiseIndividualTOMs with a different input and ouptut format.
#
#=======================================================================================================

# The following is contained in networkOptions:

    #corType = corType,
    #maxPOutliers = maxPOutliers,
    #quickCor = quickCor,
    #pearsonFallback = pearsonFallback,
    #cosineCorrelation = cosineCorrelation,
    #corFnc = corFnc,
    #corOptions = corOptions,
    #corType.code = match(corType, .corTypes),

    # Adjacency options
    #replaceMissingAdjacencies = TRUE,
    #power = 6,
    #networkType = c("signed hybrid", "signed", "unsigned"),

    # Topological overlap options

    #TOMType = c("signed", "unsigned"),
    #TOMDenom = c("mean", "min"))


individualTOMs = function(
   multiExpr,
   multiWeights = NULL,

   multiExpr.imputed = NULL,  ## Optional, useful for pre-clustering if preclustering is needed.

   # Data checking options

   checkMissingData = TRUE,

   # Blocking options

   blocks = NULL, 
   maxBlockSize = 5000, 
   blockSizePenaltyPower = 5,
   nPreclusteringCenters = NULL,
   randomSeed = 54321,

   # Network construction options. This can be a single object of class NetworkOptions, or a multiData
   # structure of NetworkOptions objects, one per element of multiExpr.

   networkOptions,

   # Save individual TOMs? This is equivalent to using external = TRUE in blockwiseData

   saveTOMs = TRUE,
   individualTOMFileNames = "individualTOM-Set%s-Block%b.RData",

   # Behaviour options
   collectGarbage = TRUE,
   verbose = 2, indent = 0)
{
  spaces = indentSpaces(indent);

  dataSize = checkSets(multiExpr, checkStructure = TRUE);
  if (dataSize$structureOK)
  {
    nSets = dataSize$nSets;
    nGenes = dataSize$nGenes;
    multiFormat = TRUE;
  } else {
    multiExpr = multiData(multiExpr);
    if (!is.null(multiWeights)) multiWeights = multiData(multiWeights);
    nSets = dataSize$nSets;
    nGenes = dataSize$nGenes;
    multiFormat = FALSE;
  }

  .checkAndScaleMultiWeights(multiWeights, multiExpr, scaleByMax = FALSE);

  if (inherits(networkOptions, "NetworkOptions"))
  {
    networkOptions = list2multiData(.listRep(networkOptions, nSets));
  } else {
    if (!all(mtd.apply(networkOptions, inherits, "NetworkOptions", mdaSimplify = TRUE)))
       stop("'networkOptions' must be of class 'NetworkOptions' or a multiData structure\n",
            "   of objects of the class.\n",
            "   See newNetworkOptions for creating valid network options.");
  }

  if (is.null(names(multiExpr)))
    names(multiExpr) = spaste("Set", 1:nSets);

  if (!is.null(randomSeed))
  {
    if (exists(".Random.seed"))
    {
       savedSeed = .Random.seed
       on.exit(.Random.seed <<-savedSeed);
    } 
    set.seed(randomSeed);
  }

  #if (maxBlockSize >= floor(sqrt(2^31)) )
  #  stop("'maxBlockSize must be less than ", floor(sqrt(2^31)), ". Please decrease it and try again.")

  if (!is.null(blocks) && (length(blocks)!=nGenes))
    stop("Input error: length of 'blocks' must equal number of genes in 'multiExpr'.");

  if (verbose>0) 
     printFlush(paste(spaces, "Calculating topological overlaps block-wise from all genes"));

  nSamples = dataSize$nSamples;
  
  # Check data for genes and samples that have too many missing values

  # Check that multiExpr has valid (mtd)column names. If column names are missing, generate them.

  colIDs = mtd.colnames(multiExpr);
  if (is.null(colIDs)) colIDs = c(1:dataSize$nGenes);

  if (checkMissingData)
  {
    gsg = goodSamplesGenesMS(multiExpr, multiWeights = multiWeights, 
                             verbose = verbose - 1, indent = indent + 1)
    if (!gsg$allOK)
    {
      multiExpr = mtd.subset(multiExpr, gsg$goodSamples, gsg$goodGenes);
      if (!is.null(multiWeights)) 
        multiWeights = mtd.subset(multiWeights, gsg$goodSamples, gsg$goodGenes);
      if (!is.null(multiExpr.imputed)) 
        multiExpr.imputed = mtd.subset(multiExpr.imputed, gsg$goodSamples, gsg$goodGenes);
      colIDs = colIDs[gsg$goodGenes];
    }
  } else {
    gsg = list(goodGenes = rep(TRUE, nGenes), goodSamples = lapply(nSamples, function(n) rep(TRUE, n)));
    gsg$allOK = TRUE;
  }
  nGGenes = sum(gsg$goodGenes);
  nGSamples = rep(0, nSets);
  for (set in 1:nSets) nGSamples[set] = sum(gsg$goodSamples[[set]]);

  if (is.null(blocks))
  {
    if (nGGenes > maxBlockSize)
    {
      if (verbose>1) printFlush(paste(spaces, "....pre-clustering genes to determine blocks.."));
      clustering = consensusProjectiveKMeans(
                       if (!is.null(multiExpr.imputed)) multiExpr.imputed else multiExpr, 
                       preferredSize = maxBlockSize,
                       sizePenaltyPower = blockSizePenaltyPower, checkData = FALSE,
                       nCenters = nPreclusteringCenters,
                       randomSeed = randomSeed,
                       verbose = verbose-2, indent = indent + 1);
      gBlocks = .orderLabelsBySize(clustering$clusters);
    } else 
      gBlocks = rep(1, nGGenes);
    blocks = rep(NA, nGenes);
    blocks[gsg$goodGenes] = gBlocks;
  } else {
    gBlocks = blocks[gsg$goodGenes];
  }

  blockLevels = as.numeric(levels(factor(gBlocks)));
  blockSizes = table(gBlocks)
  nBlocks = length(blockLevels);

  if (any(blockSizes > sqrt(2^31)-1))
    printFlush(spaste(spaces,
            "Found block(s) with size(s) larger than limit of 'int' indexing.\n",
            spaces, " Support for such large blocks is experimental; please report\n",
            spaces, " any issues to Peter.Langfelder@gmail.com."));

  # check file names for uniqueness

  actualFileNames = matrix("", nSets, nBlocks);
  if (saveTOMs) 
  {
    for (set in 1:nSets) for (b in 1:nBlocks)
      actualFileNames[set, b] = .processFileName(individualTOMFileNames, set, names(multiExpr), b);
  
    rownames(actualFileNames) = spaste("Set.", c(1:nSets));
    colnames(actualFileNames) = spaste("Block.", c(1:nBlocks));
    if (length(unique(as.vector(actualFileNames))) < nSets * nBlocks) 
    {
      printFlush("Error: File names for (some) set/block combinations are not unique:");
      print(actualFileNames);
      stop("File names must be unique.");
    }
  }
  
  # Initialize various variables

  blockGenes = list();
  blockNo = 1;
  setTomDS = replicate(nSets, list());
  # Here's where the analysis starts
  for (blockNo in 1:nBlocks)
  {
    if (verbose>1 && nBlocks > 1) printFlush(paste(spaces, "..Working on block", blockNo, "."));
    # Select the block genes
    block = c(1:nGGenes)[gBlocks==blockLevels[blockNo]];
    #nBlockGenes = length(block);
    #blockGenes[[blockNo]] = c(1:nGenes)[gsg$goodGenes][gBlocks==blockLevels[blockNo]];
    #errorOccurred = FALSE;

    # For each set: calculate and save TOM

    for (set in 1:nSets)
    {
      if (verbose>2) printFlush(paste(spaces, "....Working on set", set))
      selExpr = as.matrix(multiExpr[[set]]$data[, block]);
      if (!is.null(multiWeights)) {
         selWeights = as.matrix(multiWeights[[set]]$data[, block]) 
      } else
         selWeights = NULL;

      tomDS = as.dist(.networkCalculation(selExpr, networkOptions[[set]]$data, weights = selWeights, 
                      verbose = verbose -2, indent = indent+2));

      setTomDS[[set]]$data = addBlockToBlockwiseData(if (blockNo==1) NULL else setTomDS[[set]]$data, 
                            external = saveTOMs,
                            blockData = tomDS, 
                            blockFile = actualFileNames[set, blockNo],
                            recordAttributes = TRUE,
                            metaData = list(IDs = colIDs[block]))
    }
    if (collectGarbage) { rm(tomDS); gc(); }
  }

  names(setTomDS) = names(multiExpr);

  if (!multiFormat)
  {
    gsg$goodSamples = gsg$goodSamples[[1]];
    setTomDS = setTomDS[[1]]$data;
  }

  blockInformation = newBlockInformation(blocks, gsg);

  list(blockwiseAdjacencies = setTomDS,
       setNames = names(multiExpr),
       nSets = length(multiExpr),
       blockInfo = blockInformation,
       networkOptions = networkOptions
       )
}


#=====================================================================================================
#
# hierarchical consensus TOM
#
#=====================================================================================================

hierarchicalConsensusTOM = function(
      # Supply either ...
      # ... information needed to calculate individual TOMs

      multiExpr,
      multiWeights = NULL,

      # Data checking options
      checkMissingData = TRUE,

      # Blocking options
      blocks = NULL,
      maxBlockSize = 20000,
      blockSizePenaltyPower = 5,
      nPreclusteringCenters = NULL,
      randomSeed = 12345,

      # Network construction options. This can be a single object of class NetworkOptions, or a multiData
      # structure of NetworkOptions objects, one per element of multiExpr.

      networkOptions,

      # Save individual TOMs?

      keepIndividualTOMs = TRUE,
      individualTOMFileNames = "individualTOM-Set%s-Block%b.RData",

      # ... or information about individual (more precisely, input) TOMs

      individualTOMInfo = NULL,

      # Consensus calculation options 
      consensusTree,

      useBlocks = NULL,

      # Save calibrated TOMs?
      saveCalibratedIndividualTOMs = FALSE,
      calibratedIndividualTOMFilePattern = "calibratedIndividualTOM-Set%s-Block%b.RData",

      # Return options
      saveConsensusTOM = TRUE,
      consensusTOMFilePattern = "consensusTOM-%a-Block%b.RData",
      getCalibrationSamples = FALSE,

      # Return the intermediate results as well?  
      keepIntermediateResults = saveConsensusTOM,

      # Internal handling of TOMs
      useDiskCache = NULL, chunkSize = NULL,
      cacheDir = ".",
      cacheBase = ".blockConsModsCache",

      # Behavior
      collectGarbage = TRUE,
      verbose = 1,
      indent = 0)
{
  if (!is.null(randomSeed))
  {
    if (exists(".Random.seed"))
    {
       savedSeed = .Random.seed
       on.exit(.Random.seed <<-savedSeed);
    } 
    set.seed(randomSeed);
  }

  localIndividualTOMCalculation = is.null(individualTOMInfo);


  if (is.null(individualTOMInfo))
  {
    if (missing(multiExpr)) stop("Either 'individualTOMInfo' or 'multiExpr' must be given.");
    if (is.null(useDiskCache)) useDiskCache = .useDiskCache(multiExpr, blocks, chunkSize);
    time = system.time({individualTOMInfo = individualTOMs(multiExpr = multiExpr, 
                         multiWeights = multiWeights,
                         checkMissingData = checkMissingData,

                         blocks = blocks,
                         maxBlockSize = maxBlockSize,
                         blockSizePenaltyPower = blockSizePenaltyPower,
                         nPreclusteringCenters = nPreclusteringCenters,
                         randomSeed = NULL,

                         networkOptions = networkOptions,

                         saveTOMs = useDiskCache | keepIndividualTOMs,
                         individualTOMFileNames = individualTOMFileNames,

                         collectGarbage = collectGarbage,
                         verbose = verbose, indent = indent);});
    if (verbose > 1) { printFlush("Timimg for individual TOMs:"); print(time); }
  }
  consensus = hierarchicalConsensusCalculation(individualTOMInfo$blockwiseAdjacencies,
           consensusTree,
           level = 1,
           useBlocks = useBlocks,
           randomSeed = NULL,
           saveCalibratedIndividualData = saveCalibratedIndividualTOMs,
           calibratedIndividualDataFilePattern = calibratedIndividualTOMFilePattern,
           saveConsensusData = saveConsensusTOM,
           consensusDataFileNames = consensusTOMFilePattern,
           getCalibrationSamples= getCalibrationSamples,
           keepIntermediateResults = keepIntermediateResults,
           useDiskCache = useDiskCache, 
           chunkSize = chunkSize,
           cacheDir = cacheDir,
           cacheBase = cacheBase,
           collectGarbage = collectGarbage,
           verbose = verbose,
           indent = indent);
           
  if (localIndividualTOMCalculation)
  {
     if (!keepIndividualTOMs)
     {
        # individual TOMs are contained in the individualTOMInfo list; remove them.
        mtd.apply(individualTOMInfo$blockwiseAdjacencies, BD.checkAndDeleteFiles);
        individualTOMInfo$blockwiseAdjacencies = NULL
     }
  } 
        
  c( consensus,
     list(individualTOMInfo = individualTOMInfo,
          consensusTree = consensusTree)
   );
}


#========================================================================================================
#
# Merge consensusTOMInfo lists
#
#========================================================================================================
#
# Caution: at present the function does not check that the inputs are compatible and compatible with the
# supplied blocks.

.mergeConsensusTOMInformationLists = function(blockInformation, consensusTOMInfoList)
{

  blocks = blockInformation$blocks;

  blockLevels = unique(blocks);
  nBlocks = length(blockLevels);

  out = consensusTOMInfoList[[1]];

  # Merging consensus information
  out$consensusData = do.call(mergeBlockwiseData, lapply(consensusTOMInfoList, getElement, "consensusData"));
  if (out$saveCalibratedIndividualData)
  {
    out$calibratedIndividualData = lapply(1:out$nSets, function(set)
           do.call(mergeBlockwiseData, 
                lapply(consensusTOMInfoList, function(ct) ct$calibratedIndividualData[[set]])));
  }

  if (!is.null(out$calibrationSamples))
  {
    out$calibrationSamples = do.call(c, lapply(consensusTOMInfoList, getElement, "calibrationSamples"));
  }
  out$originCount = rowSums(do.call(cbind, lapply(consensusTOMInfoList, getElement, "originCount")))

  # Merging information in individualTOMInfo

  out$individualTOMInfo$blockwiseAdjacencies = lapply(1:out$nSets, function(set)
      list(data = do.call(mergeBlockwiseData, lapply(consensusTOMInfoList, function(ct)
                     ct$individualTOMInfo$blockwiseAdjacencies[[set]]$data)))); 

  out$individualTOMInfo$blockInfo = blockInformation;

  out;
}

#========================================================================================================
#
# consensusTOM: old, single-layer consensus.
#
#========================================================================================================

consensusTOM = function(
      # Supply either ...
      # ... information needed to calculate individual TOMs

      multiExpr,

      # Data checking options
      checkMissingData = TRUE,

      # Blocking options
      blocks = NULL,
      maxBlockSize = 5000,
      blockSizePenaltyPower = 5,
      nPreclusteringCenters = NULL,
      randomSeed = 54321,

      # Network construction arguments: correlation options

      corType = "pearson",
      maxPOutliers = 1,
      quickCor = 0,
      pearsonFallback = "individual",
      cosineCorrelation = FALSE,
      replaceMissingAdjacencies = FALSE,

      # Adjacency function options

      power = 6,
      networkType = "unsigned",
      checkPower = TRUE,

      # Topological overlap options

      TOMType = "unsigned",
      TOMDenom = "min",
      suppressNegativeTOM = FALSE,

      # Save individual TOMs?

      saveIndividualTOMs = TRUE,
      individualTOMFileNames = "individualTOM-Set%s-Block%b.RData",

      # ... or individual TOM information

      individualTOMInfo = NULL,
      useIndivTOMSubset = NULL,

   ##### Consensus calculation options 

      useBlocks = NULL,

      networkCalibration = c("single quantile", "full quantile", "none"),

      # Save calibrated TOMs?
      saveCalibratedIndividualTOMs = FALSE,
      calibratedIndividualTOMFilePattern = "calibratedIndividualTOM-Set%s-Block%b.RData",

      # Simple quantile scaling options
      calibrationQuantile = 0.95,
      sampleForCalibration = TRUE, sampleForCalibrationFactor = 1000,
      getNetworkCalibrationSamples = FALSE,

      # Consensus definition
      consensusQuantile = 0,
      useMean = FALSE,
      setWeights = NULL,

      # Return options
      saveConsensusTOMs = TRUE,
      consensusTOMFilePattern = "consensusTOM-Block%b.RData",
      returnTOMs = FALSE,

      # Internal handling of TOMs
      useDiskCache = NULL, chunkSize = NULL,
      cacheDir = ".",
      cacheBase = ".blockConsModsCache",

      nThreads = 1,

      # Diagnostic messages
      verbose = 1,
      indent = 0)
{
  spaces = indentSpaces(indent);
  networkCalibration = match.arg(networkCalibration);

  seedSaved = FALSE;
  if (!is.null(randomSeed))
  {
    if (exists(".Random.seed"))
    {
       savedSeed = .Random.seed
       on.exit(.Random.seed <<-savedSeed);
    } 
    set.seed(randomSeed);
  }

  if (any(!is.finite(setWeights))) stop("Entries of 'setWeights' must all be finite.");

  localIndividualTOMCalculation = is.null(individualTOMInfo);
  if (is.null(individualTOMInfo))
  {
    if (missing(multiExpr)) stop("Either 'individualTOMInfo' or 'multiExpr' must be given.");

    dataSize = checkSets(multiExpr);
    nSets.all = dataSize$nSets;
    nGenes = dataSize$nGenes;

    if (is.null(useDiskCache)) useDiskCache = .useDiskCache(multiExpr, blocks, chunkSize);
    if (length(power)!=1)
    {
      if (length(power)!=nSets.all)
        stop("Invalid arguments: Length of 'power' must equal number of sets given in 'multiExpr'.");
    } else {
      power = rep(power, nSets.all);
    }

    if ( (consensusQuantile < 0) | (consensusQuantile > 1) ) 
      stop("'consensusQuantile' must be between 0 and 1.");

    time = system.time({individualTOMInfo = blockwiseIndividualTOMs(multiExpr = multiExpr, 
                         checkMissingData = checkMissingData,
                         blocks = blocks,
                         maxBlockSize = maxBlockSize,
                         blockSizePenaltyPower = blockSizePenaltyPower,
                         nPreclusteringCenters = nPreclusteringCenters,
                         randomSeed = randomSeed,
                         corType = corType,
                         maxPOutliers = maxPOutliers,
                         quickCor = quickCor,
                         pearsonFallback = pearsonFallback,
                         cosineCorrelation = cosineCorrelation,
                         replaceMissingAdjacencies = replaceMissingAdjacencies,
                         power = power,
                         networkType = networkType, 
                         TOMType = TOMType,
                         TOMDenom = TOMDenom,
                         suppressNegativeTOM = suppressNegativeTOM,
                         saveTOMs = useDiskCache | saveIndividualTOMs,
                         individualTOMFileNames = individualTOMFileNames,
                         nThreads = nThreads,
                         verbose = verbose, indent = indent);});
    if (verbose > 1) { printFlush("Timimg for individual TOMs:"); print(time); }

    if (!saveIndividualTOMs & useDiskCache) 
       on.exit(.checkAndDelete(individualTOMInfo$actualTOMFileNames), add = TRUE)

  } else {
    nSets.all = if (individualTOMInfo$saveTOMs)
             nrow(individualTOMInfo$actualTOMFileNames) else ncol(individualTOMInfo$TOMSimilarities[[1]]);
    nGenes = length(individualTOMInfo$blocks);
    if (is.null(useDiskCache)) useDiskCache = .useDiskCache(NULL, blocks, chunkSize, 
               nSets = nSets.all, nGenes = nGenes);

  }

  setNames = individualTOMInfo$setNames;
  if (is.null(setNames)) setNames = spaste("Set.", 1:individualTOMInfo$nSets)
  
  nGoodGenes = length(individualTOMInfo$gBlocks);

  if (is.null(setWeights)) setWeights = rep(1, nSets.all);
  if (length(setWeights)!=nSets.all)
    stop("Length of 'setWeights' must equal the number of sets.");

  setWeightMat = as.matrix(setWeights/sum(setWeights));

  if (is.null(useIndivTOMSubset))
  {  
    if (individualTOMInfo$nSets != nSets.all)
      stop(paste("Number of sets in individualTOMInfo and in multiExpr do not agree.\n",
                 "  To use a subset of individualTOMInfo, set useIndivTOMSubset appropriately."));

    useIndivTOMSubset = c(1:nSets.all);
  }

  nSets = length(useIndivTOMSubset);


  if (length(unique(useIndivTOMSubset))!=nSets)
    stop("Entries of 'useIndivTOMSubset' must be unique");

  if (any(useIndivTOMSubset<1) | any(useIndivTOMSubset>individualTOMInfo$nSets))
    stop("All entries of 'useIndivTOMSubset' must be between 1 and the number of sets in individualTOMInfo");

  # if ( (minKMEtoJoin >1) | (minKMEtoJoin  <0) ) stop("minKMEtoJoin  must be between 0 and 1.");

  gsg = individualTOMInfo$goodSamplesAndGenes;

  # Restrict gsg to used sets

  gsg$goodSamples = gsg$goodSamples[useIndivTOMSubset];

  if (is.null(chunkSize)) chunkSize = as.integer(.largestBlockSize/(2*nSets))

  # Initialize various variables

  if (getNetworkCalibrationSamples)
  {
    if (!sampleForCalibration)
      stop(paste("Incompatible input options: networkCalibrationSamples can only be returned", 
                 "if sampleForCalibration is TRUE."));
    networkCalibrationSamples = list();
  }

  blockLevels = sort(unique(individualTOMInfo$gBlocks));
  nBlocks = length(blockLevels);

  if (is.null(useBlocks)) useBlocks = blockLevels;

  useBlockIndex = match(useBlocks, blockLevels);

  if (!all(useBlocks %in% blockLevels))
    stop("All entries of 'useBlocks' must be valid block levels.");

  if (any(duplicated(useBlocks)))
    stop("Entries of 'useBlocks' must be unique.");

  nUseBlocks = length(useBlocks);
  if (nUseBlocks==0)
    stop("'useBlocks' cannot be non-NULL and empty at the same time.");

  consensusTOM.out = list();

  TOMFiles = rep("", nUseBlocks);
  originCount = rep(0, nSets);

  calibratedIndividualTOMFileNames = NULL;
  if (saveCalibratedIndividualTOMs)
  {
    calibratedIndividualTOMFileNames = matrix("", nSets, nBlocks);
    for (set in 1:nSets) for (b in 1:nBlocks)
      calibratedIndividualTOMFileNames[set, b] = .processFileName(calibratedIndividualTOMFilePattern, 
                                 setNumber = set, setNames = setNames, blockNumber = b);

  }
  gc();

  # Here's where the analysis starts

  for (blockIndex in 1:nUseBlocks)
  {
    blockNo = useBlockIndex[blockIndex];

    if (verbose>1) printFlush(paste(spaces, "..Working on block", blockNo, "."));
    # Select block genes
    block = c(1:nGoodGenes)[individualTOMInfo$gBlocks==blockLevels[blockNo]];
    nBlockGenes = length(block);
    # blockGenes[[blockNo]] = c(1:nGenes)[gsg$goodGenes][gBlocks==blockLevels[blockNo]];
    scaleQuant = rep(1, nSets);
    scalePowers = rep(1, nSets);

    # Set up file names or memory space to hold the set TOMs
    if (useDiskCache)
    {
      nChunks = ceiling(nBlockGenes * (nBlockGenes-1)/2/chunkSize);
      chunkFileNames = array("", dim = c(nChunks, nSets));
      on.exit(.checkAndDelete(chunkFileNames), add = TRUE);
    } else nChunks = 1;

    if (nChunks==1) useDiskCache = FALSE;
    if (!useDiskCache)
    {
      # Note: setTomDS will contained the scaled set TOM matrices.
      setTomDS = matrix(0, nBlockGenes*(nBlockGenes-1)/2, nSets);
      colnames(setTomDS) = setNames;
    } 

    # create an empty consTomDS distance structure.

    consTomDS = .emptyDist(nBlockGenes);

    # sample entry indices from the distance structure for TOM scaling, if requested

    if (networkCalibration=="single quantile" && sampleForCalibration)
    {
      qx = min(calibrationQuantile, 1-calibrationQuantile);
      nScGenes = min(sampleForCalibrationFactor * 1/qx, length(consTomDS));
      nTOMEntries = length(consTomDS)
      scaleSample = sample(nTOMEntries, nScGenes);
      if (getNetworkCalibrationSamples)
        networkCalibrationSamples[[blockIndex]] = list(sampleIndex = scaleSample,
                                            TOMSamples = matrix(NA, nScGenes, nSets));
    }
    if (networkCalibration %in% c("single quantile", "none"))
    {
      for (set in 1:nSets)
      {
        if (verbose>2) printFlush(paste(spaces, "....Working on set", useIndivTOMSubset[set]))
        if (individualTOMInfo$saveTOMs)
        {
           tomDS = .loadObject(individualTOMInfo$ actualTOMFileNames[useIndivTOMSubset[set], blockNo],
                               name = "tomDS", size = nBlockGenes*(nBlockGenes-1)/2);
        } else {
          tomDS = consTomDS;
          tomDS[] = individualTOMInfo$TOMSimilarities[[blockNo]] [, useIndivTOMSubset[set]]
        }
        
        if (networkCalibration=="single quantile")
        {
          # Scale TOMs so that calibrationQuantile agree in each set
          if (sampleForCalibration)
          {
            if (getNetworkCalibrationSamples)
            { 
              networkCalibrationSamples[[blockIndex]]$TOMSamples[, set] = tomDS[scaleSample];
              scaleQuant[set] = quantile(networkCalibrationSamples[[blockIndex]]$TOMSamples[, set], 
                                         probs = calibrationQuantile, type = 8);
            } else {
              scaleQuant[set] = quantile(tomDS[scaleSample], probs = calibrationQuantile, type = 8);
            }
          } else
            scaleQuant[set] = quantile(x = tomDS, probs = calibrationQuantile, type = 8);
          if (set>1) 
          {
             scalePowers[set] = log(scaleQuant[1])/log(scaleQuant[set]);
             tomDS = tomDS^scalePowers[set];
          }
          if (saveCalibratedIndividualTOMs)
             save(tomDS, file = calibratedIndividualTOMFileNames[set, blockNo]);
        } 

        # Save the calculated TOM either to disk in chunks or to memory.
      
        if (useDiskCache)
        {
          if (verbose > 3) printFlush(paste(spaces, "......saving TOM similarity to disk cache.."));
          sc = .saveChunks(tomDS, chunkSize, cacheBase, cacheDir = cacheDir);
          chunkFileNames[, set] = sc$files;
          chunkLengths = sc$chunkLengths;
        } else {
          setTomDS[, set] = tomDS[];
        }
        rm(tomDS); gc();
      }
    } else if (networkCalibration=="full quantile")
    {
      # Step 1: load each TOM, get order, split TOM into chunks according to order, and save.
      if (verbose>1) printFlush(spaste(spaces, "..working on quantile normalization"))
      if (useDiskCache)
      {
        orderFiles = rep("", nSets);
        on.exit(.checkAndDelete(orderFiles),add = TRUE);
      }
      for (set in 1:nSets)
      {
        if (verbose>2) printFlush(paste(spaces, "....Working on set", useIndivTOMSubset[set]))
        if (individualTOMInfo$saveTOMs)
        {
           tomDS = .loadObject(individualTOMInfo$ actualTOMFileNames[useIndivTOMSubset[set], blockNo],
                               name = "tomDS", size = nBlockGenes*(nBlockGenes-1)/2);
        } else {
          tomDS = consTomDS;
          tomDS[] = individualTOMInfo$TOMSimilarities[[blockNo]] [, useIndivTOMSubset[set]]
        }
        if (useDiskCache)
        {
          # Order TOM (this may take a long time...)
          if (verbose > 3) printFlush(spaste(spaces, "......ordering TOM"));
          time = system.time({order1 = .qorder(tomDS)});
          if (verbose > 3) { printFlush("Time to order TOM:"); print(time); }
          # save the order
          orderFiles[set] = tempfile(pattern = spaste(".orderForSet", set), tmpdir = cacheDir);
          if (verbose > 3) printFlush(spaste(spaces, "......saving order and ordered TOM"));
          save(order1, file = orderFiles[set]);
          # Save ordered tomDS into chunks
          tomDS.ordered = tomDS[order1];
          sc = .saveChunks(tomDS.ordered, chunkSize, cacheBase, cacheDir = cacheDir);
          chunkFileNames[, set] = sc$files;
          chunkLengths = sc$chunkLengths;
        } else {
          setTomDS[, set] = tomDS[]
        }
      }
      if (useDiskCache)
      {
        # Step 2: Load chunks one by one and quantile normalize
        if (verbose > 2) printFlush(spaste(spaces, "....quantile normalizing chunks"));
        for (c in 1:nChunks)
        {
          if (verbose > 3) printFlush(spaste(spaces, "......QN for chunk ", c, " of ", nChunks));
          chunkData = matrix(NA, chunkLengths[c], nSets);
          for (set in 1:nSets)
            chunkData[, set] = .loadObject(chunkFileNames[c, set]);

          time = system.time({ chunk.norm = normalize.quantiles(chunkData, copy = FALSE);});
          if (verbose > 1) { printFlush("Time to QN chunk:"); print(time); }
          # Save quantile normalized chunks
          for (set in 1:nSets)
          {
            temp = chunk.norm[, set];
            save(temp, file = chunkFileNames[c, set]);
          }
        }

        if (verbose > 2) printFlush(spaste(spaces, "....putting together full QN'ed TOMs"));
        # Put together full TOMs
        for (set in 1:nSets)
        {
           load(orderFiles[set]);
           start = 1;
           for (c in 1:nChunks)
           {
             end = start + chunkLengths[c] - 1;
             tomDS[order1[start:end]] = .loadObject(chunkFileNames[c, set], size = chunkLengths[c]);
             start = start + chunkLengths[c];
           }
           if (saveCalibratedIndividualTOMs)
              save(tomDS, file = calibratedIndividualTOMFileNames[set, blockNo]);
           .saveChunks(tomDS, chunkSize, fileNames = chunkFileNames[, set]);
           unlink(orderFiles[set]);
        }
      } else {
        # If disk cache is not being used, simply call normalize.quantiles on the full set.
        setTomDS = normalize.quantiles(setTomDS);
        if (saveCalibratedIndividualTOMs) for (set in 1:nSets)
        {
           tomDS = .vector2dist(setTomDS[, set]);
           save(tomDS, file = calibratedIndividualTOMFileNames[set, blockNo]);
        }
      }
    } else stop("Unrecognized value of 'networkCalibration': ", networkCalibration);

    # Calculate consensus network
    if (verbose > 2)
      printFlush(paste(spaces, "....Calculating consensus network"));
    if (useDiskCache)
    {
      start = 1;
      for (chunk in 1:nChunks)
      {
        if (verbose > 3) printFlush(paste(spaces, "......working on chunk", chunk));
        end = start + chunkLengths[chunk] - 1;
        setChunks = array(0, dim = c(chunkLengths[chunk], nSets));
        for (set in 1:nSets)
        {
          load(file = chunkFileNames[chunk, set]);
          setChunks[, set] = temp;
          file.remove(chunkFileNames[chunk, set]);
        }
        colnames(setChunks) = setNames;
        tmp = .consensusCalculation.base(setChunks, useMean = useMean, setWeightMat = setWeightMat,
                                           consensusQuantile = consensusQuantile);
        consTomDS[start:end] = tmp$consensus;
        countIndex = as.numeric(names(tmp$originCount));
        originCount[countIndex] = originCount[countIndex] + tmp$originCount; 
        rm(tmp);
        start = end + 1;
      }
    } else {
      tmp = .consensusCalculation.base(setTomDS, useMean = useMean, setWeightMat = setWeightMat,
                                         consensusQuantile = consensusQuantile);
      consTomDS[] = tmp$consensus;
      countIndex = as.numeric(names(tmp$originCount));
      originCount[countIndex] = originCount[countIndex] + tmp$originCount; 
      rm(tmp);
    }
    
    # Save the consensus TOM if requested

    if (saveConsensusTOMs)
    {
       TOMFiles[blockIndex] = .substituteTags(consensusTOMFilePattern, "%b", blockNo);
       if (TOMFiles[blockIndex]==consensusTOMFilePattern)
         stop(paste("File name for consensus TOM must contain the tag %b somewhere in the file name -\n",
                    "   - this tag will be replaced by the block number. "));
       save(consTomDS, file = TOMFiles[blockIndex]);
    }

    if (returnTOMs) consensusTOM.out[[blockIndex]] = consTomDS;

    gc();
  }

  if (!saveConsensusTOMs) TOMFiles = NULL;
  if (!returnTOMs)  consensusTOM.out = NULL;

  if (localIndividualTOMCalculation)
  {
     if (!individualTOMInfo$saveTOMs)
     {
        # individual TOMs are contained in the individualTOMInfo list; remove them.
        individualTOMInfo$TOMSimilarities = NULL;
     }
  } 
        
  list(consensusTOM = consensusTOM.out,
       TOMFiles = TOMFiles,
       saveConsensusTOMs = saveConsensusTOMs,

       individualTOMInfo = individualTOMInfo,
       useIndivTOMSubset = useIndivTOMSubset,
       goodSamplesAndGenes = gsg,
       nGGenes = nGoodGenes,
       nSets = nSets,

       saveCalibratedIndividualTOMs = saveCalibratedIndividualTOMs,
       calibratedIndividualTOMFileNames = calibratedIndividualTOMFileNames,
       networkCalibrationSamples = if (getNetworkCalibrationSamples) networkCalibrationSamples else NULL,

       consensusQuantile = consensusQuantile, 
       originCount = originCount

      )
}

Try the WGCNA package in your browser

Any scripts or data that you put into this service are public.

WGCNA documentation built on March 1, 2021, 1:05 a.m.