R/Functions.R

Defines functions replaceMissing plotMultiHist multiPlot addErrorBars.2sided multiGrepl multiGrep multiSub multiGSub shortenStrings listRep formatLabels effectiveNChar prependZeros multiIntersect multiUnion metaAnalysis sBinary hierarchicalConsensusKME consensusKME nterleave qvalue.restricted prepComma rankPvalue metaZfunction spaste standardScreeningNumericTrait standardScreeningCensoredTime scaleFreeFitIndex vectorizeMatrix heatmap goodSamplesGenesMS goodSamplesGenes goodSamplesMS goodGenesMS goodSamples goodGenes randIndex choosenew numbers2colors plotEigengeneNetworks preservationNetworkConnectivity setCorrelationPreservation correlationPreservation addTraitToMEs keepCommonProbes blueWhiteRed redWhiteGreen greenWhiteRed greenBlackRed sizeGrWindow labeledBarplot labeledHeatmap.multiPage labeledHeatmap extend reverseVector reverseRows networkScreening networkScreeningGS hubGeneSignificance automaticNetworkScreeningGS automaticNetworkScreening simulateDatExpr5Modules simulateMultiExpr simulateDatExpr simulateSmallLayer simulateModule simulateEigengeneNetwork causalChildren alignExpr relativeCorPredictionSuccess corPredictionSuccess plotMEpairs plotDendroAndColors updateProgInd initProgInd nearestNeighborConnectivityMS nearestNeighborConnectivity addGuideLines addGrid propVarExplained corPvalueStudent corPvalueFisher verboseBarplot verboseBoxplot verboseScatterplot dynamicMergeCut displayColors panel.cor panel.hist stdErr addErrorBars clusterCoef signedKME checkAdjMat nPresent intramodularConnectivity.fromExpr intramodularConnectivity plotModuleSignificance plotNetworkHeatmap TOMplot plotClusterTreeSamples plotOrderedColors plotColorUnderTree cutreeStaticColor cutreeStatic unsignedAdjacency adjacency subsetTOM vectorTOM GTOMdist scaleFreePlot pickSoftThreshold pickHardThreshold softConnectivity sigmoidAdjacencyFunction signumAdjacencyFunction hierarchicalMergeCloseModules mergeCloseModules multiSetMEs checkSets permissiveDim fixDataStructure moduleNumber labels2colors normalizeLabels standardColors hierarchicalConsensusMEDissimilarity consensusMEDissimilarity turnDistVectorIntoMatrix turnVectorIntoDist equalizeQuantiles hierarchicalConsensusMEDissimilarity consensusMEDissimilarity orderMEsByHierarchicalConsensus consensusOrderMEs clustOrder orderMEs collectGarbage removeGreyME moduleEigengenes moduleColor.getMEprefix

Documented in addErrorBars addGrid addGuideLines addTraitToMEs adjacency alignExpr automaticNetworkScreening automaticNetworkScreeningGS blueWhiteRed checkAdjMat checkSets clusterCoef collectGarbage consensusKME consensusMEDissimilarity consensusOrderMEs corPredictionSuccess corPvalueFisher corPvalueStudent correlationPreservation cutreeStatic cutreeStaticColor displayColors dynamicMergeCut fixDataStructure formatLabels goodGenes goodGenesMS goodSamples goodSamplesGenes goodSamplesGenesMS goodSamplesMS greenBlackRed greenWhiteRed GTOMdist hierarchicalConsensusKME hierarchicalConsensusMEDissimilarity hierarchicalMergeCloseModules hubGeneSignificance initProgInd intramodularConnectivity intramodularConnectivity.fromExpr keepCommonProbes labeledBarplot labeledHeatmap labeledHeatmap.multiPage labels2colors mergeCloseModules metaAnalysis metaZfunction moduleColor.getMEprefix moduleEigengenes moduleNumber multiGrep multiGrepl multiGSub multiIntersect multiSetMEs multiSub multiUnion nearestNeighborConnectivity nearestNeighborConnectivityMS networkScreening networkScreeningGS normalizeLabels nPresent numbers2colors orderMEs orderMEsByHierarchicalConsensus pickHardThreshold pickSoftThreshold plotClusterTreeSamples plotColorUnderTree plotDendroAndColors plotEigengeneNetworks plotMEpairs plotModuleSignificance plotMultiHist plotNetworkHeatmap plotOrderedColors prepComma prependZeros preservationNetworkConnectivity propVarExplained qvalue.restricted randIndex rankPvalue redWhiteGreen relativeCorPredictionSuccess removeGreyME replaceMissing scaleFreeFitIndex scaleFreePlot setCorrelationPreservation shortenStrings sigmoidAdjacencyFunction signedKME signumAdjacencyFunction simulateDatExpr simulateDatExpr5Modules simulateEigengeneNetwork simulateModule simulateMultiExpr simulateSmallLayer sizeGrWindow softConnectivity spaste standardColors standardScreeningCensoredTime standardScreeningNumericTrait stdErr subsetTOM TOMplot unsignedAdjacency updateProgInd vectorizeMatrix vectorTOM verboseBarplot verboseBoxplot verboseScatterplot

# Categories of functions:
# . network construction (including connectivity calculation)
# . module detection
# . gene screening
# . data simulation
# . general statistical functions
# . visualization



#-----------------------------------------------------------------------------------------------
#
# Overall options and settings for the package
#
#-----------------------------------------------------------------------------------------------

.moduleColorOptions = list(MEprefix = "ME")

moduleColor.getMEprefix = function()
{
  .moduleColorOptions$MEprefix;
}

# ===================================================
#The function moduleEigengenes finds the first principal component (eigengene) in each 
# module defined by the colors of the input vector "colors".
# The theoretical underpinnings are described in Horvath, Dong, Yip (2005)
# http://www.genetics.ucla.edu/labs/horvath/ModuleConformity/
# This requires the R library impute

moduleEigengenes = function(expr, colors, impute = TRUE, nPC = 1, align = "along average",
                            excludeGrey = FALSE, grey = if (is.numeric(colors))  0 else "grey",
                            subHubs = TRUE, trapErrors = FALSE, 
                            returnValidOnly = trapErrors,
                            softPower = 6, scale = TRUE,
                            verbose = 0, indent = 0)
{
  spaces = indentSpaces(indent);

  if (verbose==1) 
     printFlush(paste(spaces, "moduleEigengenes: Calculating", nlevels(as.factor(colors)), 
                              "module eigengenes in given set."));
  if (is.null(expr))
  {  
    stop("moduleEigengenes: Error: expr is NULL. ");
  }
  if (is.null(colors))
  {  
    stop("moduleEigengenes: Error: colors is NULL. ");
  }

  if (is.null(dim(expr)) || length(dim(expr))!=2)
    stop("moduleEigengenes: Error: expr must be two-dimensional.");

  if (dim(expr)[2]!=length(colors))
    stop("moduleEigengenes: Error: ncol(expr) and length(colors) must be equal (one color per gene).");

  if (is.factor(colors))
  {
    nl = nlevels(colors);
    nlDrop = nlevels(colors[, drop = TRUE]);
    if (nl > nlDrop)
     stop(paste("Argument 'colors' contains unused levels (empty modules). ", 
                "Use colors[, drop=TRUE] to get rid of them."));
  }

  if (softPower < 0) stop("softPower must be non-negative");

  alignRecognizedValues =  c("", "along average");
  if (!is.element(align, alignRecognizedValues)) {
    printFlush(paste("ModulePrincipalComponents: Error:",
                "parameter align has an unrecognised value:", 
                align, "; Recognized values are ", alignRecognizedValues));
    stop()
  }

  maxVarExplained = 10;
  if (nPC>maxVarExplained)
    warning(paste("Given nPC is too large. Will use value", maxVarExplained));

  nVarExplained = min(nPC, maxVarExplained);
  modlevels=levels(factor(colors))
  if (excludeGrey)
    if (sum(as.character(modlevels)!=as.character(grey))>0) {
      modlevels = modlevels[as.character(modlevels)!=as.character(grey)]
    } else {
      stop(paste("Color levels are empty. Possible reason: the only color is grey",
                 "and grey module is excluded from the calculation."));
    }
  PrinComps = data.frame(matrix(NA,nrow=dim(expr)[[1]], ncol= length(modlevels))) 
  averExpr = data.frame(matrix(NA,nrow=dim(expr)[[1]], ncol= length(modlevels))) 
  varExpl= data.frame(matrix(NA, nrow= nVarExplained, ncol= length(modlevels)))
  validMEs = rep(TRUE, length(modlevels));
  validAEs = rep(FALSE, length(modlevels));
  isPC = rep(TRUE, length(modlevels));
  isHub = rep(FALSE, length(modlevels));
  validColors = colors;
  names(PrinComps)=paste(moduleColor.getMEprefix(), modlevels, sep="")
  names(averExpr)=paste("AE",modlevels,sep="")
  for(i in c(1:length(modlevels)) )
  {
    if (verbose>1) 
      printFlush(paste(spaces, "moduleEigengenes : Working on ME for module", modlevels[i]));
    modulename = modlevels[i]
    restrict1 = as.character(colors)== as.character(modulename)
    if (verbose > 2)
       printFlush(paste(spaces, " ...", sum(restrict1), "genes"));
    datModule = as.matrix(t(expr[, restrict1]));
    n = dim(datModule)[1]; p = dim(datModule)[2];
    pc = try( 
      {
        if (nrow(datModule)>1 && impute)
        {
          seedSaved = FALSE;
          if (exists(".Random.seed")) {
             saved.seed = .Random.seed;
             seedSaved = TRUE;
          }
          if (any(is.na(datModule))) 
          {
             if (verbose > 5) printFlush(paste(spaces, " ...imputing missing data"));
             datModule = impute.knn(datModule, k = min(10, nrow(datModule)-1))
             # some versions of impute.knn return a list and we need the data component:
             try( { if (!is.null(datModule$data)) datModule = datModule$data; }, silent = TRUE )
          }
          # The <<- in the next line is extremely important. Using = or <- will create a local variable of
          # the name .Random.seed and will leave the important global .Random.seed untouched.
          if (seedSaved) .Random.seed <<- saved.seed;
        }
        if (verbose > 5) printFlush(paste(spaces, " ...scaling"));
        if (scale) datModule=t(scale(t(datModule)));
        if (verbose > 5) printFlush(paste(spaces, " ...calculating SVD"));
        svd1 = svd(datModule, nu = min(n, p, nPC), nv = min(n, p, nPC));
        # varExpl[,i]= (svd1$d[1:min(n,p,nVarExplained)])^2/sum(svd1$d^2)
        if (verbose > 5) printFlush(paste(spaces, " ...calculating PVE"));
        veMat = cor(svd1$v[, c(1:min(n,p,nVarExplained))], t(datModule), use = "p") 
        varExpl[c(1:min(n,p,nVarExplained)),i]= rowMeans(veMat^2, na.rm = TRUE)
        # this is the first principal component
        svd1$v[,1]
      }, silent = TRUE);
    if (class(pc)=='try-error')
    {
      if ( (!subHubs) && (!trapErrors) ) stop(pc);
      if (subHubs)
      {
        if (verbose>0)
        {
          printFlush(paste(spaces, " ..principal component calculation for module", 
                                   modulename, "failed with the following error:"));
          printFlush(paste(spaces, "     ", pc, spaces,
                           " ..hub genes will be used instead of principal components."));
        }
        isPC[i] = FALSE;
        pc = try( 
        {
          scaledExpr = scale(t(datModule));
          covEx = cov(scaledExpr, use = "p");
          covEx[!is.finite(covEx)] = 0;
          modAdj = abs(covEx)^softPower;
          kIM = (rowMeans(modAdj, na.rm = TRUE))^3;
          if (max(kIM, na.rm = TRUE) > 1) kIM = kIM-1;
          kIM[is.na(kIM)] = 0;
          hub = which.max(kIM)
          alignSign = sign(covEx[, hub]);
          alignSign[is.na(alignSign)] = 0;
          isHub[i] = TRUE;
          pcxMat = scaledExpr * 
                matrix(kIM * alignSign, nrow = nrow(scaledExpr), ncol = ncol(scaledExpr), byrow = TRUE) /
                sum(kIM);
          pcx = rowMeans(pcxMat, na.rm = TRUE);
          varExpl[1, i] = mean(cor(pcx, t(datModule), use = "p")^2, na.rm = TRUE)
          pcx
        }, silent = TRUE);
      }
    }
    
    if (class(pc)=='try-error')
    {
      if (!trapErrors) stop(pc);
      if (verbose>0)
      {
        printFlush(paste(spaces, " ..ME calculation of module", modulename, 
                                 "failed with the following error:"));
        printFlush(paste(spaces, "     ", pc, spaces,
                         " ..the offending module has been removed."));
      }
      warning(paste("Eigengene calculation of module", modulename, 
                    "failed with the following error \n     ", 
                    pc, "The offending module has been removed.\n"));
      validMEs[i] = FALSE; isPC[i] = FALSE; isHub[i] = FALSE;
      validColors[restrict1] = grey;
    } else {
      PrinComps[, i] = pc;
      ae = try( 
      {
        if (isPC[i]) scaledExpr = scale(t(datModule));
        averExpr[, i] = rowMeans(scaledExpr, na.rm = TRUE);
        if (align == "along average")
        {
          if (verbose>4) printFlush(paste(spaces,
                          " .. aligning module eigengene with average expression."))
          corAve = cor(averExpr[,i], PrinComps[,i], use = "p");
          if (!is.finite(corAve)) corAve = 0;
          if (corAve<0) PrinComps[,i] = -PrinComps[,i]
        }
        0;
      }, silent = TRUE);
      if (class(ae)=='try-error')
      {
        if (!trapErrors) stop(ae);
        if (verbose>0)
        {
          printFlush(paste(spaces, " ..Average expression calculation of module", modulename,
                                   "failed with the following error:"));
          printFlush(paste(spaces, "     ", ae, spaces,
                           " ..the returned average expression vector will be invalid."));
        }
        warning(paste("Average expression calculation of module", modulename,
                      "failed with the following error \n     ",
                      ae, "The returned average expression vector will be invalid.\n"));
      }
      validAEs[i] = !(class(ae)=='try-error');
    }
  } 
  allOK = (sum(!validMEs)==0)
  if (returnValidOnly && sum(!validMEs)>0) 
  {
    PrinComps = PrinComps[, validMEs]
    averExpr = averExpr[, validMEs];
    varExpl = varExpl[, validMEs];
    validMEs = rep(TRUE, times = ncol(PrinComps));
    isPC = isPC[validMEs];
    isHub = isHub[validMEs];
    validAEs = validAEs[validMEs];
  }
  allPC = (sum(!isPC)==0);
  allAEOK = (sum(!validAEs)==0)
  list(eigengenes = PrinComps, averageExpr = averExpr, varExplained = varExpl, nPC = nPC, 
       validMEs = validMEs, validColors = validColors, allOK = allOK, allPC = allPC, isPC = isPC,
       isHub = isHub, validAEs = validAEs, allAEOK = allAEOK)
}

#---------------------------------------------------------------------------------------------
#
# removeGrey
#
#---------------------------------------------------------------------------------------------
# This function removes the grey eigengene from supplied module eigengenes.

removeGreyME = function(MEs, greyMEName = paste(moduleColor.getMEprefix(), "grey", sep=""))
{
  newMEs = MEs;
  if (is.vector(MEs) & mode(MEs)=="list")
  {
    warned = 0;
    newMEs = vector(mode = "list", length = length(MEs));
    for (set in 1:length(MEs))
    {
      if (!is.data.frame(MEs[[set]]$data))
        stop("MEs is a vector list but the list structure is missing the correct 'data' component."); 
      newMEs[[set]] = MEs[[set]];
      if (greyMEName %in% names(MEs[[set]]$data))
      {
         newMEs[[set]]$data = MEs[[set]]$data[, names(MEs[[set]]$data)!=greyMEName];
      } else {
         if (warned==0)
         {
           warning("removeGreyME: The given grey ME name was not found among the names of given MEs.");
           warned = 1;
         }
      }
    }
  } else {
    if (length(dim(MEs))!=2) stop("Argument 'MEs' has incorrect dimensions.")
    MEs = as.data.frame(MEs);
    if (greyMEName %in% names(MEs))
    {
       newMEs = MEs[, names(MEs)!=greyMEName];
    } else {
       warning("removeGreyME: The given grey ME name was not found among the names of given MEs.");
    }
  } 
 
  newMEs;
}
#-------------------------------------------------------------------------------------
#
#  ModulePrincipalComponents
#
#-------------------------------------------------------------------------------------
# Has been superseded by moduleEigengenes above.

# ===================================================
# This function collects garbage

collectGarbage=function(){while (gc()[2,4] != gc()[2,4] | gc()[1,4] != gc()[1,4]){}}

#--------------------------------------------------------------------------------------
#
# orderMEs
#
#--------------------------------------------------------------------------------------
#
# performs hierarchical clustering on MEs and returns the order suitable for plotting.

orderMEs = function(MEs, greyLast = TRUE, 
                    greyName = paste(moduleColor.getMEprefix(), "grey", sep=""), 
                    orderBy = 1, order = NULL, 
                    useSets = NULL, verbose = 0, indent = 0)
{
  spaces = indentSpaces(indent);

  if ("eigengenes" %in% names(MEs))
  {
     if (is.null(order))
     {
       if (verbose>0) printFlush(paste(spaces, "orderMEs: order not given, calculating using given set", 
                                          orderBy));
       corPC = cor(MEs$eigengenes, use="p")
       disPC = 1-corPC;
       order = .clustOrder(disPC, greyLast = greyLast, greyName = greyName);
     } 
   
     if (length(order)!=dim(MEs$eigengenes)[2])
       stop("orderMEs: given MEs and order have incompatible dimensions.");
    
     orderedMEs = MEs;
     orderedMEs$eigengenes = as.data.frame(MEs$eigengenes[,order]);
     colnames(orderedMEs$eigengenes) = colnames(MEs$eigengenes)[order];
     if (!is.null(MEs$averageExpr))
     {
       orderedMEs$averageExpr = as.data.frame(MEs$averageExpr[, order])
       colnames(orderedMEs$averageExpr) = colnames(MEs$data)[order];
     }
     if (!is.null(MEs$varExplained))
     {
       orderedMEs$varExplained = as.data.frame(MEs$varExplained[, order])
       colnames(orderedMEs$varExplained) = colnames(MEs$data)[order];
     }
     return(orderedMEs);
  } else {
     check = checkSets(MEs, checkStructure = TRUE, useSets = useSets);
     if (check$structureOK)
     {
        multiSet = TRUE;
     } else {
        multiSet = FALSE;
        MEs = fixDataStructure(MEs);
        useSets = NULL; orderBy = 1;
     }
   
     if (!is.null(useSets)) 
       if (is.na(match(orderBy, useSets))) orderBy = useSets[1];
   
     if (is.null(order))
     {
       if (verbose>0) printFlush(paste(spaces, "orderMEs: order not given, calculating using given set", 
                                          orderBy));
       corPC = cor(MEs[[orderBy]]$data, use="p")
       disPC = 1-corPC;
       order = .clustOrder(disPC, greyLast = greyLast, greyName = greyName);
     } 
   
     if (length(order)!=dim(MEs[[orderBy]]$data)[2])
       stop("orderMEs: given MEs and order have incompatible dimensions.");
    
     nSets = length(MEs);
     orderedMEs = MEs;
     if (is.null(useSets)) useSets = c(1:nSets);
     for (set in useSets) 
     {
       orderedMEs[[set]]$data = as.data.frame(MEs[[set]]$data[,order]);
       colnames(orderedMEs[[set]]$data) = colnames(MEs[[set]]$data)[order];
       if (!is.null(MEs[[set]]$averageExpr))
       {
         orderedMEs[[set]]$averageExpr = as.data.frame(MEs[[set]]$averageExpr[, order])
         colnames(orderedMEs[[set]]$averageExpr) = colnames(MEs[[set]]$data)[order];
       }
       if (!is.null(MEs[[set]]$varExplained))
       {
         orderedMEs[[set]]$varExplained = as.data.frame(MEs[[set]]$varExplained[, order])
         colnames(orderedMEs[[set]]$varExplained) = colnames(MEs[[set]]$data)[order];
       }
     }
     if (multiSet) {
       return(orderedMEs);
     } else {
       return(orderedMEs[[1]]$data);
     }
  }
}

#---------------------------------------------------------------------------------------------
#
# .clustOrder
#
#---------------------------------------------------------------------------------------------

.clustOrder = function(distM, greyLast = TRUE, 
                       greyName = paste(moduleColor.getMEprefix(), "grey", sep=""))
{
  distM = as.matrix(distM);
  distNames = dimnames(distM)[[1]];
  greyInd = match(greyName, distNames);
  if (greyLast && !is.na(greyInd)) 
  {
     clusterMEs = (greyName!=distNames);
     if (sum(clusterMEs)>1)
     {
       h = fastcluster::hclust(as.dist(distM[clusterMEs, clusterMEs]), method = "average");
       order = h$order;
       if (sum(order>=greyInd)>0) order[order>=greyInd] = order[order>=greyInd]+1;
       order = c(order, greyInd);
     } else if (ncol(distM)>1) 
     {
       if (greyInd==1)
       {
         order = c(2, 1)
       } else order = c(1, 2);
     } else order = 1;
  } else {
     if (length(distM)>1)
     {
       h = fastcluster::hclust(as.dist(distM), method = "average");
       order = h$order;
     } else order = 1;
  }
  order;

 # print(paste("names:", names(distM), collapse = ", "));
 # print(paste("order:", order, collapse=", "))
}
 
#---------------------------------------------------------------------------------------------
#
# consensusOrderMEs
#
#---------------------------------------------------------------------------------------------
# Orders MEs by the dendrogram of their consensus dissimilarity.

consensusOrderMEs = function(MEs, useAbs = FALSE, useSets = NULL, greyLast = TRUE, 
                             greyName = paste(moduleColor.getMEprefix(), "grey", sep=""), 
                             method = "consensus")
{
  # Debugging code:
  #printFlush("consensusOrderMEs:");
  #size = checkSets(MEs);
  #print(size);
  # end debuging code
  Diss = consensusMEDissimilarity(MEs, useAbs = useAbs, useSets = useSets, method = method);
  order = .clustOrder(Diss, greyLast, greyName);
  #print(order)
  orderMEs(MEs, greyLast = greyLast, greyName = greyName, order = order, useSets = useSets);
} 

orderMEsByHierarchicalConsensus = function(MEs, networkOptions, consensusTree,
                             greyName = "ME0", 
                             calibrate = FALSE)
{
  Diss = .hierarchicalConsensusMEDissimilarity(MEs, networkOptions, consensusTree,
                      greyName = greyName, calibrate = calibrate);
  order = .clustOrder(Diss, greyLast = TRUE, greyName = greyName);
  mtd.subset(MEs, , order);
}

#---------------------------------------------------------------------------------------------
#
# consensusMEDissimilarity
#
#---------------------------------------------------------------------------------------------
# This function calcualtes a consensus dissimilarity (i.e., correlation) among sets of MEs (more generally,
# any sets of vectors). 
# CAUTION: when not using absolute value, the minimum similarity will favor the large negative values!

consensusMEDissimilarity = function(MEs, useAbs = FALSE, useSets = NULL, method = "consensus")
{
  methods = c("consensus", "majority");
  m = charmatch(method, methods);
  if (is.na(m))
    stop("Unrecognized method given. Recognized values are", paste(methods, collapse =", "));

  nSets = length(MEs);
  MEDiss = vector(mode="list", length = nSets);
  if (is.null(useSets)) useSets = c(1:nSets);
  for (set in useSets)
  {
    if (useAbs)
    {
        diss = 1-abs(cor(MEs[[set]]$data, use="p"));
    } else
    {
        diss = 1-cor(MEs[[set]]$data, use="p");
    }
    MEDiss[[set]] = list(Diss = diss);
  }

  for (set in useSets)
    if (set==useSets[1])
    {
      ConsDiss = MEDiss[[set]]$Diss;
    } else {
      if (m==1) {
         ConsDiss = pmax(ConsDiss, MEDiss[[set]]$Diss);
      } else {
         ConsDiss = ConsDiss + MEDiss[[set]]$Diss;
      }
    }

  if (m==2) ConsDiss = ConsDiss/nSets;

  ConsDiss = as.data.frame(ConsDiss);
  names(ConsDiss) = names(MEs[[useSets[1]]]$data);
  rownames(ConsDiss) = names(MEs[[useSets[1]]]$data);

  ConsDiss;
}


hierarchicalConsensusMEDissimilarity = function(MEs, networkOptions, consensusTree,
              greyName = "ME0", calibrate = FALSE)
                                        
{
  nSets = checkSets(MEs)$nSets;

  if (inherits(networkOptions, "NetworkOptions")) 
      networkOptions = list2multiData(.listRep(networkOptions, nSets));

  .hierarchicalConsensusMEDissimilarity(MEs, networkOptions, consensusTree,
              greyName = greyName, calibrate = calibrate)
}



# Quantile normalization
# normalize each column such that (column) quantiles are the same 
# The final value for each quantile is the 'summaryType' of the corresponding quantiles across the columns



.equalizeQuantiles = function(data, summaryType = c("median", "mean"))
{
  summaryType = match.arg(summaryType);
  data.sorted = apply(data, 2, sort);
 
  if (summaryType == "median")
  {
    refSample = rowMedians(data.sorted, na.rm = TRUE)
  } else if (summaryType == "mean")
    refSample = rowMeans(data.sorted, na.rm = TRUE);

  ranks = round(colRanks(data, ties.method = "average", preserveShape = TRUE))
  out = refSample [ ranks ];
  dim(out) = dim(data);
  dimnames(out) = dimnames(data);

  out;
}

.turnVectorIntoDist = function(x, size, Diag, Upper)
{
   attr(x, "Size") = size;
   attr(x, "Diag") = FALSE;
   attr(x, "Upper") = FALSE;
   class(x) = c("dist", class(x))
   x;
}

.turnDistVectorIntoMatrix = function(x, size, Diag, Upper, diagValue)
{
  mat = as.matrix(.turnVectorIntoDist(x, size, Diag, Upper));
  if (!Diag) diag(mat) = diagValue;
  mat;
}
  
# This function calculates consensus dissimilarity of module eigengenes

.consensusMEDissimilarity = function(multiMEs, 
                                     useSets = NULL, 
                                     corFnc = cor, corOptions = list(use = 'p'),
                                     equalizeQuantiles = FALSE,
                                     quantileSummary = "mean",
                                     consensusQuantile = 0, useAbs = FALSE, 
                                     greyName = "ME0")
{
  nSets = checkSets(multiMEs)$nSets;
  useMEs = c(1:ncol(multiMEs[[1]]$data))[names(multiMEs[[1]]$data)!=greyName]
  useNames = names(multiMEs[[1]]$data)[useMEs];
  nUseMEs = length(useMEs);
#  if (nUseMEs<2) 
#    stop("Something is wrong: there are two or more proper modules, but less than two proper",
#         "eigengenes. Please check that the grey color label and module eigengene label", 
#         "are correct.");

  if (is.null(useSets)) useSets = c(1:nSets);
  nUseSets = length(useSets);
  MEDiss = array(NA, dim = c(nUseMEs, nUseMEs, nUseSets));
  for (set in useSets)
  {
    corOptions$x = multiMEs[[set]]$data[, useMEs];
    if (useAbs)
    {
        diss = 1-abs(do.call(corFnc, corOptions));
    } else {
        diss = 1-do.call(corFnc, corOptions);
    }
    MEDiss[, , set] = diss;
  }

  if (equalizeQuantiles)
  {
    distMat = apply(MEDiss, 3, function(x) {as.numeric(as.dist(x))} )
    dim(distMat) = c( nUseMEs * (nUseMEs-1)/2, nUseSets);
    normalized = .equalizeQuantiles(distMat, summaryType = quantileSummary);
    MEDiss = apply(normalized, 2, .turnDistVectorIntoMatrix, size = nUseMEs, Diag = FALSE, Upper = FALSE,
                                                             diagValue = 0);
  }
 
  ConsDiss = apply(MEDiss, c(1:2), quantile, probs = 1-consensusQuantile, names = FALSE, na.rm = TRUE);
  colnames(ConsDiss) = rownames(ConsDiss) = useNames;
  ConsDiss;
}
     

.hierarchicalConsensusMEDissimilarity = function(multiMEs, 
                                           networkOptions,
                                           consensusTree,
                                           greyName,
                                           calibrate)
{
  nSets = checkSets(multiMEs)$nSets;
  useMEs = which(mtd.colnames(multiMEs)!=greyName);
  useNames = mtd.colnames(multiMEs)[useMEs];
  nUseMEs = length(useMEs);
#  if (nUseMEs<2) 
#    stop("Something is wrong: there are two or more proper modules, but less than two proper",
#         "eigengenes. Please check that the grey color label and module eigengene label", 
#         "are correct.");

  if (!isMultiData(networkOptions, strict = FALSE))
    stop("'networkOptions' must be either a single list of class 'NetworkOptions'\n",
         "or a MultiData structure containing one such list per input set. ");

  if (length(networkOptions)!=nSets) 
    stop("Number of sets in 'multiMEs' and 'networkOptions' must be the same.");

  MEDiss = mtd.mapply(function(me, netOpt)
  {
    cor.me = do.call(netOpt$corFnc, 
            c(list(x = me), netOpt$corOptions));
    if (!grepl("signed", netOpt$networkType)) cor.me = abs(cor.me);
    cor.me;
  }, mtd.subset(multiMEs, , useMEs), networkOptions, returnList = TRUE);

  if (calibrate)
  {
    cons = hierarchicalConsensusCalculation(MEDiss, 
                consensusTree = consensusTree, 
                level = 1,
                # Return options: the data can be either saved or returned but not both.
                saveConsensusData = FALSE,
                keepIntermediateResults = FALSE,
                # Internal handling of data
                useDiskCache = FALSE, 
                # Behaviour
                collectGarbage = FALSE,
                verbose = 0, indent = 0)$consensusData
     cons = BD.getData(cons, blocks = 1);
  } else
     cons = simpleHierarchicalConsensusCalculation(MEDiss, consensusTree)
  consDiss = 1-cons;
  colnames(consDiss) = rownames(consDiss) = useNames;
  consDiss;
}
     



#======================================================================================================
# ColorHandler.R
#======================================================================================================

# A set of global variables and functions that should help handling color names for some 400+ modules.
# A vector called .GlobalStandardColors is defined that holds color names with first few entries 
# being the well-known and -loved colors. The rest is randomly chosen from the color names of R,
# excluding grey colors.

#---------------------------------------------------------------------------------------------------------
#
# .GlobalStandardColors 
#
#---------------------------------------------------------------------------------------------------------
# This code forms a vector of color names in which the first entries are given by BaseColors and the rest
# is "randomly" chosen from the rest of R color names that do not contain "grey" nor "gray".

BaseColors = c("turquoise","blue","brown","yellow","green","red","black","pink","magenta",
                "purple","greenyellow","tan","salmon","cyan", "midnightblue", "lightcyan",
                "grey60", "lightgreen", "lightyellow", "royalblue", "darkred", "darkgreen",
                "darkturquoise", "darkgrey",
                "orange", "darkorange", "white", "skyblue", "saddlebrown", "steelblue", 
                "paleturquoise", "violet", "darkolivegreen", "darkmagenta" );

RColors = colors()[-grep("grey", colors())];
RColors = RColors[-grep("gray", RColors)];
InBase = match(BaseColors, RColors);
ExtraColors = RColors[-c(InBase[!is.na(InBase)])];
nExtras = length(ExtraColors);

# Here is the vector of colors that should be used by all functions:

.GlobalStandardColors = c(BaseColors, ExtraColors[rank(sin(13*c(1:nExtras) +sin(13*c(1:nExtras))) )] );

standardColors = function(n = NULL)
{
  if (is.null(n)) return(.GlobalStandardColors);
  if ((n>0) && (n<=length(.GlobalStandardColors))) 
  {
    return(.GlobalStandardColors[c(1:n)]);
  } else {
    stop("Invalid number of standard colors requested.");
  }
}

rm(BaseColors, RColors, ExtraColors, nExtras, InBase);

#---------------------------------------------------------------------------------------------------------
#
# normalizeLabels
#
#---------------------------------------------------------------------------------------------------------
# "Normalizes" numerical labels such that the largest group is labeled 1, the next largest 2 etc.
# If KeepZero == TRUE, label zero is preserved.

normalizeLabels = function(labels, keepZero = TRUE)
{
  if (keepZero)
  {
    NonZero = (labels!=0);
  }
  else
  {
    NonZero = rep(TRUE, length(labels));
  }
  f = as.numeric(factor(labels[NonZero]));
  t = table(labels[NonZero]);
  # print(t)
  r = rank(-as.vector(t), ties.method = "first");
  norm_labs = rep(0, times = length(labels));
  norm_labs[NonZero] = r[f];
  norm_labs;
}
  
#---------------------------------------------------------------------------------------------------------
#
# labels2colors
#
#---------------------------------------------------------------------------------------------------------
# This function converts integer numerical labels labels into color names in the order either given by
# colorSeq,
# or (if colorSeq==NULL) by standardColors(). If GreyIsZero == TRUE, labels 0 will be assigned
# the color grey; otherwise presence of labels below 1 will trigger an error.
# dimensions of labels (if present) are preserved.

labels2colors = function(labels, zeroIsGrey = TRUE, colorSeq = NULL, naColor = "grey",
                         commonColorCode = TRUE)
{
  if (is.null(colorSeq)) colorSeq = standardColors();

  if (is.numeric(labels))
  {
    if (zeroIsGrey) minLabel = 0 else minLabel = 1
    if (any(labels<0, na.rm = TRUE)) minLabel = min(c(labels), na.rm = TRUE)
    nLabels = labels;
  } else {
    
    if (commonColorCode)
    {
      factors = factor(c(as.matrix(as.data.frame(labels))))
      nLabels = as.numeric(factors)
      dim(nLabels)= dim(labels);
    } else {
      labels = as.matrix(as.data.frame(labels));
      factors = list();
      for (c in 1:ncol(labels))
        factors[[c]] = factor(labels[, c]);
      nLabels = sapply(factors, as.numeric)
    }
  }
      
  if (max(nLabels, na.rm = TRUE) > length(colorSeq))
  {
     nRepeats = as.integer((max(labels)-1)/length(colorSeq)) + 1;
     warning(paste("labels2colors: Number of labels exceeds number of avilable colors.", 
                   "Some colors will be repeated", nRepeats, "times."))
     extColorSeq = colorSeq;
     for (rep in 1:nRepeats) 
       extColorSeq = c(extColorSeq, paste(colorSeq, ".", rep, sep=""));
  } else {
     nRepeats = 1;
     extColorSeq = colorSeq;
  }
  colors = rep("grey", length(nLabels));
  fin = !is.na(nLabels);
  colors[!fin] = naColor;
  finLabels = nLabels[fin];
  colors[fin][finLabels!=0] = extColorSeq[finLabels[finLabels!=0]];
  if (!is.null(dim(labels)))
    dim(colors) = dim(labels);
  
  colors;
}

#========================================================================================
#
# MergeCloseModules
#
#========================================================================================

#---------------------------------------------------------------------------------
#
# moduleNumber
#
#---------------------------------------------------------------------------------
# Similar to modulecolor2 above, but returns numbers instead of colors, which is oftentimes more useful.
# 0 means unassigned.
# Return value is a simple vector, not a factor.
# Caution: the module numbers are neither sorted nor sequential, the only guarranteed fact is that grey
# probes are labeled by 0 and all probes belonging to the same module have the same number.

moduleNumber = function(dendro, cutHeight = 0.9, minSize = 50)
{
  Branches = cutree(dendro, h = cutHeight);
  NOnBranches = table(Branches);
  TrueBranch = NOnBranches >= minSize;
  Branches[!TrueBranch[Branches]] = 0;
  
  Branches;
}

#--------------------------------------------------------------------------------------
#
# fixDataStructure
#
#--------------------------------------------------------------------------------------
# Check input data: if they are not a vector of lists, put them into the form of a vector of lists.

fixDataStructure = function(data, verbose = 0, indent = 0)
{
  spaces = indentSpaces(indent);
  if ((class(data)!="list") || (class(data[[1]])!="list"))
  {
    if (verbose>0)
      printFlush(paste(spaces, 
                       "fixDataStructure: data is not a vector of lists: converting it into one."));
    x = data;
    data = vector(mode = "list", length = 1);
    data[[1]] = list(data = x);
    rm(x);
  }
  data;
}

#-------------------------------------------------------------------------------------------
#
# checkSets
#
#-------------------------------------------------------------------------------------------
# Checks sets for consistency and returns some diagnostics.

.permissiveDim = function(x)
{
  d = dim(x);
  if (is.null(d)) return( c(length(x), 1))
  return(d)
}

checkSets = function(data, checkStructure = FALSE, useSets = NULL)
{
  nSets = length(data);
  if (is.null(useSets)) useSets = c(1:nSets);
  if (nSets<=0) stop("No data given.");
  structureOK = TRUE;
  if ((class(data)!="list") || (class(data[[useSets[1]]])!="list"))
  {
    if (checkStructure)
    {
      structureOK = FALSE;
      nGenes = 0; nSamples = 0;
    } else {
      stop("data does not appear to have the correct format. Consider using fixDataStructure",
           "or setting checkStructure = TRUE when calling this function.");
    }
  } else {
    nSamples = vector(length = nSets);
    nGenes = .permissiveDim(data[[useSets[1]]]$data)[2];
    for (set in useSets) 
    {
      if (nGenes!=.permissiveDim(data[[set]]$data)[2])
      {
        if (checkStructure) 
        {
           structureOK = FALSE;
        } else {
           stop(paste("Incompatible number of genes in set 1 and", set));
        }
      }
      nSamples[set] = .permissiveDim(data[[set]]$data)[1];
    }
  }

  list(nSets = nSets, nGenes = nGenes, nSamples = nSamples, structureOK = structureOK);
}


#--------------------------------------------------------------------------------------
#
# multiSetMEs
#
#--------------------------------------------------------------------------------------

multiSetMEs = function(exprData, colors, universalColors = NULL, useSets = NULL, 
                       useGenes = NULL, impute = TRUE, 
                       nPC = 1, align = "along average", excludeGrey = FALSE, 
                       grey = if (is.null(universalColors)) {if(is.numeric(colors)) 0 else "grey"} else
                                     if (is.numeric(universalColors)) 0 else "grey",
                       subHubs = TRUE,
                       trapErrors = FALSE, 
                       returnValidOnly = trapErrors,
                       softPower = 6,
                       verbose = 1, indent = 0)
{
  spaces = indentSpaces(indent);
  nSets = length(exprData);
  setsize = checkSets(exprData, useSets = useSets);
  nGenes = setsize$nGenes;
  nSamples = setsize$nSamples;
  if (verbose>0) printFlush(paste(spaces,"multiSetMEs: Calculating module MEs."));
  MEs = vector(mode="list", length=nSets);
  consValidMEs = NULL;
  if (!is.null(universalColors))
    consValidColors = universalColors;
  if (is.null(useSets)) useSets = c(1:nSets);
  if (is.null(useGenes))
  { 
    for (set in useSets) {
      if (verbose>0) printFlush(paste(spaces,"  Working on set", as.character(set), "...")); 
      if (is.null(universalColors)) {
        setColors = colors[,set];
      } else {
        setColors = universalColors; 
      }
      setMEs = moduleEigengenes(expr = exprData[[set]]$data,
                            colors = setColors, impute = impute, nPC = nPC, align = align, 
                            excludeGrey = excludeGrey, grey = grey,
                            trapErrors = trapErrors, subHubs = subHubs,
                            returnValidOnly = FALSE, softPower = softPower,
                            verbose = verbose-1, indent = indent+1);
      if (!is.null(universalColors) && (!setMEs$allOK))
      {
        if (is.null(consValidMEs)) {
          consValidMEs = setMEs$validMEs;
        } else {
          consValidMEs = consValidMEs * setMEs$validMEs;
        }
        consValidColors[setMEs$validColors!=universalColors] = 
            setMEs$validColors[setMEs$validColors!=universalColors]
      }
      MEs[[set]] = setMEs;
      names(MEs[[set]])[names(setMEs)=='eigengenes'] = 'data';
      # Here's what moduleEigengenes returns:
      #
      #  list(eigengenes = PrinComps, averageExpr = averExpr, varExplained = varExpl, nPC = nPC, 
      #       validMEs = validMEs, validColors = validColors, allOK = allOK, allPC = allPC, isPC = isPC,
      #       isHub = isHub, validAEs = validAEs, allAEOK = allAEOK)
    }
  } else {
    for (set in useSets) {
      if (verbose>0) printFlush(paste(spaces,"  Working on set", as.character(set), "...")); 
      if (is.null(universalColors)) {
        setColors = colors[useGenes ,set];
      } else {
        setColors = universalColors[useGenes]; 
      }
      setMEs = moduleEigengenes(expr = exprData[[set]]$data[, useGenes],
                            colors = setColors, impute = impute, nPC = nPC, align = align, 
                            excludeGrey = excludeGrey, grey = grey,
                            trapErrors = trapErrors, subHubs = subHubs,
                            returnValidOnly = FALSE, softPower = softPower,
                            verbose = verbose-1, indent = indent+1);
      if (!is.null(universalColors) && (!setMEs$allOK))
      {
        if (is.null(consValidMEs)) {
          consValidMEs = setMEs$validMEs;
        } else {
          consValidMEs = consValidMEs * setMEs$validMEs;
        }
        consValidColors[setMEs$validColors!=universalColors[useGenes]] = 
            setMEs$validColors[setMEs$validColors!=universalColors[useGenes]]
      }
      MEs[[set]] = setMEs;
      names(MEs[[set]])[names(setMEs)=='eigengenes'] = 'data';
    }
  }
  if (!is.null(universalColors))
  {
    for (set in 1:nSets)
    {
      if (!is.null(consValidMEs)) MEs[[set]]$validMEs = consValidMEs;
      MEs[[set]]$validColors = consValidColors;
    }
  } 
  for (set in 1:nSets)
  {
    MEs[[set]]$allOK = (sum(!MEs[[set]]$validMEs)==0); 
    if (returnValidOnly)
    {
      valid = (MEs[[set]]$validMEs > 0);
      MEs[[set]]$data = MEs[[set]]$data[, valid];
      MEs[[set]]$averageExpr = MEs[[set]]$averageExpr[, valid];
      MEs[[set]]$varExplained = MEs[[set]]$varExplained[, valid];
      MEs[[set]]$isPC =  MEs[[set]]$isPC[valid];
      MEs[[set]]$allPC = (sum(!MEs[[set]]$isPC)==0)
      MEs[[set]]$isHub =  MEs[[set]]$isHub[valid];
      MEs[[set]]$validAEs =  MEs[[set]]$validAEs[valid];
      MEs[[set]]$allAEOK = (sum(!MEs[[set]]$validAEs)==0)
      MEs[[set]]$validMEs = rep(TRUE, times = ncol(MEs[[set]]$data));
    }
  }

  names(MEs) = names(exprData);
  
  MEs;
}

#---------------------------------------------------------------------------------------------
#
# MergeCloseModules
#
#---------------------------------------------------------------------------------------------
                  

mergeCloseModules = function(
  # input data
  exprData, colors,

  # Optional starting eigengenes
  MEs = NULL,  

  # Optional restriction to a subset of all sets
  useSets = NULL, 

  # If missing data are present, impute them?
  impute = TRUE,

  # Input handling options
  checkDataFormat = TRUE, 
  unassdColor = if (is.numeric(colors)) 0 else "grey", 

  # Options for eigengene network construction
  corFnc = cor, corOptions = list(use = 'p'),
  useAbs = FALSE, 

  # Options for constructing the consensus
  equalizeQuantiles = FALSE,
  quantileSummary = "mean",
  consensusQuantile = 0, 

  # Merging options
  cutHeight = 0.2, 
  iterate = TRUE,

  # Output options
  relabel = FALSE, 
  colorSeq = NULL, 
  getNewMEs = TRUE,
  getNewUnassdME = TRUE,

  # Options controlling behaviour of the function
  trapErrors = FALSE,
  verbose = 1, indent = 0)
{

  MEsInSingleFrame = FALSE;
  spaces = indentSpaces(indent);

  #numCols = is.numeric(colors);
  #facCols = is.factor(colors);
  #charCols = is.character(colors);

  origColors = colors;

  colors = colors[, drop = TRUE];

  greyName = paste(moduleColor.getMEprefix(), unassdColor, sep="");

  if (verbose>0) printFlush(paste(spaces, 
            "mergeCloseModules: Merging modules whose distance is less than", cutHeight));

  if (verbose>3) printFlush(paste(spaces, 
            "  .. will look for grey label", greyName));

  if (!checkSets(exprData, checkStructure = TRUE, useSets = useSets)$structureOK)
  {
    if (checkDataFormat)
    {
      exprData = fixDataStructure(exprData);
      MEsInSingleFrame = TRUE;
    } else {
      stop("Given exprData appear to be misformatted.");
    }
  }

  setsize = checkSets(exprData, useSets = useSets);
  nSets = setsize$nSets;
  
  if (!is.null(MEs))
  {
    checkMEs = checkSets(MEs, checkStructure = TRUE, useSets = useSets);
    if (checkMEs$structureOK)
    {
      if (nSets!=checkMEs$nSets)
        stop("Input error: numbers of sets in exprData and MEs differ.")
      for (set in 1:nSets)
      {
        if (checkMEs$nSamples[set]!=setsize$nSamples[set])
            stop(paste("Number of samples in MEs is incompatible with subset length for set", set));
      }
    } else {
      if (MEsInSingleFrame)
      {
        MEs = fixDataStructure(MEs);
        checkMEs = checkSets(MEs);
      } else {
        stop("MEs do not have the appropriate structure (same as exprData). ");
      }
    }
  }

  if (setsize$nGenes!=length(colors))
    stop("Number of genes in exprData is different from the length of original colors. They must equal.");

  if ((cutHeight <0) | (cutHeight>(1+as.integer(useAbs)))) 
    stop(paste("Given cutHeight is out of sensible range between 0 and", 1+as.integer(useAbs) ));

  done = FALSE; iteration = 1;

  MergedColors = colors;
  ok = try( 
  {
    while (!done)
    {
      if (is.null(MEs)) 
      {
        MEs = multiSetMEs(exprData, colors = NULL, universalColors = colors,
                        useSets = useSets, impute = impute,
                        subHubs = TRUE, trapErrors = FALSE, excludeGrey = TRUE, 
                        grey = unassdColor,
                        verbose = verbose-1, indent = indent+1);
        MEs = consensusOrderMEs(MEs, useAbs = useAbs, useSets = useSets, greyLast = FALSE);
        collectGarbage();
      } else if (nlevels(as.factor(colors))!=checkMEs$nGenes)
      {
        if ((iteration==1) & (verbose>0)) printFlush(paste(spaces, "  Number of given module colors", 
                  "does not match number of given MEs => recalculating the MEs."))
        MEs = multiSetMEs(exprData, colors = NULL, universalColors = colors,
                        useSets = useSets, impute = impute,
                        subHubs = TRUE, trapErrors = FALSE, excludeGrey = TRUE,
                        grey = unassdColor,
                        verbose = verbose-1, indent = indent+1);
        MEs = consensusOrderMEs(MEs, useAbs = useAbs, useSets = useSets, greyLast = FALSE);
        collectGarbage();
      }
      if (iteration==1) oldMEs = MEs;
  
      # Check colors for number of distinct colors that are not grey
  
      colLevs = as.character(levels(as.factor(colors)));
      if  ( length(colLevs[colLevs!=as.character(unassdColor)])<2 )
      {
        printFlush(paste(spaces, 
           "mergeCloseModules: less than two proper modules."));
        printFlush(paste(spaces, " ..color levels are",
            paste(colLevs, collapse = ", ")));
        printFlush(paste(spaces, " ..there is nothing to merge."));
        MergedNewColors = colors;
        MergedColors = colors;
        nOldMods = 1; nNewMods = 1;
        oldTree = NULL; Tree = NULL;
        break;
      }
  
      # Cluster the found module eigengenes and merge ones that are too close according to the specified
      # quantile.
  
      nOldMods = nlevels(as.factor(colors));

      ConsDiss = .consensusMEDissimilarity(MEs, equalizeQuantiles = equalizeQuantiles, 
                                           quantileSummary = quantileSummary,
                                           consensusQuantile = consensusQuantile, useAbs = useAbs,
                                           corFnc = corFnc, corOptions = corOptions, 
                                           useSets = useSets, greyName = greyName);

      Tree = fastcluster::hclust(as.dist(ConsDiss), method = "average");
      if (iteration==1) oldTree = Tree;
      TreeBranches = as.factor(moduleNumber(dendro = Tree, cutHeight = cutHeight, minSize = 1));
      UniqueBranches = levels(TreeBranches);
      nBranches = nlevels(TreeBranches)
      NumberOnBranch = table(TreeBranches);
      MergedColors = colors;
      
      # Merge modules on the same branch
  
      for (branch in 1:nBranches) if (NumberOnBranch[branch]>1)
      {
        ModulesOnThisBranch = names(TreeBranches)[TreeBranches==UniqueBranches[branch]];
        ColorsOnThisBranch = substring(ModulesOnThisBranch, 3);
        if (is.numeric(origColors)) ColorsOnThisBranch = as.numeric(ColorsOnThisBranch);
        if (verbose>3) 
           printFlush(paste(spaces, "  Merging original colors", 
                            paste(ColorsOnThisBranch, collapse=", ")));
        for (color in 2:length(ColorsOnThisBranch))
          MergedColors[MergedColors==ColorsOnThisBranch[color]] = ColorsOnThisBranch[1];
      }

      MergedColors = MergedColors[, drop = TRUE];
      
      nNewMods = nlevels(as.factor(MergedColors));
  
      if (nNewMods<nOldMods & iterate)
      {
        colors = MergedColors;
        MEs = NULL;
      } else {
        done = TRUE;
      }
      iteration = iteration+1;
    } 
    if (relabel) 
    {
       RawModuleColors = levels(as.factor(MergedColors));
       # relabel the merged colors to the usual order based on the number of genes in each module
       if (is.null(colorSeq)) 
       {
         if (is.numeric(origColors)) {
           colorSeq = c(1:length(table(origColors)));
         } else {
           nNewColors = length(RawModuleColors);
           colorSeq = labels2colors(c(1:nNewColors))
         }
       }
       
      # nGenesInModule = rep(0, nNewMods);
      # for (mod in 1:nNewMods) nGenesInModule[mod] = sum(MergedColors==RawModuleColors[mod]);
       nGenesInModule = table(MergedColors);
     
       SortedRawModuleColors = RawModuleColors[order(-nGenesInModule)]
    
       # Change the color names to the standard sequence, but leave grey grey 
       # (that's why rank in general does not equal color)
       MergedNewColors = MergedColors;
       if (is.factor(MergedNewColors)) MergedNewColors = as.character(MergedNewColors);
       if (verbose>3) printFlush(paste(spaces, "   Changing original colors:"));
       rank = 0;
       for (color in 1:length(SortedRawModuleColors)) if (SortedRawModuleColors[color]!=unassdColor)
       {
         rank = rank + 1;
         if (verbose>3) printFlush(paste(spaces, "      ", SortedRawModuleColors[color], 
                                    "to ", colorSeq[rank]));
         MergedNewColors[MergedColors==SortedRawModuleColors[color]] = colorSeq[rank];
       }
       if (is.factor(MergedColors)) MergedNewColors = as.factor(MergedNewColors);
    } else {
       MergedNewColors = MergedColors; 
    }
    MergedNewColors = MergedNewColors[, drop = TRUE];

    if (getNewMEs)
    {
      if (nNewMods<nOldMods | relabel | getNewUnassdME)
      {
        if (verbose>0) printFlush(paste(spaces, "  Calculating new MEs..."));
        NewMEs = multiSetMEs(exprData, colors = NULL, universalColors = MergedNewColors,
                             useSets = useSets, impute = impute, subHubs = TRUE, trapErrors = FALSE,
                             excludeGrey = !getNewUnassdME, grey = unassdColor,
                             verbose = verbose-1, indent = indent+1);
        newMEs = consensusOrderMEs(NewMEs, useAbs = useAbs, useSets = useSets, greyLast = TRUE,
                                   greyName = greyName);

        ConsDiss = .consensusMEDissimilarity(newMEs, 
                                             equalizeQuantiles = equalizeQuantiles,
                                             quantileSummary = quantileSummary,
                                             consensusQuantile = consensusQuantile, useAbs = useAbs,
                                             corFnc = corFnc, corOptions = corOptions, 
                                             useSets = useSets, greyName = greyName);
        if (length(ConsDiss) > 1)
        {
          Tree = fastcluster::hclust(as.dist(ConsDiss), method = "average");
        } else Tree = NULL;
      } else {
        newMEs = MEs;
      }
    } else {
       newMEs = NULL;
    }
    if (MEsInSingleFrame) 
    {
      newMEs = newMEs[[1]]$data;
      oldMEs = oldMEs[[1]]$data;
    }
  }, silent = TRUE);

  if (class(ok)=='try-error')
  {
    if (!trapErrors) stop(ok);
    if (verbose>0)
    {
      printFlush(paste(spaces, "Warning: merging of modules failed with the following error:"));
      printFlush(paste('   ', spaces, ok));
      printFlush(paste(spaces, " --> returning unmerged modules and *no* eigengenes."));
    }
    warning(paste("mergeCloseModules: merging of modules failed with the following error:\n",
                  "    ", ok, " --> returning unmerged modules and *no* eigengenes.\n"));
    list(colors = origColors, allOK = FALSE);
  } else {
    list(colors = MergedNewColors, dendro = Tree, oldDendro = oldTree, cutHeight = cutHeight, 
         oldMEs = oldMEs, newMEs = newMEs, allOK = TRUE);
  }
}

  
#---------------------------------------------------------------------------------------------
#
# hierarchicalMergeCloseModules
#
#---------------------------------------------------------------------------------------------
                  

hierarchicalMergeCloseModules = function(
  # input data
  multiExpr, labels,

  # Optional starting eigengenes
  MEs = NULL,  

  unassdColor = if (is.numeric(labels)) 0 else "grey", 
  # If missing data are present, impute them?
  impute = TRUE,


  # Options for eigengene network construction
  networkOptions,

  # Options for constructing the consensus
  consensusTree,
  calibrateMESimilarities = FALSE,

  # Merging options
  cutHeight = 0.2, 
  iterate = TRUE,

  # Output options
  relabel = FALSE, 
  colorSeq = NULL, 
  getNewMEs = TRUE,
  getNewUnassdME = TRUE,

  # Options controlling behaviour of the function
  trapErrors = FALSE,
  verbose = 1, indent = 0)
{

  MEsInSingleFrame = FALSE;
  spaces = indentSpaces(indent);

  #numCols = is.numeric(labels);
  #facCols = is.factor(labels);
  #charCols = is.character(labels);

  origColors = labels;

  labels = labels[, drop = TRUE];

  greyName = paste(moduleColor.getMEprefix(), unassdColor, sep="");

  if (verbose>0) printFlush(paste(spaces, 
            "mergeCloseModules: Merging modules whose distance is less than", cutHeight));

  if (verbose>3) printFlush(paste(spaces, 
            "  .. will use unassigned ME label", greyName));

  setsize = checkSets(multiExpr);
  nSets = setsize$nSets;
  
  if (!is.null(MEs))
  {
    checkMEs = checkSets(MEs, checkStructure = TRUE);
    if (checkMEs$structureOK)
    {
      if (nSets!=checkMEs$nSets)
        stop("Input error: numbers of sets in multiExpr and MEs differ.")
      for (set in 1:nSets)
      {
        if (checkMEs$nSamples[set]!=setsize$nSamples[set])
            stop(paste("Number of samples in MEs is incompatible with subset length for set", set));
      }
    } else {
      if (MEsInSingleFrame)
      {
        MEs = fixDataStructure(MEs);
        checkMEs = checkSets(MEs);
      } else {
        stop("MEs do not have the appropriate structure (same as multiExpr). ");
      }
    }
  }

  if (inherits(networkOptions, "NetworkOptions")) 
      networkOptions = list2multiData(.listRep(networkOptions, nSets));

  if (setsize$nGenes!=length(labels))
    stop("Number of genes in multiExpr is different from the length of original labels. They must equal.");

  done = FALSE; iteration = 1;

  MergedColors = labels;
  #ok = try( 
  #{
    while (!done)
    {
      if (is.null(MEs)) 
      {
        MEs = multiSetMEs(multiExpr, colors = NULL, universalColors = labels,
                        impute = impute,
                        subHubs = TRUE, trapErrors = FALSE, excludeGrey = TRUE, 
                        grey = unassdColor,
                        verbose = verbose-1, indent = indent+1);
        #MEs = consensusOrderMEs(MEs, useAbs = useAbs, greyLast = FALSE);
        #collectGarbage();
      } else if (nlevels(as.factor(labels))!=checkMEs$nGenes)
      {
        if ((iteration==1) & (verbose>0)) printFlush(paste(spaces, "  Number of given module labels", 
                  "does not match number of given MEs => recalculating the MEs."))
        MEs = multiSetMEs(multiExpr, colors = NULL, universalColors = labels,
                        impute = impute,
                        subHubs = TRUE, trapErrors = FALSE, excludeGrey = TRUE,
                        grey = unassdColor,
                        verbose = verbose-1, indent = indent+1);
        #MEs = consensusOrderMEs(MEs, useAbs = useAbs, greyLast = FALSE);
        #collectGarbage();
      }
      if (iteration==1) oldMEs = MEs;
  
      # Check labels for number of distinct labels that are not grey
  
      colLevs = as.character(levels(as.factor(labels)));
      if  ( length(colLevs[colLevs!=as.character(unassdColor)])<2 )
      {
        printFlush(paste(spaces, 
           "mergeCloseModules: less than two proper modules."));
        printFlush(paste(spaces, " ..color levels are",
            paste(colLevs, collapse = ", ")));
        printFlush(paste(spaces, " ..there is nothing to merge."));
        MergedNewColors = labels;
        MergedColors = labels;
        nOldMods = 1; nNewMods = 1;
        oldTree = NULL; Tree = NULL;
        break;
      }
  
      # Cluster the found module eigengenes and merge ones that are too close according to the specified
      # quantile.
  
      nOldMods = nlevels(as.factor(labels));

      ConsDiss = .hierarchicalConsensusMEDissimilarity(MEs, networkOptions = networkOptions,
                                           consensusTree = consensusTree, 
                                           greyName = greyName,
                                           calibrate = calibrateMESimilarities);

      Tree = fastcluster::hclust(as.dist(ConsDiss), method = "average");
      if (iteration==1) oldTree = Tree;
      TreeBranches = as.factor(moduleNumber(dendro = Tree, cutHeight = cutHeight, minSize = 1));
      UniqueBranches = levels(TreeBranches);
      nBranches = nlevels(TreeBranches)
      NumberOnBranch = table(TreeBranches);
      MergedColors = labels;
      
      # Merge modules on the same branch
  
      for (branch in 1:nBranches) if (NumberOnBranch[branch]>1)
      {
        ModulesOnThisBranch = names(TreeBranches)[TreeBranches==UniqueBranches[branch]];
        ColorsOnThisBranch = substring(ModulesOnThisBranch, 3);
        if (is.numeric(origColors)) ColorsOnThisBranch = as.numeric(ColorsOnThisBranch);
        if (verbose>3) 
           printFlush(paste(spaces, "  Merging original labels", 
                            paste(ColorsOnThisBranch, collapse=", ")));
        for (color in 2:length(ColorsOnThisBranch))
          MergedColors[MergedColors==ColorsOnThisBranch[color]] = ColorsOnThisBranch[1];
      }

      MergedColors = MergedColors[, drop = TRUE];
      
      nNewMods = nlevels(as.factor(MergedColors));
  
      if (nNewMods<nOldMods & iterate)
      {
        labels = MergedColors;
        MEs = NULL;
      } else {
        done = TRUE;
      }
      iteration = iteration+1;
    } 
    if (relabel) 
    {
       RawModuleColors = levels(as.factor(MergedColors));
       # relabel the merged labels to the usual order based on the number of genes in each module
       if (is.null(colorSeq)) 
       {
         if (is.numeric(origColors)) {
           colorSeq = c(1:length(table(origColors)));
         } else {
           nNewColors = length(RawModuleColors);
           colorSeq = labels2colors(c(1:nNewColors))
         }
       }
       
      # nGenesInModule = rep(0, nNewMods);
      # for (mod in 1:nNewMods) nGenesInModule[mod] = sum(MergedColors==RawModuleColors[mod]);
       nGenesInModule = table(MergedColors);
     
       SortedRawModuleColors = RawModuleColors[order(-nGenesInModule)]
    
       # Change the color names to the standard sequence, but leave grey grey 
       # (that's why rank in general does not equal color)
       MergedNewColors = MergedColors;
       if (is.factor(MergedNewColors)) MergedNewColors = as.character(MergedNewColors);
       if (verbose>3) printFlush(paste(spaces, "   Changing original labels:"));
       rank = 0;
       for (color in 1:length(SortedRawModuleColors)) if (SortedRawModuleColors[color]!=unassdColor)
       {
         rank = rank + 1;
         if (verbose>3) printFlush(paste(spaces, "      ", SortedRawModuleColors[color], 
                                    "to ", colorSeq[rank]));
         MergedNewColors[MergedColors==SortedRawModuleColors[color]] = colorSeq[rank];
       }
       if (is.factor(MergedColors)) MergedNewColors = as.factor(MergedNewColors);
    } else {
       MergedNewColors = MergedColors; 
    }
    MergedNewColors = MergedNewColors[, drop = TRUE];

    if (getNewMEs)
    {
      if (nNewMods<nOldMods | relabel | getNewUnassdME)
      {
        if (verbose>0) printFlush(paste(spaces, "  Calculating new MEs..."));
        NewMEs = multiSetMEs(multiExpr, colors = NULL, universalColors = MergedNewColors,
                             impute = impute, subHubs = TRUE, trapErrors = FALSE,
                             excludeGrey = !getNewUnassdME, grey = unassdColor,
                             verbose = verbose-1, indent = indent+1);
        newMEs = orderMEsByHierarchicalConsensus(NewMEs, networkOptions, 
                                   consensusTree,
                                   greyName = greyName, calibrate = calibrateMESimilarities);

        ConsDiss = .hierarchicalConsensusMEDissimilarity(newMEs, networkOptions = networkOptions,
                                           consensusTree = consensusTree, 
                                           greyName = greyName,
                                           calibrate = calibrateMESimilarities);
        if (length(ConsDiss) > 1)
        {
          Tree = fastcluster::hclust(as.dist(ConsDiss), method = "average");
        } else Tree = NULL;
      } else {
        newMEs = MEs;
      }
    } else {
       newMEs = NULL;
    }
  #}, silent = TRUE);

  #if (class(ok)=='try-error')
  #{
  #  if (!trapErrors) stop(ok);
  #  if (verbose>0)
  #  {
  #    printFlush(paste(spaces, "Warning: merging of modules failed with the following error:"));
  #    printFlush(paste('   ', spaces, ok));
  #    printFlush(paste(spaces, " --> returning unmerged modules and *no* eigengenes."));
  #  }
  #  warning(paste("mergeCloseModules: merging of modules failed with the following error:\n",
  #                "    ", ok, " --> returning unmerged modules and *no* eigengenes.\n"));
  #  list(labels = origColors, allOK = FALSE);
  #} else {
    list(labels = MergedNewColors, dendro = Tree, oldDendro = oldTree, cutHeight = cutHeight, 
         oldMEs = oldMEs, newMEs = newMEs, allOK = TRUE);
  #}
}

  

# ===================================================
#For hard thresholding, we use the signum (step) function

signumAdjacencyFunction = function(corMat, threshold)  
{
  adjmat= as.matrix(abs(corMat)>=threshold)
  dimnames(adjmat) <- dimnames(corMat)
  diag(adjmat) <- 0
  adjmat
}

# ===================================================
# For soft thresholding, one can use the sigmoid function 
# But we have focused on the power adjacency function in the tutorial...
sigmoidAdjacencyFunction = function(ss, mu=0.8, alpha=20) 
{
   1/(1+exp(-alpha*(ss-mu)))
}

#This function is useful for speeding up the connectivity calculation.
#The idea is to partition the adjacency matrix into consecutive baches of a #given size.
#In principle, the larger the block size the faster is the calculation. But #smaller blockSizes require #less memory...
# Input: gene expression data set where *rows* correspond to microarray samples #and columns correspond to genes. 
# If fewer than minNSamples contain gene expression information for a given
# gene, then its connectivity is set to missing. 

softConnectivity=function(datExpr, 
                          corFnc = "cor", corOptions = "use = 'p'", 
                          type = "unsigned", 
                          power = if (type == "signed") 15 else 6, 
                          blockSize = 1500, minNSamples = NULL,
                          verbose = 2, indent = 0) 
{
  spaces = indentSpaces(indent);
  nGenes=dim(datExpr)[[2]]

  if (blockSize * nGenes>.largestBlockSize) blockSize = as.integer(.largestBlockSize/nGenes);
  nSamples=dim(datExpr)[[1]]
  if (is.null(minNSamples))
  {
    minNSamples = max(..minNSamples, nSamples/3);
  }

  if (nGenes<..minNGenes | nSamples<minNSamples ) 
     stop(paste("Error: Something seems to be wrong. \n", 
          "   Make sure that the input data frame has genes as rows and array samples as columns.\n",
          "   Alternatively, there seem to be fewer than", ..minNGenes, "genes or fewer than", 
              minNSamples, "samples. ") )
  if (nGenes<nSamples ) 
    printFlush("Warning: There are fewer genes than samples in the function softConnectivity. Maybe you should transpose the data?")


  k=rep(NA,nGenes)
  start = 1;
  if (verbose>0) { 
     printFlush(paste(spaces, "softConnectivity: FYI: connecitivty of genes with less than", 
                               ceiling(minNSamples), "valid samples will be returned as NA.")); 
     cat(paste(spaces, "..calculating connectivities..")); 
     pind = initProgInd();
  }
  while (start < nGenes)
  {
    end = min(start + blockSize-1, nGenes);
    index1=start:end;
    ad1 = adjacency(datExpr, index1, power = power, type = type, 
                    corFnc = corFnc, corOptions = corOptions);
    k[index1]=colSums(ad1, na.rm = TRUE)-1;
    # If fewer than minNSamples contain gene expression information for a given
    # gene, then we set its connectivity to 0.
    NoSamplesAvailable=colSums(!is.na(datExpr[,index1]))
    k[index1][NoSamplesAvailable< minNSamples]=NA
    if (verbose>0) pind = updateProgInd(end/nGenes, pind);
    start = end + 1;
  } 
  if (verbose > 0) printFlush("");
  k
} # end of function


# ==============================================================================
# The function PickHardThreshold can help one to estimate the cut-off value 
# when using the signum (step) function.
# The first column lists the threshold ("cut"),
# the second column lists the corresponding p-value based on the Fisher Transform 
# of the correlation. 
# The third column reports the resulting scale free topology fitting index R^2.
# The fourth column reports the slope of the fitting line, it shoud be negative for 
# biologically meaningul networks.
# The fifth column reports the fitting index for the truncated exponential model. 
# Usually we ignore it.
# The remaining columns list the mean, median and maximum resulting connectivity.
# To pick a hard threshold (cut) with the scale free topology criterion:
# aim for high scale free R^2 (column 3), high connectivity (col 6) and negative slope 
# (around -1, col 4).
# The output is a list with 2 components. The first component lists a sugggested cut-off
# while the second component contains the whole table.
# The removeFirst option removes the first point (k=0, P(k=0)) from the regression fit.
# nBreaks specifies how many intervals used to estimate the frequency p(k) i.e. the no. of points in the 
# scale free topology plot.

pickHardThreshold=function (data, dataIsExpr = TRUE, RsquaredCut = 0.85, cutVector = seq(0.1, 0.9, 
    by = 0.05), moreNetworkConcepts=FALSE , removeFirst = FALSE, nBreaks = 10, corFnc = "cor", 
    corOptions = "use = 'p'") 
{
    nGenes = dim(data)[[2]]
    colname1 = c("Cut", "p-value", "SFT.R.sq", "slope=", 
        "truncated R^2", "mean(k)", "median(k)", "max(k)")
if(moreNetworkConcepts) {
colname1=c(colname1,"Density", "Centralization", "Heterogeneity")
}
    if (!dataIsExpr)
    {
      checkAdjMat(data);
      if (any(diag(data)!=1)) diag(data) = 1;
    } else
      nSamples = dim(data)[[1]]

    datout = data.frame(matrix(NA, nrow = length(cutVector), 
        ncol = length(colname1)))
    names(datout) = colname1
    datout[, 1] = cutVector
    if (dataIsExpr)
    {
      for (i in 1:length(cutVector)) 
      {
          cut1 = cutVector[i]
          datout[i, 2] = 2 * (1 - pt(sqrt(nSamples - 1) * cut1/sqrt(1 - 
              cut1^2), nSamples - 1))
      }
    } else 
       datout[, 2] = NA;

    fun1 = function(x, dataIsExpr) {
        if (dataIsExpr)
        {
          corExpr = parse(text = paste(corFnc, "(x, data", 
              prepComma(corOptions), ")"))
          corx = abs(eval(corExpr))
        } else 
          corx = x;
        out1 = rep(NA, length(cutVector))
        for (j in c(1:length(cutVector))) {
            out1[j] = sum(corx > cutVector[j], na.rm = TRUE)
        }
        out1
    }
    datk = t(apply(data, 2, fun1, dataIsExpr))
    for (i in c(1:length(cutVector))) {
        khelp= datk[, i] - 1
        SFT1=scaleFreeFitIndex(k=khelp,nBreaks=nBreaks,removeFirst=removeFirst)
        datout[i, 3] = SFT1$Rsquared.SFT  
        datout[i, 4] = SFT1$slope.SFT 
        datout[i, 5] = SFT1$truncatedExponentialAdjRsquared
        datout[i, 6] = mean(khelp,na.rm = TRUE)
        datout[i, 7] = median(khelp,na.rm = TRUE)
        datout[i, 8] = max(khelp,na.rm = TRUE)
if(moreNetworkConcepts) { 
Density = sum(khelp)/(nGenes * (nGenes - 1))
datout[i, 9] =Density
Centralization = nGenes*(max(khelp)-mean(khelp))/((nGenes-1)*(nGenes-2))
datout[i, 10] = Centralization
Heterogeneity = sqrt(nGenes * sum(khelp^2)/sum(khelp)^2 - 1)
datout[i, 11] = Heterogeneity
}
    }
    datout = as.data.frame(lapply(datout, as.numeric));
    print(signif(data.frame(datout),3))
    ind1 = datout[, 3] > RsquaredCut
    indcut = NA
    indcut = if (sum(ind1) > 0) min(c(1:length(ind1))[ind1]) else indcut;
    cutEstimate = cutVector[indcut][[1]]
    list(cutEstimate = cutEstimate, fitIndices = data.frame(datout))
} # end of function pickHardThreshold


#==============================================================================================
#
# pickSoftThreshold
#
#===============================================================================================
# The function pickSoftThreshold allows one to estimate the power parameter when using
# a soft thresholding approach with the use of the power function AF(s)=s^Power
# The removeFirst option removes the first point (k=1, P(k=1)) from the regression fit.
# PL: a rewrite that splits the data into a few blocks.
# SH: more netowkr concepts added.
# PL: re-written for parallel processing
# Alexey Sergushichev: speed up by pre-calculating correlation powers

pickSoftThreshold = function (data, dataIsExpr = TRUE, RsquaredCut = 0.85, 
    powerVector = c(seq(1, 10, by = 1), seq(12, 20, by = 2)), removeFirst = FALSE, nBreaks = 10, 
    blockSize = NULL, corFnc = cor, corOptions = list(use = 'p'), 
    networkType = "unsigned", moreNetworkConcepts=FALSE, 
    gcInterval = NULL, verbose = 0, indent = 0)
{
    powerVector = sort(powerVector)
    intType = charmatch(networkType, .networkTypes)
    if (is.na(intType)) 
        stop(paste("Unrecognized 'networkType'. Recognized values are", 
            paste(.networkTypes, collapse = ", ")))
    nGenes = ncol(data);
    if (nGenes<3) 
    { 
       stop("The input data data contain fewer than 3 rows (nodes).", 
            "\nThis would result in a trivial correlation network." )
    }
    if (!dataIsExpr) 
    {
      checkSimilarity(data);
      if (any(diag(data)!=1)) diag(data) = 1;
    }

    if (is.null(blockSize))
    {
      blockSize = blockSize(nGenes, rectangularBlocks = TRUE, maxMemoryAllocation = 2^30);
      if (verbose > 0) 
        printFlush(spaste("pickSoftThreshold: will use block size ", blockSize, "."))
    }
    if (length(gcInterval)==0) gcInterval = 4*blockSize;
   
    colname1 = c("Power", "SFT.R.sq", "slope", "truncated R.sq", 
                 "mean(k)", "median(k)", "max(k)")
    if(moreNetworkConcepts) 
    {
         colname1=c(colname1,"Density", "Centralization", "Heterogeneity")
    }
    datout = data.frame(matrix(666, nrow = length(powerVector), ncol = length(colname1)))
    names(datout) = colname1
    datout[, 1] = powerVector
    spaces = indentSpaces(indent)
    if (verbose > 0) {
        cat(paste(spaces, "pickSoftThreshold: calculating connectivity for given powers..."))
        if (verbose == 1) pind = initProgInd()
        else cat("\n")
    }

    # if we're using one of WGNCA's own correlation functions, set the number of threads to 1.
    corFnc = match.fun(corFnc);
    corFormals = formals(corFnc);
    if ("nThreads" %in% names(corFormals)) corOptions$nThreads = 1;

    # Resulting connectivities
    datk = matrix(0, nrow = nGenes, ncol = length(powerVector))

    # Number of threads. In this case I need this explicitly.
    nThreads = WGCNAnThreads();

    nPowers = length(powerVector);

    # Main loop
    startG = 1
    lastGC = 0;
    while (startG <= nGenes) 
    {
      endG = min (startG + blockSize - 1, nGenes)

      if (verbose > 1) 
          printFlush(paste(spaces, "  ..working on genes", startG, "through", endG, "of", nGenes))

      nBlockGenes = endG - startG + 1;
      jobs = allocateJobs(nBlockGenes, nThreads);
      # This assumes that the non-zero length allocations
      # precede the zero-length ones
      actualThreads = which(sapply(jobs, length) > 0); 

      datk[ c(startG:endG), ] = foreach(t = actualThreads, .combine = rbind) %dopar% 
      {
        useGenes = c(startG:endG)[ jobs[[t]] ]
        nGenes1 = length(useGenes);
        if (dataIsExpr)
        {
          corOptions$x = data;
          corOptions$y = data[ , useGenes];
          corx = do.call(corFnc, corOptions);
          if (intType == 1) {
              corx = abs(corx)
          } else if (intType == 2) {
              corx = (1 + corx)/2
          } else if (intType == 3) {
              corx[corx < 0] = 0
          }
          if (sum(is.na(corx)) != 0) 
              warning(paste("Some correlations are NA in block", 
                  startG, ":", endG, "."));
        } else {
          corx = data[, useGenes];
        }
        datk.local = matrix(NA, nGenes1, nPowers);
        corxPrev = matrix(1, nrow=nrow(corx), ncol=ncol(corx))
        powerVector1 <- c(0, head(powerVector, -1))
        powerSteps <- powerVector - powerVector1
        uniquePowerSteps <- unique(powerSteps)
        corxPowers <- lapply(uniquePowerSteps, function(p) corx^p)
        names(corxPowers) <- uniquePowerSteps
        for (j in 1:nPowers) {
            corxCur <- corxPrev * corxPowers[[as.character(powerSteps[j])]]
            datk.local[, j] = colSums(corxCur, na.rm = TRUE) - 1
            corxPrev <- corxCur
        };
        datk.local
      } # End of %dopar% evaluation
      # Move to the next block of genes.
      startG = endG + 1
      if ((gcInterval > 0) && (startG - lastGC > gcInterval)) { gc(); lastGC = startG; }
      if (verbose == 1) pind = updateProgInd(endG/nGenes, pind)
    }
    if (verbose == 1) printFlush("");

    for (i in c(1:length(powerVector))) 
    {
        khelp= datk[, i] 
        SFT1=scaleFreeFitIndex(k=khelp,nBreaks=nBreaks,removeFirst=removeFirst)
        datout[i, 2] = SFT1$Rsquared.SFT  
        datout[i, 3] = SFT1$slope.SFT 
        datout[i, 4] = SFT1$truncatedExponentialAdjRsquared
        datout[i, 5] = mean(khelp,na.rm = TRUE)
        datout[i, 6] = median(khelp,na.rm = TRUE)
        datout[i, 7] = max(khelp,na.rm = TRUE)
        if(moreNetworkConcepts) 
        { 
           Density = sum(khelp)/(nGenes * (nGenes - 1))
           datout[i, 8] =Density
           Centralization = nGenes*(max(khelp)-mean(khelp))/((nGenes-1)*(nGenes-2))
           datout[i, 9] = Centralization
           Heterogeneity = sqrt(nGenes * sum(khelp^2)/sum(khelp)^2 - 1)
           datout[i, 10] = Heterogeneity
         }
    }
    print(signif(data.frame(datout),3))
    ind1 = datout[, 2] > RsquaredCut
    indcut = NA
    indcut = if (sum(ind1) > 0) min(c(1:length(ind1))[ind1]) else indcut;
    powerEstimate = powerVector[indcut][[1]]
    collectGarbage()
    list(powerEstimate = powerEstimate, fitIndices = data.frame(datout))
}


# ===================================================
# The function ScaleFreePlot1 creates a plot for checking scale free topology
# when truncated1 = TRUE is specificed, it provides the R^2 measures for the following
# degree distributions: a) scale free topology, b) log-log R^2 and c) truncated exponential R^2

# The function ScaleFreePlot1 creates a plot for checking scale free topology

scaleFreePlot = function(connectivity, nBreaks=10, truncated = FALSE, removeFirst = FALSE, main = "", ...)
{
  k = connectivity
  discretized.k = cut(k, nBreaks)
  dk = tapply(k, discretized.k, mean)
  p.dk = as.vector(tapply(k, discretized.k, length)/length(k))
  breaks1 = seq(from = min(k), to = max(k),
      length = nBreaks + 1)
  hist1 = suppressWarnings(hist(k, breaks = breaks1, equidist = FALSE, plot = FALSE, right = TRUE, ...))
  dk2 = hist1$mids
  dk = ifelse(is.na(dk), dk2, dk)
  dk = ifelse(dk == 0, dk2, dk)
  p.dk = ifelse(is.na(p.dk), 0, p.dk)
  log.dk = as.vector(log10(dk))
  if (removeFirst) {
      p.dk = p.dk[-1]
      log.dk = log.dk[-1]
  }
  log.p.dk= as.numeric(log10(p.dk + 1e-09))
  lm1 = lm(log.p.dk ~ log.dk)
  if (truncated==TRUE) 
  { 
    lm2 = lm(log.p.dk ~ log.dk + I(10^log.dk))
    OUTPUT=data.frame(scaleFreeRsquared=round(summary(lm1)$adj.r.squared,2),
                      slope=round(lm1$coefficients[[2]],2),
    TruncatedRsquared=round(summary(lm2)$adj.r.squared,2))
    printFlush("the red line corresponds to the truncated exponential fit")
    title = paste(main, 
                " scale free R^2=",as.character(round(summary(lm1)$adj.r.squared,2)),
                ", slope=", round(lm1$coefficients[[2]],2),
                ", trunc.R^2=",as.character(round(summary(lm2)$adj.r.squared,2)))
  } else { 
    title = paste(main, " scale R^2=",as.character(round(summary(lm1)$adj.r.squared,2)),
                  ", slope=", round(lm1$coefficients[[2]],2))
    OUTPUT=data.frame(scaleFreeRsquared=round(summary(lm1)$adj.r.squared,2),
                      slope=round(lm1$coefficients[[2]],2))
  }

  suppressWarnings(plot(log.dk, log.p.dk, xlab="log10(k)", ylab="log10(p(k))", main = title, ... ))
  lines(log.dk,predict(lm1),col=1)
  if (truncated) lines(log.dk, predict(lm2), col = 2)
  OUTPUT
} # end of function 





##############################################################################################
##############################################################################################
# B) Computing the topological overlap matrix 
##############################################################################################
##############################################################################################



# ===================================================
#The function TOMdist computes a dissimilarity 
# based on the topological overlap matrix (Ravasz et al)
# Input: an Adjacency matrix with entries in [0,1]
#
#  ************* Removed: use 1-TOMsimilarity(adjMat). ***********************
#
#TOMdist=function(adjMat, useActualMax = FALSE) 
#{
  #diag(adjMat)=0;
  #adjMat[is.na(adjMat)]=0;
  #maxh1=max(as.dist(adjMat) ); minh1=min(as.dist(adjMat) ); 
  #if (maxh1>1 | minh1 < 0 ) 
    #stop(paste("The adjacency matrix contains entries that are larger than 1 or",
               #"smaller than 0: max =",maxh1,", min =",minh1)) 
  #if ( max(c(as.dist(abs(adjMat-t(adjMat)))))>10^(-12) ) 
    #stop("Non-symmetric adjacency matrix. ") 
  #adjMat= (adjMat+ t(adjMat) )/2
  #connectivity=apply(adjMat,2,sum)
  #maxADJconst=1
  #if (useActualMax==TRUE) maxADJconst=max(c(as.dist(adjMat ))) 
  #Dhelp1=matrix(connectivity,ncol=length(connectivity),nrow=length(connectivity))
  #denomTOM= pmin(as.dist(Dhelp1),as.dist(t(Dhelp1)))   +as.dist(maxADJconst-adjMat); 
  #gc();gc();
  #numTOM=as.dist(adjMat %*% adjMat +adjMat);
  ##TOMmatrix=numTOM/denomTOM
  ## this turns the TOM matrix into a dissimilarity 
  #out1=1-as.matrix(numTOM/denomTOM) 
  #diag(out1)=1 
  ## setting the diagonal to 1 is unconventional (it should be 0)
  ## but it leads to nicer looking TOM plots... 
  #out1
#}

##---------------------------------------------------------------------------
## This is a somewhat modified TOMdist - most checks are left out as they are
## often not necessary.
#
#  ******* This function is not necessary anymore. Left out. ***********
#
#TOMdistNoChecks = function(adjMat, useActualMax = FALSE)
#{
  #diag(adjMat)=0;
  #adjMat[is.na(adjMat)]=0;
  #connectivity=apply(adjMat,2,sum)
  #maxADJconst=1
  #if (useActualMax==TRUE) maxADJconst=max(c(as.dist(adjMat )))
  #Dhelp1 = matrix(connectivity,ncol=length(connectivity),nrow=length(connectivity))
  #denomTOM = pmin(as.dist(Dhelp1),as.dist(t(Dhelp1))) + as.dist(maxADJconst-adjMat);
  #rm(Dhelp1);
  #numTOM=as.dist(adjMat %*% adjMat +adjMat);
  ##TOMmatrix=numTOM/denomTOM
  ## this turns the TOM matrix into a dissimilarity 
  #out1=1-as.matrix(numTOM/denomTOM)
  #rm(numTOM); rm(denomTOM);
  #collectGarbage();
  #diag(out1)=1
  ## setting the diagonal to 1 is unconventional (it should be 0)
  ## but it leads to nicer looking TOM plots... 
  #out1
#}

#---------------------------------------------------------------------------
# exact equivalent of TOMdistNoChecks above, but returns similarity.
# This function works with a generalized adjacency that can be signed.
# If the adjacency is signed, returned TOM will be signed as well (use abs(TOM) to get the usual unsigned
# topological overlap)
# If checkDiag and na.rm are turned both off, the function saves a bit of memory overhead.

# ************* this function is replaced by TOMsimilarity that calls compiled code.

#TOMsimilarity = function(adjMat, useActualMax = FALSE, checkDiag = TRUE, na.rm = TRUE)
#{
  #if (checkDiag) diag(adjMat) = 1;
  #if (na.rm) adjMat[is.na(adjMat)]=0;
  #absAdj = abs(adjMat);
  #connectivity=apply(absAdj,2,sum)-1;
  #maxADJconst=1
  #if (useActualMax==TRUE) maxADJconst=max(c(as.dist(absAdj )))
  #Dhelp1 = matrix(connectivity,ncol=length(connectivity),nrow=length(connectivity))
  #denomTOM = pmin(as.dist(Dhelp1),as.dist(t(Dhelp1))) + as.dist(maxADJconst-absAdj);
  #rm(Dhelp1);
  #numTOM=as.dist(adjMat %*% adjMat - adjMat);
  ##TOMmatrix=numTOM/denomTOM
  ## this turns the TOM matrix into a dissimilarity 
  #out1=as.matrix(numTOM/denomTOM)
  #rm(numTOM); rm(denomTOM);
  #collectGarbage();
  #diag(out1)=1
  #out1
#}


# ===================================================
# This function computes a TOMk dissimilarity
# which generalizes the topological overlap matrix (Ravasz et al)
# Input: an Adjacency matrix with entries in [0,1]
# WARNING:  ONLY FOR UNWEIGHTED NETWORKS, i.e. the adjacency matrix contains binary entries...
# This function is explained in Yip and Horvath (2005)
# http://www.genetics.ucla.edu/labs/horvath/GTOM/
GTOMdist = function(adjMat, degree = 1)
{
  maxh1=max(as.dist(adjMat) ); minh1=min(as.dist(adjMat) );
  if (degree!=round(abs(degree))) 
    stop("'degree' must be a positive integer.");
  if (maxh1>1 | minh1 < 0 )
    stop(paste("Entries of the adjacency matrix are not between 0 and 1: max =",
                 maxh1,", min =",minh1))

  if (  max(c(as.dist(abs(adjMat-t(adjMat)))))>0   ) 
    stop("Given adjacency matrix is not symmetric.")

  B <- adjMat;
  if (degree>=2) for (i in 2:degree) 
  {
          diag(B) <- diag(B) + 1;
          B = B %*% adjMat;# this gives the number of paths with length at most degree connecting a pair
  }   
  B <- (B>0);   # this gives the degree-step reachability from a node to another
  diag(B) <- 0;   # exclude each node being its own neighbor
  B <- B %*% B   # this gives the number of common degree-step-neighbor that a pair of nodes share

  Nk <- diag(B);
  B <- B +adjMat;   # numerator
  diag(B) <- 1;
  denomTOM=outer(Nk,Nk,FUN="pmin")+1-adjMat;
  diag(denomTOM) <- 1;
  1 - B/denomTOM   # this turns the TOM matrix into a dissimilarity
}

#=============================================================================================
#
# vectorTOM: calculate TOM of a vector (or a 'small' matrix) with expression
# data. If the number of columns in vect is small (or 1), number of columns in
# datExpr can be large.
#
#============================================================================================

vectorTOM = function(datExpr, vect, subtract1 = FALSE, blockSize = 2000, 
                     corFnc = "cor", corOptions = "use = 'p'", networkType = "unsigned", power = 6,
                     verbose = 1, indent = 0)
{
  spaces = indentSpaces(indent);

  intType = charmatch(networkType, .networkTypes)
  if (is.na(intType))
    stop(paste("Unrecognized 'networkType'. Recognized values are", paste(.networkTypes, collapse = ", ")));

  if (is.null(dim(vect)))
  {
     vect = as.matrix(vect) 
     vectIsVector = TRUE;
  } else vectIsVector = FALSE;

  if (nrow(vect)!=nrow(datExpr)) 
    stop("Input error: numbers of samples in 'vect' and 'datExpr' must be the same.");

  if (ncol(vect)>blockSize) 
    stop(paste("Input error: number of columns in 'vect' is too large. ",
               "If you are certain you want to try anyway, increase 'blockSize' to at least",
               "the number of columns in 'vect'."));

  corEval = parse(text = paste(corFnc, "(datExpr, vect ", prepComma(corOptions), ")"));
  corVE = eval(corEval);
  if (intType==1)
  { corVE = abs(corVE);
  } else if (intType==2)
  { corVE = (1+corVE)/2;
  } else if (intType==3)
  { corVE[corVE < 0] = 0;
  } else 
    stop("Unrecognized networkType argument. Recognized values are 'unsigned', 'signed', and 'signed hybrid'.");

  corVE = corVE^power;

  subtract1 = as.numeric(subtract1);

  nVect = ncol(vect); nGenes = ncol(datExpr);
  TOM = matrix(NA, nrow = nGenes, ncol = nVect);

  if (verbose > 0) {
     if (verbose > 1) cat(paste(spaces, "Calculating TOM of a set of vectors with genes"));
     pind = initProgInd();
  }
  start = 1; 
  denomArr = array(0, dim = c(2, blockSize, nVect));
  while (start <= nGenes)
  {
    end = min(start + blockSize-1, nGenes); 
    blockInd = c(start:end);
    corEval = parse(text = paste(corFnc, "(datExpr[, blockInd], datExpr ", prepComma(corOptions), ")"));
    corEE = eval(corEval);
    if (intType==1)
    { corEE = abs(corEE);
    } else if (intType==2)
    { corEE = (1+corEE)/2;
    } else if (intType==3)
    { corEE[corEE < 0] = 0;
    } 
    corEE = corEE^power;
    num = corEE %*% corVE -subtract1 * corVE[blockInd, ]
    kV = apply(corVE, 2, sum, na.rm = TRUE) - subtract1
    kE = apply(corEE, 1, sum, na.rm = TRUE) - 1;
    denomArr[1, 1:(end-start+1), ] = matrix(kV, nrow = end-start+1, ncol = nVect, byrow = TRUE);
    denomArr[2, 1:(end-start+1), ] = matrix(kE, nrow = end-start+1, ncol = nVect);
    denom = apply(denomArr[, 1:(end-start+1), ], c(2,3), min) + 1 - corVE[blockInd, ];
    TOM[blockInd, ] = num/denom;
    if (verbose > 0) pind = updateProgInd(end/nGenes, pind);
    start = end + 1;
    collectGarbage();
  }
  if (verbose>0) printFlush(" ");

  TOM;
}

#=============================================================================================
#
# subsetTOM: calculate TOM of a subset of vectors with respect to a full set of vectors.
#
#============================================================================================


subsetTOM = function(datExpr, subset, 
                    corFnc = "cor", corOptions = "use = 'p'", networkType = "unsigned", power = 6,
                    verbose = 1, indent = 0)
{
  spaces = indentSpaces(indent);

  if (!is.null(dim(subset)))
    stop("'subset' must be a dimensionless vector.");

  if (is.null(dim(datExpr)))
    stop("'datExpr' must be a matrix or data frame.");
  if (length(dim(datExpr))!=2)
    stop("'datExpr' must be two-dimensional.");

  nGenes = ncol(datExpr);

  if (is.logical(subset))
    subset = c(1:nGenes)[subset];

  nBlock = length(subset);

  if (any(!is.finite(subset))) stop("Entries of 'subset' must all be finite.");

  if (min(subset) < 1 | max(subset) > nGenes)
    stop(paste("Some entries of 'subset' are out of range.", 
         "\nNote: 'subset' must contain indices of the subset for which the TOM is calculated."));

  intType = charmatch(networkType, .networkTypes)
  if (is.na(intType))
    stop(paste("Unrecognized 'networkType'. Recognized values are", paste(.networkTypes, collapse = ", ")));

  adj = adjacency(datExpr, subset, power = power, type = networkType, corFnc = corFnc, 
                  corOptions = corOptions);

  adj[is.na(adj)] = 0;
  num = t(adj) %*% adj - adj[subset, ];

  k = apply(adj, 2, sum);

  kMat = matrix(k, nBlock, nBlock);

  denom = pmin(kMat, t(kMat)) - adj[subset, ];

  TOM = num/denom;
  diag(TOM) = 1;

  TOM;
}

#---------------------------------------------------------------------
#
# adjacency
#
#---------------------------------------------------------------------
# Computes the adjacency from the expression data: takes cor, transforms it as appropriate and possibly
# adds a sign if requested. No subselection on datExpr is performed.
# A slighly reworked version that assumes one wants the adjacency matrix of data with itself or a
# subset. The data are given only once, and an additional selection index for columns is given.
# Caution: no checking of selectCols validity is performed.
# The probability method is removed as it's not used.
 
adjacency = function(datExpr, selectCols=NULL, type = "unsigned", power = if (type=="distance") 1 else 6,
                     corFnc = "cor", corOptions = "use = 'p'",
                     distFnc = "dist", distOptions = "method = 'euclidean'")
{
  intType = charmatch(type, .adjacencyTypes)
  if (is.na(intType))
    stop(paste("Unrecognized 'type'. Recognized values are", paste(.adjacencyTypes, collapse = ", ")));

  corFnc.fnc = match.fun(corFnc);

  if (intType < 4)
  {
    if (is.null(selectCols))
    {
      if (is.list(corOptions))
      {
         cor_mat = do.call(corFnc.fnc, c(list(x = datExpr), corOptions))
      } else {
         corExpr = parse(text = paste(corFnc, "(datExpr ", prepComma(corOptions), ")"));
         # cor_mat = cor(datExpr, use = "p");
         cor_mat = eval(corExpr);
      }
    } else {
      if (is.list(corOptions))
      {
         cor_mat = do.call(corFnc.fnc, c(list(x = datExpr, y = datExpr[, selectCols]), corOptions))
      } else {
        corExpr = parse(text = paste(corFnc, "(datExpr, datExpr[, selectCols] ", prepComma(corOptions), ")"));
        #cor_mat = cor(datExpr, datExpr[, selectCols], use="p");
        cor_mat = eval(corExpr);
      }
    }
  } else {
    if (!is.null(selectCols)) 
      stop("The argument 'selectCols' cannot be used for distance adjacency.");
    if (is.list(distOptions))
    {
      d = do.call(distFnc, c(list(x = t(datExpr)), distOptions));
    } else {
      corExpr = parse(text = paste(distFnc, "(t(datExpr) ", prepComma(distOptions), ")"));
      # cor_mat = cor(datExpr, use = "p");
      d = eval(corExpr);
    }
    if (any(d<0)) 
      warning("Function WGCNA::adjacency: Distance function returned (some) negative values.");
    cor_mat = 1-as.matrix( (d/max(d, na.rm = TRUE))^2 );
  }

  if (intType==1)
  { cor_mat = abs(cor_mat); 
  } else if (intType==2)
  { cor_mat = (1+cor_mat)/2; 
  } else if (intType==3)
  { cor_mat[cor_mat < 0] = 0; 
  }
  cor_mat^power;
}

# A presumably faster and less memory-intensive version, only for "unsigned" networks.

unsignedAdjacency = function(datExpr, datExpr2 = NULL, power = 6,
                             corFnc = "cor", corOptions = "use = 'p'")
{
  corExpr = parse(text = paste(corFnc, "(datExpr, datExpr2 ", prepComma(corOptions), ")"));
  # abs(cor(datExpr, datExpr2, use="p"))^power;
  abs(eval(corExpr))^power;
}

#####################################################################################################
#####################################################################################################
# C) Defining gene modules using clustering procedures
#####################################################################################################
#####################################################################################################


cutreeStatic = function(dendro, cutHeight = 0.9, minSize = 50)
{
  normalizeLabels(moduleNumber(dendro, cutHeight, minSize));
}

cutreeStaticColor = function(dendro, cutHeight = 0.9, minSize = 50)
{
  labels2colors(normalizeLabels(moduleNumber(dendro, cutHeight, minSize)));
}

 
plotColorUnderTree = function( 
  dendro, 
   colors,
   rowLabels = NULL,
   rowWidths = NULL,
   rowText = NULL,
   rowTextAlignment = c("left", "center", "right"),
   rowTextIgnore = NULL,
   textPositions = NULL,
   addTextGuide = TRUE,
   cex.rowLabels = 1,
   cex.rowText = 0.8,
   ...)
{
  plotOrderedColors(
   dendro$order,
   colors = colors,
   rowLabels = rowLabels,
   rowWidths = rowWidths,
   rowText = rowText,
   rowTextAlignment = rowTextAlignment,
   rowTextIgnore = rowTextIgnore,
   textPositions = textPositions,
   addTextGuide = addTextGuide,
   cex.rowLabels = cex.rowLabels,
   cex.rowText = cex.rowText,
   startAt = 0,
   ...);
}


plotOrderedColors = function(
   order, 
   colors, 
   rowLabels = NULL, 
   rowWidths = NULL,
   rowText = NULL, 
   rowTextAlignment = c("left", "center", "right"),
   rowTextIgnore = NULL,
   textPositions = NULL, 
   addTextGuide = TRUE,
   cex.rowLabels = 1,
   cex.rowText = 0.8,
   startAt = 0,
   ...)
{
  colors = as.matrix(colors);
  dimC = dim(colors)

  if (is.null(rowLabels) & (length(dimnames(colors)[[2]])==dimC[2])) 
     rowLabels = colnames(colors);


  sAF = options("stringsAsFactors")
  options(stringsAsFactors = FALSE);
  on.exit(options(stringsAsFactors = sAF[[1]]), TRUE)

  nColorRows = dimC[2];
  if (length(order) != dimC[1] ) 
    stop("ERROR: length of colors vector not compatible with number of objects in 'order'.");
  C = colors[order, , drop = FALSE]; 
  step = 1/(dimC[1]-1 + 2*startAt);
  #barplot(height=1, col = "white", border=FALSE, space=0, axes=FALSE, ...)
  barplot(height=1, col = "white", border=FALSE, space=0, axes=FALSE)
  charWidth = strwidth("W")/2;
  if (!is.null(rowText))
  {
     if (is.null(textPositions)) textPositions = c(1:nColorRows);
     if (is.logical(textPositions)) textPositions = c(1:nColorRows)[textPositions];
     nTextRows = length(textPositions);
  } else 
     nTextRows = 0;
  nRows = nColorRows + nTextRows;
  ystep = 1/nRows;
  if (is.null(rowWidths)) 
  { 
    rowWidths = rep(ystep, nColorRows + nTextRows)
  } else
  {
    if (length(rowWidths)!=nRows) 
      stop("plotOrderedColors: Length of 'rowWidths' must equal the total number of rows.")
    rowWidths = rowWidths/sum(rowWidths);
  }

  hasText = rep(0, nColorRows);
  hasText[textPositions] = 1;
  csPosition = cumsum(c(0, hasText[-nColorRows]));
  
  colorRows = c(1:nColorRows) + csPosition;
  rowType = rep(2, nRows);
  rowType[colorRows] = 1;

  physicalTextRow = c(1:nRows)[rowType==2];

  yBottom = c(0, cumsum(rowWidths[nRows:1])) ;  # Has one extra entry but that shouldn't hurt
  yTop = cumsum(rowWidths[nRows:1]) 

  if (!is.null(rowText))
  {
     rowTextAlignment = match.arg(rowTextAlignment);
     rowText = as.matrix(rowText)
     textPos = list();
     textPosY = list();
     textLevs = list();
     for (tr in 1:nTextRows) 
     {
       charHeight = max(strheight(rowText[, tr], cex = cex.rowText));
       width1 = rowWidths[ physicalTextRow[tr] ];
       nCharFit = floor(width1/charHeight/1.7/par("lheight"));
       if (nCharFit<1) stop("Rows are too narrow to fit text. Consider decreasing cex.rowText.");
       set = textPositions[tr];
       #colLevs = sort(unique(colors[, set]));
       #textLevs[[tr]] = rowText[match(colLevs, colors[, set]), tr];
       textLevs[[tr]] = sort(unique(rowText[, tr]));
       textLevs[[tr]] = textLevs[[tr]] [ !textLevs[[tr]] %in% rowTextIgnore ];
       nLevs = length(textLevs[[tr]]);
       textPos[[tr]] = rep(0, nLevs);
       orderedText = rowText[order, tr]
       for (cl in 1:nLevs)
       {
         ind = orderedText == textLevs[[tr]][cl];
         sind = ind[-1];
         ind1 = ind[-length(ind)];
         starts = c( if (ind[1]) 1 else NULL, which(!ind1 & sind)+1)
         ends = which(c(ind1 & !sind, ind[length(ind)] ));
         if (length(starts)==0) starts = 1;
         if (length(ends)==0) ends = length(ind);
         if (ends[1] < starts[1]) starts = c(1, starts);
         if (ends[length(ends)] < starts[length(starts)]) ends = c(ends, length(ind));
         lengths = ends - starts;
         long = which.max(lengths);
         textPos[[tr]][cl] = switch(rowTextAlignment, 
                    left = starts[long],
                    center = (starts[long] + ends[long])/2 + 0.5,
                    right = ends[long]+1);
       }
       if (rowTextAlignment=="left") {
          yPos = seq(from = 1, to=nCharFit, by=1) / (nCharFit+1);
       } else {
          yPos = seq(from = nCharFit, to=1, by=-1) / (nCharFit+1);
       }
       textPosY[[tr]] = rep(yPos, ceiling(nLevs/nCharFit)+5)[1:nLevs][rank(textPos[[tr]])];
     }
  } 
        
  jIndex = nRows;

  if (is.null(rowLabels)) rowLabels = c(1:nColorRows);
  C[is.na(C)] = "grey"
  for (j in 1:nColorRows)
  {
    jj = jIndex;
    ind = (1:dimC[1]);
    xl = (ind-1.5+startAt) * step; xr = (ind-0.5+startAt) * step; 
    yb = rep(yBottom[jj], dimC[1]); yt = rep(yTop[jj], dimC[1]);
    if (is.null(dim(C))) {
       rect(xl, yb, xr, yt, col = as.character(C), border = as.character(C));
    } else {
       rect(xl, yb, xr, yt, col = as.character(C[,j]), border = as.character(C[,j]));
    }
    text(rowLabels[j], pos=2, x= -charWidth/2 +xl[1], y= (yBottom[jj] + yTop[jj])/2, 
         cex=cex.rowLabels, xpd = TRUE);
    textRow = match(j, textPositions);
    if (is.finite(textRow))
    {
      jIndex = jIndex - 1;
      xt = (textPos[[textRow]] - 1.5) * step;
      
      xt[xt<par("usr")[1]] = par("usr")[1];
      xt[xt>par("usr")[2]] = par("usr")[2];
     
      #printFlush(spaste("jIndex: ", jIndex, ", yBottom: ", yBottom[jIndex],
      #                  ", yTop: ", yTop[jIndex], ", min(textPosY): ", min(textPosY[[textRow]]),
      #                  ", max(textPosY): ", max(textPosY[[textRow]])));
      yt = yBottom[jIndex] + (yTop[jIndex]-yBottom[jIndex]) * (textPosY[[textRow]] + 1/(2*nCharFit+2));
      nt = length(textLevs[[textRow]]);
      # Add guide lines
      if (addTextGuide)
        for (l in 1:nt) lines(c(xt[l], xt[l]), c(yt[l], yTop[jIndex]), col = "darkgrey", lty = 3);

      textAdj = c(0, 0.5, 1)[ match(rowTextAlignment, c("left", "center", "right")) ];
      text(textLevs[[textRow]], x = xt, y = yt, adj = c(textAdj, 1), xpd = TRUE, cex = cex.rowText)
      # printFlush("ok");
    }
    jIndex = jIndex - 1;
  }
  for (j in 0:(nColorRows + nTextRows)) lines(x=c(0,1), y=c(yBottom[j+1], yBottom[j+1]));
}

#========================================================================================================
# This function can be used to create an average linkage hierarchical
# clustering tree
# or the microarray samples. The rows of datExpr correspond to the samples and
# the columns to the genes
# You can optionally input a quantitative microarray sample trait.

plotClusterTreeSamples=function(datExpr, y = NULL, traitLabels = NULL, yLabels = NULL, 
         main = if (is.null(y)) "Sample dendrogram" else "Sample dendrogram and trait indicator",
         setLayout = TRUE, autoColorHeight = TRUE, colorHeight = 0.3,
         dendroLabels = NULL,
         addGuide = FALSE, guideAll = TRUE, guideCount = NULL,
         guideHang = 0.20, cex.traitLabels = 0.8,
         cex.dendroLabels = 0.9, marAll = c(1,5,3,1),  saveMar = TRUE,
         abHeight = NULL, abCol = "red", ...) 
{
  dendro = fastcluster::hclust( dist( datExpr  ), method="average" )
  if (is.null(y) ) 
  {
    oldMar = par("mar");
    par(mar = marAll);
    plot(dendro, main=main, sub="", xlab = "", labels = dendroLabels, cex = cex.dendroLabels)
    if (saveMar) par(oldMar);
  } else {
    if (is.null(traitLabels)) traitLabels = names(as.data.frame(y));
    y = as.matrix(y);
    if (!is.numeric(y) ) 
    {
       warning(paste("The microarray sample trait y will be transformed to numeric."));
       dimy = dim(y)
       y=as.numeric(y)
       dim(y) = dimy;
    } # end of if (!is.numeric(y) )
    if (  nrow(as.matrix(datExpr)) != nrow(y) ) 
      stop(paste("Input Error: dim(as.matrix(datExpr))[[1]] != length(y)\n", 
                 "  In plain English: The number of microarray sample arrays does not match the number",
                 "of samples for the trait.\n",
                 "   Hint: Make sure rows of 'datExpr' (and 'y', if it is a matrix) correspond to samples."))

    if (is.integer(y))
    {
      y = y-min(0, min(y, na.rm = TRUE)) + 1;
    } else {
      y = (y>=median(y, na.rm = TRUE)) + 1;
    }
    plotDendroAndColors(dendro, colors = y, groupLabels = traitLabels, rowText = yLabels, 
                        setLayout = setLayout, 
                        autoColorHeight = autoColorHeight, colorHeight = colorHeight,
                        addGuide = addGuide, guideAll = guideAll, guideCount = guideCount, 
                        guideHang = guideHang, cex.colorLabels = cex.traitLabels,
                        cex.dendroLabels = cex.dendroLabels, marAll = marAll, 
                        saveMar = saveMar, abHeight = abHeight, abCol = abCol,
                        main = main,
                        ...);
  }
}# end of function PlotClusterTreeSamples

# ===================================================
# The function TOMplot creates a TOM plot
# Inputs:  distance measure, hierarchical (hclust) object, color label=colors

TOMplot = function(dissim, dendro, Colors=NULL, ColorsLeft = Colors, terrainColors=FALSE, 
                   setLayout = TRUE, ...) 
{
  if ( is.null(Colors) ) Colors=rep("white", dim(as.matrix(dissim))[[1]] )
  if ( is.null(ColorsLeft)) ColorsLeft = Colors;
  nNodes=length(Colors)
  if (nNodes<2) {
     warning("You have only 1 or 2 genes in TOMplot. No plot will be produced")
  } else {
     if (nNodes != length(ColorsLeft)) 
       stop("ERROR: number of (top) color labels does not equal number of left color labels")
     if (nNodes != dim(dissim)[[1]] ) 
       stop(paste("ERROR: number of color labels does not equal number of nodes in dissim.\n",
                  "     nNodes != dim(dissim)[[1]] "))
     labeltree = as.character(Colors)
     labelrow  = as.character(ColorsLeft)
     #labelrow[dendro$order[length(labeltree):1]]=labelrow[dendro$order]
     options(expressions = 10000)
     dendro$height = (dendro$height - min(dendro$height))/(1.15 *
                                     (max(dendro$height)-min(dendro$height)))
     if (terrainColors) {
       .heatmap(as.matrix(dissim), Rowv=as.dendrogram(dendro, hang = 0.1), 
                Colv= as.dendrogram(dendro, hang = 0.1), 
                scale="none", revC = TRUE, ColSideColors=as.character(labeltree),
                RowSideColors=as.character(labelrow), labRow=FALSE, labCol=FALSE, 
                col = terrain.colors(100), setLayout = setLayout, ...) 
     } else {
       .heatmap(as.matrix(dissim), Rowv=as.dendrogram(dendro, hang = 0.1), 
                Colv= as.dendrogram(dendro, hang = 0.1), 
               scale="none",revC = TRUE, ColSideColors=as.character(labeltree),
               RowSideColors=as.character(labelrow), labRow=FALSE, labCol=FALSE, setLayout = setLayout,
               ...)
     } #end of if
  }
} #end of function


plotNetworkHeatmap = function(datExpr,  plotGenes, useTOM = TRUE, power = 6 , 
                              networkType = "unsigned", main = "Heatmap of the network") 
{
  match1=match( plotGenes ,names(data.frame(datExpr)) )
  match1=match1[ !is.na(match1)]
  nGenes=length(match1)
  if (  sum( !is.na(match1) )  != length(plotGenes) ) 
  {
    printFlush(paste("Warning: Not all gene names were recognized.", 
                     "Only the following genes were recognized. "));
    printFlush(paste("   ", names(data.frame(datExpr))[match1], collapse = ", " ))
  }
  if (nGenes< 3 ) 
  { 
    warning(paste("Since you have fewer than 3 genes, the network will not be visualized.\n",
                  "   Hint: please input more genes.")); plot(1,1)
  } else {
    datErest=datExpr[, match1 ]
    ADJ1 = adjacency(datErest, power = power, type = networkType)
    if (useTOM) {
       diss1= 1-TOMsimilarity(ADJ1)   
    } else {
       diss1 = 1-ADJ1;
    }
    diag(diss1)=NA
    hier1=fastcluster::hclust(as.dist(diss1), method="average" )
    colors1=rep("white", nGenes)
    labeltree = names(data.frame(datErest))
    labelrow  = names(data.frame(datErest))
    labelrow[hier1$order[length(labeltree):1]]=labelrow[hier1$order]
    options(expressions = 10000)
    heatmap(as.matrix(diss1),Rowv=as.dendrogram(hier1),Colv= as.dendrogram(hier1), scale="none", revC = TRUE, 
            labRow= labeltree, labCol= labeltree,main=main)
  } # end of if (nGenes> 2 )
} # end of function

#####################################################################################################
#####################################################################################################
# E) Relating a measure of gene significance to the modules 
#####################################################################################################
#####################################################################################################

# ===================================================
# The function ModuleEnrichment1 creates a bar plot that shows whether modules are enriched with
# significant genes.
# More specifically, it reports the mean gene significance for each module.
# The gene significance can be a binary variable or a quantitative variable.
# It also plots the 95% confidence interval of the mean (CI=mean +/- 1.96* standard error).
# It also reports a Kruskal Wallis P-value.

plotModuleSignificance = function(geneSignificance, colors, boxplot = FALSE, 
                                  main = "Gene significance across modules,",
                                  ylab = "Gene Significance", ...)
{
  if (length(geneSignificance) != length(colors) ) 
    stop("Error: 'geneSignificance' and 'colors' do not have the same lengths")
  no.colors=length(names(table(colors) ))
  if (no.colors==1) pp=NA
  if (no.colors>1) 
  {
    pp=try(kruskal.test(geneSignificance,factor(colors))$p.value)
    if (class(pp)=='try-error') pp=NA
  }
  title = paste(main," p-value=", signif(pp,2), sep = "")
  if (boxplot != TRUE) {
    means1=as.vector(tapply(geneSignificance,colors,mean, na.rm = TRUE));
    se1= as.vector(tapply(geneSignificance,colors,stdErr))
    # par(mfrow=c(1,1))
    barplot(means1, names.arg=names(table(colors) ),col= names(table(colors) ) ,ylab=ylab, 
            main = title, ...)
    addErrorBars(as.vector(means1), as.vector(1.96*se1), two.side=TRUE)
  } else {
    boxplot(split(geneSignificance,colors),notch = TRUE,varwidth = TRUE, col= names(table(colors) ),ylab=ylab,
            main = title, ...)
  }
} # end of function

#####################################################################################################
#####################################################################################################
# F) Carrying out a within module analysis (computing intramodular connectivity etc) 
#####################################################################################################
#####################################################################################################

# ===================================================
#The function DegreeInOut computes for each gene 
#a) the total number of connections, 
#b) the number of connections with genes within its module, 
#c) the number of connections with genes outside its module
# When scaleByMax=TRUE, the within module connectivities are scaled to 1, i.e. the max(K.Within)=1 for each module

intramodularConnectivity = function(adjMat, colors, scaleByMax = FALSE) 
{
  if (nrow(adjMat)!=ncol(adjMat)) 
    stop("'adjMat' is not a square matrix.");
  if (nrow(adjMat)!=length(colors)) 
    stop("Dimensions of 'adjMat' and length of 'colors' differ.");
  nNodes=length(colors)
  colorLevels=levels(factor(colors))
  nLevels=length(colorLevels)
  kWithin=rep(-666,nNodes )
  diag(adjMat)=0
  for (i in c(1:nLevels) ) 
  {
    rest1=colors==colorLevels[i];
    if (sum(rest1) <3 ) { 
       kWithin[rest1]=0 
    } else {
       kWithin[rest1]=apply(adjMat[rest1,rest1], 2, sum, na.rm = TRUE)
       if (scaleByMax) kWithin[rest1]=kWithin[rest1]/max(kWithin[rest1])
    }
  }
  kTotal= apply(adjMat, 2, sum, na.rm = TRUE) 
  kOut=kTotal-kWithin
  if (scaleByMax) kOut=rep(NA, nNodes);
  kDiff=kWithin-kOut
  data.frame(kTotal,kWithin,kOut,kDiff)
}


intramodularConnectivity.fromExpr = function(datExpr, colors, 
                          corFnc = "cor", corOptions = "use = 'p'",
                          distFnc = "dist", distOptions = "method = 'euclidean'",
                          networkType = "unsigned", power = if (networkType=="distance") 1 else 6,
                          scaleByMax = FALSE,
                          ignoreColors = if (is.numeric(colors)) 0 else "grey",
                          getWholeNetworkConnectivity = TRUE)
{
  if (ncol(datExpr) !=length(colors))
    stop("Number of columns (genes) in 'datExpr' and length of 'colors' differ.");
  nNodes=length(colors)
  colorLevels=levels(factor(colors))
  colorLevels = colorLevels[!colorLevels %in% ignoreColors];
  nLevels=length(colorLevels)
  kWithin=rep(NA,nNodes )
  for (i in c(1:nLevels) )
  {
    rest1=colors==colorLevels[i];
    if (sum(rest1) <3 ) {
       kWithin[rest1]=0
    } else {
       adjMat = adjacency(datExpr[, rest1], type = networkType, power = power,
                          corFnc = corFnc, corOptions = corOptions,
                          distFnc = distFnc, distOptions = distOptions);
       kWithin[rest1]=colSums(adjMat, na.rm = TRUE)-1;
       if (scaleByMax) kWithin[rest1]=kWithin[rest1]/max(kWithin[rest1], na.rm = TRUE)
    }
  }
  if (getWholeNetworkConnectivity)
  {
    kTotal= softConnectivity(datExpr, corFnc = corFnc, corOptions = corOptions,
                           type = networkType, power = power);
    kOut=kTotal-kWithin
    if (scaleByMax) kOut=rep(NA, nNodes);
    kDiff=kWithin-kOut
    data.frame(kTotal,kWithin,kOut,kDiff)
  } else kWithin;
}



nPresent = function(x) 
{
  sum(!is.na(x))
}

checkAdjMat = function(adjMat, min = 0, max = 1)
{
  dim = dim(adjMat)
  if (is.null(dim) || length(dim)!=2 )
    stop("adjacency is not two-dimensional");
  if (!is.numeric(adjMat))
    stop("adjacency is not numeric");
  if (dim[1]!=dim[2])
    stop("adjacency is not square");
  if (max(abs(adjMat - t(adjMat)), na.rm = TRUE) > 1e-12)
    stop("adjacency is not symmetric");
  if (min(adjMat, na.rm = TRUE) < min || max(adjMat, na.rm = TRUE) > max)
    stop("some entries are not between", min, "and", max)
}
  


#####################################################################################################
#####################################################################################################
# G) Miscellaneous other functions, e.g. for computing the cluster coefficient.
#####################################################################################################
#####################################################################################################


# The function signedKME computes the module eigengene based connectivity.
# Input: datExpr= a possibly very large gene expression data set where the rows
# correspond to samples and the columns represent genes
# datME=data frame of module eigengenes (columns correspond to module eigengenes or MEs)
# A module eigengene based connectivity KME value will be computed if the gene has 
# a non missing expression value in at least minNSamples arrays.
# Output a data frame where columns are the KME values 
# corresponding to different modules.
# By splitting the expression data into different blocks, the function can deal with 
# tens of thousands of gene expression data. 
# If there are many eigengenes (say hundreds) consider decreasing the block size.

signedKME = function(datExpr, datME, outputColumnName="kME",
                     corFnc = "cor", corOptions = "use = 'p'") 
{
  datExpr=data.frame(datExpr)
  datME=data.frame(datME)
  output=list()
  if (dim(as.matrix(datME))[[1]] != dim(as.matrix(datExpr))[[1]] ) 
     stop("Number of samples (rows) in 'datExpr' and 'datME' must be the same.")
  varianceZeroIndicatordatExpr=as.vector(apply(as.matrix(datExpr),2,var, na.rm = TRUE))==0
  varianceZeroIndicatordatME =as.vector(apply(as.matrix(datME),2,var, na.rm = TRUE))==0
  if (sum(varianceZeroIndicatordatExpr,na.rm = TRUE)>0 ) 
    warning("Some genes are constant. Hint: consider removing constant columns from datExpr." )
  if (sum(varianceZeroIndicatordatME,na.rm = TRUE)>0 ) 
    warning(paste("Some module eigengenes are constant, which is suspicious.\n",
            "    Hint: consider removing constant columns from datME." ))
  no.presentdatExpr=as.vector(apply(!is.na(as.matrix(datExpr)),2, sum) )
  if (min(no.presentdatExpr)<..minNSamples ) 
    warning(paste("Some gene expressions have fewer than 4 observations.\n",
            "    Hint: consider removing genes with too many missing values or collect more arrays."))

  #output=data.frame(cor(datExpr, datME, use="p"))
  corExpr = parse(text = paste("data.frame(", corFnc, "(datExpr, datME ", prepComma(corOptions), "))" ));
  output = eval(corExpr);

  output[no.presentdatExpr<..minNSamples, ]=NA
  names(output)=paste(outputColumnName, substring(names(datME), first=3, last=100), sep="")  
  dimnames(output)[[1]] = names(datExpr) 
  output
} # end of function signedKME
 
 


# ===================================================
# The function clusterCoef computes the cluster coefficients.
# Input is an adjacency matrix 

clusterCoef=function(adjMat) 
{
  checkAdjMat(adjMat);
  diag(adjMat)=0
  nNodes=dim(adjMat)[[1]]
  computeLinksInNeighbors <- function(x, imatrix){x %*% imatrix %*% x}
  nolinksNeighbors <- c(rep(-666,nNodes))
  total.edge <- c(rep(-666,nNodes))
  maxh1=max(as.dist(adjMat) ); minh1=min(as.dist(adjMat) ); 
  if (maxh1>1 | minh1 < 0 ) 
    stop(paste("The adjacency matrix contains entries that are larger than 1 or smaller than 0: max =",
                maxh1,", min =",minh1))
  nolinksNeighbors <- apply(adjMat, 1, computeLinksInNeighbors, imatrix=adjMat)
  plainsum  <- apply(adjMat, 1, sum)
  squaresum <- apply(adjMat^2, 1, sum)
  total.edge = plainsum^2 - squaresum
  CChelp=rep(-666, nNodes)
  CChelp=ifelse(total.edge==0,0, nolinksNeighbors/total.edge)
  CChelp
} # end of function



# ===================================================
# The function addErrorBars  is used to create error bars in a barplot
# usage: addErrorBars(as.vector(means), as.vector(stderrs), two.side=FALSE)
addErrorBars<-function(means, errors, two.side=FALSE)
{
 if(!is.numeric(means)) {
      stop("All arguments must be numeric")}

 if(is.null(dim(means)) || length(dim(means))==1){ 
    xval<-(cumsum(c(0.7,rep(1.2,length(means)-1)))) 
 }else{
    if (length(dim(means))==2){
      xval<-cumsum(array(c(1,rep(0,dim(means)[1]-1)),
dim=c(1,length(means))))+0:(length(means)-1)+.5
    }else{
      stop("First argument must either be a vector or a matrix") }
 }
 MW<-0.25*(max(xval)/length(xval)) 
 ERR1<-means+errors
 ERR2<-means-errors
 for(i in 1:length(means)){
    segments(xval[i],means[i],xval[i],ERR1[i])
    segments(xval[i]-MW,ERR1[i],xval[i]+MW,ERR1[i])
    if(two.side){
      segments(xval[i],means[i],xval[i],ERR2[i])
      segments(xval[i]-MW,ERR2[i],xval[i]+MW,ERR2[i])
    } 
 } 
} 

# ===================================================
# this function computes the standard error
stdErr <- function(x){ sqrt( var(x,na.rm = TRUE)/sum(!is.na(x))   ) }

# ===================================================
# The following two functions are for displaying the pair-wise correlation in a panel when using the command "pairs()"
# Typically, we use "pairs(DATA, upper.panel=panel.smooth, lower.panel=.panel.cor, diag.panel=panel.hist)" to
# put the correlation coefficients on the lower panel.

.panel.hist <- function(x, ...){
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(usr[1:2], 0, 1.5) )
    h <- hist(x, plot = FALSE)
    breaks <- h$breaks; nB <- length(breaks)
    y <- h$counts; y <- y/max(y)
    rect(breaks[-nB], 0, breaks[-1], y, col="cyan", ...)
}

# ===================================================
# This function is used in "pairs()" function. The problem of the original  panel.cor is that 
# when the correlation coefficient is very small, the lower panel will have a large font 
# instead of a mini-font in a saved .ps file. This new function uses a format for corr=0.2 
# when corr<0.2, but it still reports the original value of corr, with a minimum format.

.panel.cor=function(x, y, digits=2, prefix="", cex.cor){
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(0, 1, 0, 1))
    r <- abs(cor(x, y))
    txt <- format(c(r, 0.123456789), digits=digits)[1]
    txt <- paste(prefix, txt, sep="")
    txt1=txt
    r1=r
    if (r<0.2) {
        r1=0.2
        txt1 <- format(c(r1, 0.123456789), digits=digits)[1]
        txt1 <- paste(prefix, txt1, sep="")
        }
    if(missing(cex.cor)) cex <- 0.8/strwidth(txt1)
    cex = cex * r1
    r <- round(r, digits)
    txt <- format(c(r, 0.123456789), digits=digits)[1]
    txt <- paste(prefix, txt, sep="")
    text(0.5, 0.5, txt, cex=cex)
}

# ===================================================
# This function collects garbage
# collect_garbage=function(){collectGarbage()}

 
#---------------------------------------------------------------------------------------------------------
# This function plots a barplot with all colors given. If Colors are not given, GlobalStandardColors are
# used, i.e. if you want to see the GlobalStandardColors, just call this function without parameters.
displayColors = function(colors = NULL)
{
  if (is.null(colors)) colors = standardColors();
  barplot(rep(1, length(colors)), col = colors, border = colors);
}


###############################################################################
# I) Functions for merging modules based on a high correlation of the module eigengenes
###############################################################################

#---------------------------------------------------------------------------------------------
#
# dynamicMergeCut
#
#---------------------------------------------------------------------------------------------

dynamicMergeCut = function(n, mergeCor=.9, Zquantile=2.35) 
{
  if (mergeCor>1 | mergeCor<0 ) stop("'mergeCor' must be between 0 and 1.")
  if (mergeCor==1) 
  { 
    printFlush("dynamicMergeCut: given mergeCor=1 will be set to .999."); 
    mergeCor=.999
  }
  if (n<4 ) 
  {
    printFlush(paste("Warning in function dynamicMergeCut: too few observations for the dynamic",
                "assignment of the merge threshold.\n    Will set the threshold to .35"));
    mergethreshold=.35
  } else {
    # Fisher transform of the true merge correlation
    FishermergeCor=.5*log((1+mergeCor)/(1-mergeCor))
    E=exp(2*( FishermergeCor -Zquantile/sqrt(n-3)))
    LowerBoundCIcor=(E-1)/(E+1)
    mergethreshold=1- LowerBoundCIcor
  }
  if (mergethreshold>1) 1 else mergethreshold
}# end of function dynamicMergeCut 



#======================================================================================================
#
# print.flush
#
# =====================================================================================================

#print.flush = function(...)
#{
#   printFlush(...);
#}


##############################################################################################
# I) GENERAL STATISTICAL FUNCTIONS
##############################################################################################

verboseScatterplot = function(x, y, 
                             sample = NULL,
                             corFnc = "cor", corOptions = "use = 'p'",
                             main ="", xlab = NA, ylab = NA, cex=1, cex.axis = 1.5,
                             cex.lab = 1.5, cex.main = 1.5, abline = FALSE, 
                             abline.color = 1, abline.lty = 1,
                             corLabel = corFnc, 
                             displayAsZero = 1e-5,
                             col = 1, bg = 0, pch = 1,
                             lmFnc = lm,
                             plotPriority = NULL,
                             ...) 
{
  if ( is.na(xlab) ) xlab= as.character(match.call(expand.dots = FALSE)$x)
  if ( is.na(ylab) ) ylab= as.character(match.call(expand.dots = FALSE)$y)
  x= as.numeric(as.character(x))
  y= as.numeric(as.character(y))
  corExpr = parse(text = paste(corFnc, "(x, y ", prepComma(corOptions), ")"));
  #cor=signif(cor(x,y,use="p",method=correlationmethod),2)
  cor=signif(eval(corExpr),2)
  if (abs(cor) < displayAsZero) cor = 0;
  corp = signif(corPvalueStudent(cor, sum(is.finite(x) & is.finite(y))), 2);
  #corpExpr = parse(text = paste("cor.test(x, y, ", corOptions, ")"));
  #corp=signif(cor.test(x,y,use="p",method=correlationmethod)$p.value,2)
  #corp=signif(eval(corpExpr)$p.value,2)
  if (corp<10^(-200) ) corp="<1e-200" else corp = paste("=", corp, sep="");
  if (!is.na(corLabel))
  {
     mainX = paste(main, " ", corLabel, "=", cor, ", p",corp, sep="");
  } else
     mainX = main;

  if (length(col)<length(x)) col = rep(col, ceiling(length(x)/length(col)));
  if (length(pch)<length(x)) pch = rep(pch, ceiling(length(x)/length(pch)));
  if (length(cex)<length(x)) cex = rep(cex, ceiling(length(x)/length(cex)));
  if (length(bg )<length(x))  bg = rep(bg,  ceiling(length(x)/length(bg)));

  if (is.null(plotPriority)) plotPriority = rep(1, length(x)); 
  if (length(plotPriority)!=length(x))
    stop("When given, length of 'plotPriority' must equal length of 'x'.");

  if (!is.null(sample))
  {
    if (length(sample) == 1)
    {
      sample = sample(length(x), sample)
    } 
    priority1 = plotPriority[sample];
    order1 = order(priority1, na.last = TRUE);
    plot(x[sample][order1], y[sample][order1], main=mainX, xlab=xlab, ylab=ylab, 
         cex.axis=cex.axis, cex.lab=cex.lab, cex.main=cex.main, 
         col = col[sample][order1], bg = bg[sample][order1], pch = pch[sample][order1],
         cex = cex[sample][order1],
         ...)
  } else {
    order1 = order(plotPriority, na.last = TRUE);
    plot(x[order1], y[order1], main=mainX, xlab=xlab, ylab=ylab, 
         cex.axis=cex.axis, cex.lab=cex.lab, cex.main=cex.main, col = col[order1], bg = bg[order1], 
         pch = pch[order1], cex = cex[order1], ...)
  }
  if (abline)
  {
    lmFnc = match.fun(lmFnc);
    fit = lmFnc(y~x);
    abline(reg = fit, col = abline.color, lty = abline.lty);
  }
  invisible(sample);
}

verboseBoxplot = function(x, g,
                          main ="", xlab = NA, ylab = NA, cex=1, cex.axis = 1.5,
                          cex.lab = 1.5, cex.main = 1.5, notch = TRUE, varwidth = TRUE, ...,
                          addScatterplot = FALSE,
                          pt.cex = 0.8, pch = 21, pt.col = "blue", pt.bg = "skyblue",
                          randomSeed = 31425, jitter = 0.6) 
{
  if ( is.na(xlab) ) xlab= as.character(match.call(expand.dots = FALSE)$g)
  #print(xlab1)
  if ( is.na(ylab) ) ylab= as.character( match.call(expand.dots = FALSE)$x)
  #print(ylab1)
  p1 = signif(kruskal.test(x, factor(g) )$p.value,2)
  #if (p1< 5.0*10^(-22) ) p1="< 5e-22"
  boxp = boxplot(x~factor(g), notch = notch, varwidth = varwidth,
          main=paste(main,"p =",p1 ),
          xlab=xlab, ylab=ylab, cex=cex, cex.axis=cex.axis,cex.lab=cex.lab, cex.main=cex.main, ...);

  if (exists(".Random.seed"))
  {
    savedSeed = .Random.seed;
    set.seed(randomSeed);
    on.exit(.Random.seed <<- savedSeed);
  }

  set.seed(randomSeed)  # so we can make the identical plot again
  points(jitter(rep(1:ncol(boxp$stats),boxp$n), jitter), unlist(tapply(x, g, identity)),
         pch=pch, col=pt.col, bg = pt.bg, cex = pt.cex)

  invisible(boxp);
}


verboseBarplot = function (x, g,  main = "",
    xlab = NA, ylab = NA, cex = 1, cex.axis = 1.5, cex.lab = 1.5,
    cex.main = 1.5, color="grey", numberStandardErrors=1,
    KruskalTest=TRUE,  AnovaTest=FALSE, two.sided=TRUE, 
    addCellCounts=FALSE, horiz = FALSE, ylim = NULL, ...,
    addScatterplot = FALSE,
    pt.cex = 0.8, pch = 21, pt.col = "blue", pt.bg = "skyblue",
    randomSeed = 31425, jitter = 0.6,
    adjustYLim = TRUE) 
{
   g.factor = as.factor(g);
   stderr1 = function(x) {
        sqrt(var(x, na.rm = TRUE)/sum(!is.na(x)))
    }
    SE = tapply(x, g.factor, stderr1)
    err.bp = function(dd, error, two.sided = FALSE, numberStandardErrors, 
        horiz = FALSE) {
        if (!is.numeric(dd)) {
            stop("All arguments must be numeric")
        }
        if (is.vector(dd)) {
            xval = (cumsum(c(0.7, rep(1.2, length(dd) - 1))))
        }
        else {
            if (is.matrix(dd)) {
                xval = cumsum(array(c(1, rep(0, dim(dd)[1] - 
                  1)), dim = c(1, length(dd)))) + 0:(length(dd) - 
                  1) + 0.5
            }
            else {
                stop("First argument must either be a vector or a matrix")
            }
        }
        MW = 0.25 * (max(xval)/length(xval))
        NoStandardErrors = 1
        ERR1 = dd + numberStandardErrors * error
        ERR2 = dd - numberStandardErrors * error
        if (horiz) {
            for (i in 1:length(dd)) {
                segments(dd[i], xval[i], ERR1[i], xval[i])
                segments(ERR1[i], xval[i] - MW, ERR1[i], xval[i] + 
                  MW)
                if (two.sided) {
                  segments(dd[i], xval[i], ERR2[i], xval[i])
                  segments(ERR2[i], xval[i] - MW, ERR2[i], xval[i] + 
                    MW)
                }
            }
        }
        else {
            for (i in 1:length(dd)) {
                segments(xval[i], dd[i], xval[i], ERR1[i])
                segments(xval[i] - MW, ERR1[i], xval[i] + MW, 
                  ERR1[i])
                if (two.sided) {
                  segments(xval[i], dd[i], xval[i], ERR2[i])
                  segments(xval[i] - MW, ERR2[i], xval[i] + MW, 
                    ERR2[i])
                }
            }
        }
    }
    if (is.na(ylab)) 
        ylab = as.character(match.call(expand.dots = FALSE)$x)
    if (is.na(xlab)) 
        xlab = as.character(match.call(expand.dots = FALSE)$g)
    Means1 = tapply(x, g.factor, mean, na.rm = TRUE)

    if (length(unique(x)) > 2) {
        p1 = signif(kruskal.test(x ~ g.factor)$p.value, 2)
        if (AnovaTest) 
            p1 = signif(anova(lm(x ~ g.factor))$Pr[[1]], 2)
    }
    else {
        p1 = tryCatch(signif(fisher.test(x, g, alternative = "two.sided")$p.value, 
            2), error = function(e) {
            NA
        })
    }
    if (AnovaTest | KruskalTest) 
        main = paste(main, "p =", p1)
    maxSE = max(as.vector(SE), na.rm = TRUE);
    if (is.null(ylim)) 
    {
      if (addScatterplot && adjustYLim) {
        ylim = range(x, na.rm = TRUE);
        d = ylim[2] -ylim[1];
        ylim = ylim + c(-d/50, d/50);
      } else {
        ylim = range(Means1,na.rm = TRUE) + c(-maxSE, maxSE) * numberStandardErrors * (numberStandardErrors>0);
        if (ylim[1] > 0) ylim[1] = 0;
        if (ylim[2] <0) ylim[2] = 0;
      }
    }
    ret = barplot(Means1, main = main, col = color, xlab = xlab, 
        ylab = ylab, cex = cex, cex.axis = cex.axis, cex.lab = cex.lab, 
        cex.main = cex.main, horiz = horiz, ylim = ylim,
        ...)
    if (addCellCounts) {
       cellCountsF = function(x) {  sum(!is.na(x)) }
       cellCounts=tapply(x, g.factor, cellCountsF)
       mtext(text=cellCounts,side=if(horiz) 2 else 1,outer=FALSE,at=ret, col="darkgrey",las=2,cex=.8,...)
    } # end of if (addCellCounts)
    abline(h = 0)
    if (numberStandardErrors > 0) {
        err.bp(as.vector(Means1), as.vector(SE), two.sided = two.sided, 
            numberStandardErrors = numberStandardErrors, horiz = horiz)
    }
    if (exists(".Random.seed"))
    {
      savedSeed = .Random.seed;
      set.seed(randomSeed);
      on.exit(.Random.seed <<- savedSeed);
    }

    x.list = tapply(x, g, identity);
    nPerGroup = sapply(x.list, length);
    set.seed(randomSeed)  # so we can make the identical plot again
    points(jitter(rep(ret, nPerGroup), jitter), unlist(x.list),
           pch=pch, col=pt.col, bg = pt.bg, cex = pt.cex)

    attr(ret, "height") = as.vector(Means1)
    attr(ret, "stdErr") = as.vector(SE)
    invisible(ret)
}

#=============================================================================================
#
# Correlation p-value for multiple correlation values
#
#=============================================================================================

corPvalueFisher = function(cor, nSamples, twoSided = TRUE)
{
  if (sum(abs(cor)>1, na.rm = TRUE)>0)
    stop("Some entries in 'cor' are out of normal range -1 to 1.");
  if (twoSided)
  {
     z = abs(0.5 * log((1+cor)/(1-cor)) * sqrt(nSamples-3));
     2 * pnorm(-z);
  } else {
     # return a small p-value for positive correlations
     z = -0.5 * log((1+cor)/(1-cor)) * sqrt(nSamples-3); 
     pnorm(-z);
  }
}

# this function compute an asymptotic p-value for a given correlation (r) and sample size (n) 
# Needs a new name before we commit it to the package.

corPvalueStudent = function(cor, nSamples) 
{
  T=sqrt(nSamples-2) * cor/sqrt(1-cor^2)
  2*pt(abs(T),nSamples-2, lower.tail = FALSE)
}


#########################################################################################

propVarExplained = function(datExpr, colors, MEs, corFnc = "cor", corOptions = "use = 'p'")
{
  fc = as.factor(colors);
  mods = levels(fc);
  nMods = nlevels(fc);
  nGenes = ncol(datExpr);
  if (nMods!=ncol(MEs))
    stop(paste("Input error: number of distinct 'colors' differs from\n", 
               "     the number of module eigengenes given in ME."));

  if (ncol(datExpr)!=length(colors))
    stop("Input error: number of probes (columns) in 'datExpr' differs from the length of goven 'colors'.");

  if (nrow(datExpr)!=nrow(MEs))
    stop("Input error: number of observations (rows) in 'datExpr' and 'MEs' differ.");

  PVE = rep(0, nMods);

  col2MEs = match(mods, substring(names(MEs), 3));

  if (sum(is.na(col2MEs))>0)
    stop("Input error: not all given colors could be matched to names of module eigengenes.");

  for (mod in 1:nMods)
  {
    modGenes = c(1:nGenes)[as.character(colors)==mods[mod]];
    corExpr = parse(text = paste(corFnc, "(datExpr[, modGenes], MEs[, col2MEs[mod]]",
                                 prepComma(corOptions), ")"));
    PVE[mod] = mean(as.vector(eval(corExpr)^2));
  }

  names(PVE) = paste("PVE", mods, sep = "");
  PVE
}
 

#===================================================================================
#
# addGrid
#
#===================================================================================
# This function adds a horizontal grid to a plot 

addGrid = function(linesPerTick = NULL, horiz = TRUE, vert = FALSE, col = "grey30", lty = 3)
{
  box = par("usr");
  if (horiz)
  {
    ticks = par("yaxp");
    nTicks = ticks[3];
    if (is.null(linesPerTick))
    {
       if (nTicks < 6) linesPerTick = 5 else linesPerTick = 2;
    }
    spacing = (ticks[2]-ticks[1])/(linesPerTick*nTicks);
    first = ceiling((box[3] - ticks[1])/spacing);
    last = floor((box[4] - ticks[1])/spacing);
    #print(paste("addGrid: first=", first, ", last =", last, "box = ", paste(signif(box,2), collapse = ", "), 
                #"ticks = ", paste(signif(ticks, 2), collapse = ", "), "spacing =", spacing ));
    for (k in first:last)
      lines(x = box[c(1,2)], y = rep(ticks[1] + spacing * k, 2), 
            col = col, lty = lty);
  }
  if (vert)
  {
    ticks = par("xaxp");
    nTicks = ticks[3];
    if (is.null(linesPerTick))
    {
       if (nTicks < 6) linesPerTick = 5 else linesPerTick = 2;
    }
    spacing = (ticks[2]-ticks[1])/(linesPerTick*ticks[3]);
    first = ceiling((box[1] - ticks[1])/spacing);
    last = floor((box[2] - ticks[1])/spacing);
    #print(paste("addGrid: first=", first, ", last =", last, "box = ", paste(signif(box,2), collapse = ", "), 
    #            "ticks = ", paste(signif(ticks, 2), collapse = ", "), "spacing =", spacing ));
    for (l in first:last)
      lines(x = rep(ticks[1] + spacing * l, 2), y = box[c(3,4)],
            col = col, lty = lty);
  }

}

#-----------------------------------------------------------------------------------------------
#
# Add vertical "guide" lines to a dendrogram to facilitate identification of clusters with color bars
#
#-----------------------------------------------------------------------------------------------

addGuideLines = function(dendro, all = FALSE, count = 50, positions = NULL, col = "grey30", lty = 3,
                         hang = 0)
{
  if (all)
  {
    positions = 1:(length(dendro$height)+1);
  } else {
    if (is.null(positions))
    {
      lineSpacing = (length(dendro$height)+1)/count;
      positions = (1:count)* lineSpacing;
    }
  }
  objHeights = rep(0, length(dendro$height+1));
  objHeights[-dendro$merge[dendro$merge[,1]<0,1]] = dendro$height[dendro$merge[,1]<0];
  objHeights[-dendro$merge[dendro$merge[,2]<0,2]] = dendro$height[dendro$merge[,2]<0];
  box = par("usr"); ymin = box[3]; ymax = box[4];
  objHeights = objHeights - hang*(ymax - ymin);
  objHeights[objHeights<ymin] = ymin;
  posHeights = pmin(objHeights[dendro$order][floor(positions)], objHeights[dendro$order][ceiling(positions)]);
  for (line in 1:length(positions)) # The last guide line is superfluous
    lines(x = rep(positions[line], 2), y = c(ymin, posHeights[line]), lty = 3, col = col);
}

#-------------------------------------------------------------------------------------------
#
# nearestNeighborConnectivity
#
#-------------------------------------------------------------------------------------------
# This function takes expression data (rows=samples, colummns=genes)
# and the power exponent used in weighting the
# correlations to get the network adjacency matrix, and returns an array of dimensions
# nGenes * nSets containing the connectivities of each gene in each subset.

nearestNeighborConnectivity = function(datExpr, nNeighbors = 50, power = 6,
                             type = "unsigned", corFnc = "cor", corOptions = "use = 'p'",
                             blockSize = 1000,  
                             sampleLinks = NULL, nLinks = 5000,
                             setSeed = 38457,
                             verbose=1, indent=0)
{
  spaces = indentSpaces(indent);
  nGenes = dim(datExpr)[2];
  nSamples = dim(datExpr)[1];

  if (is.null(sampleLinks))
  {
    sampleLinks = (nGenes > nLinks);
  }

  if (sampleLinks) nLinks = min(nLinks, nGenes) else nLinks = nGenes;
  
  #printFlush(paste("blockSize =", blockSize));
  #printFlush(paste("nGenes =", nGenes));
  #printFlush(paste(".largestBlockSize =", .largestBlockSize));

  if (blockSize * nLinks>.largestBlockSize) blockSize = as.integer(.largestBlockSize/nLinks);

  intNetworkType = charmatch(type, .networkTypes);
  if (is.na(intNetworkType))
    stop(paste("Unrecognized networkType argument. Recognized values are (unique abbreviations of)",
               paste(.networkTypes, collapse = ", ")));

  subtract = rep(1, nGenes);
  if (sampleLinks)
  {
    if (verbose > 0) 
      printFlush(paste(spaces, "nearestNeighborConnectivity: selecting sample pool of size",
                       nLinks, ".."))
    sd = apply(datExpr, 2, sd, na.rm = TRUE);
    order = order(-sd);
    saved = FALSE;
    if (exists(".Random.seed")) 
    {
      saved = TRUE;
      savedSeed = .Random.seed
      if (is.numeric(setSeed)) set.seed(setSeed);
    }
    samplePool = order[sample(x = nGenes, size = nLinks)]
    if (saved) .Random.seed <<- savedSeed;
    poolExpr = datExpr[, samplePool];
    subtract[-samplePool] = 0;
  } 
      
  if (verbose>0) 
  {
     printFlush(paste(spaces, "nearestNeighborConnectivity: received",
                      "dataset with nGenes =", as.character(nGenes)));
     cat(paste(spaces, "..using nNeighbors =", nNeighbors, "and blockSize =", blockSize, "  "));
     pind = initProgInd(trailStr = " done");
  }

  nearestNeighborConn = rep(0, nGenes);

  nBlocks = as.integer((nGenes-1)/blockSize);
  SetRestrConn = NULL;
  start = 1;
  if (sampleLinks)
  {
    corEval = parse(text = paste(corFnc, "(poolExpr, datExpr[, blockIndex] ", prepComma(corOptions), ")"))
  } else {
    corEval = parse(text = paste(corFnc, "(datExpr, datExpr[, blockIndex] ", prepComma(corOptions), ")"))
  }

  while (start <= nGenes)
  {
    end = start + blockSize-1;
    if (end>nGenes) end = nGenes;
    blockIndex = c(start:end);
    #if (verbose>1) printFlush(paste(spaces, "..working on genes", start, "through", end, "of", nGenes))
    c = eval(corEval);
    if (intNetworkType==1)
    { c = abs(c);
    } else if (intNetworkType==2)
    { c = (1+c)/2;
    } else if (intNetworkType==3)
    { c[c < 0] = 0;
    } else stop("Internal error: intNetworkType has wrong value:", intNetworkType, ". Sorry!");
    adj_mat = as.matrix(c^power);
    adj_mat[is.na(adj_mat)] = 0;
    sortedAdj = as.matrix(apply(adj_mat, 2, sort, decreasing = TRUE)[1:(nNeighbors+1), ]);
    nearestNeighborConn[blockIndex] = apply(sortedAdj, 2, sum)-subtract[blockIndex];
    start = end+1;
    if (verbose>0) pind = updateProgInd(end/nGenes, pind);
    collectGarbage();
  }
  if (verbose>0) printFlush(" ");
  nearestNeighborConn;
}


#Try to merge this with the single-set function.
#-------------------------------------------------------------------------------------------
#
# nearestNeighborConnectivityMS
#
#-------------------------------------------------------------------------------------------
# This function takes expression data (rows=samples, colummns=genes) in the multi-set format
# and the power exponent used in weighting the
# correlations to get the network adjacency matrix, and returns an array of dimensions
# nGenes * nSets containing the connectivities of each gene in each subset.

nearestNeighborConnectivityMS = function(multiExpr, nNeighbors = 50, power=6, 
                               type = "unsigned", corFnc = "cor", corOptions = "use = 'p'",
                               blockSize = 1000,
                               sampleLinks = NULL, nLinks = 5000, setSeed = 36492,
                               verbose=1, indent=0)
{
  spaces = indentSpaces(indent);
  setsize = checkSets(multiExpr);
  nGenes = setsize$nGenes;
  nSamples = setsize$nSamples;
  nSets = setsize$nSets;

  if (is.null(sampleLinks))
  {
    sampleLinks = (nGenes > nLinks);
  }

  if (sampleLinks) nLinks = min(nLinks, nGenes) else nLinks = nGenes;

  #printFlush(paste("blockSize =", blockSize));
  #printFlush(paste("nGenes =", nGenes));
  #printFlush(paste(".largestBlockSize =", .largestBlockSize));

  if (blockSize * nLinks>.largestBlockSize) blockSize = as.integer(.largestBlockSize/nLinks);

  if (length(power)==1)
  {
    power = rep(power, nSets);
  } else if (length(power)!=nSets) 
      stop("Invalid arguments: length of 'power' must equal number sets in 'multiExpr'");

  intNetworkType = charmatch(type, .networkTypes);
  if (is.na(intNetworkType))
    stop(paste("Unrecognized networkType argument. Recognized values are (unique abbreviations of)",
               paste(.networkTypes, collapse = ", ")));

  subtract = rep(1, nGenes);
  if (sampleLinks)
  { 
    if (verbose > 0) 
      printFlush(paste(spaces, "nearestNeighborConnectivityMS: selecting sample pool of size",
                       nLinks, ".."))
    sd = apply(multiExpr[[1]]$data, 2, sd, na.rm = TRUE);
    order = order(-sd);
    saved = FALSE;
    if (exists(".Random.seed")) 
    {
      saved = TRUE;
      savedSeed = .Random.seed
      if (is.numeric(setSeed)) set.seed(setSeed);
    }
    samplePool = order[sample(x = nGenes, size = nLinks)]
    if (saved) .Random.seed <<- savedSeed;
    subtract[-samplePool] = 0;
  }

  if (verbose>0) printFlush(paste(spaces, "nearestNeighborConnectivityMS: received&q