R/networkFunctions-extras-05.R

Defines functions SelectTraits EvaluateClusters bicov.original bicor.original bicovw bicorw meanw medianw propVarExplained cutreeDynamic.1 moduleEigenMatrices subsetMEMs corVMat corMatMat corMats orderMEMs checkMultiMEMs consensusOrderMEMs PlotCorMEMsAndDendros dendrogramOrderings outlierMeasure legendClean initLib spaste plotEnrichmentBarplot rowConsensus plotHistogramAndCF plotQTL distributionOnSNPs2 qtlPeaks labeledBoxplot labeledPlot labeledBarplot2 smoothGauss linDate linAge CausalChildren simulateCausal matrixPCor zeo verbosePairs invertEmpiricalDistribution evalInvDist grey2red toRadians toDegrees normalizeAngle scaleToRange pointToLabelDist labelPosition drawRectangle drawEllipse networkPlot circlePlot alternatingAngles networkPlot.hierarchical intervalOverlap allocateJobs eps barplotArray .reverseRows .reverseVector fixLabels shortenLabels stretchLimits nearestSNP peakRegions makeCross addErrorBars.xy linearizeDate verboseMultiplot moduleHeatmapAndEigengeneBarplot naiveAccuracyFnc accuraciesFnc table2.allLevels multiData.subset multiData2list list2multiData varNames multiData.apply multiData.simplify isMultiData multiData.mapply multiData.rbindSelf logTrafo multiLev2numeric outliers GOgenes topEnrichment chiSquare.table plotEnrichments plotEnrichments.barplot addOrnaments .extend validationSuccess validationSuccess.ext replaceNA minWhichMin brace.horizontal prettyNamesForMetaAnalysis GOLabels prependZeros entrez2geneSymbol

# Extras not contained in the R packages.

# Version 02: cleanup of old functions that have not been used in ages, add some new ones.

if (!exists("UseCpmax")) UseCpmax = FALSE;

#--------------------------------------------------------------------------------------------

# Load the requisite libraries

WorkingDirectory = getwd();

#if (!library(MASS, logical.return=TRUE)) { # standard, no need to install
#For some reason, MASS does not seem to be installed on Titan, so we'll try to load it 
# from my own library. If that fails as well, stop.
#  if (!library(MASS, logical.return=TRUE, lib.loc="M:/Work/RLibrary")) stop()
#}   

libraryLocs = c("")

loadPackages = c("MASS", "class", "cluster", "impute", "Hmisc", "survival",
                 "WGCNA") #, "mclust");

for (pack in loadPackages)
{
  if (!require(pack, character.only = TRUE, quietly = TRUE))
  {
    ok = FALSE;
    for (loc in libraryLocs) ok = ok | require(pack, lib.loc = loc, 
                                               character.only = TRUE, quietly = TRUE);
    if (!ok) stop(paste("Could not find package", pack, 
                        "either in the standard library nor in the personal libraries\n",
                        paste("     ", libraryLocs, collapse = "\n")));
  }
}


#library(MASS);
#library(class)  # standard, no need to install
#library(cluster)	
#library(impute)# install it for imputing missing value
#library(Hmisc)	# install it for the C-index calculations
#library(survival)
#library(fields);
#     
#oldwd = getwd();

if (exists("memory.limit"))
{
  # increase the available memory 
  memory.limit(size=4000)   
}

# kWithinModule = function(TOM, Colors, gene) -> intramodularConnectivity

options(stringsAsFactors = FALSE);

#-----------------------------------------------------------------------------------------------
# Trait selection based on independence and significance
# Assumes that, just like with the PCs, the Traits have the same columns in each dataset (though the
# sample sets need not be the same).

SelectTraits = function(Traits, BranchCut = 0.25, SelectOnSignificance = FALSE, PCs = NULL, 
                        SignifThres = 0.03, Impute = FALSE, verbose = 1, indent = 0)
{
  spaces = indentSpaces(indent);
  if (verbose>0) printFlush(paste(spaces, "SelectTraits: Selecting from ", dim(Traits[[1]]$data)[2],
                                  "traits."));
  No.Sets = length(Traits);
  TDiss = 1-cor(Traits[[1]]$data, use = "pairwise.complete.obs");
  if (No.Sets>1) for (set in 2:No.Sets)
  {
    TDiss = pmax(TDiss, 1-cor(Traits[[set]]$data, use = "pairwise.complete.obs"));
  }
  h = hclust(as.dist(TDiss), method = "average");
  TMods = ModuleNumber(h, CutHeight = BranchCut, MinSize = 1);
  No.TMods = nlevels(as.factor(TMods));
  SelTraits = vector(mode="list", length = No.Sets);
  for (set in 1:No.Sets)
  {
    TData = Traits[[set]]$data;
    TData[is.na(TData)] = 0;
    TPCs = ModulePrinComps1(TData, as.factor(TMods), Impute = Impute, verbose = 0, 
                            GetConformity = TRUE);
    SelTraits[[set]] = list(data = TPCs$PrinComps);
    for (tmod in 1:No.TMods)
    {
      if (sum(TMods==tmod)>1)
      {
        rnk = order(-TPCs$ModuleConformity[TMods==tmod]);
        SelTraits[[set]]$data[, tmod] = (TData[, TMods==tmod])[, rnk[1]];
        names(SelTraits[[set]]$data)[tmod] = (names(TData)[TMods==tmod])[rnk[1]];
      } else {
        SelTraits[[set]]$data[, tmod] = TData[, TMods==tmod];
        names(SelTraits[[set]]$data)[tmod] = names(TData)[TMods==tmod];
      }
    }
  }
  
  if (verbose>0) printFlush(paste(spaces, "SelectTraits: Clustering led to ", dim(SelTraits[[1]]$data)[2],
                                  "traits."));
  
  if (SelectOnSignificance)
  {
    # Reduce further: calculate cor.tests for each ME with each trait in each set; 
    # keep only traits that have at least one cor.test$p.value below a threshold
    
    if (is.null(PCs)) stop("PCs must be given when SelectOnSignificance is requested.");
    
    No.Mods = dim(PCs[[1]]$data)[2];
    if (is.null(No.Mods)) 
      stop("Given PCs do not appear to have the correct structure (vector of list",
           "with \'data\' component being a matrix whose columns are PC vectors");
    
    No.Traits = dim(SelTraits[[1]]$data)[2];
    
    SelectTrait = rep(FALSE, times = No.Traits);
    
    for (trait in 1:No.Traits)
      for (mod in 1:No.Mods)
      {
        Significant = TRUE;
        for (set in (1:No.Sets))
        {
          ct = cor.test(PCs[[set]]$data[, mod], SelTraits[[set]]$data[, trait]); 
          if (ct$p.value>SignifThres) Significant = FALSE;
        }
        if (Significant) SelectTrait[trait] = TRUE;
      }
    
    for (set in 1:No.Sets)
    {
      SelTraits[[set]]$data = SelTraits[[set]]$data[, SelectTrait];
    }
    
    # print(paste("No. of selected traits expected by chance:", No.Mods * No.Traits * TraitThres));
  }
  
  # Re-cluster the significant traits for diagnostic purposes
  if (sum(SelectTrait)>1)
  {
    TDiss = 1-cor(SelTraits[[1]]$data, use = "pairwise.complete.obs");
    if (No.Sets>1) for (set in 2:No.Sets)
    {
      TDiss = pmax(TDiss, 1-cor(SelTraits[[set]]$data, use = "pairwise.complete.obs"));
    }
    newh = hclust(as.dist(TDiss), method = "average");
  } else {
    newh = NULL;
  }
  
  if (verbose>0) 
  {
    printFlush(paste(spaces, "SelectTraits: Selected", sum(SelectTrait), "traits: "));
    printFlush(paste(spaces, paste(names(SelTraits[[1]]$data), collapse = ", ")));
  }
  
  
  list(No.SelectedTraits = sum(SelectTrait), Traits = SelTraits, ClusterTree = h, NewClusterTree = newh);
}

#----------------------------------------------------------------------------------------------
# 
# EvaluateClusters
#
#----------------------------------------------------------------------------------------------
# Input: distance matrix and cluster labels. 0 in Labels means the particular point is a
# singleton (not assigned to a cluster).
# Output: a list of cluster quality indicators.

EvaluateClusters = function(DistM, Labels, RefLabels = NULL, Sample = FALSE, SampleProp = 1,
                            verbose = 2, indent = 0)
{
  
  spaces = indentSpaces(indent);
  
  d = dim(DistM);
  No.Points = length(Labels);
  
  if (d[1]!=d[2]) stop("Distance matrix DistM must be square.");
  
  if (d[1]!=No.Points) stop("Dimension of DistM incompatible with number of cluster labels");
  
  if (verbose>0) print(paste(spaces, "Evaluating clusters...")); 
  
  # Calculate indices relating to the internal and external links
  
  if (Sample)
  {
    if ( (SampleProp<=0) | (SampleProp>=1) )
      stop(paste("Incorrect SampleProp parameter given:", SampleProp));
    No.Sampled = as.integer(No.Points * SampleProp);
    Sampled = sample(x = No.Points, size = No.Sampled);
    DistM = DistM[Sampled, Sampled];
    Labels = Labels[Sampled];
    RefLabels = RefLabels[Sampled];
    No.Points = No.Sampled;
  }
  
  InternalLinks = matrix(0, nrow = No.Points, ncol = No.Points);
  for (point in 1:No.Points) if (Labels[point]!=0)
    InternalLinks[, point] = ifelse(Labels==Labels[point], Labels, 0);
  
  IntLinks = as.vector(InternalLinks[upper.tri(InternalLinks)]);
  Dist = as.vector(DistM[upper.tri(DistM)]);
  No.Internal = sum(IntLinks!=0);
  No.External = length(IntLinks) - No.Internal;
  
  if ( (No.Internal > 0) & (No.External>0) )
  {
    ord = order(Dist, c(1:length(Dist)));
    
    # ...number of "misplaced" internal and external links
    
    s1 = Dist[ord[No.Internal]];
    s2 = Dist[ord[No.Internal+1]];
    if (s1==s2)
    {
      MaxInt = s1 + 1e-10;
      MinExt = s1 - 1e-10;
    } else {
      MaxInt = s1;
      MinExt = s2;
    }
    No.Misplaced = sum( ( (IntLinks>0) & (Dist>MaxInt) ) | ( (IntLinks==0) & (Dist<MinExt) ) );
    
    # ...weight index of the internal and external links
    
    DistInt = sum(Dist[IntLinks>0]);
    DistExt = sum(Dist[IntLinks==0]);
    DistSm = sum(Dist[ord <= No.Internal]);
    DistLg = sum(Dist[ord > No.Internal]);
    DistIndex = DistInt/DistSm * DistLg/DistExt;
  } else {
    No.Misplaced = 0;
    DistIndex = 0;
  }
  
  if (!is.null(RefLabels))
  {
    if (length(RefLabels)!=No.Points)
      stop("Length of given RefLabels incompatible with number of points.");
    
    RefInternalLinks = matrix(0, nrow = No.Points, ncol = No.Points);
    for (point in 1:No.Points) if (RefLabels[point]!=0)
      RefInternalLinks[, point] = ifelse(RefLabels==RefLabels[point], RefLabels, 0);
    
    RefIntLinks = as.vector(RefInternalLinks[upper.tri(RefInternalLinks)]);
    
    No.Agreed = sum( xor(IntLinks>0, RefIntLinks==0));
    No.Disagreed = sum( xor(IntLinks==0, RefIntLinks==0));
    
    RandIndex = No.Agreed/(No.Agreed+No.Disagreed);
    
    # Make each unassigned label unique
    XLabels = Labels;
    NUnassigned = sum(XLabels==0);
    start = max(XLabels);
    XLabels[XLabels==0] = start + c(1:NUnassigned);
    
    # Same for the reference labels 
    XRefLabels = RefLabels;
    NRefUnassigned = sum(XRefLabels==0);
    start = max(XRefLabels);
    XRefLabels[XRefLabels==0] = start + c(1:NRefUnassigned);
    
    AdjRandIndex = adjustedRandIndex(XLabels, XRefLabels);  # This requires package mclust
  } else {
    RandIndex = 0;
    AdjRandIndex = 0;
  }
  
  list(No.Misplaced = No.Misplaced, DistIndex = DistIndex, RandIndex = RandIndex, 
       AdjRandIndex = AdjRandIndex);
}


# Biweight correlation. The original functions are from
# http://www.unt.edu/benchmarks/archives/2001/december01/rss.htm and that in
# turn seems to be based on Wilcox (1997, page 197).
# The actually useful versions are PL's rewrites that use block code so the
# functions can be used with matrices as inputs. PL's function could be
# improved since it does some calculations unnecessarily twice when y==NULL.

bicov.original<-function(x, y, na.rm = FALSE){
  mx <- median(x, na.rm = na.rm)
  my <-median(y, na.rm = na.rm)
  ux <- abs((x - mx)/(9 * qnorm(0.75) * mad(x, na.rm = na.rm)))
  uy <- abs((y - my)/(9 * qnorm(0.75) * mad(y, na.rm = na.rm)))
  aval <- ifelse(ux <= 1, 1, 0)
  bval <- ifelse(uy <= 1, 1, 0)
  top <- sum(aval * (x - mx) * (1 - ux^2)^2 * bval * (y - my) * (1 - uy^2)^2, na.rm = na.rm)
  top <- sum(!is.na(x) & !is.na(y)) * top
  botx <- sum(aval * (1 - ux^2) * (1 - 5 * ux^2), na.rm = na.rm)
  boty <- sum(bval * (1 - uy^2) * (1 - 5 * uy^2), na.rm = na.rm)
  bi <- top/(botx * boty)
  bi
}

bicor.original<-function(x, y, na.rm = FALSE){
  x <-as.numeric(x)
  y<-as.numeric(y)
  bicov(x,y, na.rm = na.rm)/(sqrt(bicov(x,x, na.rm = na.rm)*bicov(y,y, na.rm = na.rm)))
}

# This function should now work for both vector and matrix x and y (all combinations);
# Big WARNING: The use = pairwise.complete.obs simply works by removing NAs independently in x and
# y!!!!!

# Weighted version of bicov and bicor

bicovw = function(x, xWeight, y = NULL, yWeight = NULL, robustX = TRUE, robustY = TRUE, 
                  use = "all.obs", diagOnly = FALSE)
{
  x = as.matrix(x);
  xWeight = as.matrix(xWeight);
  if (!all.equal(dim(x), dim(xWeight)))
    stop("x and xWeight must have the same dimensions.");
  
  na.method <- pmatch(use, c("all.obs", "pairwise.complete.obs"))
  if (is.na(na.method)) 
    stop(paste("Unrecognized parameter 'use'. Recognized values are \n",
               "'all.obs', 'pairwise.complete.obs'"))
  na.rm = (na.method==2);
  if (is.null(y)) {
    if (robustX)
    {
      #mx <- apply(x, 2, median, na.rm = na.rm)
      mx = medianw(x, xWeight);
      mxMat = matrix(mx, nrow = nrow(x), ncol = ncol(x), byrow = TRUE);
      madxMat = matrix(apply(x, 2, mad, na.rm = na.rm), nrow = nrow(x), ncol = ncol(x), byrow = TRUE);
      ux <- as.matrix(abs((x - mxMat)/(9 * qnorm(0.75) * madxMat)))
    } else {
      mx = meanw(x, xWeight)
      mxMat = matrix(mx, nrow = nrow(x), ncol = ncol(x), byrow = TRUE);
      ux = matrix(0, nrow = nrow(mxMat), ncol = ncol(mxMat));
    }
    aval  = ifelse(ux <= 1, 1, 0) * xWeight;
    botx <- apply(as.matrix(aval * (1 - ux^2) * (1 - 5 * ux^2)), 2, sum, na.rm = na.rm);
    ux[is.na(ux)] = 1;
    aval[is.na(aval)] = 0;
    if (diagOnly)	
    {
      # diagOnly makes sense particularly for y=NULL because it is used in the
      # normalization in bicor.
      topFact = apply(!is.na(x), 2, sum)
      x[is.na(x)] = mxMat[is.na(x)];
      top <- apply(as.matrix(aval * (x - mxMat) * (1 - ux^2)^2)^2, 2, sum); 
      top <- top * topFact;
      bi <- top/((botx^2) * (1-1/nrow(x)));
    } else { 
      topFact = t(!is.na(x)) %*% !is.na(x) 
      x[is.na(x)] = mxMat[is.na(x)];
      top <- t(as.matrix(aval * (x - mxMat) * (1 - ux^2)^2)) %*% 
        as.matrix((aval * (x - mxMat) * (1 - ux^2)^2))
      top <- top * topFact;
      bi <- top/(botx %o% (botx *(1-1/nrow(x)) ))
    }
    bi
  } else {
    y = as.matrix(y);
    yWeight = as.matrix(yWeight);
    if (robustX)
    {
      #mx <- apply(x, 2, median, na.rm = na.rm)
      mx = medianw(x, xWeight)
      mxMat = matrix(mx, nrow = nrow(x), ncol = ncol(x), byrow = TRUE);
      madxMat = matrix(apply(x, 2, mad, na.rm = na.rm), nrow = nrow(x), ncol = ncol(x), byrow = TRUE);
      ux <- as.matrix(abs((x - mxMat)/(9 * qnorm(0.75) * madxMat)))
    } else {
      mx = meanw(x, xWeight)
      mxMat = matrix(mx, nrow = nrow(x), ncol = ncol(x), byrow = TRUE);
      ux = matrix(0, nrow = nrow(mxMat), ncol = ncol(mxMat));
    }
    if (robustY)
    {
      #my <- apply(y, 2, median, na.rm = na.rm)
      my = medianw(y, yWeight)
      myMat = matrix(my, nrow = nrow(y), ncol = ncol(y), byrow = TRUE);
      madyMat = matrix(apply(y, 2, mad, na.rm = na.rm), nrow = nrow(y), ncol = ncol(y), byrow = TRUE);
      uy <- as.matrix(abs((y - myMat)/(9 * qnorm(0.75) * madyMat)))
    } else {
      my = meanw(y, yWeight);
      myMat = matrix(my, nrow = nrow(y), ncol = ncol(y), byrow = TRUE);
      uy = matrix(0, nrow = nrow(myMat), ncol = ncol(myMat));
    }
    aval <- ifelse(ux <= 1, 1, 0) * xWeight;
    bval <- ifelse(uy <= 1, 1, 0) * yWeight;
    botx <- apply(as.matrix(aval * (1 - ux^2) * (1 - 5 * ux^2)), 2, sum, na.rm = na.rm)
    boty <- apply(as.matrix(bval * (1 - uy^2) * (1 - 5 * uy^2)), 2, sum, na.rm = na.rm)
    ux[is.na(ux)] = 1;
    uy[is.na(uy)] = 1;
    aval[is.na(aval)] = 0;
    bval[is.na(bval)] = 0;
    topFact = t(!is.na(x)) %*% !is.na(y) 
    x[is.na(x)] = mxMat[is.na(x)];
    y[is.na(y)] = myMat[is.na(y)];
    top <- t(as.matrix(aval * (x - mxMat) * (1 - ux^2)^2)) %*% as.matrix((bval * (y - myMat) * (1 - uy^2)^2))
    top <- top * topFact;
    bi <- top/(botx %o% (boty * (1-1/nrow(x))))
    bi
  }
}

# weighted version of biweight mid-corellation
#
bicorw<-function(x, xWeight, y = NULL, yWeight = NULL, robustX = TRUE, robustY = TRUE, use = "all.obs")
{
  na.method <- pmatch(use, c("all.obs", "pairwise.complete.obs"))
  if (is.na(na.method)) 
    stop(paste("Unrecognized parameter 'use'. Recognized values are \n",
               "'all.obs', 'pairwise.complete.obs'"))
  if (!all.equal(dim(x), dim(xWeight)))
    stop("x and xWeight must have the same dimensions");
  if (is.null(y)) 
  {
    bcx = sqrt(bicov(x, robustX = robustX, use = use, diagOnly = TRUE));
    bicov(x, robustX = robustX, use = use)/(bcx %o% bcx);
  } else
  {
    if (!all.equal(dim(y), dim(yWeight)))
      stop("y and yWeight must have the same dimensions");
    bicov(x, y, robustX = robustX, robustY = robustY, use = use)/
      (sqrt(bicov(x, robustX = robustX, use = use, diagOnly = TRUE)) %o%
         sqrt(bicov(y, robustX = robustY, use = use, diagOnly = TRUE)))
  }
}

meanw = function(x, w, na.rm = FALSE)
{
  x = as.matrix(x);
  w = as.matrix(w);
  if (!all.equal(dim(x), dim(w)))
    stop("Dimensions of x and w must be the same.");
  
  if (na.rm)
  {
    w[is.na(w)] = 0;
    w[is.na(x)] = 0;
  }
  s = apply(x*w, 2, sum, na.rm = TRUE);
  sw = apply(w, 2, sum, na.rm = TRUE);
  
  if (sum(sw==0)>0)
    stop("Sum of weights is zero in some columns.")
  
  s/sw;
}


medianw = function(x, w, na.rm = FALSE)
{
  x = as.matrix(x);
  w = as.matrix(w);
  if (!all.equal(dim(x), dim(w)))
    stop("Dimensions of x and w must be the same.");
  
  if (na.rm)
  {
    w[is.na(w)] = 0;
    w[is.na(x)] = 0;
  }
  
  if (sum(w<0)>0) stop("Weights w must be non-negative.");
  
  order = apply(x, 2, order, na.last = TRUE);
  for (i in 1:ncol(x)) w[, i] = w[order[, i], i];
  
  nSamples = nrow(w);
  csw = apply(w, 2, cumsum)
  sw = apply(w, 2, sum);
  if (sum(sw==0)>0)
    stop("Some columns of w have no non-zero entries.")
  # csw are cumulative sums of weights
  csw = csw * matrix(nSamples/csw[nSamples, ], nrow = nSamples, ncol = ncol(w), byrow = TRUE);
  
  midw = nSamples/2;
  # midh will be the index in each column where csw rises above midw. Can be anywehere between 1 and
  # nSamples.
  midh = apply(csw-midw>0, 2, match, x = TRUE)
  
  # Couldn't figure out a way to do this in block notation.
  medians = rep(0, ncol(x));
  for (i in 1:ncol(x))
    if (midh[i]==1) {
      medians[i] = x[order[1, i], i];
    } else {
      w0 = abs(csw[midh[i]-1]); x0 = x[order[midh[i]-1, i], i];
      w1 = csw[midh[i]]; x1 = x[order[midh[i], i], i];
      medians[i] = (w0*x1 + w1* x0)/(w1+w0);
    }
  medians;
}

propVarExplained = function(datExpr, colors, MEs, corFnc = "cor", corOptions = "use = 'p'")
{
  fc = as.factor(colors);
  mods = levels(fc);
  nMods = nlevels(fc);
  if (nMods!=ncol(MEs))
    stop(paste("Input error: number of distinct 'colors' differs from\n", 
               "the number of module eigengenes given in ME."));
  
  if (ncol(datExpr)!=length(colors))
    stop("Input error: number of probes (columns) in 'datExpr' differs from the length of goven 'colors'.");
  
  if (nrow(datExpr)!=nrow(MEs))
    stop("Input error: number of observations (rows) in 'datExpr' and 'MEs' differ.");
  
  PVE = rep(0, nMods);
  
  col2MEs = match(mods, substring(names(MEs), 3));
  
  if (sum(is.na(col2MEs))>0)
    stop("Input error: not all given colors could be matched to names of module eigengenes.");
  
  for (mod in 1:nMods)
  {
    modGenes = c(1:nGenes)[as.character(colors)==mods[mod]];
    corExpr = parse(text = paste(corFnc, "(datExpr[, modGenes], MEs[, col2MEs[mod]],",
                                 corOptions, ")"));
    PVE[mod] = mean(as.vector(eval(corExpr)^2));
  }
  
  names(PVE) = paste("PVE", mods, sep = "");
  PVE
}


cat("\n");
if (exists("cutreeDynamic", inherits = FALSE, mode = "function"))
{
  cat("\n");
  cat(paste("Warning: a function of the name 'cutreeDynamic' appears to be defined", 
            "in the main environment.\n")); 
  cat(paste("  It is likely that an old version of NetworkFunctions has been loaded.\n"));
  cat(paste("  Please do not use old NetworkFunctions, they are not compatible with this code.\n"));
  cat("\n");
  cat(paste("  The old definition has been removed; if the old functionality of cutreeDynamic is needed, \n",
            "  replace calls to the old cutreeDynamic(...) by cutreeDynamic.1(...).\n",
            "  Arguments to the function can be left unchanged.\n"));
  cat("\n");
  cat(paste("  After loading the dynamicTreeCut package, type help(cutreeDynamic) to see information\n",
            "  about the new dynamic tree cutting function and its use.\n"));
  cat("\n");
  
  rm("cutreeDynamic", inherits = FALSE);
}

cutreeDynamic.1 = function(hierclust, maxTreeHeight=1, deepSplit=TRUE, minModuleSize=50, 
                           minAttachModuleSize=100, nameallmodules=FALSE, useblackwhite=FALSE)
{
  if ( (minAttachModuleSize!=100) | (nameallmodules!=FALSE) | (useblackwhite!=FALSE) )
    waring("cutreeDynamic.1: Parameters minAttachModuleSize, nameallmodules, useblackwhite are ignored.");
  labels2colors(cutreeDynamicTree(hierclust, maxTreeHeight, deepSplit, minModuleSize));
}

#===================================================================================================
#
# moduleEigenMatrices.R
#
#===================================================================================================


#==========================================================================================================
#
# Module Eigenmatrix functions
#
#==========================================================================================================
#
# input: expr: Expression data of a single set
#	colors: module colors
#	impute: use imputation to replace NA's in data?
#	nPC: optional explicit specification of the number of PCs to keep
#	minVarExpl: the number of kept PCs will be such that at least minVarExpl variance will be
#	explained.
#	align: specification of alignment of the first PC.
#	saveAll: keep all eigenvectors and eigenvalues? Should only be useful for diagnostic purposes.
# output:
# Eigen-information will be stored in a list with the following components: 
# modName: name of module
# d: singular values
# v: singular vectors
# mat: singular matrix ( = sum{ labda_i sv_i sv_i^T })
# NSq: normalization constant = sum lamba_i^2
# varExpl: variance explained by the singular vectors
# averageExpr: average expression of the module as a vector

moduleEigenMatrices=function(expr, colors, excludeGrey = TRUE, 
                             grey = if (is.numeric(colors)) 0 else "grey", 
                             impute = TRUE, nPC = NULL, minVarExpl = 0.9, 
                             align = "along average", saveAll = FALSE, scaleExpr = TRUE,
                             maxNPC = 10, 
                             verbose = 0, indent = 0)
{
  nSamples = nrow(expr);
  spaces = indentSpaces(indent);
  
  if (verbose>0) 
    printFlush(paste(spaces, "moduleEigenMatrices: Calculating", nlevels(as.factor(colors)), 
                     "module eigenMatrices in given set."));
  
  if (is.null(expr)) stop("moduleEigenMatrices: Error: expr is NULL. ");
  if (is.null(colors)) stop("moduleEigenMatrices: Error: colors is NULL. ");
  
  alignRecognizedValues =  c("", "along average");
  if (!is.element(align, alignRecognizedValues))
    stop(paste("moduleEigenMatrices: Error:",
               "parameter align has an unrecognised value:", 
               align, "; Recognized values are ", alignRecognizedValues));
  
  if ( (minVarExpl < 0) | (minVarExpl > 1))
    stop("Incorrect minVarExpl given. Must be between 0 and 1.");
  
  modlevels=levels(factor(colors))
  if (excludeGrey) modlevels = modlevels[modlevels != as.character(grey)];
  
  nMods = length(modlevels);
  
  if (!is.null(nPC)) maxNPC = nPC;
  
  eigenMatrices = list(modNames = modlevels, 
                       nPC = rep(NA, nMods), 
                       d = matrix(NA, maxNPC, nMods),
                       PCs = array(NA, dim = c(nSamples, maxNPC, nMods)), 
                       mat = array(NA, dim = c(nSamples, nSamples, nMods)),
                       NSq = rep(NA, nMods),
                       varExpl = matrix(NA, maxNPC, nMods),
                       averageExpr = matrix(NA, nSamples, nMods),
                       nSamples = nSamples);
  # names(PrinComps)=paste("ME",modlevels,sep="")
  # names(aveExpr)=paste("AE",modlevels,sep="")
  
  #print(paste("nPC:", nPC))
  for(i in c(1:length(modlevels)) ) 
  {
    if (verbose>1) 
      printFlush(paste(spaces, "moduleEigengenes : Working on ME for module", modlevels[i]));
    modulename = modlevels[i]
    restrict1 = as.character(colors)== modulename
    datModule = t(expr[, restrict1])
    if (impute)
    {
      seedSaved = FALSE;
      if (exists(".Random.seed")) {
        saved.seed = .Random.seed;
        seedSaved = TRUE;
      }
      datModule = impute.knn(as.matrix(datModule), k = min(10, nrow(datModule)-1))
      if (seedSaved) .Random.seed = saved.seed;
    }
    if (scaleExpr) datModule=t(scale(t(datModule)));
    svd1=svd(datModule);
    #print(paste("sum(d^2):", sum(svd1$d^2)))
    varExpl= svd1$d^2/sum(svd1$d^2);
    varExplCum = cumsum(varExpl);
    if (is.null(nPC))
    {
      modNPC = min(maxNPC, match(TRUE, minVarExpl <= varExplCum));
      if (is.na(modNPC)) stop ("Internal error: modNPC is NA. Sorry!");
    } else {
      modNPC = min(nPC, length(varExpl));
    }
    #print(paste("modNPC:", modNPC))
    aveExpr = apply(datModule, 2, mean);  #datModule is transposed...
    if (align == "along average")
    {
      if (verbose>4) printFlush(paste(spaces,
                                      " .. aligning module eigengene with average expression."))
      if (cor(aveExpr, svd1$v[,1])<0) svd1$v[,1] = -svd1$v[,1];
    }
    modEMat = matrix(0, nrow = nSamples, ncol = nSamples);
    for (pc in 1:modNPC)
    {
      pcMat = svd1$v[, pc] %o%svd1$v[, pc];
      modEMat = modEMat + abs(svd1$d[pc]) * pcMat
    }
    #eigenMatrices[[i]] = list(
    #    modName = paste("ME", modulename, sep = ""),
    #    n = modNPC,
    #    d = svd1$d[1:modNPC],
    #    PCs = scale(svd1$v[, c(1:modNPC)]),
    #    mat = modEMat,
    #    NSq = sum((svd1$d[1:modNPC])^2),
    #    varExpl = varExpl[1:modNPC],
    #    averageExpr = aveExpr,
    #    nSamples = ncol(datModule));
    eigenMatrices$modNames[i] = paste("ME", modulename, sep = "");
    eigenMatrices$nPC[i] = modNPC;
    eigenMatrices$d[c(1:modNPC), i] = svd1$d[1:modNPC];
    eigenMatrices$PCs[, c(1:modNPC), i] = scale(svd1$v[, c(1:modNPC)]);
    eigenMatrices$mat[,,i] = modEMat;
    eigenMatrices$NSq[i] =  sum((svd1$d[1:modNPC])^2);
    eigenMatrices$averageExpr[, i] = aveExpr;
    eigenMatrices$varExpl[1:modNPC, i] = varExpl[1:modNPC];
    #if (saveAll)
    #{
    #  eigenMatrices[[i]]$dAll = svd1$d;
    #  eigenMatrices[[i]]$vAll = svd1$v;
    #}
  }
  class(eigenMatrices) = 'eigenMatrices'
  eigenMatrices;
  
}

subsetMEMs = function(MEMs, subset, drop = TRUE)
{
  nMods = length(MEMs$modNames);
  if (sum(abs(subset)>nMods) > 0)
    stop("Non-existing eigenmatrices selected")
  
  nNewMods = length(eigenMatrices$modNames[subset]);
  if (nNewMods==0)
    stop("Selection is empty.");
  
  newMEMs = list(modNames = MEMs$modNames[subset, drop = drop], 
                 nPC = MEMs$nPC[subset, drop = drop], 
                 d = MEMs$d[, subset, drop = drop],
                 PCs = MEMs$PCs[, , subset, drop = drop], 
                 mat = MEMs$mat[, , subset, drop = drop],
                 NSq = MEMs$NSq[subset, drop = drop],
                 varExpl = MEMs$varExpl[, subset, drop = drop],
                 averageExpr = MEMs$averageExpr[, subset, drop = drop],
                 nSamples = MEMs$nSamples);
  class(newMEMs) = 'eigenMatrices';
  newMEMs;
}

corVMat = function(vec, MEMs, vecSelect = NULL, memSelect = NULL, na.rm = FALSE)
{
  vec = scale(as.matrix(vec))
  vecNames = dimnames(vec)[[2]];
  nMods = length(MEMs$nPC);
  nSamples = nrow(vec);
  if (is.null(vecSelect)) vecSelect = c(1:ncol(vec));
  if (is.null(memSelect)) memSelect = c(1:nMods);
  
  vecSelect = c(1:ncol(vec))[vecSelect];
  memSelect = c(1:nMods)[memSelect];
  
  cor = matrix(NA, length(vecSelect), length(memSelect));
  
  for (v in 1:length(vecSelect)) for (m in 1:length(memSelect))
  {
    vvec = vec[, vecSelect[v]];
    if (na.rm)
    {
      keep = c(1:nSamples)[!is.na(vec)];
    } else {
      keep = c(1:nSamples); 
    }
    proj = MEMs$mat[keep, keep, memSelect[m]] %*% vec[keep];
    cor[v, m] = sign(cor(MEMs$PCs[,1,memSelect[m]], vvec, use = 'p')) * 
      sqrt( sum(proj^2)/sum(vec[keep]^2)/MEMs$NSq[memSelect[m]] );
  }
  dimnames(cor) = list(vecNames[vecSelect], MEMs$modNames[memSelect]);
  cor;
}

# This function may not work!

corMatMat = function(eigenMat1, eigenMat2)
{
  n1 = nrow(eigenMat1$mat);
  n2 = nrow(eigenMat2$mat);
  if (n1!=n2) stop("Matrices in eigenMat1 and eigenMat2 must have the same dimension.");
  
  Trace = sum(diag(eigenMat1$mat %*% eigenMat2$mat));
  sqrt(Trace/sqrt(eigenMat1$NSq * eigenMat2$NSq));
}

corMats = function(MEMs1, MEMs2 = NULL, select1 = NULL, select2 = NULL)
{
  nMods1 = length(MEMs1$nPC);
  if (is.null(select1)) {
    select1 = c(1:nMods1)
  } else select1 = c(1:nMods1)[select1];
  n1 = length(select1);
  if (is.null(MEMs2))
  {
    cm = matrix(1, n1, n1);
    dimnames(cm) = list(MEMs1$modNames[select1], MEMs1$modNames[select1]);
    for (i in 1:(n1-1)) for (j in (i+1):n1)
    {
      ii = select1[i];
      jj = select1[j];
      Trace = sum(diag(MEMs1$mat[,,ii] %*% MEMs1$mat[,,jj]));
      sign = sign(cov(MEMs1$PC[, 1, ii], MEMs1$PC[, 1, jj]))
      cm[i,j] = sign * sqrt(Trace/sqrt(MEMs1$NSq[ii] * MEMs1$NSq[jj]));
      cm[j,i] = cm[i,j]
    }
  } else
  {
    d1 = dim(MEMs1$mat)[1]
    d2 = dim(MEMs2$mat)[1]
    if (d1!=d2) stop("Eigenmatrices in MEMs1 and MEMs2 have incompatible dimensions.");
    n2 = length(select2);
    cm = matrix(0, n1, n2);
    dimnames(cm) = list(MEMs1$modNames[select1], MEMs2$modNames[select2]);
    for (i in 1:n1) for (j in 1:n2)
    {
      ii = select1[i];
      jj = select2[j];
      Trace = sum(diag(MEMs1$mat[,,ii] %*% MEMs2$mat[,,jj]));
      sign = sign(cov(MEMs1$PC[, 1, ii], MEMs2$PC[, 1, jj]))
      cm[i,j] = sign * sqrt(Trace/sqrt(MEMs1$NSq[ii] * MEMs2$NSq[jj]));
    }
  }
  cm;
}

#--------------------------------------------------------------------------------------
#
# orderMEMs
#
#--------------------------------------------------------------------------------------

orderMEMs = function(MEMs)
{
  nMats = length(MEMs);
  if (nMats==0) stop("Given 'MEMs' is empty.");
  dist = as.dist(1-corMats(MEMs))
  h = hclust(dist, method = "average");
  order = h$order
  subsetMEMs(MEMs, order);
}

checkMultiMEMs = function(multiMEMs)
{
  if (!is.list(multiMEMs))
    stop("Given 'multiMEMs' must be a list.");
  
  nSets = length(multiMEMs)
  if (nSets==0) stop("Given 'multiMEMs' is empty.");
  
  for (set in 1:nSets)
  {
    if (class(multiMEMs[[set]])!='eigenMatrices')
      stop("Not all entries in 'multiMEMs' have class 'eigenMatrices'");
  }
  nMods = length(multiMEMs[[1]]$nPC);
  nSamples = rep(NA, nSets)
  for (set in 1:nSets)
  {
    if (length(multiMEMs[[set]]$nPC) != nMods)
      stop('Numbers of modules not consistent across sets');
    nSamples[set] = MEMs[[set]]$nSamples;
  } 
  modNames = MEMs[[1]]$modNames;
  
  list(nSets = nSets, nMods = nMods, nSamples = nSamples, modNames = modNames);
}

consensusOrderMEMs = function(multiMEMs, order = NULL)
{
  size = checkMultiMEMs(multiMEMs);
  
  if (is.null(order))
  {
    dist = as.dist(1-corMats(multiMEMs[[1]]))
    if (size$nSets>1)
      for (set in 2:size$nSets)
        dist = pmax(dist, as.dist(1-corMats(multiMEMs[[set]])));
    h = hclust(dist, method = "average");
    order = h$order
  } else {
    if (length(order)!=size$nMods) 
      stop("Number of modules in 'multiMEMs' must match the length of 'order'.");
  }
  ordMEMs =  vector(mode = "list", length = size$nSets);
  for (set in 1:size$nSets)
  {
    ordMEMs[[set]] = subsetMEMs(multiMEMs[[set]], order);
  }
  ordMEMs;
}

#--------------------------------------------------------------------------------------
#
# PlotCorMEMsAndDendros
#
#--------------------------------------------------------------------------------------
# Plots a matrix plot of the correlation of module eigenmatrices. 
# On the diagonal the heatmaps show correlation of MEMs in the
# particular subset; off-diagonal are differences in the correlation matrix. 
# Titles is a vector of titles for the diagonal diagrams; the off-diagonal will have no title
# for now.

PlotCorMEMsAndDendros = function(MEMs, Titles, ColorLabels = TRUE, colors = NULL, IncludeSign = TRUE, 
                                 ColoredBarPlot = TRUE, LetterSubPlots = FALSE, Letters = NULL, IncludeGrey = FALSE, 
                                 setMargins = TRUE, plotCPMeasure = FALSE, plotMeans = TRUE, CPzlim = c(0,1),
                                 printAdj = FALSE, printCPVals = FALSE, CPcex = 0.9, plotErrors = FALSE, 
                                 marDendro = NULL,
                                 marHeatmap = NULL, PlotDiagAdj = TRUE, invertColors = FALSE,
                                 ...)
{
  #Letters = "abcdefghijklmnopqrstuvwxyz";
  if (is.null(Letters)) Letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
  
  if (is.null(colors)) 
    if (IncludeSign)
    {
      colors = GreenWhiteRed(50);
    } else {
      colors = heat.colors(30);
    }
  size = checkMultiMEMs(MEMs);
  nSets = size$nSets;
  cex = par("cex");
  mar = par("mar");
  par(mfrow = c(nSets+1, nSets));
  par(cex = cex);
  cors = list();
  for (set in 1:nSets) cors[[set]] = corMats(MEMs[[set]]);
  #if (!IncludeGrey)
  #{
  #  for (set in 1:nSets)
  #    PCs[[set]]$data = PCs[[set]]$data[ , substring(names(PCs[[set]]$data),3)!="grey"]
  #}
  letter.ind = 1;
  for (set in 1:nSets)
  {
    #par(cex = StandardCex/1.4);
    par(mar = marDendro);
    labels = size$modNames;
    uselabels = labels # [substring(labels,3)!="grey"];
    disPC = as.dist(1-cors[[set]]);
    clust = hclust(disPC, method = "average");
    if (LetterSubPlots) {
      main = paste(substring(Letters, letter.ind, letter.ind), ". ", Titles[set], sep="");
    } else {
      main = Titles[set];
    }
    #validColors = is.na(match(uselabels, colors()));
    #plotLabels = ifelse(validColors, substring(uselabels[validColors], 3), uselabels[!validColors]);
    plotLabels = uselabels;
    plot(clust, main = main, sub="", xlab="", 
         labels = plotLabels, ylab="", ylim=c(0,1));
    letter.ind = letter.ind + 1;
  }
  
  for (i.row in (1:nSets))
  {
    for (i.col in (1:nSets))
    {
      letter.ind = i.row * nSets + i.col;
      if (LetterSubPlots) 
      {
        #letter = paste("(", substring(Letters, first = letter.ind, last = letter.ind), ")", sep = "");
        letter = paste( substring(Letters, first = letter.ind, last = letter.ind), ".  ", sep = "");
      } else {
        letter = NULL;
      }
      par(cex = cex);
      if (setMargins) {
        if (is.null(marHeatmap))
        {
          if (ColorLabels) {
            par(mar = c(1,2,3,4)+0.2);
          } else {
            par(mar = c(6,7,3,5)+0.2);
          }
        } else {
          par(mar = marHeatmap);
        }
      }
      No.Modules = size$nMods;
      if (i.row==i.col)
      {
        if (IncludeSign)
        {
          if (PlotDiagAdj) {
            HeatmapWithTextLabels((1+cors[[i.row]])/2, dimnames(cors[[i.row]])[[2]],
                                  dimnames(cors[[i.row]])[[2]],
                                  main=paste(letter, Titles[[i.col]]), InvertColors=invertColors, 
                                  zlim=c(0,1.0),
                                  NumMatrix = if (printAdj) signif((1+cors[[i.row]])/2, 2) else NULL,
                                  cex.Num = 0.6,
                                  ColorLabels = ColorLabels, colors = colors, SetMargins = FALSE, ...);
          } else {
            HeatmapWithTextLabels(cors[[i.row]], dimnames(cors[[i.row]])[[2]], 
                                  dimnames(cors[[i.row]])[[2]],
                                  main=paste(letter, Titles[[i.col]]), InvertColors=invertColors, 
                                  zlim=c(-1,1.0),
                                  NumMatrix = if (printAdj) signif(cors[[i.row]], 2) else NULL,
                                  cex.Num = 0.6,
                                  ColorLabels = ColorLabels, colors = colors, SetMargins = FALSE, ...);
          }
        } else {
          HeatmapWithTextLabels(abs(cors[[i.row]]), dimnames(cors[[i.row]])[[2]],
                                dimnames(cors[[i.row]])[[2]],
                                main=paste(letter, Titles[[i.col]]), InvertColors=invertColors, 
                                zlim=c(0,1.0),
                                NumMatrix = if (printAdj) signif(abs(cors[[i.row]]), 2) else NULL,
                                cex.Num = 0.6,
                                ColorLabels = ColorLabels, colors = colors, SetMargins = FALSE, ...);
        }
      } else
      {
        cor1 = cors[[i.col]];
        cor2 = cors[[i.row]];
        cor.dif = (cor1 - cor2)/2;
        d = tanh((cor1 - cor2) / (abs(cor1) + abs(cor2))^2);
        # d = abs(corPC1 - corPC2) / (abs(corPC1) + abs(corPC2));
        dispd = cor.dif;
        if (plotCPMeasure) dispd[upper.tri(d)] = d[upper.tri(d)];
        if (i.row>i.col)
        {
          if (IncludeSign)
          {
            half = as.integer(length(colors)/2);
            if (invertColors) { range = c(1:(half+1));} else {range = c(half:length(colors)); }
            halfColors = colors[range];
          } else {
            halfColors = colors;
          }
          if (printCPVals) {
            printMtx = matrix(paste(".", as.integer((1-abs(dispd))*100), sep = ""), 
                              nrow = nrow(dispd), ncol = ncol(dispd));
            printMtx[printMtx==".100"] = "1";
          } else { 
            printMtx = NULL; 
          }
          if (sum( (1-abs(dispd)<CPzlim[1]) | (1-abs(dispd)>CPzlim[2]) )>0)
            warning("PlotCorPCs: Correlation preservation data out of zlim range!");
          if (plotCPMeasure) {
            HeatmapWithTextLabels(1-abs(dispd), dimnames(cor1)[[2]], dimnames(cor2)[[2]], 
                                  main=paste(letter, "UT: Cor.Pres\nLT: 1-Cor.Diff"),
                                  InvertColors=invertColors, 
                                  ColorLabels = ColorLabels, zlim = CPzlim, colors = halfColors,
                                  SetMargins = FALSE, 
                                  NumMatrix = printMtx, cex.Num = CPcex, ...);
          } else {
            HeatmapWithTextLabels(1-abs(dispd), dimnames(cor1)[[2]], dimnames(cor2)[[2]], 
                                  main=paste(letter, "Preservation"), InvertColors=invertColors, 
                                  ColorLabels = ColorLabels, zlim = CPzlim, colors = halfColors,
                                  SetMargins = FALSE,  NumMatrix= printMtx, cex.Num = CPcex, ...);
          }
        } else {
          if (plotCPMeasure) {
            dp = 1-abs(d);
            method = "Cor.Pres.:";
          } else {
            dp = 1-abs(cor.dif); 
            method = "Preservation:";
          }
          diag(dp) = 0;
          if (plotMeans) {
            sum_dp = mean(dp[upper.tri(dp)]);
            means = apply(dp, 2, sum)/(ncol(dp)-1);
            if (plotErrors) {
              Errors = sqrt( (apply(dp^2, 2, sum)/(ncol(dp)-1) - means^2)/(ncol(dp)-2));
            } else {
              Errors = NULL; 
            }
            BarplotWithTextLabels(means, dimnames(cor1)[[2]], 
                                  main=paste(letter, "D=", signif(sum_dp,2)), 
                                  ylim=c(0,1),
                                  ColorLabels = ColorLabels, Colored = ColoredBarPlot,
                                  SetMargins = FALSE, Errors = Errors, ... )
          } else {
            sum_dp = sum(dp[upper.tri(dp)]);
            BarplotWithTextLabels(dp, names(cor1),
                                  main=paste(letter, method, "sum = ", signif(sum_dp,3)), 
                                  ylim=c(0,dim(dp)[[1]]),
                                  ColorLabels = ColorLabels, Colored = ColoredBarPlot, 
                                  SetMargins = FALSE, ... )
          }
        }
      }
    }
  }
}



#=================================================================================================
#
# Dendrogram orderings
#
#=================================================================================================


dendrogramOrderings = function(dendro, maxMerge = 18, verbose = 1, indent = 0)
{
  
  spaces = indentSpaces(indent)
  
  if (verbose>0) printFlush(paste(spaces, "Calculating dendrogramOrderings.."));
  nMerge = length(dendro$height);
  nSingl = nMerge + 1;
  mleft = dendro$merge[,1];
  mright = dendro$merge[,2];
  
  if ((verbose>0) & (nMerge>maxMerge)) 
    printFlush(paste("..FYI: limiting number of flipped merges to", maxMerge)); 
  
  # First step: get the list of L and R singletons below each merge, i.e. for each merge and each
  # singleton the array sides[, merge, side] gives singletons to the side side of merge merge.
  # The number of those sigletons is recorded in nSides[merge, side]
  
  sides = array(0, dim = c(nSingl, ncol = nMerge, 2));
  
  nSides = matrix(0, nrow = nMerge, ncol = 2);
  
  traceBack = rep(0, times = nMerge);
  curSide = rep(1, times = nMerge);	# 1=left, 2=right, 3=all done.
  
  nTrace = 1;		# Number of traces includes current merge.
  traceBack[1] = nMerge;
  curMerge = nMerge;
  
  if (verbose>1) printFlush(paste(spaces, " ..finding `sides' of each merge.."));
  while ((curSide[nMerge]<3) | (nTrace > 1))
  {
    # print(paste("nTrace:", nTrace, "traceBack:", paste(traceBack, collapse = ", ")));
    # print(paste("curMerge:", curMerge, "curSide", paste(curSide, collapse = ", ")));
    if (curSide[curMerge] < 3)
    { 
      # print(paste("Processing side", curSide[curMerge]));
      # Both sides not yet processed.
      if (dendro$merge[curMerge, curSide[curMerge]] < 0)
      {
        # Have a sigleton on the current side. Record the singleton as being on the current side of the
        # current merge and all merges in the traceback.
        # print("Singleton.");
        singleton = -dendro$merge[curMerge, curSide[curMerge]];
        curSide[curMerge] = curSide[curMerge] + 1;
        iTrace = nTrace;
        while (iTrace > 0)
        {
          merge = traceBack[iTrace];
          mergeSide = curSide[merge] - 1;
          i = nSides[merge, mergeSide] + 1;
          sides[i, merge, mergeSide] = singleton;
          nSides[merge, mergeSide] = i;
          iTrace = iTrace - 1;
        }
        # print("nSides after:"); print(nSides);
        # print("sides[,,1]:"); print(sides[,,1]);
        # print("sides[,,2]:"); print(sides[,,2]);
      } else {
        # Record traceback and do the next merge.
        # print("Branch.")
        x = curSide[curMerge];
        curSide[curMerge] = x+1;
        curMerge = dendro$merge[curMerge, x];
        nTrace = nTrace + 1;
        traceBack[nTrace] = curMerge;
      }
    } else {
      # print("Taking one step back.");
      # Both sides were already processed. Return one step back.
      if (nTrace > 1)
      {
        nTrace = nTrace - 1;
        curMerge = traceBack[nTrace];
      } else {
        printFlush("All done, apparently!");
      }
    }
    # print("Press enter"); scan();
  }
  
  # Generate all orderings by recursively flipping left and right sides of each merge.
  
  # It is much easier to flip rankings than orderings.
  
  nRankings = 2**(min(nMerge, maxMerge)-1);
  ranks = matrix(0, nrow = nSingl, ncol = nRankings);
  
  cuRanking = 1;  # Ranking being currently worked on
  
  # Convert the dendorgram order into ranks
  dendrRanking = rep(0, nSingl);
  dendrRanking[dendro$order] = c(1:nSingl);
  
  # Rankings used as a base before flipping for every merge
  
  baseRankings = matrix(rep(dendrRanking, nMerge), nrow = nSingl, ncol = nMerge);
  
  flipState = rep(0, nMerge);
  
  # The very last merge can be left out.
  merge = nMerge-1;
  
  if (verbose>1) printFlush(paste(spaces, " ..generating rankings.."));
  counter = 0; countedMerge = nMerge - 5;
  
  while (merge<nMerge) 
  {
    # print(paste("merge:", merge, "flipState:", paste(flipState, collapse = ", ")));
    if (flipState[merge]==0)
    {
      # First pass through this merge.
      flipState[merge] = 1;
      # Copy base order from above
      if (merge<nMerge) baseRankings[,merge] = baseRankings[,merge+1];
      if ((merge>1) & (merge>nMerge-maxMerge+1))
      {
        # Reset everything below
        flipState[1:(merge-1)] = 0;
        # ...and go one step below
        merge = merge-1;
      } else {
        # No merges below: save current ranking
        ranks[,cuRanking] = baseRankings[, merge];
        cuRanking = cuRanking + 1;
        # Perform flip at the merge
        nLeft = nSides[merge, 1];
        nRight = nSides[merge, 2];
        left = sides[1:nLeft, merge, 1];
        right = sides[1:nRight, merge, 2];
        flippedRanking = baseRankings[, merge];
        #print(paste("Flip: left=", paste(left, collapse = ","), "; right=", 
        #paste(right, collapse = ", ")));
        #print(paste("Ranks before:", paste( flippedRanking, collapse = ", ")));
        flippedRanking[left] = baseRankings[left, merge] + nRight;
        flippedRanking[right] = baseRankings[right, merge] - nLeft;
        #print(paste("Ranks after", paste( flippedRanking, collapse = ", ")));
        # Save the flipped order
        ranks[, cuRanking] = flippedRanking;
        cuRanking = cuRanking + 1;
        flipState[merge] = 2;
      }
    } else if (flipState[merge]==1)
    {
      if ((verbose > 2) & (merge == countedMerge))
      { 
        printFlush(paste(spaces, "  ..counter on counted merge:", counter, "of",
                         2**(nMerge-countedMerge-1)));
        counter = counter + 1;
      }
      flipState[merge] = 2;
      # perform the flip of this merge
      nLeft = nSides[merge, 1];
      nRight = nSides[merge, 2];
      left = sides[1:nLeft, merge, 1];
      right = sides[1:nRight, merge, 2];
      flippedRanking = baseRankings[, merge];
      #print(paste("Flip: left=", paste(left, collapse = ","), "; right=", 
      #paste(right, collapse = ", ")));
      #print(paste("Ranks before:", paste( flippedRanking, collapse = ", ")));
      flippedRanking[left] = baseRankings[left, merge] + nRight;
      flippedRanking[right] = baseRankings[right, merge] - nLeft;
      #print(paste("Ranks after", paste( flippedRanking, collapse = ", ")));
      # Reset all flips below
      flipState[1:(merge-1)] = 0;
      # Set new base order
      baseRankings[, merge] = flippedRanking;
      # and go to next lower merge
      merge = merge -1;
    } else {
      # Both flips on this merge were performed and saved... go one step back.
      merge = merge + 1;
    }
  }
  
  collect_garbage();
  
  if (verbose>1) printFlush(paste(spaces, " ..converting rankings into oderings.."));
  
  
  # The following lines do not work for large dendrograms, presumably because subsetting creates a copy of
  # the whole object.
  
  orders = ranks;
  for (i in 1:nRankings) orders[ranks[, i], i] = c(1:nSingl);
  
  # Replace it with slightly more complicated but block code.
  # Want to convert everything into a flat vector.
  
  #BatchSize = as.integer(1000000/nSingl);
  
  #nBatch = as.integer( (nRankings-1)/BatchSize ) + 1;
  #for (batch in 1:nBatch)
  
  #nData = nRankings * nSingl;
  #flatRanks = as.integer( c(0:(nData-1)) / nSingl ) * nSingl + as.vector(ranks);
  #orders = flatRanks;
  #orders[flatRanks] = rep(c(1:nSingl), times = nRankings);
  #dim(orders) = c(nSingl, nRankings);
  
  list(orders = orders, ranks = ranks);
}

#-----------------------------------------------------------------------------------------------
#
# outlierMeasure
#
#-----------------------------------------------------------------------------------------------

# This function calculates the measure of being an outlier for each sample in each column and averages
# them. It returns a vector of the length = nrow(x) giving the mean measure of outlierishness for each
# sample.   

outlierMeasure = function(x)
{
  x = as.matrix(x);
  mx = apply(x, 2, median, na.rm = TRUE)
  mxMat = matrix(mx, nrow = nrow(x), ncol = ncol(x), byrow = TRUE);
  madxMat = matrix(apply(x, 2, mad, na.rm = TRUE), nrow = nrow(x), ncol = ncol(x), byrow = TRUE);
  ux = as.matrix(abs((x - mxMat)/(9 * qnorm(0.75) * madxMat)))
  aval = ifelse(ux <= 1, 1, 0)
  outMeas = ux^2;   # Could multiply by 1-aval as well 
  apply(outMeas, 1, mean, na.rm = TRUE);
}


# Related: this function plots a legend but cleans up the legend rectangle
# first. Do not give it a plot argument, it will cause an error.

legendClean = function(...)
{
  box = legend(..., plot = FALSE)$rect
  rect(box$left, box$top-box$h, box$left +box$w, box$top, col = "white", border = "white");
  legend(...);
}

initLib = function(libname, force = FALSE)
{
  os = R.Version()$os;
  recOS = c("linux-gnu", "mingw32");
  ios = match(os, recOS);
  if (is.na(ios)) stop("Unrecognized OS.");
  exts = c(".so", ".dll");
  filename = paste(libname, exts[ios], sep="");
  if (force && is.loaded(filename)) dyn.unload(filename);
  if (!is.loaded(filename)) dyn.load(filename);
}


# spaste: paste without spaces.

spaste = function(...)
{
  paste(..., sep="");
}


# Plot enrichment barplot of modules in a yes/no indicator variable

plotEnrichmentBarplot = function(indicator, moduleLabels, moduleLevels, moduleColors,
                                 xlab = "Module number",
                                 ylab = "-log(p-value)",
                                 cex.lab = 1,
                                 ...)
{
  nGenes = length(moduleLabels);
  geneColors = moduleLabels[indicator];
  presentMods = sort(unique(geneColors))
  nPresMods = length(presentMods)
  pValues = rep(NA, nPresMods)
  nIntGenes = rep(0, nPresMods)
  xGeneColors = rep(-1, nGenes)
  xGeneColors[indicator] = geneColors;
  for (imod in 1:nPresMods)
  {
    mod = presentMods[imod]
    testMatrix = matrix( c(
      sum(xGeneColors>-1 & moduleLabels==mod),
      sum(xGeneColors==-1 & moduleLabels==mod),
      sum(xGeneColors>-1 & moduleLabels!=mod),
      sum(xGeneColors==-1 & moduleLabels!=mod)), 2, 2)
    pValues[imod] = fisher.test(testMatrix, alternative = "greater")$p.value;
    nIntGenes[imod] = testMatrix[1,1];
  }
  
  pValues[pValues==0] = 1e-300;
  
  max = max(-log10(pValues), -log10(0.01 / nPresMods) + 1);
  min = 0;
  
  mp = barplot(-log10(pValues), col = moduleColors[match(presentMods, moduleLevels)], names.arg = FALSE,
               xlab = xlab, ylab = ylab, ylim = c(min, max), ...)
  text(x = mp, y = pmax(-log10(pValues) + 1, rep(-log10(0.01 / nPresMods) + 1, length(pValues))),
       labels =  nIntGenes, adj = c(0.5, 0), xpd = TRUE, cex = 0.8 )
  text(x = mp, y = min - (max-min)*0.05, labels =presentMods, adj = c(0.5, 1), xpd = TRUE, cex = cex.lab )
  abline(h = -log10(0.01 / nPresMods), col = "red")
}


# Row-wise consensus: must have equal signs
# i.e. consensus across columns, returning one value for each row (for higher-dimensional arrays, returns
# an array of one dimension lower of consensi across the last dimension)
# attempts to ignore NA's 

rowConsensus = function(data)
{
  nDim = length(dim(data))
  if (nDim < 2) stop("'data' must be a matrix or array with at least 2 dimensions.");
  if (nDim==2) data = as.matrix(data);
  dimd = dim(data);
  nc = dimd[length(dimd)];
  sign = apply(sign(data), c(1:(nDim-1)), sum, na.rm = TRUE)
  sign[is.na(sign)] = 0;
  sign[abs(sign) < nc] = 0;
  consensus = apply(abs(data), c(1:(nDim-1)), min, na.rm = TRUE) * sign/nc;
  consensus;
}

plotHistogramAndCF = function(data, nTicks = NULL, ...)
{
  h = hist(data, ...);
  ymax = par("yaxp")[2];
  n = par("yaxp")[3];
  #if (!(n %in% c(2,4,5,10)))
  if (is.null(nTicks)) nTicks = n;
  sum = sum(h$counts);
  lines(h$mids, cumsum(h$counts) / sum * ymax, col="red");
  axis(side = 4, at = c(0:nTicks)/nTicks * ymax, labels = signif(c(0:nTicks)/nTicks, 2))
  addGrid();
  invisible(h);
}

# The [bi]corAndPvalue functions have been moved to WGCNA package

#===============================================================================================
#
# Plot of a set of variables vs. genomic location.
#
#===============================================================================================

.plotQTLtypes = c("lines", "points");

plotQTL = function(data, chr, bp, colors = 1, type = "lines", bpBasedSpacing = FALSE,
                   lty = 1, lwd = 1, ylim = NULL, xLabOffset = 0.04, 
                   gridColor = "grey", grid.lty = 2, ... )
{
  
  itype = charmatch(type, .plotQTLtypes);
  if (is.na(itype))
    stop(paste("Unrecognized 'type'. Recognized values are", paste(.plotQTLtypes, collapse = ", ")));
  
  nchr = as.numeric(chr);
  order = order(nchr, bp);
  data = as.matrix(data);
  nCols = ncol(data);
  nSNPs = length(order)
  x = data[order, , drop = FALSE];
  if (bpBasedSpacing)
  {
    chromosomeNumbers = sort(unique(nchr));
    nChromo = length(chromosomeNumbers);
    chromoLength = tapply(bp, nchr, max);
    dividers = cumsum(chromoLength);
    totalLength = sum(chromoLength);
    snpGlobalPositions = bp;
    addLength = 0; index = 1;
    for (ch in chromosomeNumbers)
    {
      snpGlobalPositions[ch==nchr] = bp[ch==nchr] + addLength;
      addLength = addLength + chromoLength[index];
      index = index + 1;
    }
    plotX = snpGlobalPositions;
    breaksX = c(0.5, dividers + 0.5);
    breaks = dividers[-length(dividers)] + 0.5;
  } else {
    plotX = c(1:nSNPs);
    cx = chr[order];
    cx2 = cx[-1];
    breaks = which(cx[-nSNPs]!=cx2) + 0.5;
    breaksX = c(0.5, breaks, nSNPs + 0.5);
  }
  mids = (breaksX[-1] + breaksX[-length(breaksX)])/2;
  
  if (is.null(colors)) colors = matrix(1, nSNPs, nCols);
  if ((length(colors)==nCols) || (length(colors)==1)) 
    colors = matrix(colors, nSNPs, nCols, byrow = TRUE);
  if (length(lty==1)) lty = rep(lty, nCols);
  if (length(lwd==1)) lwd = rep(lwd, nCols);
  
  
  if (is.null(ylim))
  {
    min = min(data, na.rm = TRUE);
    max = max(data, na.rm = TRUE);
    ylim = c(min, max);
  }
  
  if (itype==1)
  {
    plot(plotX, x[, 1], col = colors[, 1], type = "l", lty = lty[1], lwd = lwd[1], 
         ylim = ylim, xaxt = "none", xlab = "Chromosome", xaxs = "i", ...)
    
    if (nCols > 1) for (col in 2:nCols)
      lines(plotX, x[, col], col = colors[, col], lty = lty[col], lwd = lwd[col]);
  } else {
    plot(plotX, x[, 1], col = colors[, 1], pch = ".",
         ylim = ylim, xaxt = "none", xlab = "Chromosome", xaxs = "i", ...)
    
    if (nCols > 1) for (col in 2:nCols)
      points(plotX, x[, col], col = colors[, col], pch = ".");
  }
  
  
  box = par("usr");
  for (b in breaks)
    lines(x = rep(b, 2), y = box[3:4], col = gridColor, lty = grid.lty );
  
  text(mids, rep(box[3] - xLabOffset * (box[4]-box[3]), length(mids)), sort(unique(chr)), xpd = TRUE);
  
  invisible(plotX);
}

distributionOnSNPs2 = function(dataList, snpChr, snpBp, window, filterType = "gaussian", fun = sum, 
                               normalize = FALSE)
{
  nData = length(dataList);
  nSNPs= length(snpChr);
  distribution = matrix(0, nSNPs, nData);
  snpChrLevs = sort(unique(snpchr));
  
  ft = charmatch(filterType, .filterTypes);
  if (is.na(ft))
    stop(paste("Unrecognized filter type: ", filterType,
               "\nRecognized values are ", paste(.filterTypes, collapse = ", ")))
  
  for (m in 1:nData)
  {
    sourceChr = dataList[[m]]$chr;
    sourceBp = dataList[[m]]$bp;
    for (c in snpChrLevs)
    {
      chrSnps = snpChr==c;
      
      targetBp = snpBp[chrSnps];
      sourceBpC = sourceBp[sourceChr==c];
      finiteBp = is.finite(sourceBpC);
      sourceBpC = sourceBpC[finiteBp];
      
      # outer(x,y,`-`)[i,j] = x[i]-y[j]
      distMat = outer(sourceBpC, targetBp, `-`);
      dataMat = matrix(dataList[[m]]$data[sourceChr==c][finiteBp], nrow(distMat), ncol(distMat));
      
      if (ft==1)
      {
        weight = exp(-distMat^2/window^2);
      } else {
        weight = matrix(as.numeric(abs(distMat) <= window), nrow(distMat), ncol(distMat));
      }
      d = apply(weight * dataMat, 2, fun);
      if (normalize)
      {
        sw = apply(weight, 2, fun);
        d = d/sw;
        d[sw==0] = NA;
      }
      distribution[chrSnps, m] = d;
    }
  }
  distribution;
}

#================================================================================================
#
# qtlPeaks
#
#================================================================================================

# Find peaks in data (qtl) that have no near higher QTL (within tol). 
# Returns a logical vector that indicates whether
# each entry is a peak or not.

qtlPeaks = function(qtl, chr, bp, minQTL, window, tol = 0.001)
{
  order = order(chr, bp)
  nSNPs = length(qtl);
  qtlL = c(qtl[-1], 0);
  qtlR = c(0, qtl[-nSNPs]);
  
  chrL = c(chr[-1], chr[nSNPs]);
  chrR = c(chr[1], chr[-nSNPs]);
  
  peaks = (qtl >= qtlL | chrL != chr) & (qtl >= qtlR | chr != chrR)
  
  peaks[is.na(peaks)] = FALSE;
  peaks[peaks][qtl[peaks] < minQTL] = FALSE;
  
  # remove neighboring peaks that have the exact same LOD
  nPeaks2 = sum(peaks);
  delPeaks2 = rep(FALSE, nPeaks2);
  qtl2 = qtl[peaks];
  delPeaks2[qtl2[-1]==qtl2[-nPeaks2]] = TRUE;
  # This looks a bit confusing :)
  peaks[peaks][delPeaks2] = FALSE;
  
  peakP = qtl[peaks];
  nPeaks = sum(peaks);
  peakchr = chr[peaks];
  peakBp = bp[peaks];
  
  # Remove peaks that are not highest in their neighborhood
  delPeaks = rep(FALSE, nPeaks);
  for (p in 1:nPeaks)
  {
    near = chr==peakchr[p] & abs(peakBp[p] - bp) <= window
    maxNearQTL = max(qtl[near], na.rm = TRUE);
    if (maxNearQTL > peakP[p] + tol) delPeaks[p] = TRUE;
  }
  
  peaks[peaks][delPeaks] = FALSE;
  
  peaks;
}

#===============================================================================================
#
# labeled boxplot
#
#===============================================================================================

labeledBoxplot = function(x, names, srt = 45, adj = c(1, 0.5), namesColor = 1, cex.names = 1, 
                          verbose = FALSE, ...)
{
  x = as.list(x);
  if (verbose)
  {
    lengths = lapply(x, length);
    n = length(x);
    g = rep(1:n, lengths);
    xx = as.vector(unlist(x));
    out = verboseBoxplot(xx, g, names = FALSE, ...);
  } else 
    out = boxplot(x, names = FALSE, ...);
  n = length(x);
  box = par("usr");
  xText = c(1:n);
  yText = rep(box[3] - 0.02 * (box[4] - box[3]), n);
  
  text(xText, yText, names, srt = srt, adj = adj, col = namesColor, cex = cex.names, xpd = TRUE);
  out;
}


#===============================================================================================
#
# labeled plot
#
#===============================================================================================

labeledPlot = function(x, y, names, srt = 45, adj = c(1, 0.5), namesColor = 1, cex.names = 1, ...)
{
  plot(x, y, xaxt = "n", ...);
  n = length(x);
  box = par("usr");
  xText = c(1:n);
  yText = rep(box[3] - 0.02 * (box[4] - box[3]), n);
  
  text(xText, yText, names, srt = srt, adj = adj, col = namesColor, cex = cex.names, xpd = TRUE);
}


#===============================================================================================
#
# labeled barplot, version 2 (somewhat different aim than the WGCNA function)
#
#===============================================================================================

labeledBarplot2 = function(x, names, g = NULL, horiz = FALSE, 
                           srt = if (horiz) 0 else 45, 
                           adj = if (horiz) c(1, 0.5) else c(1, 0.5), 
                           namesColor = 1, cex.names = 1, 
                           addGuide = TRUE, guideColor = "grey30", guideLty = 2, ...)
{
  
  if (!is.null(g)) {
    mp = verboseBarplot(x, g, names.arg = NA, horiz = horiz, ...);
    heights = attr(mp, "height");
  } else {
    mp = barplot(x, names.arg = NA, horiz= horiz, ...);
    heights = x;
    attr(mp, "height") = x;
    attr(mp, "stdErr") = rep(0, length(x));
  }
  
  box = par("usr");
  
  if (horiz)
  {
    yText = mp;
    xText = rep(box[1] - 0.01 * (box[2] - box[1]), length(mp));
  } else {
    xText = mp;
    yText = rep(box[3] - 0.02 * (box[4] - box[3]), length(mp));
  }
  
  text(xText, yText, names, srt = srt, adj = adj, col = namesColor, cex = cex.names, xpd = TRUE);
  if (addGuide) for (i in 1:length(mp))
    if (horiz)
    {
      lines(c(min(0, heights[i]), box[1] - 0.02 * (box[2] - box[1])),  rep(mp[i], 2), 
            col = guideColor, lty = guideLty);
    } else {
      lines(rep(mp[i], 2), c(min(0, heights[i]), box[3] - 0.02 * (box[4] - box[3])), 
            col = guideColor, lty = guideLty);
    }
  
  invisible(mp);
  
}


#===============================================================================================
#
# Gaussian smoothing filter
#
#===============================================================================================

smoothGauss = function(x,y, xtest, width, normalize = TRUE)
{
  dist = outer(x, xtest, "-");
  weight = exp(-dist^2/(2*width^2));
  yMat = matrix(y, length(x), length(xtest));
  
  if (normalize)
  {
    val = apply(yMat * weight, 2, sum)/apply(weight, 2, sum)
  } else
    val = apply(yMat * weight, 2, sum)
  val;
}

#===============================================================================================
#
# Convert date in the form month/day/year into number of days
# Accepts the separator (usually /, -, or .) and the position of day, month, and year in the date
#
#===============================================================================================

linDate = function(dateStr, separator= "/", day = 2, month = 1, year = 3)
{
  # Here we assume date specified as month/day/year
  
  daysInMonth = c(31,28,31,30,31,30,31,31,30,31,30,31);
  sumDIM = c(0, cumsum(daysInMonth));
  daysInYear = sum(daysInMonth);
  
  split = strsplit(dateStr, split = separator, fixed = TRUE);
  nSubj = length(split);
  date = rep(NA, nSubj);
  for (s in 1:nSubj)
  {
    date[s] = as.numeric(split[[s]][day]) + sumDIM[as.numeric(split[[s]][month])] +
      as.numeric(split[[s]][year]) * daysInYear;
  }
  date;
}

#===============================================================================================
#
# Convert age in the form xxyxxm to number of years. Assumes all months have the same length.
#
#===============================================================================================

linAge = function(ageStr)
{
  # Assume ageStr contains age in the form xxyxxm where xx are numbers and y and m are year and month
  # delimiters
  
  split = strsplit(ageStr, split = c("y"), fixed = TRUE)
  
  nSubj = length(split);
  age = rep(NA, nSubj);
  for (s in 1:nSubj)
  {
    age[s] = as.numeric(split[[s]][1]);
    if (length(split[[s]])==2)
    {
      months = strsplit(split[[s]][[2]], split = "m", fixed = TRUE)[[1]];
      age[s] = age[s] + as.numeric(months)/12;
    }
  }
  age;
}

#=========================================================================================================
#
# Simulation of a causal model. From Old/GeneExpressionSimulation/.
# Note: the causal model simulation has been modified to produce a consistent interpretation of the path
# coefficients.
#
#=========================================================================================================

#----------------------------------------------------------------------------
#
# CausalChildren
#
#----------------------------------------------------------------------------
# Note: The returned vector may contain multiple occurences of the same child.

CausalChildren = function(Parents, Cause)
{
  nNodes = dim(Cause)[[1]];
  
  # print(paste("Length of Parents: ",length(Parents)));
  if (length(Parents)==0) return(NULL);
  
  Child_ind = apply(as.matrix(abs(Cause[, Parents])), 1, sum)>0;
  if (sum(Child_ind)>0)
  {
    children = c(1:nNodes)[Child_ind] 
  } else {
    children = NULL;
  }
  children;
}

#----------------------------------------------------------------------------
#
# simulateCausal
#
#----------------------------------------------------------------------------
#
# Given a set of causal anchors, this function creates a network of vectors that should satisfy the
# causal relations encoded in the causal matrix Cause, i.e. Cause[j,i] is the causal effect of vector i on
# vector j. (This is Jason's convention.)

# The function starts by initializing all vectors to noise given in the noise specification. (The noise
# can be specified for each vector separately.) Then it runs the standard causal network signal
# propagation and returns the resulting vectors.

simulateCausal = function(Cause, AnchorInd, AnchorVecs, Noise, verbose = 2, indent = 0)
{
  spaces = indentSpaces(indent);
  
  if (verbose>0) printFlush(paste(spaces, "Creating seed vectors..."));
  nNodes = dim(Cause)[[1]];
  nSamples = dim(AnchorVecs)[[1]];
  
  if (length(AnchorInd)!=dim(AnchorVecs)[[2]])
  {
    printFlush(paste("CreateSeedVectors: Error: Length of AnchorInd must equal",
                     "the number of vectors in AnchorVecs."));
    stop();
  }
  if (length(Noise)!=nNodes)
  {
    printFlush(paste("CreateSeedVectors: Error: Length of Noise must equal",
                     "the number of nodes as given by the dimension of the Cause matrix."));
    stop();
  }
  
  # Initialize all node vectors to noise with given standard deviation
  
  NodeVectors = matrix(0, nrow = nSamples, ncol = nNodes);
  for (i in 1:nNodes)
  {
    NodeVectors[,i] = rnorm(n=nSamples, mean=0, sd=Noise[i]);
  }
  
  Levels = rep(0, times = nNodes);
  
  # Calculate levels for all nodes: start from anchors and go through each successive level of children
  
  level = 0;
  Parents = AnchorInd;
  Children = CausalChildren(Parents = Parents, Cause = Cause);
  if (verbose>1) printFlush(paste(spaces, "..Determining level structure..."));
  while (!is.null(Children))
  {
    # print(paste("level:", level));
    # print(paste("   Parents:", Parents));
    # print(paste("   Children:", Children));
    level = level + 1;
    if ((verbose>1) & (level/10 == as.integer(level/10))) 
      printFlush(paste(spaces, "  ..Detected level", level));
    #printFlush(paste("Detected level", level));
    Levels[Children] = level;
    Parents = Children;
    Children = CausalChildren(Parents = Parents, Cause = Cause);
  }
  
  HighestLevel = level;
  
  # Generate the whole network
  
  if (verbose>1) printFlush(paste(spaces, "..Calculating network..."));
  NodeVectors[,AnchorInd] = NodeVectors[,AnchorInd] + AnchorVecs;
  for (level in (1:HighestLevel))
  {
    if ( (verbose>1) & (level/10 == as.integer(level/10)) ) 
      printFlush(paste(spaces, " .Working on level", level));
    #printFlush(paste("Working on level", level));
    LevelChildren = c(1:nNodes)[Levels==level]
    for (child in LevelChildren) 
    {
      LevelParents = c(1:nNodes)[Cause[child, ]!=0]
      parentEffect = rep(0, nSamples);
      for (parent in LevelParents)
        parentEffect = parentEffect + Cause[child, parent]*scale(NodeVectors[,parent]);
      NodeVectors[, child] = NodeVectors[, child] + parentEffect;
    }
  }
  
  Nodes = list(Vectors = scale(NodeVectors), Cause = Cause, Levels = Levels, AnchorInd = AnchorInd);
  Nodes;
} 



#===================================================================================================== 
#
# partial correlation of columns of x conditioned on A
#
#=====================================================================================================

matrixPCor = function(x, A, corFnc = "cor", corOptions = "use = 'p'")
{
  cx = eval(parse(text = spaste(corFnc, "(x, ", corOptions, ")")));
  ca = as.numeric(eval(parse(text = spaste(corFnc, "(x, A, ", corOptions, ")"))));
  
  pc = ( cx - ca %*% t(ca)) /  sqrt( (1-ca^2) %*% t(1-ca^2) )
  pc;
};

zeo = function(x, A, corFnc = "cor", corOptions = "use = 'p'")
{
  n = ncol(x);
  cx = eval(parse(text = spaste(corFnc, "(x, ", corOptions, ")")));
  ca = as.numeric(eval(parse(text = spaste(corFnc, "(x, A, ", corOptions, ")"))));
  caMatCol = matrix(ca, n, n);
  caMatRow = matrix(ca, n, n, byrow = TRUE);
  zeo = (caMatCol - cx * caMatRow) / sqrt( (1-caMatRow^2) * (1-cx^2) );
  diag(zeo) = NA;
  zeo;
}

#===================================================================================================
#
# verbosePairs
#
#===================================================================================================

verbosePairs = function(x, mar1 = c(2,2,1,1), names = colnames(x), cex.names = 3, sample = NULL,
                        cex.lab = 1, cex.main = 1, cex.axis = 1, breaks = 100, corFnc = "cor", ...)
{
  x = as.matrix(x);
  nSets = ncol(x);
  par(mfrow = c(nSets, nSets));
  par(mar = mar1)
  if (is.null(names)) names = c(1:nSets);
  
  for (s1 in 1:nSets) for (s2 in 1:nSets)
  {
    if (s1==s2)
    {
      #plot(c(0,1), c(0,1), type = "n", xaxt = "n", yaxt = "n");
      #text(0.5, 0.5, adj = c(0.5, 0.5), cex = cex.names, labels = names[s1]);
      if (corFnc=="bicor")
      {
        mean = median(x[, s1], na.rm = TRUE);
        sd = mad(x[, s1], na.rm = TRUE);
        mainLine = spaste("\nmedian: ", signif(mean, 2), ", mad: ", signif(sd, 2));
      } else {
        mean = mean(x[, s1], na.rm = TRUE);
        sd = sd(x[, s1], na.rm = TRUE);
        mainLine = spaste("\nmean: ", signif(mean, 2), ", sd: ", signif(sd, 2));
      }
      par(mar = mar1 + c(0,0,2,0))
      hist(x[, s1], breaks = breaks, main = spaste(names[s1], mainLine), xlab = "", cex.lab = cex.lab,
           cex.axis = cex.axis, cex.main = cex.main);
    } else {
      par(mar = mar1)
      verboseScatterplot(x[, s2], x[, s1], sample = sample, cex.lab = cex.lab,
                         cex.axis = cex.axis, cex.main = cex.main, xlab = "", ylab = "",
                         corFnc = corFnc, ...);
    }
  }
}


#=======================================================================================================
#
# invertEmpiricalDistribution
#
#=======================================================================================================

# Inverts an empirical distribution

invertEmpiricalDistribution = function(data, nBreaks = 100, smoothWindow = 3, plot = FALSE)
{
  if (!is.null(dim(data))) stop("'data' must be a vector.");
  n = length(data);
  h = hist(data, breaks = nBreaks, plot = plot);
  
  
  step = h$mids[2] - h$mids[1];
  smooth = smoothGauss(h$mids, h$counts, h$mids, smoothWindow * step);
  smooth[smooth < 0] = 0;
  sums = c(0, cumsum(smooth))/max(cumsum(smooth));
  
  if (plot) lines(h$mids, smooth, col = "red")
  if (plot) lines(h$breaks, sums * max(h$counts), col = "blue")
  
  y = h$breaks
  x = sums;
  # fit = lm(y~ns(x, df = nBreaks));
  
  if (FALSE)
  {
    nInvBreaks = nBreaks;  
    invX = seq(from = 0, to=1, length.out = nInvBreaks + 1)
    inverse = predict(fit, newdata = data.frame(x=invX))
    
    inverse[1] = h$breaks[1];
    inverse[nInvBreaks + 1] = h$breaks[length(h$breaks)];
    
    if (plot) lines(inverse, invX*max(h$counts), col = "green");
    
    plot(sums, h$breaks, type = "l", col = 1);
    lines(invX, inverse+1, col = 2);
  }
  
  #fit;
  
  list(x = x, y = y);
}

# Make a slightly modified version of the function approx to supress a warning

approx = function (x, y = NULL, xout, method = "linear", n = 50, yleft, 
                   yright, rule = 1, f = 0, ties = mean) 
{
  x <- xy.coords(x, y)
  y <- x$y
  x <- x$x
  nx <- length(x)
  method <- pmatch(method, c("linear", "constant"))
  if (is.na(method)) 
    stop("invalid interpolation method")
  stopifnot(is.numeric(rule), (lenR <- length(rule)) >= 1, 
            lenR <= 2)
  if (lenR == 1) 
    rule <- rule[c(1, 1)]
  if (any(na <- is.na(x) | is.na(y))) {
    ok <- !na
    x <- x[ok]
    y <- y[ok]
    nx <- length(x)
  }
  if (!identical(ties, "ordered")) {
    if (length(ux <- unique(x)) < nx) {
      #            if (missing(ties)) 
      #                warning("collapsing to unique 'x' values")
      y <- as.vector(tapply(y, x, ties))
      x <- sort(ux)
      nx <- length(x)
    }
    else {
      o <- order(x)
      x <- x[o]
      y <- y[o]
    }
  }
  if (nx <= 1) {
    if (method == 1) 
      stop("need at least two non-NA values to interpolate")
    if (nx == 0) 
      stop("zero non-NA points")
  }
  if (missing(yleft)) 
    yleft <- if (rule[1] == 1) 
      NA
  else y[1L]
  if (missing(yright)) 
    yright <- if (rule[2] == 1) 
      NA
  else y[length(y)]
  stopifnot(length(yleft) == 1, length(yright) == 1, length(f) == 
              1)
  if (missing(xout)) {
    if (n <= 0) 
      stop("'approx' requires n >= 1")
    xout <- seq.int(x[1L], x[nx], length.out = n)
  }
  y <- .C("R_approx", as.double(x), as.double(y), as.integer(nx), 
          xout = as.double(xout), as.integer(length(xout)), as.integer(method), 
          as.double(yleft), as.double(yright), as.double(f), NAOK = TRUE, 
          PACKAGE = "stats")$xout
  list(x = xout, y = y)
}

# convenience function

evalInvDist = function(invDist, xnew)
{
  approx(invDist$x, invDist$y, xout = xnew)$y;
}


#===============================================================================================
#
# grey2red
#
#===============================================================================================

grey2red = function(n, base, gamma)
{
  red = seq(from=base^gamma, to=1, length.out = n)^(1/gamma)
  green = blue = seq(from = base^gamma, to=0, length.out = n)^(1/gamma);
  col = rgb(red, green, blue, maxColorValue = 1); 
}


# Example of grey2red:

if (FALSE)
{
  par(mfrow = c(5,1))
  par(mar = c(1,3,1,1))
  n= 100
  barplot(rep(1, n), col = grey2red(n, 0, 1))
  barplot(rep(1, n), col = grey2red(n, 1, 1))
  barplot(rep(1, n), col = grey2red(n, 0.5, 1))
  barplot(rep(1, n), col = grey2red(n, 0.5, 0.2))
  barplot(rep(1, n), col = grey2red(n, 0.5, 5.0))
}



#===============================================================================================
#
# Network plot for generating VisANT-like plots
#
#===============================================================================================

#transform to radians

toRadians = function(angle, degrees = TRUE)
{
  if (degrees) angle = angle * pi/180;
  angle;
}

toDegrees = function(angle, radians = TRUE)
{
  if (radians) angle = angle / pi * 180;
  angle;
}

normalizeAngle = function(angle, degrees = TRUE)
{
  if (degrees)
  {
    step = 360; min = 0;
  } else {
    step = 2*pi; min = 0;
  }
  
  for (i in 1:length(angle))
  {
    while (angle[i] < min) angle[i] = angle[i] + step;
    while (angle[i] > step + min) angle[i] = angle[i] - step;
  }
  angle;
}

scaleToRange = function(x, min, max, scale = TRUE)
{
  if (scale)
  {
    y = (x-min(x, na.rm = TRUE))/(max(x, na.rm = TRUE) - min(x, na.rm = TRUE)) * (max - min) + min;
  } else {
    y = x;
    y[!is.na(x)] = min;
  }
  y;
}

# Angles here are assumed in degrees
# This should return the distance from center of a symbol to the center of a label
pointToLabelDist = function(labelWidth, labelHeight, pointRadius, alignAngle, labelAngle, degrees = TRUE)
{
  alignAngle = toRadians(alignAngle, degrees);
  labelAngle = toRadians(labelAngle, degrees);
  
  # Align and label angles cannot go against each other since that would put the label into the point
  # symbol.
  if (cos(alignAngle - labelAngle) < 0) labelAngle = labelAngle + pi;
  
  al.m.lab = alignAngle - labelAngle
  ratio = labelHeight/labelWidth;
  epsilon = atan(ratio) * sign( sin(al.m.lab) );
  gamma = al.m.lab - epsilon; 
  
  if (abs(tan(al.m.lab)) > ratio + 2*pointRadius/labelWidth)
  {
    # "Scenario A": long (width) side touches the circle
    return ( (pointRadius + 0.5 * labelHeight)/abs(sin(al.m.lab)) );
  } else if (abs(tan(al.m.lab)) > ratio) {
    # "Scenario B": corner touches the circle
    a = 0.5 * sqrt(labelWidth^2 + labelHeight^2);
    delta.m.al = asin( a*sin(gamma) / pointRadius );
    kappa = pi - gamma - delta.m.al;
    return ( pointRadius * sin(kappa) / sin(gamma) );
  } else {
    # Scenario C: short (height) side touches
    return ( (pointRadius + 0.5 * labelWidth) / abs(cos(gamma)) );
  }
}

labelPosition = function(labelWidth, labelHeight, pointRadius, alignAngle, labelAngle, degrees = TRUE)
{
  dst = pointToLabelDist(labelWidth, labelHeight, pointRadius, alignAngle, labelAngle, degrees);
  alignAngle = toRadians(alignAngle, degrees);
  c( dst * cos(alignAngle), dst * sin(alignAngle) );
}

drawRectangle = function(center.x, center.y, width, height, angle, degrees = TRUE, ...)
{
  corners = matrix( c(-width/2, -height/2, 
                      width/2, -height/2,
                      width/2, height/2,
                      -width/2, height/2,
                      -width/2, -height/2), 2, 5)
  angle = toRadians(angle, degrees);
  rotMat = matrix(c( cos(angle), sin(angle), -sin(angle), cos(angle)), 2, 2);
  
  points = rotMat %*% corners + matrix(c(center.x, center.y), 2, 5);
  lines(points[1, ], points[2, ], ...);
}

drawEllipse = function(center.x, center.y, width, height = width, angle = 0, startAngle = 0, 
                       stopAngle = 360, degrees = TRUE, nSteps = 100,...)
{
  pixelAngles = toRadians(seq(from=startAngle, to = stopAngle, length.out = nSteps + 1), degrees)
  pixels = matrix( c(width * cos(pixelAngles), 
                     height * sin(pixelAngles)), 2, nSteps + 1, byrow = TRUE);
  
  angle = toRadians(angle, degrees);
  rotMat = matrix(c( cos(angle), sin(angle), -sin(angle), cos(angle)), 2, 2)
  points = rotMat %*% pixels +
    matrix(c(center.x, center.y), 2, nSteps + 1);
  lines(points[1, ], points[2, ], ...);
}





# This function needs x and y for the points.

networkPlot = function(
  adjacency,
  labels,
  pos.x, pos.y, 
  alignAngles,
  labelAngles,
  cex.labels,
  cex.points,
  lineAngles = NULL, # Not used for now
  maxAdj = NULL,
  colors = NULL,
  startNewPlot = TRUE,
  plotBox = c(-1, 1, -1, 1),
  variable.line.width = TRUE,
  min.line.width = 1,
  max.line.width = 5,
  nPlotLines = NULL,
  pch = 21,
  labelColors = "black",
  pointColors = "black",
  pointBg = "black",
  xLabelOffset = 0.01,
  yLabelOffset = 0.01,
  degrees = TRUE,
  ...)
{
  
  if (startNewPlot)
    plot(plotBox[1:2], plotBox[3:4], axes = FALSE, type = "n", xlab = "", ylab = "", ...);
  
  # plot(c(-1-xMargin,1+xMargin), c(-1-yMargin,1+yMargin), axes = FALSE, type = "n", xlab = "", ylab = "", ...) 
  checkAdjMat(adjacency, min = -1)
  n = length(labels);
  
  adjx = adjacency
  adjx[is.na(adjx)] = 0;
  
  if (length(pch)==1) pch = rep(pch, n);
  if (length(labelColors)==1) labelColors = rep(labelColors, n);
  if (length(pointColors)==1) pointColors = rep(pointColors, n);
  if (length(pointBg)==1) pointBg = rep(pointBg, n);
  if (length(xLabelOffset)==1) xLabelOffset = rep(xLabelOffset, n);
  if (length(yLabelOffset)==1) yLabelOffset = rep(yLabelOffset, n);
  if (length(cex.labels)==1) cex.labels = rep(cex.labels, n);
  if (length(cex.points)==1) cex.points = rep(cex.points, n);
  if (length(alignAngles)==1) alignAngles = rep(alignAngles, n);
  if (length(labelAngles)==1) labelAngles = rep(labelAngles, n);
  
  diag(adjx) = 0;
  diag(adjacency) = 0;
  maxA = max(abs(adjx));
  if (!is.null(maxAdj)) if (maxA<maxAdj) maxA = maxAdj;
  if (sum(adjx < 0) > 0)
  {
    if (is.null(colors)) colors = greenWhiteRed(100);
    adjCol = numbers2colors(adjacency, signed = TRUE, colors = colors, lim = c(-maxA, maxA));
  } else {
    if (is.null(colors)) colors = greenWhiteRed(100)[50:100];
    adjCol = numbers2colors(adjacency, signed = FALSE, colors = colors, lim = c(0, maxA));
  }
  
  
  ltA = adjacency;
  diag(ltA) = NA;
  ltA[upper.tri(ltA)] = NA;
  
  adjOrder = order(c(abs(ltA)))
  rows = row(adjacency)[adjOrder];
  cols = col(adjacency)[adjOrder];
  
  nLines = n*(n-1)/2;
  if (is.null(nPlotLines)) 
  {
    startLine = 1
  } else { 
    startLine = nLines - nPlotLines + 1;
  }
  for (line in startLine:nLines)
  {
    n1 = rows[line];
    n2 = cols[line];
    a = adjacency[n1, n2];
    normA = abs(a)/maxA;
    
    w = min.line.width;
    if (variable.line.width)
      w = min.line.width + (max.line.width - min.line.width) * normA;
    
    #pRadius1 = par("cxy") * cex.points[n1]/35;  # Emprical fudge factor..
    #pRadius2 = par("cxy") * cex.points[n2]/35;
    lineLen = sqrt( (pos.x[n1] - pos.x[n2])^2 + (pos.y[n1] - pos.y[n2])^2);
    x1 = pos.x[n1] #+ pRadius1[1] * (x[n2] - x[n1]) / lineLen
    y1 = pos.y[n1] #+ pRadius1[1] * (y[n2] - y[n1]) / lineLen
    x2 = pos.x[n2] #+ pRadius2[1] * (x[n1] - x[n2]) / lineLen
    y2 = pos.y[n2] #+ pRadius2[1] * (y[n1] - y[n2]) / lineLen
    
    lines(c(x1,x2),c(y1, y2), lwd = w, col = adjCol[n1, n2]);
  }
  
  x = pos.x;
  y = pos.y;
  
  for (node in 1:n)
    points(x[node], y[node], pch = pch[node], cex = cex.points[node], 
           bg = pointBg[node], col = pointColors[node]);
  
  for (node in 1:n)
  {
    cex = cex.labels[node];
    textWidth = strwidth(labels[node], cex = cex);
    textHeight = strheight(labels[node], cex = cex);
    pRadius = par("cxy") * cex.points[node]/5  ;  # Emprical fudge factor..
    effPointRadius = sqrt(mean(pRadius^2));
    labelShift = labelPosition(textWidth+2*xLabelOffset[node], 
                               textHeight + 2*yLabelOffset[node], effPointRadius, 
                               alignAngles[node], labelAngles[node], degrees);
    #printFlush(paste("Node:", node, " labelShift: ", paste(signif(labelShift, 2), collapse = ", "),
    #               ", width, height: ", paste(signif(c(textWidth, textHeight), 2), collapse = ",")));
    #  Make sure the label will read from left to right
    labAng = toDegrees(normalizeAngle(labelAngles[node], degrees), !degrees);
    
    if (labAng > 90 & labAng < 270) labAng = labAng - 180;
    
    text(x[node] + labelShift[1], y[node] + labelShift[2],
         labels = labels[node], adj = c(0.5, 0.5), 
         cex = cex, col = labelColors[node], srt = labAng, xpd = TRUE);
  }
  
}


# Debug
if (FALSE)
{
  labelWidth = textWidth+2*xLabelOffset[node];
  labelHeight = textHeight + 2*yLabelOffset[node];
  pointRadius = effPointRadius;
  alignAngle= alignAngles[node];
  labelAngle = labelAngles[node];
}

#===============================================================================================
#
# Circle plot for generating VisANT-like plots
#
#===============================================================================================

circlePlot = function(
  adjacency,
  labels,
  order = NULL,
  maxAdj = NULL,
  colors = NULL,
  startNewPlot = TRUE,
  plotBox = c(-1, 1, -1, 1),
  center = c(0,0), 
  radii = c(0.8, 0.8),
  startAngle = 0,
  variable.cex.labels = TRUE,
  min.cex.labels = 1,
  max.cex.labels = 1.5,
  variable.cex.points = TRUE,
  min.cex.points = 1,
  max.cex.points = 3,
  variable.line.width = TRUE,
  min.line.width = 1,
  max.line.width = 5,
  nPlotLines = NULL,
  pch = 21,
  labelColors = "black",
  pointColors = "black",
  pointBg = "black",
  # [x,y]Margin arguments have been taken out. Use radii instead.
  xLabelOffset = 0.01,
  yLabelOffset = 0.01,
  variableLabelAngle = TRUE,
  labelAngleMultiplier = 0.5,
  
  ...)
{
  
  checkAdjMat(adjacency, min = -1)
  n = length(labels);
  angles = seq(from = startAngle, to = startAngle + 2*pi * (1-1/n), length.out = n);
  x = center[1] + radii[1] * sin(angles);  # This is intentional; top should correspond to angle=0
  y = center[2] + radii[2] * cos(angles);
  
  adjx = adjacency
  adjx[is.na(adjx)] = 0;
  connectivity = colSums(abs(adjx))-diag(adjx)
  minConn = min(connectivity, na.rm = TRUE);
  maxConn = max(connectivity, na.rm = TRUE);
  
  if (is.null(order)) order = order(-connectivity, na.last = TRUE)
  
  if (length(pch)==1) pch = rep(pch, n);
  if (length(labelColors)==1) labelColors = rep(labelColors, n);
  if (length(pointColors)==1) pointColors = rep(pointColors, n);
  if (length(pointBg)==1) pointBg = rep(pointBg, n);
  if (length(xLabelOffset)==1) xLabelOffset = rep(xLabelOffset, n);
  if (length(yLabelOffset)==1) yLabelOffset = rep(yLabelOffset, n);
  
  oLabs = labels[order]
  oLColors = labelColors[order];
  oPColors = pointColors[order];
  oPBg = pointBg[order];
  oConn = connectivity[order];
  oAdj = adjx[order, order];
  oPch = pch[order];
  
  alignAngles = normalizeAngle(toDegrees(-angles) + 90); 
  alignAngles[alignAngles > 270] = alignAngles[alignAngles > 270] - 360;
  if (variableLabelAngle)
  {
    labelAngles = ifelse(alignAngles<=90, alignAngles*labelAngleMultiplier, 
                         180 + (alignAngles - 180)*labelAngleMultiplier)
    alignAngles = labelAngles;
  } else
    labelAngles = 0;
  
  actualCexPts = scaleToRange(oConn, min.cex.points, max.cex.points, variable.cex.points)
  #actualCexPts = rep(min.cex.points, n);
  #if (variable.cex.points)
  #     actualCexPts = min.cex.points + (max.cex.points - min.cex.points) * 
  #                                (oConn - minConn)/(maxConn - minConn)
  
  cex.labels = scaleToRange(oConn, min.cex.labels, max.cex.labels, variable.cex.labels);
  #cex.labels = rep(min.cex.labels, n);
  #if (variable.cex.labels)
  #     cex.labels[node] = min.cex.labels + (max.cex.labels - min.cex.labels) *
  #                               (oConn - minConn)/(maxConn - minConn)
  
  networkPlot(adjacency[order, order],
              oLabs, 
              x, y,
              alignAngles,
              labelAngles,
              cex.labels = cex.labels,
              cex.points = actualCexPts,
              maxAdj = maxAdj,
              colors = colors,
              startNewPlot = startNewPlot,
              variable.line.width = variable.line.width,
              min.line.width = min.line.width,
              max.line.width = max.line.width,
              nPlotLines = nPlotLines,
              pch = oPch,
              labelColors = oLColors,
              pointColors = oPColors,
              pointBg = oPBg,
              xLabelOffset = xLabelOffset[order],
              yLabelOffset = yLabelOffset[order],
              degrees = TRUE, ...);
  
}

# Example of circle plot:

if (FALSE)
{
  
  sizeGrWindow(8,8)
  par(mfrow = c(1,1));
  nS = 100;
  nn = 30;
  mod = simulateModule(rnorm(nS), nn);
  adjacency = cor(mod)^3;
  
  order = NULL
  
  labels = paste("Gene", c(1:nn));
  circlePlot(adjacency, labels, order, variable.cex.labels = FALSE, radii = c(0.6, 0.6));
  
  # Plot two circles in one plot
  
  circlePlot(adjacency, labels, order, variable.cex.labels = FALSE, center = c(-0.5, -0.5), 
             radii = c(0.35, 0.35));
  
  circlePlot(adjacency, labels, order, startNewPlot = FALSE, 
             variable.cex.labels = FALSE, center = c(0.5, 0.5), 
             radii = c(0.35, 0.35));
  
  
}


#=======================================================================================================
#
# Hierarchical circle network plot
#
#=======================================================================================================

alternatingAngles = function(n, startAngle, degrees = TRUE)
{
  nm = n-1;
  steps = c(0, rep(c(1:ceil(nm/2)), rep(2, ceil(nm/2))) * rep(c(1, -1), ceil(nm/2)))
  steps = steps[1:n];
  
  startAngle = toDegrees(startAngle, !degrees);
  angles = normalizeAngle(startAngle + 360/n * steps);
  if (!degrees) angles = toRadians(angles);
  angles 
}


networkPlot.hierarchical = function(
  adjacency,
  labels,
  clusteringAdjacency = adjacency,
  maxAdj = NULL,
  colors = NULL,
  startNewPlot = TRUE,
  plotBox = c(-1, 1, -1, 1),
  center = c(0,0),
  outerRadii = c(0.5, 0.5),
  
  variableInnerRadius = TRUE,
  minInnerRadii = c(0.1, 0.1),
  maxInnerRadii = c(0.25, 0.25),
  
  minClusterSize = 3,
  deepSplit = 1,
  
  angleOfLargestCluster = -90,
  angularSizeOffset = 3,
  variable.cex.labels = TRUE,
  min.cex.labels = 1,
  max.cex.labels = 1.5,
  variable.cex.points = TRUE,
  min.cex.points = 1,
  max.cex.points = 3,
  variable.line.width = TRUE,
  min.line.width = 1,
  max.line.width = 5,
  nPlotLines = NULL,
  pch = 21,
  labelColors = "black",
  pointColors = "black",
  pointBg = "black",
  # [x,y]Margin arguments have been taken out. Use radii instead.
  xLabelOffset = 0.01,
  yLabelOffset = 0.01,
  variableLabelAngle = TRUE,
  variableLabelAngleMinSize = 4,
  showSkeleton = FALSE,
  ...)
{
  
  checkAdjMat(adjacency, min = -1)
  n = length(labels);
  
  adjx = clusteringAdjacency
  adjx[is.na(adjx)] = 0;
  tree = hclust(as.dist(1-adjx), method = "a");
  
  # Get cluster order around the big circle. 
  # Room for improvement: instead of this could get eigennodes, cluster them and order their dendrogram
  # using dendrogramOrderings 
  clusters.noPam = cutreeDynamic(tree, distM = 1-adjx,
                                 minClusterSize = minClusterSize, 
                                 deepSplit = deepSplit,
                                 pamStage = FALSE);
  clusterOrder = unique(clusters.noPam[tree$order]);
  clusterOrder = clusterOrder[clusterOrder!=0];
  
  
  # Here I want all points in a cluster
  clusters = cutreeDynamic(tree, distM = 1-adjx,
                           minClusterSize = minClusterSize,
                           deepSplit = deepSplit,
                           pamStage = TRUE, cutHeight = 2);
  
  #plotDendroAndColors(tree, cbind(clusters.noPam, clusters))
  clusterSizes = table(clusters)
  nClusters = length(clusterSizes);
  
  if (nClusters==1)
  {
    circlePlot(
      adjacency,
      labels,
      order = NULL,
      maxAdj = maxAdj,
      colors = colors,
      startNewPlot = startNewPlot,
      plotBox = plotBox,
      center = center,
      radii = radii,
      startAngle = startAngle,
      variable.cex.labels = variable.cex.labels,
      min.cex.labels = min.cex.labels,
      max.cex.labels = max.cex.labels,
      variable.cex.points = variable.cex.points,
      min.cex.points = min.cex.points,
      max.cex.points = max.cex.points,
      variable.line.width = variable.line.width,
      min.line.width = min.line.width,
      max.line.width = max.line.width,
      nPlotLines = nPlotLines,
      pch = pch,
      labelColors = labelColors,
      pointColors = pointColors,
      pointBg = pointBg,
      xLabelOffset = xLabelOffset,
      yLabelOffset = yLabelOffset,
      variableLabelAngle = variableLabelAngle,
      showSkeleton = FALSE,
      ...);
    return ;
  }
  
  effectiveSizes = clusterSizes + angularSizeOffset 
  if (variableInnerRadius & (max(clusterSizes) > min(clusterSizes))) 
  {
    ratios = effectiveSizes / max(effectiveSizes);
    if (min(ratios) < max(minInnerRadii/maxInnerRadii))
      ratios = scaleToRange(ratios, max(minInnerRadii/maxInnerRadii), 1);
    innerRadii = matrix( ratios, nClusters, 2)  * 
      matrix( maxInnerRadii, nClusters, 2, byrow = TRUE);
  } else
    innerRadii = matrix( minInnerRadii, nClusters, 2, byrow = TRUE);
  
  # This may not work well in certain cases but for now will do...
  meanApproxInnerRadius = sqrt(rowMeans(innerRadii^2));
  
  clusterSlot = match(names(clusterSizes), clusterOrder);
  
  angleRanges = meanApproxInnerRadius/sum(meanApproxInnerRadius) * 360;
  angleRanges.ordered = angleRanges[ clusterOrder ]
  circleAngles1 = c(0, cumsum(angleRanges.ordered)) + c(angleRanges.ordered/2, 0);
  circleAngles = circleAngles1[1:nClusters] - circleAngles1[2]/2;
  
  largestCluster = which.max(clusterSizes);
  circleAngles = circleAngles - circleAngles[ clusterSlot[largestCluster] ] + angleOfLargestCluster;
  
  if (showSkeleton) 
  {
    addGrid(v = TRUE)
    abline(h=center[2]);
    abline(v=center[1])
  }
  
  n = ncol(adjacency);
  x = rep(NA, n);
  y = rep(NA, n);
  kIn = rep(0, n);
  alignAngles = rep(0, n);
  
  if (showSkeleton) drawEllipse(center[1], center[2], outerRadii[1], outerRadii[2]);
  clusterLevels = names(clusterSizes);
  for (c in 1:nClusters)
  {
    inCluster = clusters==clusterLevels[c];
    kIn[inCluster] = colSums(abs(adjacency[inCluster, inCluster]));
    clusterAngle = circleAngles[ clusterSlot[c] ];
    incOrder = rank(-kIn[inCluster]);
    incAngles = alternatingAngles( sum(inCluster), 180 +clusterAngle)[incOrder];
    clustCenter = center + outerRadii* c(cos(toRadians(clusterAngle)), sin(toRadians(clusterAngle)));
    x[inCluster] = clustCenter[1] + innerRadii[c, 1] * cos(toRadians(incAngles));  
    y[inCluster] = clustCenter[2] + innerRadii[c, 2] * sin(toRadians(incAngles));
    alignAngles[inCluster] = normalizeAngle(incAngles);
    if (showSkeleton) 
    {
      drawEllipse(clustCenter[1], clustCenter[2], innerRadii[c, 1], innerRadii[c, 2]);
      points(clustCenter[1], clustCenter[2], pch = 5)
    }
  }
  
  cex.points = scaleToRange(kIn, min.cex.points, max.cex.points, variable.cex.points)
  cex.labels = scaleToRange(kIn, min.cex.labels, max.cex.labels, variable.cex.labels);
  
  alignAngles[alignAngles > 270] = alignAngles[alignAngles > 270] - 360;
  labelAngles = rep(0, n);
  for (c in 1:nClusters)
  {
    inCluster = clusters==clusterLevels[c];
    if (variableLabelAngle & sum(inCluster) >= variableLabelAngleMinSize)
    {
      alignAngles[inCluster] = ifelse(alignAngles[inCluster]<=90, alignAngles[inCluster]/2, 
                                      180 + (alignAngles[inCluster] - 180)/2)
      labelAngles[inCluster] = alignAngles[inCluster];
    } 
  }
  
  networkPlot(
    adjacency,
    labels,
    pos.x = x, pos.y = y,
    alignAngles = alignAngles,
    labelAngles = labelAngles,
    cex.labels = cex.labels,
    cex.points = cex.points,
    lineAngles = NULL, # Not used for now
    maxAdj = maxAdj,
    colors = colors,
    startNewPlot = startNewPlot & !showSkeleton,
    plotBox = plotBox,
    variable.line.width = variable.line.width,
    min.line.width = min.line.width,
    max.line.width = max.line.width,
    nPlotLines = nPlotLines,
    pch = pch,
    labelColors = labelColors,
    pointColors = pointColors,
    pointBg = pointBg,
    xLabelOffset = xLabelOffset,
    yLabelOffset = yLabelOffset,
    degrees = TRUE,
    ...);
  invisible(list(x = x, y = y, clusters = clusters, tree =tree));
}

#=======================================================================================================
#
# Interval overlap
#
#=======================================================================================================

# Interval overlap: 
#    return -1 if [xl, xh] does not intersect with [yl, yh]
#    return 1 if there's partial overlap between the intervals
#    return 2 if there's partial overlap and the ymid is in the [xl, xh] interval
#    return 3 if [yl, yh] is a subset of [xl, xh]
#    return 4 if [xl, xh] is a subset of [yl, yh]
#    return 5 if [xl, xh] is a subset of [yl, yh] and ymid is in [xl, xh]

intervalOverlap = function(xl, xh, yl, yh, ymid = NULL)
{
  n = length(xl);
  m = length(yl);
  result = matrix(-1, m, n);
  xl = as.double(xl);
  xh = as.double(xh);
  yl = as.double(yl);
  yh = as.double(yh);
  
  a = outer(yl, xl, `-`) * outer(yl, xh, `-`) <= 0; 
  if (!is.null(ymid))
  {
    ymid = as.double(ymid);
    b = outer(ymid, xl, `-`) * outer(ymid, xh, `-`) <= 0;
  } else 
    b = array(FALSE, dim = dim(a));
  
  c = outer(yh, xl, `-`) * outer(yh, xh, `-`) <= 0;
  d = outer(yh, xl, `-`) * outer(yl, xl, `-`) <= 0;
  e = outer(yh, xh, `-`) * outer(yl, xh, `-`) <= 0;
  
  result[xor(a,c)] = 1 + as.numeric(b[xor(a,c)]);
  result[a&c] = 3;
  result[d&e] = 4 + as.numeric(b[d&e]);
  t(result);
}

#========================================================================================================
#
# allocateJobs
#
#========================================================================================================

# Facilitates multi-threading by producing an even allocation of jobs 

allocateJobs = function(nJobs, nThreads)
{
  if (nJobs<nThreads)
    stop("Fewer jobs than threads... please decrease nThreads."); 
  n1 = floor(nJobs/nThreads);
  n2 = nJobs - nThreads*n1;
  allocation = list();
  start = 1;
  for (t in 1:nThreads)
  {
    end = start + n1 - 1 + as.numeric(t<=n2);
    allocation[[t]] = c(start:end);
    start = end+1;
  }
  
  allocation;
}

#=======================================================================================================
#
# eps - produce eps plots
#
#=======================================================================================================

eps = function(...) { postscript(..., horizontal = FALSE, onefile = FALSE, paper = "special"); }

#========================================================================================================
#
# Array of vertical barplots
#
#=======================================================================================================

barplotArray = function(mat, 
                        lim = NULL,
                        commonScale = FALSE, 
                        colors = "grey40",
                        border = "black",
                        barGap = 0,
                        ablines = NULL,
                        abColors = NULL,
                        ablines.lty = NULL,
                        textMat = NULL,
                        cex.text = 0.8,
                        leftThreshold = 0.8,
                        alignRight = TRUE,
                        col.text = 1,
                        textMargin = 0.04/ncol(mat),
                        margin = 0.1, 
                        textMat.minXPosition = 0,
                        main = "",
                        xlab = "", ylab = "",
                        cex.main = 1.4
                        
)
{
  nCol = ncol(mat);
  nRow = nrow(mat);
  
  if (length(colors)==1 | length(colors)==nRow)
  {
    colors = matrix(colors, nRow, nCol)
  } else if (length(colors)==nCol) {
    colors = matrix(colors, nRow, nCol, byrow = TRUE)
  } else if (!isTRUE(all.equal(dim(colors), c(nRow, nCol))))
    stop(spaste("'colors' must be either a single color, a vector of length nrow or ncol(mat),", 
                "or a matrix of the same dimensions as mat."));
  
  if (length(border)==1 | length(border)==nRow)
  {
    border = matrix(border, nRow, nCol)
  } else if (length(border)==nCol) {
    border = matrix(border, nRow, nCol, byrow = TRUE)
  } else if (!isTRUE(all.equal(dim(border), c(nRow, nCol))))
    stop(spaste("'border' must be either a single color, a vector of length nrow or ncol(mat),", 
                "or a matrix of the same dimensions as mat."));
  
  plot(c(0,1), c(0,1), xlim = c(0,1), ylim = c(0,1), axes = FALSE, main = main, cex.main = cex.main,
       type = "n", xlab = xlab, ylab = ylab);
  
  box = par("usr");
  xMin = box[1];
  xMax = box[2];
  yMin = box[3];
  yMax = box[4];
  
  if (is.null(lim))
  {
    if (commonScale)
    {
      matMax = rep(max(mat, na.rm = TRUE), nCol);
      matMin = rep(min(mat, na.rm = TRUE), nCol);
    } else {
      matMax = apply(mat, 2, max, na.rm = TRUE);
      matMin = apply(mat, 2, min, na.rm = TRUE);
    }
  } else { 
    if (length(lim)==2) lim = matrix(lim, 2, nCol);
    if (!all.equal(dim(lim), c(2, nCol)))
      stop(paste("if 'lim' is given, it must be either a vector of length 2 or a matrix of 2 rows and",
                 "same number of columns as 'mat'"));
    matMax = lim[2, ];
    matMin = lim[1, ];
  }
  
  matMin[matMin > 0] = 0; 
  mat[is.na(mat)] = 0;
  
  x0 = matrix(c(0:(nCol-1))/nCol * (xMax - xMin) + xMin, nRow,nCol, byrow = TRUE);
  x1 = matrix(c(1:nCol)/nCol * (xMax - xMin) + xMin, nRow,nCol, byrow = TRUE);
  
  y0 = matrix(c((nRow-1):0)/nRow * (yMax - yMin)+ yMin, nRow,nCol);
  y1 = matrix(c(nRow:1)/nRow * (yMax - yMin) + yMin, nRow,nCol);
  
  y1 = y1 - (y1-y0) * barGap;
  
  if (!is.null(ablines))
  {
    if (is.null(abColors)) abColors = rep(1, length(ablines))
    if (is.null(ablines.lty)) ablines.lty = rep(1, length(ablines))
  }
  
  out = list(xleft = x0, xright = x1, ytop = y0, ybottom = y1,
             x0 = rep(0, nCol),
             box = par("usr"),
             xMid = (x0[1, ] + x1[1, ])/2,
             yMid = (y0[, 1] + y1[, 1])/2);
  
  for (c in 1:nCol)
  {
    scale = (x1[1, c] - x0[1, c])/(matMax[c] - matMin[c])/(1+margin);
    cx0 = x0[, c] - matMin[c] * scale; # Position of the zero line. Note that matMin by def. cannot positive.
    cx1 = x0[, c] + mat[, c] * scale; # Position of the end of the bar.
    rect(cx0, y1[, c], cx1, y0[, c], col = colors[, c], border = border[, c]);
    
    if (!is.null(ablines))
      for (al in 1:length(ablines))
        if (ablines[al] >= matMin[c] && ablines[al] <= matMax[c])
          lines(rep(x0[1, c] + ablines[al] * scale, 2), c(y0[nRow, c], y1[1, c]), col = abColors[al],
                lty = ablines.lty[al])
    if (c > 1) lines(x = c(x0[1, c], x0[1, c]), y = c(y0[1, 1], y1[nRow, 1]), col = 1);
    out$x0[c] = cx0[1];
    if (!is.null(textMat))
    {
      onLeft = (cx1-x0[, c])/(x1[, c]-x0[, c]) > leftThreshold
      ytext = (y1[, c]  + y0[, c])/2
      if (alignRight)
      {
        xtext = x1[, c] - textMargin;
        xtext[onLeft] = pmax(cx0, cx1)[onLeft]  - textMargin
        adj.x = rep(1, nRow)
      } else {
        adj.x = ifelse(onLeft, 1, 0);
        xtext = pmax(cx0, cx1) + ifelse(onLeft, -1, 1) * textMargin;
        scaledMinXPosition = x0[1, c] + textMat.minXPosition * scale
        xtext[xtext < scaledMinXPosition + textMargin] = scaledMinXPosition + textMargin
      }
      for (r in 1:nRow)
        text(xtext[r], ytext[r], textMat[r, c], col = col.text, cex = cex.text, adj = c(adj.x[r], 0.5));
    }
  }
  
  invisible(out);
}


#---------------------------------------------------------------------------------------------------------
# labeledBarplotArray.R
#---------------------------------------------------------------------------------------------------------

#--------------------------------------------------------------------------
#
# .reverseRows = function(Matrix)
#
#--------------------------------------------------------------------------
#


.reverseRows = function(Matrix)
{
  ind = seq(from=dim(Matrix)[1], to=1, by=-1);
  Matrix[ind,];
  #Matrix
}

.reverseVector = function(Vector)
{
  ind = seq(from=length(Vector), to=1, by=-1);
  Vector[ind];
  #Vector
}

#--------------------------------------------------------------------------
#
# labeledBarplotArray = function ( Matrix, xLabels, yLabels, ... ) { 
#
#--------------------------------------------------------------------------
# This function plots a heatmap of the specified matrix 
# and labels the x and y axes wit the given labels.
# It is assumed that the number of entries in xLabels and yLabels is consistent 
# with the dimensions in.
# If colorLabels==TRUE, the labels are not printed and instead interpreted as colors --
#  -- a simple symbol with the appropriate color is printed instead of the label.
# The x,yLabels are expected to have the form "..color" as in "MEgrey" or "PCturquoise".
# xSymbol, ySymbols are additional markers that can be placed next to color labels

#barplotArray = function(mat,
#                        lim = NULL,
#                        commonScale = FALSE,
#                        colors = "grey40",
#                        border = "black",
#                        ablines = NULL,
#                        abColors = NULL,
#                        ablines.lty = NULL,
#                        textMat = NULL,
#                        cex.text = 0.8,
#                        leftThreshold = 0.8,
#                        col.text = 1,
#                        textMargin = 0.04/ncol(mat),
#                        margin = 0.1,
#                        main = "",
#                        xlab = "", ylab = ""
#
#                        )


labeledBarplotArray = function (mat, 
                                signed = FALSE,
                                xLabels, yLabels = NULL, 
                                xSymbols = NULL, ySymbols = NULL, 
                                colorLabels = NULL, 
                                xColorLabels = FALSE, yColorLabels = FALSE,
                                checkColorsValid = TRUE,
                                xLabelsPosition = "bottom",
                                xLabelsAngle = 45,
                                xLabelsAdj = 1,
                                xColorWidth = 0.05,
                                yColorWidth = 0.05,
                                colors = if (signed) greenWhiteRed(100) else greenWhiteRed(100)[50:100],
                                invertColors = FALSE, 
                                cex.lab = NULL, 
                                cex.lab.x = cex.lab,
                                cex.lab.y = cex.lab,
                                colors.lab.x = 1,
                                colors.lab.y = 1,
                                ... ) 
{
  if (!is.null(colorLabels)) {xColorLabels = colorLabels; yColorLabels = colorLabels; }
  
  if (is.null(yLabels) & (!is.null(xLabels)) & (dim(mat)[1]==dim(mat)[2])) 
    yLabels = xLabels; 
  
  if (checkColorsValid)
  {
    xValidColors = !is.na(match(substring(xLabels, 3), colors()));
    yValidColors = !is.na(match(substring(yLabels, 3), colors()));
  } else {
    xValidColors = rep(TRUE, length(xLabels));
    yValidColors = rep(TRUE, length(yLabels));
  }
  
  if (sum(xValidColors)>0) xColorLabInd = c(1:length(xLabels))[xValidColors]
  if (sum(!xValidColors)>0) xTextLabInd = c(1:length(xLabels))[!xValidColors]
  
  if (sum(yValidColors)>0) yColorLabInd = c(1:length(yLabels))[yValidColors]
  if (sum(!yValidColors)>0) yTextLabInd = c(1:length(yLabels))[!yValidColors]
  
  xLabPos = charmatch(xLabelsPosition, c("bottom", "top"));
  if (is.na(xLabPos))
    stop("Argument 'xLabelsPosition' must be (a unique abbreviation of) 'bottom', 'top'");
  
  if (is.null(colors)) colors = heat.colors(30);
  if (invertColors) colors = .reverseVector(colors);
  
  # Call the barplotArray function
  
  labPos = barplotArray(mat, 
                        colors = numbers2colors(mat, colors = colors, signed = signed,
                                                lim = c(0, max(mat))), 
                        ...)
  
  # Draw the labels
  
  nxlabels = length(xLabels)
  plotbox = labPos$box;
  xmin = plotbox[1]; xmax = plotbox[2]; ymin = plotbox[3]; yrange = plotbox[4]-ymin;
  ymax = plotbox[4]; xrange = xmax - xmin;
  
  xspacing = labPos$xMid[2] - labPos$xMid[1];
  yspacing = abs(labPos$yMid[2] - labPos$yMid[1]);
  
  nylabels = length(yLabels)
  offsetx = par("cxy")[1] / 3;
  offsety = par("cxy")[2] / 3;
  # Transform fractional widths into coordinate widths
  xColW = min(xmax - xmin, ymax - ymin) * xColorWidth;
  yColW = min(xmax - xmin, ymax - ymin) * yColorWidth;
  if (sum(!xValidColors)>0)
  {
    xLabYPos = ifelse(xLabPos==1, ymin - offsety, ymax + offsety)
    if (is.null(cex.lab)) cex.lab = 1;
    text(labPos$xMid[xTextLabInd] , xLabYPos, srt = xLabelsAngle, 
         adj = xLabelsAdj, labels = xLabels[xTextLabInd], xpd = TRUE, cex = cex.lab.x, col = colors.lab.x)
  }
  if (sum(xValidColors)>0)
  {
    baseY = ifelse(xLabPos==1, ymin-offsety-xColW, ymax + offsety + xColW);
    deltaY = ifelse(xLabPos==1, xColW, -xColW);
    rect(xleft = labPos$xMid[xColorLabInd] - xspacing/2, ybottom = baseY,
         xright = labPos$xMid[xColorLabInd] + xspacing/2, ytop = baseY + deltaY,
         density = -1,  col = substring(xLabels[xColorLabInd], 3), 
         border = substring(xLabels[xColorLabInd], 3), xpd = TRUE)
    if (!is.null(xSymbols))
      text ( labPos$xMid[xColorLabInd], baseY - sign(deltaY)* offsety, xSymbols[xColorLabInd], 
             adj = xLabelsAdj, 
             xpd = TRUE, srt = xLabelsAngle, cex = cex.lab.x, col = colors.lab.x);
  }
  if (sum(!yValidColors)>0)
  {
    if (is.null(cex.lab)) cex.lab = 1;
    text(xmin - offsetx, labPos$yMid[yTextLabInd], srt = 0, 
         adj = c(1, 0.5), labels = yLabels[yTextLabInd], xpd = TRUE, cex = cex.lab.y, col = colors.lab.y )
  } 
  if (sum(yValidColors)>0)
  {
    rect(xleft = xmin- yColW - offsetx, ybottom = labPos$yMid[yColorLabInd] - yspacing/2,
         xright = xmin- offsetx, ytop = labPos$yMid[yColorLabInd] + yspacing/2, 
         density = -1,  col = substring(yLabels[yColorLabInd], 3), 
         border = substring(yLabels[yColorLabInd], 3), xpd = TRUE)
    if (!is.null(ySymbols))
      text (xmin- yColW - 2*offsetx, 
            labPos$yMid[yColorLabInd], ySymbols[yColorLabInd], 
            adj = c(1, 0.5), xpd = TRUE, cex = cex.lab.y, col = colors.lab.y);
  }
  
  axis(1, labels = FALSE, tick = FALSE)
  axis(2, labels = FALSE, tick = FALSE)
  axis(3, labels = FALSE, tick = FALSE)
  axis(4, labels = FALSE, tick = FALSE)
}

#==================================================================================================
#
# fixLabels
#
#=================================================================================================

# Replace (some) spaces in the given labels by newlines so that the length of each line is no more than
# maxCharPerLine


fixLabels = function(labels, maxCharPerLine = 14, split = " ", fixed = TRUE, newsplit = split)
{
  n = length(labels);
  splitX = strsplit(labels, split = split, fixed = fixed);
  newLabels= rep("", n);
  for (l in 1:n)
  {
    nl = "";
    line = "";
    for (s in 1:length(splitX[[l]]))
    {
      newLen = nchar(line) + nchar(splitX [[l]] [s]);
      if (nchar(line) < 5 | newLen < maxCharPerLine)
      {
        nl = paste(nl, splitX[[l]] [s], sep = newsplit)
        line = paste(line, splitX[[l]] [s], sep = newsplit);
      } else {
        nl = paste(nl, splitX[[l]] [s], sep = "\n");
        line = splitX[[l]] [s];
      }
    }
    newLabels[l] = nl;
  }
  substring(newLabels, 2);
}

#==================================================================================================
#
# shortenLabels
#
#=================================================================================================

# Truncate labels at the last space before given maximum length, add ... if the label is shortened.
# maxCharPerLine


shortenLabels = function(labels, maxLength = 25, minLength = 10)
{
  n = length(labels);
  split = strsplit(labels, split = " ", fixed = TRUE);
  newLabels= rep("", n);
  for (l in 1:n)
  {
    nl = "";
    s = 1; len = 0;
    while (s <= length(split[[l]]) && len <= maxLength)
    {
      newLen = len + nchar(split [[l]] [s]);
      if (len < minLength | newLen < maxLength)
      {
        nl = paste(nl, split[[l]] [s]);
        len = nchar(nl);
        s = s+1;
      } else {
        nl = spaste(nl, "...");
        len = maxLength + 1;
      }
    }
    newLabels[l] = nl;
  }
  substring(newLabels, 2);
}


#===============================================================================================
#
# stretchLimits
#
#===============================================================================================

# Calculate minimum and maximum and stretch the limits by a given factor

stretchLimits = function(x, y, rx = 0.03, ry = 0.03)
{
  minx = min(x, na.rm = TRUE);
  maxx = max(x, na.rm = TRUE);
  xlim = c(minx - rx * (maxx-minx), maxx + rx * (maxx-minx));
  miny = min(y, na.rm = TRUE);
  maxy = max(y, na.rm = TRUE);
  ylim = c(miny - ry * (maxy-miny), maxy + ry * (maxy-miny));
  list(xlim = xlim, ylim = ylim);
}


#===============================================================================================
#
# nearestSNP
#
#===============================================================================================

# Find the nearest SNP and the corresponding distance to given gene positions.

nearestSNP = function(snpChr, snpBp, geneChr, geneBp)
{
  snpHasAnno = !is.na(snpChr) & !is.na(snpBp);
  geneHasAnno = !is.na(geneChr) & !is.na(geneBp);
  
  snpChr[!snpHasAnno] = -131;  # Hopefully no one will ever have chromosome -131 in their data. 
  geneChr[!geneHasAnno] = -132;  # Same for -132
  nGenes = length(geneChr);
  validChr = snpChr[snpChr!=-131]
  chrNames = sort(unique(validChr));
  nChr = length(chrNames);
  nSNPs = length(snpChr);
  nearestSNP = rep(NA, nGenes);
  for (chr in 1:nChr)
  {
    print(paste("chromosome", chrNames[chr]));
    snps = snpChr==chrNames[chr]
    pos = snpBp[snps];
    pos1 = pos[-1];
    breaks = (pos[-length(pos)] + pos1)/2;
    
    genes = c(1:nGenes)[geneChr==chrNames[chr] ];
    genePos = geneBp[genes];
    nearest = findInterval(genePos, breaks);
    nearestSNP[genes] = c(1:nSNPs)[snps][nearest+1];
  }
  nearestSNPdist = geneBp - snpBp[nearestSNP];
  list(nearestSNP = nearestSNP, nearestSNPdist = nearestSNPdist);
}

#===============================================================================================
#
# peakRegions
#
#===============================================================================================

# This function finds peaks and peak regions (defined as areas around the peak where the function drops less
# than a certain amount). Meant for QTL analysis.


peakRegions = function(qtl, chromo, basePair, minLOD, lodDrop, minPeakDistance)
{
  nSNPs = length(qtl);
  peaks = qtlPeaks(qtl, chromo, basePair, minQTL = minLOD, window = minPeakDistance);
  peakLocs = c(1:nSNPs)[peaks];
  nPeaks = sum(peaks)
  
  # For each peak, find the nearest base pairs up- and down-stream where the LOD score goes down by 1.
  
  peakChromo = chromo[peakLocs];
  peakBp = basePair[peakLocs];
  
  fromBp = rep(0, nPeaks);
  toBp = rep(0, nPeaks);
  fromLoc = rep(0, nPeaks);
  toLoc = rep(0, nPeaks);
  zzzzz = 10123;
  for (p in 1:nPeaks)
  {
    loc = peakLocs[p];
    baseLod = qtl[loc];
    while (loc > 0 && chromo[loc]==peakChromo[p] && qtl[loc] > baseLod - lodDrop)
      loc = loc -1;
    
    if (loc < 1) loc = 1;
    
    if (chromo[loc]!=peakChromo[p]) fromBp[p] = 0 else fromBp[p] = basePair[loc];
    fromLoc[p] = loc;
    
    loc = peakLocs[p];
    while (loc <= nSNPs && chromo[loc]==peakChromo[p] && qtl[loc] > baseLod - lodDrop)
      loc = loc +1;
    
    if (loc > nSNPs) loc = nSNPs;
    toLoc[p] = loc;
    if (chromo[loc]!=peakChromo[p]) toBp[p] = 1e10 else toBp[p] = basePair[loc];
  }
  
  list(peakIndicator = peaks, peakIndex = peakLocs, 
       peakChromo = peakChromo, peakBp = peakBp,
       peakStartInd = fromLoc,
       peakEndInd = toLoc,
       peakStartBp = fromBp,
       peakEndBp = toBp);
}


#===============================================================================================
#
# makeCross
#
#===============================================================================================

# prepare cross data
# First column in both genotypes and phenotypes must be sample id
# Second column in phenotypes must be sex

makeCross = function(genotypes, traits, markerNames = NULL, 
                     chromo = NULL, basePair = NULL, fileNameBase = NULL,
                     outlierZ = 3, genoCodes = c(1,2,3), alleles = c("B", "C"))
{
  
  commonSamples = intersect(genotypes[, 1], traits[, 1]);
  if (length(commonSamples) < 10)
    stop("Something's wrong: less than 10 samples common between genotypes and traits.");
  genotypes = genotypes[match(commonSamples, genotypes[, 1]), ];
  traits = traits[match(commonSamples, traits[, 1]), ];
  
  if (is.null(fileNameBase))
  {
    delete = TRUE
    genoFile = tempfile("genotypes", getwd());
    phenFile = tempfile("phenotypes", getwd());
  } else {
    delete = FALSE
    genoFile = spaste(fileNameBase, "-genotypes.csv");
    phenFile = spaste(fileNameBase, "-phenotypes.csv");
  }
  
  if (is.null(markerNames))
  {
    # Determine marker names from colnames of genotypes
    markerNames = colnames(genotypes);
    if (is.null(chromo)) # Assume marker names have the form marker.chrN.BpNNNNNN
    {
      split = sapply(strsplit(markerNames[-1], split = ".", fixed = TRUE), I);
      markerNames = split[1, ];
      chromo = as.numeric(substring( split[2, ], 4));
      basePair = as.numeric(substring( split[3, ], 3));
    }
  }
  
  SNPinfo = rbind( c("", chromo), c("", basePair));
  colnames(SNPinfo) =c(colnames(genotypes)[1], markerNames);
  colnames(genotypes) = colnames(SNPinfo);
  
  genoForQTL = rbind(SNPinfo, genotypes);
  
  write.csv(genoForQTL, genoFile, row.names = FALSE, quote = FALSE);
  
  if (outlierZ > 0)
  {
    stdTraits = scale(apply(traits[, -c(1:2)], 2, as.numeric)); # Do not scale mouse ID and sex:)
    outliers = abs(stdTraits) > outlierZ
    traits2 = as.data.frame(traits);
    traits2[, -c(1:2)][outliers] = NA;
  } else 
    traits2 = traits;
  
  write.csv(traits2, phenFile, row.names = FALSE, quote = FALSE);
  
  cross = read.cross(format = "csvs",
                     genfile = genoFile,
                     phefile = phenFile,
                     genotypes = genoCodes, alleles = alleles);
  
  if (delete)
  {
    unlink(genoFile);
    unlink(phenFile);
  }
  list(cross = cross, markerNames = markerNames, chromo = chromo, basePair = basePair);
}


#================================================================================================
#
# add x- and y- error bars to a plot
#
#================================================================================================

addErrorBars.xy = function(x, y, sdx, sdy, barWidth = 0.01)
{
  box = par("usr");
  barWidth.x = (box[2] - box[1])*barWidth;
  barWidth.y = (box[4] - box[3])*barWidth;
  
  n = length(x);
  
  if (length(y)!=n) stop("Lengths of x and y must be the same.");
  if (length(sdx)!=n) stop("Lengths of x and sdx must be the same.");
  if (length(sdy)!=n) stop("Lengths of x and sdy must be the same.");
  
  for (i in 1:n) if ( is.finite(x[i]) && is.finite(y[i]))
  {
    if (is.finite(sdx[i]))
    {
      segments(x[i]-sdx[i], y[i], x[i] + sdx[i], y[i]);
      segments(x[i]-sdx[i], y[i] - barWidth.y/2, x[i]-sdx[i], y[i] + barWidth.y/2);
      segments(x[i]+sdx[i], y[i] - barWidth.y/2, x[i]+sdx[i], y[i] + barWidth.y/2);
    }
    if (is.finite(sdy[i]))
    {
      segments(x[i], y[i]-sdy[i], x[i], y[i]+sdy[i]);
      segments(x[i]-barWidth.x/2, y[i] - sdy[i], x[i]+barWidth.x/2, y[i] - sdy[i]);
      segments(x[i]-barWidth.x/2, y[i] + sdy[i], x[i]+barWidth.x/2, y[i] + sdy[i]);
    }
  }
}

#================================================================================================
#
# linearize date
#
#================================================================================================

linearizeDate = function(dates, sep = "-", subtractMin = TRUE, yearPosition = 3, monthPosition = 1)
{
  daysInMonth = c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31);
  add = c(0, cumsum(daysInMonth));
  splitDOB = strsplit(dates, split = sep, fixed = TRUE);
  tab = as.matrix(as.data.frame(splitDOB));
  
  dayPosition = c(1,2,3)[ -c(yearPosition, monthPosition)];
  
  month = suppressWarnings(as.numeric(as.character(tab[monthPosition, ])));
  
  if (any(is.na(month)))
  {
    monthcode = substring(as.character(tab[monthPosition, ]), 1, 3);
    month = match(tolower(monthcode), 
                  c("jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"));
  }
  
  lin = as.numeric(as.character(tab[yearPosition, ])) * 365 +
    add[month] + as.numeric(as.character(tab[dayPosition, ]));
  lin - as.numeric(subtractMin) * min(lin, na.rm = TRUE);
}


#================================================================================================
#
# verboseMultiplot: overlay several verboseScatterplots
#
#================================================================================================

verboseMultiplot = function(x, y, sample = NULL, corFnc = "cor", corOptions = "use = 'p'", 
                            colors = c(1:length(x)),
                            main = "", xlab = NA, ylab = NA, cex = 1, cex.axis = 1.5, 
                            cex.lab = 1.5, cex.main = 1.5, abline = FALSE, abline.color = colors, 
                            abline.lty = 1, corLabel = corFnc, xlim = NULL, ylim = NULL, 
                            pch = rep(1, length(x)), ...) 
{
  if (is.na(xlab)) 
    xlab = as.character(match.call(expand.dots = FALSE)$x)
  if (is.na(ylab)) 
    ylab = as.character(match.call(expand.dots = FALSE)$y)
  if (mode(x)!="list") { x = list(x); y = list(y); }
  nSets = length(x);
  if (nSets!=length(y))
    stop("Number of components in 'x' and 'y' must be the same.");
  cor = corp = rep(NA, nSets);
  for (set in 1:nSets)
  {
    corExpr = parse(text = paste(corFnc, "(x[[set]], y[[set]] ", prepComma(corOptions), ")"))
    cor[set] = signif(eval(corExpr), 2)
    corp1 = signif(corPvalueStudent(cor[set], sum(is.finite(x[[set]]) & is.finite(y[[set]]))), 2)
    if (corp1 < 10^(-200)) 
      corp[set] = "<1e-200"
    else corp[set] = spaste("=", corp1);
  }
  if (!is.na(corLabel)) {
    mainX = paste(main, " ", corLabel, "=", cor, ", p", corp, 
                  sep = "")
  } else 
    mainX = main
  
  x1 = unlist(x);
  y1 = unlist(y);
  fin = is.finite(x1) & is.finite(y1);
  if (is.null(xlim))
  {
    minx = min(x1[fin]);
    maxx = max(x1[fin]);
    xlim = c(minx, maxx);
  }
  if (is.null(ylim))
  {
    miny = min(y1[fin]);
    maxy = max(y1[fin]);
    ylim = c(miny, maxy);
  }
  
  for (set in 1:nSets)
  {
    if (!is.null(sample)) 
    {
      if (length(sample) == 1) {
        sample = sample(length(x[[set]]), sample)
      }
    } else
      sample = c(1:length(x[[set]]));
    if (set==1)
    {
      plot(x[[set]] [sample], y[[set]] [sample], main = mainX, xlab = xlab, 
           ylab = ylab, cex = cex, cex.axis = cex.axis, cex.lab = cex.lab, 
           cex.main = cex.main, xlim = xlim, ylim = ylim, col = colors[set], 
           pch = pch[set], ...)
    } else 
      points(x[[set]] [sample], y[[set]] [sample], col = colors[set], pch = pch[set])
    
    if (abline) 
    {
      fit = lm(y[[set]] ~ x[[set]])
      abline(reg = fit, col = abline.color[set], lty = abline.lty)
    }
  }
}


# Test:

if (FALSE)
{
  x = list(rnorm(20), rnorm(20)+2, rnorm(10)-1);
  y = list(rnorm(20), rnorm(20)+2, rnorm(10)-1);
  y[[1]] = y[[1]] +x[[1]];
  y[[2]] = 0.5 * y[[2]] +x[[2]];
  y[[3]] = y[[3]] - x[[3]];
  
  verboseMultiplot(x, y, abline = TRUE)
  
}

#=========================================================================================================
#
# Plot module heatmap and eigengene barplot
#
#=========================================================================================================

# This needs finishing

moduleHeatmapAndEigengeneBarplot = function(
  expr, colors = greenWhiteRed(50, gamma = 0.3), geneOrder = NULL, sampleOrder = NULL, 
  setLayout = TRUE, layoutHeights = c(0.6, 0.4), marHeatmap = c(0,3.8,3,1), marBarplot = c(2,3.8,0,1),
  maxNShowGenes = 500, scale = TRUE, main = "" )
{
  if (setLayout) layout(matrix(c(1,2), 2, 1), heights= layoutHeights);
  nGenes = ncol(expr);
  if (nGenes > maxNShowGenes )
  {
    sample = sample(c(1:nGenes), size = maxNShowGenes, replace = FALSE);
    modExpr = expr[, sample];
    nGenes = maxNShowGenes;
  } else
    modExpr = expr;
  if (scale) modExpr = scale(modExpr);
  if (is.null(geneOrder)) geneOrder = hclust(as.dist(1-cor(modExpr, use="p")), method = "average")$order
  if (is.null(sampleOrder)) sampleOrder = hclust(dist(modExpr), method = "average")$order;
  maxZ = max(abs(modExpr), na.rm = TRUE);
  exprColors = numbers2colors(modExpr, signed = TRUE, lim = c(-maxZ, maxZ), colors = colors);
  par(mar = marHeatmap);
  eigengene = moduleEigengenes(modExpr, rep(1, nGenes))$eigengenes[, 1];
  mp = barplot(as.vector(eigengene[sampleOrder]), col = "white", border = "white", axisnames = FALSE,
               main = main, 
               axes = FALSE);
  plotbox = par("usr");
  xstep = mp[2]-mp[1]; xLeft = mp - xstep/2; xRight = mp + xstep/2;
  
  nrows = ncol(modExpr);
  yRange = plotbox[4]-plotbox[3];
  yBot = plotbox[3] + c(0:(nrows-1)) * yRange/nrows;
  yTop = yBot + yRange/nrows;
  
  for (sample in 1:nrow(modExpr))
  {
    rect(xleft = rep(xLeft[sample], nrows), xright = rep(xRight[sample], nrows),
         ybottom = yBot, ytop = yTop, col = exprColors[ sampleOrder[sample], geneOrder], 
         border = exprColors[ sampleOrder[sample], geneOrder]);
  }
  par(mar = marBarplot);
  eigengene[is.na(eigengene)] = 0;
  colors = numbers2colors(eigengene, signed = TRUE, colors = colors)
  barplot(as.vector(eigengene[sampleOrder]), col = colors[sampleOrder],
          xlab = "", ylab = "Eigengene expression");
}


#========================================================================================================
#
# Convenient prediction accuracy functions
#
#========================================================================================================

naiveAccuracyFnc = function(response)
{
  max(table(response))/sum(table(response))
}

accuraciesFnc = function(confusion)
{
  c (diag(confusion) / rowSums(confusion), sum(diag(confusion))/sum(confusion) );
}


table2.allLevels = function(x, y, levels.x = sort(unique(x)), levels.y = sort(unique(y)), setNames = FALSE)
{
  nx = length(levels.x);
  ny = length(levels.y);
  t = table(x, y);
  
  out = matrix(0, nx, ny);
  if (setNames)
  {
    rownames(out) = levels.x;
    colnames(out) = levels.y;
  }
  out[ match(rownames(t), levels.x), match(colnames(t), levels.y) ] = t;
  out;
}

#========================================================================================================
#
# Convenience functions for manipulating multiData structures
#
#========================================================================================================

# Note: many of these function would be simpler to use if I used some sort of class/method technique to keep
# track of the class of each object internally. For example, I could then write a generic function "subset" that
# would work consistently on lists and multiData objects. Similarly, multiData2list would simply become a
# method of as.list, and as.list would be safe to use both on lists and on multiData objects. 

multiData.subset = function(multiData, colIndex = NULL, rowIndex = NULL)
{
  size = checkSets(multiData);
  if (is.null(colIndex)) colIndex = c(1:size$nGenes);
  if (is.null(rowIndex)) rowIndex = lapply(size$nSamples, function(n) {c(1:n)})
  if (length(rowIndex)!=size$nSets) 
    stop("If given, 'rowIndex' must be a list of the same length as 'multiData'.");
  out = list();
  for (set in 1:size$nSets)
    out[[set]] = list(data = multiData[[set]]$data[rowIndex[[set]], colIndex, drop = FALSE]);
  names(out) = names(multiData);
  out;
}

multiData2list = function(multiData)
{
  lapply(multiData, `[[`, 'data');
}

list2multiData = function(data)
{
  out = list();
  for (set in 1:length(data))
    out[[set]] = list(data = data[[set]]);
  names(out) = names(data);
  out;
}

varNames = function(multiData)
{
  colnames(multiData[[1]]$data);
}

multiData.apply = function(multiData, ..., mdaSimplify = TRUE)
{
  size = checkSets(multiData);
  res = lapply(multiData, lapply, ...)
  if (mdaSimplify) return (multiData.simplify(res));
  
  return(res);
}


multiData.simplify = function(multiData)
{
  len = length(multiData[[1]]$data);
  dim = dim(multiData[[1]]$data);
  simplifiable = TRUE;
  nSets = length(multiData);
  for (set in 1:nSets)
  {
    if (len!=length(multiData[[set]]$data)) simplifiable = FALSE;
    if (!isTRUE(all.equal( dim, dim(multiData[[set]]$data)))) simplifiable = FALSE;
  }
  if (simplifiable)
  {
    if (is.null(dim)) {
      innerDim = len;
      innerNames = names(multiData[[1]]$data);
      if (is.null(innerNames)) innerNames = spaste("X", c(1:len));
    } else {
      innerDim = dim;
      innerNames = dimnames(multiData[[1]]$data);
      if (is.null(innerNames)) 
        innerNames = lapply(innerDim, function(x) {spaste("X", 1:x)})
      nullIN = sapply(innerNames, is.null);
      if (any(nullIN))
        innerNames[nullIN] = lapply(innerDim[nullIN], function(x) {spaste("X", 1:x)})
    }
    setNames = names(multiData);
    if (is.null(setNames)) setNames = spaste("Set_", 1:nSets);
    multiData.s = matrix(NA, prod(innerDim), nSets);
    for (set in 1:nSets)
      multiData.s[, set] = as.vector(multiData[[set]]$data);
    
    dim(multiData.s) = c(innerDim, nSets);
    if (!is.null(innerNames))
      dimnames(multiData.s) = c (if (is.list(innerNames)) innerNames else list(innerNames), list(setNames));
    return(multiData.s);
  }
  return(multiData);
}

isMultiData = function(x)
{
  !inherits(try(checkSets(x), silent = TRUE), 'try-error');
}

multiData.mapply = function(FUN, ..., MoreArgs = NULL, mdmaSimplify = TRUE, mda.doCollectGarbage = FALSE)
{
  dots = list(...);
  if (length(dots)==0) 
    stop("No arguments were specified. Please type ?multiData.mapply to see the help page.");
  dotLengths = sapply(dots, length);
  if (any(dotLengths!=dotLengths[1]))
    stop(spaste("All arguments to vectorize over must have the same length.\n", 
                "Scalar arguments should be put into the 'MoreArgs' argument.\n",
                "Note: lengths of '...' arguments are: ", paste(dotLengths, collapse = ", ")));
  nArgs = length(dots);
  res = list();
  isMultiSet = sapply(dots, isMultiData);
  
  FUN = match.fun(FUN);
  nSets = dotLengths[1];
  for (set in 1:nSets)
  {
    localArgs = list();
    for (arg in 1:nArgs)
      localArgs[[arg]] = if (isMultiSet[arg]) dots[[arg]] [[set]] $ data else dots[[arg]] [[set]];
    names(localArgs) = names(dots);
    res[[set]] = list(data = do.call(FUN, c(localArgs, MoreArgs)));
    if (mda.doCollectGarbage) collectGarbage();
  }
  
  names(res) = names(dots[[1]]);
  
  if (mdmaSimplify)
    return(multiData.simplify(res));
  
  return(res);
}


multiData.rbindSelf = function(multiData)
{
  size = checkSets(multiData);
  out = NULL;
  colnames = varNames(multiData);
  for (set in 1:nSets)
  {
    if (!is.null(colnames(multiData[[set]]$data)) && 
          !isTRUE(all.equal(colnames, colnames(multiData[[set]]$data))) )
      colnames(multiData[[set]]$data) = colnames;
    out = rbind(out, multiData[[set]]$data);
  }
  out;
}


#========================================================================================================
#
# Log-transformation of data
#
#========================================================================================================

logTrafo = function(data, base = 2, zeroOrNegativeAction = c("NA", "min"), minCoeff = 0.5)
{
  origDim = dim(data);
  origNames = dimnames(data);
  data = as.matrix(data)
  action = match.arg(zeroOrNegativeAction)
  missing = is.na(data);
  data[data<=0] = NA;
  if (action=="min")
  {
    data[data<=0] = NA;
    mins = apply(data,2,min, na.rm = TRUE);
    minMat = matrix(mins*minCoeff, nrow(data), ncol(data), byrow = TRUE);
    data[is.na(data)] = minMat[is.na(data)];
  }
  out = logb(data, base);
  out[missing] = NA;
  dim(out) = origDim;
  dimnames(out) = origNames;
  out;
}

#======================================================================================================
#
# Convert a multi-level factor variable to a set of binary variables
#
#======================================================================================================

multiLev2numeric = function(x, nameBase, nameSep = ".", minFraction = 0.05, exclude = c("", "NA"))
{
  t = table(x);
  minn = ceil(length(x) * minFraction);
  keepLevels = names(t)[t >= minn & ! (names(t) %in% exclude)];
  if (sum(t<minn) > 0) addOthers = TRUE else addOthers = FALSE;
  nLevels = length(keepLevels)
  nVars = nLevels;
  
  numer = matrix(NA, length(x), nVars + addOthers)
  
  for (l in 1:nLevels)
    numer[, l] = as.numeric(x==keepLevels[l]);
  
  if (addOthers) numer[, nVars + 1] = as.numeric(!(x%in%keepLevels));
  
  numer[x %in% exclude, ] = NA;
  
  if (addOthers) keepLevelsX = c(keepLevels, "other") else keepLevelsX = keepLevels;
  
  colnames(numer) = gsub("[ /]", ".", spaste(nameBase, nameSep, keepLevelsX));
  
  numer;
}


#=====================================================================================================
#
# removeOutliers (using inter-sample connectivity)
#
#=====================================================================================================

outliers = function(expr, adjacencyFnc = adjacency, adjacencyOptions = list(),
                    Z.min = -3, returnZ = FALSE)
{
  adjacencyFnc = match.fun(adjacency);
  adjacencyOptions$datExpr = t(expr);
  adj = do.call(adjacencyFnc, adjacencyOptions)
  Z = scale(colSums(adj)-1);
  if (returnZ)
  {
    list(outliers = Z<Z.min, Z = Z)
  } else 
    Z<Z.min
}

#=====================================================================================================
#
# goGenes
#
#=====================================================================================================

# Returns a list of gene identifiers in the given GO category.

GOgenes = function(
  termNames = NULL,
  organism = "human",
  ontologies = c("BP", "CC", "MF"),
  evidence = c("IMP", "IGI", "IPI", "ISS", "IDA", "IEA", "TAS", "NAS", "ND", "IC"),
  includeOffspring = TRUE, 
  verbose = 2, indent = 0)
{
  organisms = c("human", "mouse", "rat", "malaria", "yeast", "fly", "bovine", "worm", "canine",
                "zebrafish", "chicken");
  allEvidence =  c("IMP", "IGI", "IPI", "ISS", "IDA", "IEA", "TAS", "NAS", "ND", "IC");
  allOntologies = c("BP", "CC", "MF");
  
  spaces = indentSpaces(indent);
  orgInd = pmatch(organism, organisms);
  if (is.na(orgInd))
    stop(paste("Unrecognized 'organism' given. Recognized values are ",
               paste(organisms, collapse = ", ")));
  
  if (length(evidence)==0)
    stop("At least one valid evidence code must be given in 'evidence'.");
  if (length(ontologies)==0)
    stop("At least one valid ontology code must be given in 'ontology'.");
  
  evidInd = pmatch(evidence, allEvidence);
  if (sum(is.na(evidInd))!=0)
    stop(paste("Unrecognized 'evidence' given. Recognized values are ",
               paste(allEvidence, collapse = ", ")));
  
  ontoInd = pmatch(ontologies, allOntologies);
  if (sum(is.na(ontoInd))!=0)
    stop(paste("Unrecognized 'ontologies' given. Recognized values are ",
               paste(allEvidence, collapse = ", ")));
  
  orgCodes = c("Hs", "Mm", "Rn", "Pf", "Sc", "Dm", "Bt", "Ce", "Cf", "Dr", "Gg");
  orgExtensions = c(rep(".eg", 4), ".sgd", rep(".eg", 6));
  reverseMap = c(rep(".egGO2EG", 4), ".sgdGO2ORF", rep(".egGO2EG", 6))
  
  missingPacks = NULL;
  packageName = paste("org.", orgCodes[orgInd], orgExtensions[orgInd], ".db", sep="");
  if (!require(packageName, character.only = TRUE))
    missingPacks = c(missingPacks, packageName);
  
  if (!require(GO.db))
    missingPacks = c(missingPacks, "GO.db");
  
  if (!is.null(missingPacks))
    stop(paste("Could not load the requisite package(s)",
               paste(missingPacks, collapse = ", "), ". Please install the package(s)."))
  
  if (verbose > 0)
  {
    printFlush(paste(spaces, "goGenesInCategory: loading annotation data..."));
  }
  
  egGO = eval(parse(text = paste(packageName, ":::org.", orgCodes[orgInd], orgExtensions[orgInd],
                                 "GO", sep = "")));
  
  Go2eg = eval(parse(text = paste("AnnotationDbi::as.list(", packageName, ":::org.", orgCodes[orgInd],
                                  reverseMap[orgInd],")", sep = "")));
  nTerms = length(Go2eg);
  
  goInfo = eval(parse(text = "AnnotationDbi::as.list(GO.db:::GOTERM)"));
  if (length(goInfo) > 0)
  {
    orgGoNames = names(Go2eg);
    dbGoIDs = as.character(sapply(goInfo, GOID));
    dbGoOntologies = as.character(sapply(goInfo, Ontology));
    dbGoTerm = as.character(sapply(goInfo, Term));
  } else {
    dbGoIDs = "";
  }
  
  goOffSpr = list();
  if (includeOffspring)
  {
    goOffSpr[[1]] = eval(parse(text = "AnnotationDbi::as.list(GO.db:::GOBPOFFSPRING)"));
    goOffSpr[[2]] = eval(parse(text = "AnnotationDbi::as.list(GO.db:::GOCCOFFSPRING)"));
    goOffSpr[[3]] = eval(parse(text = "AnnotationDbi::as.list(GO.db:::GOMFOFFSPRING)"));
  }
  term2info = match(names(Go2eg), names(goInfo));
  orgOntologies = dbGoOntologies[term2info];
  orgIDs = dbGoIDs[term2info];
  orgTerms = dbGoTerm[term2info];
  
  if (is.null(termNames)) termNames = orgTerms;
  
  term2orgTerm = match(tolower(termNames), tolower(orgTerms));
  if (any(is.na(term2orgTerm)))
  {
    missingTermNames = term2orgTerm[is.na(term2orgTerm)];
    term2orgID = match(missingTermNames, dbGoIDs);
    if (any(is.na(term2orgID)))
      printFlush(spaste("Warning in GOGenesInCategory: the following terms were found ",
                        "in neither GO names nor GO term IDs:\n",
                        paste(missingTermNames[is.na(term2orgID)], collapse = ", ")));
    term2orgTerm[is.na(term2orgTerm)] = term2orgID;
  }
  
  keepInputTerms = is.finite(term2orgTerm);
  nInputTerms = length(termNames);
  
  termCodes = vector(mode="list", length = nInputTerms);
  for (t in 1:nInputTerms) termCodes[[t]] = NA;
  names(termCodes) = termNames;
  
  termIDs = rep(NA, nInputTerms);
  termIDs[keepInputTerms] = orgIDs[term2orgTerm[keepInputTerms]];
  
  collectGarbage();
  nExpandLength = 0;
  blockSize = 1000; # For a more efficient concatenating of offspring genes
  nAllInTerm = rep(0, nTerms);
  
  if (verbose > 0)
  {
    cat(paste(spaces, " ..preparing term lists..."));
    if (nInputTerms > 10) pind = initProgInd();
  }
  
  for (t in 1:nInputTerms) if (keepInputTerms[t] && !is.na(Go2eg[[ term2orgTerm[t] ]] [[1]]))
  {
    c = term2orgTerm[t];
    te = as.character(names(Go2eg[[c]])); # Term evidence codes
    tc = Go2eg[[c]]; # Gene codes
    if (includeOffspring)
    {
      termOffspring = NULL;
      for (ont in 1:length(goOffSpr))
      {
        term2off = match(names(Go2eg)[c], names(goOffSpr[[ont]]))
        if (!is.na(term2off))
          termOffspring = c(termOffspring, goOffSpr[[ont]][[term2off]]);
      }
      if (length(termOffspring)>0)
      {
        maxLen = blockSize;
        tex = rep("", maxLen);
        tcx = rep("", maxLen);
        ind = 1;
        len = length(te);
        tex[ ind:len ] = te;
        tcx[ ind:len ] = tc;
        ind = len + 1;
        o2go = match(termOffspring, as.character(names(Go2eg)));
        o2go = o2go[is.finite(o2go)]
        if (length(o2go)>0) for (o in 1:length(o2go)) if (!is.na(Go2eg[[o2go[o]]][[1]]))
        {
          #printFlush(paste("Have offspring for term", c, ": ", names(Go2eg)[c], 
          #           Term(goInfo[[term2info[c]]])));
          newc = Go2eg[[o2go[o]]];
          newe = names(newc);
          newl = length(newe);
          if ((len + newl) > maxLen)
          {
            nl = len + newl;
            maxLen = blockSize * ceiling( nl/blockSize);
            tex = c(tex, rep("", maxLen - length(tex)));
            tcx = c(tcx, rep("", maxLen - length(tex)));
            nExpandLength = nExpandLength + 1;
          }
          tex[ind:(len + newl)] = newe;
          tcx[ind:(len + newl)] = newc;
          ind = ind + newl;
          len = len + newl;
        }
        te = tex[1:len];
        tc = tcx[1:len];
      }
    }
    use = is.finite(match(te, evidence));
    termCodes[[t]] = unique(as.character(tc[use]));
    if (nInputTerms > 10 && verbose > 0) pind = updateProgInd(t/nInputTerms, pind);
  }
  
  if (nInputTerms > 10 && verbose > 0)
  {
    pind = updateProgInd(1, pind);
    printFlush("");
  }
  
  attr(termCodes, "GOtermID") = termIDs
  
  termCodes;
}


#====================================================================================================
#
# topEnrichment
#
#====================================================================================================

# Enrichment of top genes in a given list.

topEnrichment = function(ranking, referenceList, 
                         nTopGenes = NULL, geneIDs = NULL, direction = c("increasing", "decreasing"),
                         chiSquare = FALSE,
                         verbose = 1)
{
  direction = match.arg(direction);
  ranking = as.matrix(ranking);
  if (is.null(geneIDs)) geneIDs = rownames(ranking);
  if (is.null(geneIDs)) stop("geneIDs must be specified if 'ranking' does not have row names.");
  if (length(intersect(referenceList, geneIDs))==0)
    stop("There are no common genes between referenceList and geneIDs.");
  
  if (is.null(nTopGenes)) nTopGenes = c(10, seq(from=20, to=1000, by=20))
  sign = ifelse(direction=="increasing", 1, -1);
  ranking = apply(sign * ranking, 2, rank, na.last = TRUE);
  
  nRankings = ncol(ranking);
  nNumbers = length(nTopGenes);
  
  inReference = is.finite(match(geneIDs, referenceList));
  if (length(unique(inReference))==0) 
    stop("referenceList contains either no or all genes in 'ranking'.");
  
  enrichmentStat = matrix(NA, nNumbers, nRankings);
  
  if (verbose > 0) pind = initProgInd();
  for (r in 1:nRankings)
    for (n in 1:nNumbers)
    {
      inTop = ranking[, r] <= nTopGenes[n];
      if (length(unique(inTop)) > 1)
      {
        if (chiSquare) {
          enrichmentStat[n, r] = chiSquare.table(table(inReference, inTop));
        } else {
          enrichmentStat[n, r] = -log10(fisher.test(table(inReference, inTop), 
                                                    alternative = "greater")$p.value);
        }
      }
      if (verbose > 0) pind = updateProgInd(((r-1)*nNumbers + n)/(nRankings * nNumbers), pind);
      
    }
  
  if (verbose > 0) printFlush("");
  
  colnames(enrichmentStat) = colnames(ranking);
  rownames(enrichmentStat) = spaste("nTopGenes.", nTopGenes);
  attr(enrichmentStat, "nTopGenes") = nTopGenes;
  enrichmentStat;
}

#====================================================================================================
#
# chiSquare.table
#
#====================================================================================================

chiSquare.table = function(tab)
{
  if (sum(tab<5) > 0)
    return(qchisq(fisher.test(tab, alt = "greater")$p.value), df = 1, lower.tail = FALSE);
  
  n = sum(tab);
  expected = outer(rowSums(tab), colSums(tab))/n;
  chsq = sum( (expected - tab) ^2 / expected );
  chsq;
}

#====================================================================================================
#
# topEnrichment
#
#====================================================================================================

# Enrichment of top genes in a given list.




#====================================================================================================
#
# plotEnrichments
#
#====================================================================================================

plotEnrichments = function(enrichments, counts = attr(enrichments, "nTopGenes"),
                           colors = c(1:ncol(enrichments)),
                           ylim = NULL, pch = c(1:ncol(enrichments)), 
                           lty = rep(1, ncol(enrichments)), 
                           plotLegend = TRUE,
                           leg.position = "bottomright",
                           legend = colnames(enrichments),
                           cex.legend = 1,
                           legend.ncol = 1,
                           ...)
{
  max = max(enrichments, na.rm = TRUE);
  if (is.null(ylim)) ylim = c(0, max)
  plot(counts, enrichments[, 1], col = colors[1], type = 'l', lty = lty[1], ylim = ylim, ...);
  points(counts, enrichments[, 1], col = colors[1], pch = pch[1], bg = colors[1]);
  nPlots = ncol(enrichments);
  if (nPlots > 1) for (c in 2:nPlots)
  {
    lines(counts, enrichments[, c], col = colors[c], lty = lty[c]);
    points(counts, enrichments[, c], col = colors[c], pch = pch[c], bg = colors[c]);
  }
  
  if (plotLegend)
    legendClean(leg.position, legend = legend, lty = lty, pch = pch, col = colors, pt.bg = colors, 
                cex = cex.legend, ncol = legend.ncol);
}

plotEnrichments.barplot = function(enrichments, counts = attr(enrichments, "nTopGenes"),
                                   nTop = as.integer(length(counts)/5),
                                   colors = c(1:ncol(enrichments)),
                                   ylim = NULL, 
                                   legend = colnames(enrichments),
                                   barplot = TRUE,
                                   top = c("highest", "lowest"),
                                   reverseYAxis = NULL,
                                   horiz = FALSE,
                                   # Ornaments: braced and delimeted groups
                                   # Note: ornaments won't work for horizontal barplots
                                   braceEdges = NULL,
                                   braceText = NULL,
                                   braceText.col = 1,
                                   braceText.cex = 1,
                                   braceSepLine = TRUE,
                                   braceSepLine.lty = 2,
                                   braceSepLine.col = "darkgrey",
                                   braceHeight = 0.05,
                                   braceSpace = 0.25,
                                   bracePower = 13,
                                   braceCol = braceText.col,
                                   # Simple groups
                                   groupEdges = NULL,
                                   groupText = NULL, 
                                   groupText.col = 1,
                                   groupText.cex = 1,
                                   groupSpace = 0.1,
                                   ...)
{
  
  top = match.arg(top);
  if (is.null(reverseYAxis)) reverseYAxis = top=="lowest";
  
  ordEnr = apply(enrichments, 2, sort, decreasing = (top=="highest"));
  topEnr = ordEnr[1:nTop, ];
  means = colMeans(topEnr);
  stdErr = apply(topEnr, 2, stdErr)
  nBars = ncol(enrichments);
  
  max = max(means + stdErr, na.rm = TRUE);
  if (is.null(ylim))
  {
    if (barplot) 
    {
      ylim = c(0, max)
    } else {
      ylim = range(topEnr);
      if (reverseYAxis) ylim = rev(ylim);
    }
  }
  
  yLimExt = 0;
  
  if (!is.null(braceEdges))
  {
    yLimExt = yLimExt + braceSpace;
    braceStart = 1/(1+braceSpace);
  } else 
    braceSpace = 0;
  
  if (!is.null(groupEdges))
  {
    yLimExt = yLimExt + groupSpace;
    groupMiddle = (1 + 0.5 * groupSpace)/(1+groupSpace+braceSpace);
    braceStart = (1+groupSpace)/(1+groupSpace + braceSpace);
  } else
    groupSpace = 0;
  
  ylim[2] = ylim[2] + (ylim[2]-ylim[1]) * yLimExt;
  
  
  x = c(topEnr);
  g = rep(c(1:nBars), rep(nTop, nBars));
  
  if (barplot) {
    if (horiz)
    {
      mp = labeledBarplot2(x, g, col = colors,  xlim = ylim, names = legend, horiz = horiz, ...);
    } else {
      mp = labeledBarplot2(x, g, col = colors,  ylim = ylim, names = legend, horiz = horiz, ...);
    }
    yTop = means+stdErr;
  } else {
    bxp = labeledBoxplot(as.list(as.data.frame(topEnr)), names = legend, namesColor = colors, verbose = TRUE, 
                         border = colors, ylim = ylim, ...);
    yTop = apply(bxp$stats, 1, max, na.rm = TRUE);
    mp = c(1:ncol(topEnr));
  }
  
  addOrnaments(mp, yTop,
               braceEdges = braceEdges,
               braceText = braceText,      
               braceText.col = braceText.col,     
               braceText.cex = braceText.cex,     
               braceSepLine = braceSepLine,   
               braceSepLine.lty = braceSepLine.lty,  
               braceSepLine.col = braceSepLine.col,
               braceHeight = braceHeight,    
               braceSpace = braceSpace,     
               bracePower = bracePower,       
               braceCol = braceCol,
               # Simple groups        
               groupEdges = groupEdges,     
               groupText = groupText,      
               groupText.col = groupText.col,     
               groupText.cex = groupText.cex,     
               groupSpace = groupSpace);
  
  invisible(list(topEnrichment = topEnr, midpoints = mp, ylim = ylim));
}

# Add ornaments to a barplot/enrichment barplot

addOrnaments = function(
  midPoints,
  heights,
  braceEdges = NULL,
  braceText = NULL,
  braceText.col = 1,
  braceText.cex = 1,
  braceSepLine = TRUE,
  braceSepLine.lty = 2,
  braceSepLine.col = "darkgrey",
  braceHeight = 0.05,
  braceSpace = 0.25,
  bracePower = 13,
  braceCol = braceText.col,
  # Simple groups
  groupEdges = NULL,
  groupText = NULL,
  groupText.col = 1,
  groupText.cex = 1,
  groupSpace = 0.1)
{
  
  if (!is.null(braceEdges))
  {
    braceStart = 1/(1+braceSpace);
  } else
    braceSpace = 0;
  
  if (!is.null(groupEdges))
  {
    groupMiddle = (1 + 0.5 * groupSpace)/(1+groupSpace+braceSpace);
    braceStart = (1+groupSpace)/(1+groupSpace + braceSpace);
  } else
    groupSpace = 0;
  
  nBars = length(midPoints);
  
  box = par("usr");
  ymin = box[3];
  ymax = box[4]
  yrange = ymax - ymin;
  barSpace = midPoints[2]-midPoints[1];
  if (!is.null(braceEdges))
  {
    braceText.cex = .extend(braceText.cex, nBars);
    braceText.col = .extend(braceText.col, nBars);
    nBraces = ncol(braceEdges);
    for (b in 1:nBraces)
    {
      st = braceEdges[1, b]; en = braceEdges[2, b]; 
      if (!is.null(braceCol))
        brace.horizontal(midPoints[ st ]-0.3*barSpace, midPoints[ en ] + 0.3*barSpace, 
                         ymin + yrange * braceStart, ymin + yrange * (braceStart + braceHeight), 
                         power = bracePower, col = braceCol);
      if (braceSepLine && braceEdges[2, b] < nBars)
        abline(v = (midPoints[en] + midPoints[en +1])/2, col = braceSepLine.col, lty = braceSepLine.lty);
      text((midPoints[st] + midPoints[en])/2, ymax, braceText[b], adj = c(0.5, 1), cex = braceText.cex[b],
           col = braceText.col[b]);
    }
  }
  
  if (!is.null(groupEdges))
  {
    groupText.col = .extend(groupText.col, nBars)
    groupText.cex = .extend(groupText.cex, nBars)
    nGroups = ncol(groupEdges);
    for (gr in 1:nGroups)
    {
      st = groupEdges[1, gr]; en = groupEdges[2, gr];
      text((midPoints[st] + midPoints[en])/2, 
           max(0, heights[st:en]) + 0.5 * yrange * groupSpace/(1+groupSpace + braceSpace), 
           groupText[gr], adj = c(0.5, 0.5),
           cex = groupText.cex[gr], col = groupText.col[gr]);
    }
  }
  
}


.extend = function(x, n)
{
  nRep = ceil(n/length(x));
  rep(x, nRep)[1:n];
}

#======================================================================================================
#
# Validation success
#
#======================================================================================================

# This form of the function returns the mean observed value and mean rank of observed values for a given
# vector of top number of genes in predicted.
# In other words: for each column of predicted: the predicted values are ranked and nTopGenes is selected;
# the mean of observed and rank(observed) for these values are calculated. 

validationSuccess = function(predicted, observed, nTopGenes, rankPredicted = TRUE,
                             direction = c("increasing", "decreasing"))
{
  direction = match.arg(direction);
  rankSign = ifelse(direction=="decreasing", -1, 1);
  nNGenes = length(nTopGenes)
  nMethods = 2
  methodNames = c("AverageObserved", "AverageObservedRank");
  
  predicted = as.matrix(predicted);
  nRankings = ncol(predicted);
  if (is.null(colnames(predicted)))
  {  
    rankingNames= spaste("Ranking.", c(1:nRankings));
  } else
    rankingNames = colnames(predicted);
  
  success = array(NA, dim = c(nNGenes, nRankings, nMethods));
  observedRank = rank(observed * rankSign, na.last = "keep");
  
  if (rankPredicted) {
    predictedRank = apply(predicted * rankSign, 2, rank);
  } else 
    predictedRank = predicted;
  
  for (ing in 1:nNGenes) for (r in 1:nRankings) 
  {
    ng = nTopGenes[ing];
    keep = predictedRank[, r] <=ng;
    success[ing, r, 1] = mean(observed[keep], na.rm = TRUE);
    success[ing, r, 2] = mean(observedRank[keep], na.rm = TRUE);
  }
  
  dimnames(success) = list(spaste("nTopGenes.", nTopGenes), rankingNames, methodNames);
  success = success[ , , , drop = TRUE];
  attr(success, "nTopGenes") = nTopGenes
  success;
}

# The second form of the function returns only the success measured on correlations but also returns the
# actual values that were averaged. 

validationSuccess.ext = function(predicted, observed, nTopGenes, rankPredicted = TRUE,
                                 direction = c("increasing", "decreasing"))
{
  direction = match.arg(direction);
  rankSign = ifelse(direction=="decreasing", -1, 1);
  nNGenes = length(nTopGenes)
  
  predicted = as.matrix(predicted);
  nRankings = ncol(predicted);
  if (is.null(colnames(predicted)))
  {
    rankingNames= spaste("Ranking.", c(1:nRankings));
  } else
    rankingNames = colnames(predicted);
  
  success = array(NA, dim = c(nNGenes, nRankings));
  
  if (rankPredicted) {
    predictedRank = apply(predicted * rankSign, 2, rank);
  } else
    predictedRank = predicted;
  
  topObservedValues = matrix(0,max(nTopGenes), nRankings);
  
  for (r in 1:nRankings)
  {
    for (ing in 1:nNGenes) 
    {
      ng = nTopGenes[ing];
      keep = predictedRank[, r] <=ng;
      success[ing, r] = mean(observed[keep], na.rm = TRUE);
    }
    ng = max(nTopGenes);
    order = order(predicted[, r] * rankSign);
    topObservedValues[, r] = observed[order[1:ng]];
  }
  
  dimnames(success) = list(spaste("nTopGenes.", nTopGenes), rankingNames);
  colnames(topObservedValues) = rankingNames;
  #success = success[ , , drop = TRUE];
  attr(success, "nTopGenes") = nTopGenes
  list(success = success, topObservedValues = topObservedValues, nTopGenes = nTopGenes);
}

#====================================================================================================#
#
# replace NAs by a finite value
#
#====================================================================================================

replaceNA = function(x, replaceBy=-9)
{
  x[is.na(x)] = replaceBy;
  x;
}

#=====================================================================================================
#
# minWhichMin
#
#=====================================================================================================

# This is a wrapper around my C-level function. 
minWhichMin = function(x)
{
  x = as.matrix(x);
  nc= ncol(x);
  nr = nrow(x);
  min = rep(0, nc);
  which = rep(0, nc);
  whichmin = .C("minWhichMin", as.double(x),
                as.integer(nr), as.integer(nc),
                as.double(min), as.double(which), DUP = FALSE, NAOK = TRUE);
  cbind( min = whichmin[[4]], which = whichmin[[5]] + 1);
}

#====================================================================================================
#
# brace
#
#====================================================================================================

# draw a brace in a plot area
# For now only a horizontal brace

brace.horizontal = function(xLeft, xRight, yBottom, yTop, color=1, lwd=3, fine = 100, power = 9)
{
  xMid = (xLeft + xRight)/2
  xMM1 = (3*xLeft + xRight)/4;
  xMM2 = (xLeft + 3*xRight)/4;
  yMid = (yBottom + yTop)/2;
  
  xx = seq(from = -1, to=1, length.out = fine);
  yy = xx^power;
  
  x = c( xMM1 + (xMM1-xLeft) * xx, xMM2 + (xMM2 - xMid) * xx);
  y = yMid + (yMid-yBottom) * c(yy, rev(yy));
  
  lines(x, y, col = color, lwd = lwd);
} 

#=====================================================================================================
#
# Pretty names for meta-analysis output
#
#=====================================================================================================

prettyNamesForMetaAnalysis = function(names)
{
  translation = matrix( c(
    "Z.equalWeights",  "Stouffer (equal wts)",
    "Z.RootDoFWeights",  "Stouffer (sqrt wts)",
    "Z.DoFWeights", "Stouffer (dof wts)",
    "pValueHighScale.equalWeights", "Scale (equal wts)",
    "pValueHighScale.RootDoFWeights", "Scale (sqrt wts)",
    "pValueHighScale.DoFWeights", "Scale (dof wts)",
    "consensus", "Consensus",
    "weightedAverage.equalWeights", "Mean (equal wts)",
    "weightedAverage.RootDoFWeights", "Mean (sqrt wts)",
    "weightedAverage.DoFWeights", "Mean (dof wts)",
    "meta.Z.equalWeights", "Stouffer (equal wts)",
    "meta.Z.RootDoFWeights", "Stouffer (sqrt wts)",
    "meta.Z.DoFWeights", "Stouffer (dof wts)",
    "pValueHighScale.equalWeights", "Scale (equal wts)",
    "pValueHighScale.RootDoFWeights", "Scale (sqrt wts)",
    "pValueHighScale.DoFWeights", "Scale (dof wts)",
    "pValueHigh.equalWeights", "Rank (equal wts)",
    "pValueHigh.RootDoFWeights", "Rank (sqrt wts)",
    "pValueHigh.DoFWeights", "Rank (dof wts)"
  ), ncol = 2, byrow = TRUE)
  
  names2trans = match(names, translation[, 1]);
  out = names;
  out[is.finite(names2trans)] = translation[ names2trans[is.finite(names2trans)], 2];
  out;
}

#=====================================================================================================
#
# GOLabels
#
#=====================================================================================================

GOLabels = function(enrichmentTable, moduleCol = "Mod", pValueCol = "p.Bonf", 
                    labelCol = "termName", pValueThreshold = 0.05, numericModLabels = TRUE)
{
  if (!is.numeric(moduleCol))
    moduleCol = match(moduleCol, colnames(enrichmentTable));
  if (is.na(moduleCol)) stop("Undefined module column.")
  
  if (!is.numeric(pValueCol))
    pValueCol = match(pValueCol, colnames(enrichmentTable));
  if (is.na(pValueCol)) stop("Undefined pValue column.")
  
  if (!is.numeric(labelCol))
    labelCol = match(labelCol, colnames(enrichmentTable));
  if (is.na(labelCol)) stop("Undefined label column.")
  
  order = order(enrichmentTable[, moduleCol], enrichmentTable[, pValueCol] )
  enr.ord = enrichmentTable[order, ];
  module = enr.ord[, moduleCol];
  
  n = length(module);
  bestIndex = c(1, c(2:n)[ module[-1]!=module[-n] ]);
  
  moduleLabels = module[bestIndex];
  bestP = enr.ord[bestIndex, pValueCol];
  bestLabel = enr.ord[bestIndex, labelCol];
  
  GOlabels0 = spaste(": ", bestLabel);
  GOlabels0[bestP > pValueThreshold] = "";
  GOlabels = spaste(moduleLabels, GOlabels0);
  
  out = data.frame(GOLabels = GOlabels, modules = moduleLabels, bestP = bestP, bestLabels = bestLabel);
  if (numericModLabels)
    out = out[order(as.numeric(as.character(moduleLabels))), ]
  
  out;
}


#=====================================================================================================
#
# prependZeros
#
#=====================================================================================================
# prepend as many zeros as necessary to fill number to a certain width. Assumes an integer input.

prependZeros = function(x, len = max(nchar(x)))
{
  lengths = nchar(x);
  if (len < max(lengths)) stop("Some entries of 'x' are too long.");
  out = as.character(x);
  n = length(x);
  for (i in 1:n) if (lengths[i] < len)
    out[i] = spaste( paste(rep("0", len-lengths[i]), collapse = ""),
                     x[i]);
  
  out;
}

#=====================================================================================================
#
# entrez2geneSymbol
#
#=====================================================================================================
# convert entrez gene IDs to gene symbols

entrez2geneSymbol = function(entrezCodes, organism = "human")
{
  organisms = c("human", "mouse", "rat", "malaria", "fly", "bovine", "worm", "canine",
                "zebrafish", "chicken");
  orgInd = pmatch(organism, organisms);
  if (is.na(orgInd))
    stop(paste("Unrecognized 'organism' given. Recognized values are ",
               paste(organisms, collapse = ", ")));
  
  orgCodes = c("Hs", "Mm", "Rn", "Pf", "Dm", "Bt", "Ce", "Cf", "Dr", "Gg");
  orgExtensions = rep(".eg", 10)
  
  missingPacks = NULL;
  packageName = paste("org.", orgCodes[orgInd], orgExtensions[orgInd], ".db", sep="");
  if (!require(packageName, character.only = TRUE))
    missingPacks = c(missingPacks, packageName);
  
  if (!is.null(missingPacks))
    stop(paste("Could not load the requisite package(s)",
               paste(missingPacks, collapse = ", "), ". Please install the package(s)."))
  
  egSymbol = eval(parse(text = paste(packageName, ":::org.", orgCodes[orgInd], orgExtensions[orgInd],
                                     "SYMBOL", sep = "")));
  
  mapped_entrez = as.numeric(as.character(mappedkeys(egSymbol)));
  mapped_symbols = unlist(as.list(egSymbol[mappedkeys(egSymbol)]));
  
  entrezCodes = as.matrix(entrezCodes);
  nSets = ncol(entrezCodes);
  
  symbols = array("NA", dim = dim(entrezCodes));
  
  for (s in 1:nSets)
  {
    codes2mapped = match(as.numeric(entrezCodes), mapped_entrez);
    fin = is.finite(codes2mapped);
    symbols[fin, s] = mapped_symbols[ codes2mapped[fin] ];
  }
  
  symbols[, , drop = TRUE];
}
mzinkgraf/ConsensusCoExpression documentation built on May 23, 2019, noon