# R/Functions.R In WGCNA: Weighted Correlation Network Analysis

```# Categories of functions:
# . network construction (including connectivity calculation)
# . module detection
# . gene screening
# . data simulation
# . general statistical functions
# . visualization

#-----------------------------------------------------------------------------------------------
#
# Overall options and settings for the package
#
#-----------------------------------------------------------------------------------------------

.moduleColorOptions = list(MEprefix = "ME")

moduleColor.getMEprefix = function()
{
.moduleColorOptions\$MEprefix;
}

# ===================================================
#The function moduleEigengenes finds the first principal component (eigengene) in each
# module defined by the colors of the input vector "colors".
# The theoretical underpinnings are described in Horvath, Dong, Yip (2005)
# http://www.genetics.ucla.edu/labs/horvath/ModuleConformity/
# This requires the R library impute

moduleEigengenes = function(expr, colors, impute = TRUE, nPC = 1, align = "along average",
excludeGrey = FALSE, grey = if (is.numeric(colors))  0 else "grey",
subHubs = TRUE, trapErrors = FALSE,
returnValidOnly = trapErrors,
softPower = 6, scale = TRUE,
verbose = 0, indent = 0)
{
spaces = indentSpaces(indent);

if (verbose==1)
printFlush(paste(spaces, "moduleEigengenes: Calculating", nlevels(as.factor(colors)),
"module eigengenes in given set."));
if (is.null(expr))
{
stop("moduleEigengenes: Error: expr is NULL. ");
}
if (is.null(colors))
{
stop("moduleEigengenes: Error: colors is NULL. ");
}

if (is.null(dim(expr)) || length(dim(expr))!=2)
stop("moduleEigengenes: Error: expr must be two-dimensional.");

if (dim(expr)[2]!=length(colors))
stop("moduleEigengenes: Error: ncol(expr) and length(colors) must be equal (one color per gene).");

if (is.factor(colors))
{
nl = nlevels(colors);
nlDrop = nlevels(colors[, drop = TRUE]);
if (nl > nlDrop)
stop(paste("Argument 'colors' contains unused levels (empty modules). ",
"Use colors[, drop=TRUE] to get rid of them."));
}

if (softPower < 0) stop("softPower must be non-negative");

alignRecognizedValues =  c("", "along average");
if (!is.element(align, alignRecognizedValues)) {
printFlush(paste("ModulePrincipalComponents: Error:",
"parameter align has an unrecognised value:",
align, "; Recognized values are ", alignRecognizedValues));
stop()
}

maxVarExplained = 10;
if (nPC>maxVarExplained)
warning(paste("Given nPC is too large. Will use value", maxVarExplained));

nVarExplained = min(nPC, maxVarExplained);
modlevels=levels(factor(colors))
if (excludeGrey)
if (sum(as.character(modlevels)!=as.character(grey))>0) {
modlevels = modlevels[as.character(modlevels)!=as.character(grey)]
} else {
stop(paste("Color levels are empty. Possible reason: the only color is grey",
"and grey module is excluded from the calculation."));
}
PrinComps = data.frame(matrix(NA,nrow=dim(expr)[[1]], ncol= length(modlevels)))
averExpr = data.frame(matrix(NA,nrow=dim(expr)[[1]], ncol= length(modlevels)))
varExpl= data.frame(matrix(NA, nrow= nVarExplained, ncol= length(modlevels)))
validMEs = rep(TRUE, length(modlevels));
validAEs = rep(FALSE, length(modlevels));
isPC = rep(TRUE, length(modlevels));
isHub = rep(FALSE, length(modlevels));
validColors = colors;
names(PrinComps)=paste(moduleColor.getMEprefix(), modlevels, sep="")
names(averExpr)=paste("AE",modlevels,sep="")
if (!is.null(rownames(expr))) rownames(PrinComps) = rownames(averExpr) = make.unique(rownames(expr))
for(i in c(1:length(modlevels)) )
{
if (verbose>1)
printFlush(paste(spaces, "moduleEigengenes : Working on ME for module", modlevels[i]));
modulename = modlevels[i]
restrict1 = as.character(colors)== as.character(modulename)
if (verbose > 2)
printFlush(paste(spaces, " ...", sum(restrict1), "genes"));
datModule = as.matrix(t(expr[, restrict1]));
n = dim(datModule)[1]; p = dim(datModule)[2];
pc = try(
{
if (nrow(datModule)>1 && impute)
{
seedSaved = FALSE;
if (exists(".Random.seed")) {
saved.seed = .Random.seed;
seedSaved = TRUE;
}
if (any(is.na(datModule)))
{
if (verbose > 5) printFlush(paste(spaces, " ...imputing missing data"));
datModule = impute.knn(datModule, k = min(10, nrow(datModule)-1))
# some versions of impute.knn return a list and we need the data component:
try( { if (!is.null(datModule\$data)) datModule = datModule\$data; }, silent = TRUE )
}
# The <<- in the next line is extremely important. Using = or <- will create a local variable of
# the name .Random.seed and will leave the important global .Random.seed untouched.
if (seedSaved) .Random.seed <<- saved.seed;
}
if (verbose > 5) printFlush(paste(spaces, " ...scaling"));
if (scale) datModule=t(scale(t(datModule)));
if (verbose > 5) printFlush(paste(spaces, " ...calculating SVD"));
svd1 = svd(datModule, nu = min(n, p, nPC), nv = min(n, p, nPC));
# varExpl[,i]= (svd1\$d[1:min(n,p,nVarExplained)])^2/sum(svd1\$d^2)
if (verbose > 5) printFlush(paste(spaces, " ...calculating PVE"));
veMat = cor(svd1\$v[, c(1:min(n,p,nVarExplained))], t(datModule), use = "p")
varExpl[c(1:min(n,p,nVarExplained)),i]= rowMeans(veMat^2, na.rm = TRUE)
# this is the first principal component
svd1\$v[,1]
}, silent = TRUE);
if (inherits(pc, 'try-error'))
{
if ( (!subHubs) && (!trapErrors) ) stop(pc);
if (subHubs)
{
if (verbose>0)
{
printFlush(paste(spaces, " ..principal component calculation for module",
modulename, "failed with the following error:"));
printFlush(paste(spaces, "     ", pc, spaces,
" ..hub genes will be used instead of principal components."));
}
isPC[i] = FALSE;
pc = try(
{
scaledExpr = scale(t(datModule));
covEx = cov(scaledExpr, use = "p");
covEx[!is.finite(covEx)] = 0;
kIM = (rowMeans(modAdj, na.rm = TRUE))^3;
if (max(kIM, na.rm = TRUE) > 1) kIM = kIM-1;
kIM[is.na(kIM)] = 0;
hub = which.max(kIM)
alignSign = sign(covEx[, hub]);
alignSign[is.na(alignSign)] = 0;
isHub[i] = TRUE;
pcxMat = scaledExpr *
matrix(kIM * alignSign, nrow = nrow(scaledExpr), ncol = ncol(scaledExpr), byrow = TRUE) /
sum(kIM);
pcx = rowMeans(pcxMat, na.rm = TRUE);
varExpl[1, i] = mean(cor(pcx, t(datModule), use = "p")^2, na.rm = TRUE)
pcx
}, silent = TRUE);
}
}

if (inherits(pc, 'try-error'))
{
if (!trapErrors) stop(pc);
if (verbose>0)
{
printFlush(paste(spaces, " ..ME calculation of module", modulename,
"failed with the following error:"));
printFlush(paste(spaces, "     ", pc, spaces,
" ..the offending module has been removed."));
}
warning(paste("Eigengene calculation of module", modulename,
"failed with the following error \n     ",
pc, "The offending module has been removed.\n"));
validMEs[i] = FALSE; isPC[i] = FALSE; isHub[i] = FALSE;
validColors[restrict1] = grey;
} else {
PrinComps[, i] = pc;
ae = try(
{
if (isPC[i]) scaledExpr = scale(t(datModule));
averExpr[, i] = rowMeans(scaledExpr, na.rm = TRUE);
if (align == "along average")
{
if (verbose>4) printFlush(paste(spaces,
" .. aligning module eigengene with average expression."))
corAve = cor(averExpr[,i], PrinComps[,i], use = "p");
if (!is.finite(corAve)) corAve = 0;
if (corAve<0) PrinComps[,i] = -PrinComps[,i]
}
0;
}, silent = TRUE);
if (inherits(ae, 'try-error'))
{
if (!trapErrors) stop(ae);
if (verbose>0)
{
printFlush(paste(spaces, " ..Average expression calculation of module", modulename,
"failed with the following error:"));
printFlush(paste(spaces, "     ", ae, spaces,
" ..the returned average expression vector will be invalid."));
}
warning(paste("Average expression calculation of module", modulename,
"failed with the following error \n     ",
ae, "The returned average expression vector will be invalid.\n"));
}
validAEs[i] = !inherits(ae, 'try-error')
}
}
allOK = (sum(!validMEs)==0)
if (returnValidOnly && sum(!validMEs)>0)
{
PrinComps = PrinComps[, validMEs, drop = FALSE]
averExpr = averExpr[, validMEs, drop = FALSE];
varExpl = varExpl[, validMEs, drop = FALSE];
validMEs = rep(TRUE, times = ncol(PrinComps));
isPC = isPC[validMEs];
isHub = isHub[validMEs];
validAEs = validAEs[validMEs];
}
allPC = (sum(!isPC)==0);
allAEOK = (sum(!validAEs)==0)
list(eigengenes = PrinComps, averageExpr = averExpr, varExplained = varExpl, nPC = nPC,
validMEs = validMEs, validColors = validColors, allOK = allOK, allPC = allPC, isPC = isPC,
isHub = isHub, validAEs = validAEs, allAEOK = allAEOK)
}

#---------------------------------------------------------------------------------------------
#
# removeGrey
#
#---------------------------------------------------------------------------------------------
# This function removes the grey eigengene from supplied module eigengenes.

removeGreyME = function(MEs, greyMEName = paste(moduleColor.getMEprefix(), "grey", sep=""))
{
newMEs = MEs;
if (is.vector(MEs) & mode(MEs)=="list")
{
warned = 0;
newMEs = vector(mode = "list", length = length(MEs));
for (set in 1:length(MEs))
{
if (!is.data.frame(MEs[[set]]\$data))
stop("MEs is a vector list but the list structure is missing the correct 'data' component.");
newMEs[[set]] = MEs[[set]];
if (greyMEName %in% names(MEs[[set]]\$data))
{
newMEs[[set]]\$data = MEs[[set]]\$data[, names(MEs[[set]]\$data)!=greyMEName];
} else {
if (warned==0)
{
warning("removeGreyME: The given grey ME name was not found among the names of given MEs.");
warned = 1;
}
}
}
} else {
if (length(dim(MEs))!=2) stop("Argument 'MEs' has incorrect dimensions.")
MEs = as.data.frame(MEs);
if (greyMEName %in% names(MEs))
{
newMEs = MEs[, names(MEs)!=greyMEName];
} else {
warning("removeGreyME: The given grey ME name was not found among the names of given MEs.");
}
}

newMEs;
}
#-------------------------------------------------------------------------------------
#
#  ModulePrincipalComponents
#
#-------------------------------------------------------------------------------------
# Has been superseded by moduleEigengenes above.

# ===================================================
# This function collects garbage

collectGarbage=function(){while (gc()[2,4] != gc()[2,4] | gc()[1,4] != gc()[1,4]){}}

#--------------------------------------------------------------------------------------
#
# orderMEs
#
#--------------------------------------------------------------------------------------
#
# performs hierarchical clustering on MEs and returns the order suitable for plotting.

orderMEs = function(MEs, greyLast = TRUE,
greyName = paste(moduleColor.getMEprefix(), "grey", sep=""),
orderBy = 1, order = NULL,
useSets = NULL, verbose = 0, indent = 0)
{
spaces = indentSpaces(indent);

if ("eigengenes" %in% names(MEs))
{
if (is.null(order))
{
if (verbose>0) printFlush(paste(spaces, "orderMEs: order not given, calculating using given set",
orderBy));
corPC = cor(MEs\$eigengenes, use="p")
disPC = 1-corPC;
order = .clustOrder(disPC, greyLast = greyLast, greyName = greyName);
}

if (length(order)!=dim(MEs\$eigengenes)[2])
stop("orderMEs: given MEs and order have incompatible dimensions.");

orderedMEs = MEs;
orderedMEs\$eigengenes = as.data.frame(MEs\$eigengenes[,order]);
colnames(orderedMEs\$eigengenes) = colnames(MEs\$eigengenes)[order];
if (!is.null(MEs\$averageExpr))
{
orderedMEs\$averageExpr = as.data.frame(MEs\$averageExpr[, order])
colnames(orderedMEs\$averageExpr) = colnames(MEs\$data)[order];
}
if (!is.null(MEs\$varExplained))
{
orderedMEs\$varExplained = as.data.frame(MEs\$varExplained[, order])
colnames(orderedMEs\$varExplained) = colnames(MEs\$data)[order];
}
return(orderedMEs);
} else {
check = checkSets(MEs, checkStructure = TRUE, useSets = useSets);
if (check\$structureOK)
{
multiSet = TRUE;
} else {
multiSet = FALSE;
MEs = fixDataStructure(MEs);
useSets = NULL; orderBy = 1;
}

if (!is.null(useSets))
if (is.na(match(orderBy, useSets))) orderBy = useSets[1];

if (is.null(order))
{
if (verbose>0) printFlush(paste(spaces, "orderMEs: order not given, calculating using given set",
orderBy));
corPC = cor(MEs[[orderBy]]\$data, use="p")
disPC = 1-corPC;
order = .clustOrder(disPC, greyLast = greyLast, greyName = greyName);
}

if (length(order)!=dim(MEs[[orderBy]]\$data)[2])
stop("orderMEs: given MEs and order have incompatible dimensions.");

nSets = length(MEs);
orderedMEs = MEs;
if (is.null(useSets)) useSets = c(1:nSets);
for (set in useSets)
{
orderedMEs[[set]]\$data = as.data.frame(MEs[[set]]\$data[,order]);
colnames(orderedMEs[[set]]\$data) = colnames(MEs[[set]]\$data)[order];
if (!is.null(MEs[[set]]\$averageExpr))
{
orderedMEs[[set]]\$averageExpr = as.data.frame(MEs[[set]]\$averageExpr[, order])
colnames(orderedMEs[[set]]\$averageExpr) = colnames(MEs[[set]]\$data)[order];
}
if (!is.null(MEs[[set]]\$varExplained))
{
orderedMEs[[set]]\$varExplained = as.data.frame(MEs[[set]]\$varExplained[, order])
colnames(orderedMEs[[set]]\$varExplained) = colnames(MEs[[set]]\$data)[order];
}
}
if (multiSet) {
return(orderedMEs);
} else {
return(orderedMEs[[1]]\$data);
}
}
}

#---------------------------------------------------------------------------------------------
#
# .clustOrder
#
#---------------------------------------------------------------------------------------------

.clustOrder = function(distM, greyLast = TRUE,
greyName = paste(moduleColor.getMEprefix(), "grey", sep=""))
{
distM = as.matrix(distM);
distNames = dimnames(distM)[[1]];
greyInd = match(greyName, distNames);
if (greyLast && !is.na(greyInd))
{
clusterMEs = (greyName!=distNames);
if (sum(clusterMEs)>1)
{
h = fastcluster::hclust(as.dist(distM[clusterMEs, clusterMEs]), method = "average");
order = h\$order;
if (sum(order>=greyInd)>0) order[order>=greyInd] = order[order>=greyInd]+1;
order = c(order, greyInd);
} else if (ncol(distM)>1)
{
if (greyInd==1)
{
order = c(2, 1)
} else order = c(1, 2);
} else order = 1;
} else {
if (length(distM)>1)
{
h = fastcluster::hclust(as.dist(distM), method = "average");
order = h\$order;
} else order = 1;
}
order;

# print(paste("names:", names(distM), collapse = ", "));
# print(paste("order:", order, collapse=", "))
}

#---------------------------------------------------------------------------------------------
#
# consensusOrderMEs
#
#---------------------------------------------------------------------------------------------
# Orders MEs by the dendrogram of their consensus dissimilarity.

consensusOrderMEs = function(MEs, useAbs = FALSE, useSets = NULL, greyLast = TRUE,
greyName = paste(moduleColor.getMEprefix(), "grey", sep=""),
method = "consensus")
{
# Debugging code:
#printFlush("consensusOrderMEs:");
#size = checkSets(MEs);
#print(size);
# end debuging code
Diss = consensusMEDissimilarity(MEs, useAbs = useAbs, useSets = useSets, method = method);
order = .clustOrder(Diss, greyLast, greyName);
#print(order)
orderMEs(MEs, greyLast = greyLast, greyName = greyName, order = order, useSets = useSets);
}

orderMEsByHierarchicalConsensus = function(MEs, networkOptions, consensusTree,
greyName = "ME0",
calibrate = FALSE)
{
Diss = .hierarchicalConsensusMEDissimilarity(MEs, networkOptions, consensusTree,
greyName = greyName, calibrate = calibrate);
order = .clustOrder(Diss, greyLast = TRUE, greyName = greyName);
mtd.subset(MEs, , order);
}

#---------------------------------------------------------------------------------------------
#
# consensusMEDissimilarity
#
#---------------------------------------------------------------------------------------------
# This function calcualtes a consensus dissimilarity (i.e., correlation) among sets of MEs (more generally,
# any sets of vectors).
# CAUTION: when not using absolute value, the minimum similarity will favor the large negative values!

consensusMEDissimilarity = function(MEs, useAbs = FALSE, useSets = NULL, method = "consensus")
{
methods = c("consensus", "majority");
m = charmatch(method, methods);
if (is.na(m))
stop("Unrecognized method given. Recognized values are", paste(methods, collapse =", "));

nSets = length(MEs);
MEDiss = vector(mode="list", length = nSets);
if (is.null(useSets)) useSets = c(1:nSets);
for (set in useSets)
{
if (useAbs)
{
diss = 1-abs(cor(MEs[[set]]\$data, use="p"));
} else
{
diss = 1-cor(MEs[[set]]\$data, use="p");
}
MEDiss[[set]] = list(Diss = diss);
}

for (set in useSets)
if (set==useSets[1])
{
ConsDiss = MEDiss[[set]]\$Diss;
} else {
if (m==1) {
ConsDiss = pmax(ConsDiss, MEDiss[[set]]\$Diss);
} else {
ConsDiss = ConsDiss + MEDiss[[set]]\$Diss;
}
}

if (m==2) ConsDiss = ConsDiss/nSets;

ConsDiss = as.data.frame(ConsDiss);
names(ConsDiss) = names(MEs[[useSets[1]]]\$data);
rownames(ConsDiss) = make.unique(names(MEs[[useSets[1]]]\$data));

ConsDiss;
}

hierarchicalConsensusMEDissimilarity = function(MEs, networkOptions, consensusTree,
greyName = "ME0", calibrate = FALSE)

{
nSets = checkSets(MEs)\$nSets;

if (inherits(networkOptions, "NetworkOptions"))
networkOptions = list2multiData(.listRep(networkOptions, nSets));

.hierarchicalConsensusMEDissimilarity(MEs, networkOptions, consensusTree,
greyName = greyName, calibrate = calibrate)
}

# Quantile normalization
# normalize each column such that (column) quantiles are the same
# The final value for each quantile is the 'summaryType' of the corresponding quantiles across the columns

.equalizeQuantiles = function(data, summaryType = c("median", "mean"))
{
summaryType = match.arg(summaryType);
data.sorted = apply(data, 2, sort);

if (summaryType == "median")
{
refSample = rowMedians(data.sorted, na.rm = TRUE)
} else if (summaryType == "mean")
refSample = rowMeans(data.sorted, na.rm = TRUE);

ranks = round(colRanks(data, ties.method = "average", preserveShape = TRUE))
out = refSample [ ranks ];
dim(out) = dim(data);
dimnames(out) = dimnames(data);

out;
}

.turnVectorIntoDist = function(x, size, Diag, Upper)
{
attr(x, "Size") = size;
attr(x, "Diag") = FALSE;
attr(x, "Upper") = FALSE;
class(x) = c("dist", class(x))
x;
}

.turnDistVectorIntoMatrix = function(x, size, Diag, Upper, diagValue)
{
mat = as.matrix(.turnVectorIntoDist(x, size, Diag, Upper));
if (!Diag) diag(mat) = diagValue;
mat;
}

# This function calculates consensus dissimilarity of module eigengenes

.consensusMEDissimilarity = function(multiMEs,
useSets = NULL,
corFnc = cor, corOptions = list(use = 'p'),
equalizeQuantiles = FALSE,
quantileSummary = "mean",
consensusQuantile = 0, useAbs = FALSE,
greyName = "ME0")
{
nSets = checkSets(multiMEs)\$nSets;
useMEs = c(1:ncol(multiMEs[[1]]\$data))[names(multiMEs[[1]]\$data)!=greyName]
useNames = names(multiMEs[[1]]\$data)[useMEs];
nUseMEs = length(useMEs);
#  if (nUseMEs<2)
#    stop("Something is wrong: there are two or more proper modules, but less than two proper",
#         "eigengenes. Please check that the grey color label and module eigengene label",
#         "are correct.");

if (is.null(useSets)) useSets = c(1:nSets);
nUseSets = length(useSets);
MEDiss = array(NA, dim = c(nUseMEs, nUseMEs, nUseSets));
for (set in useSets)
{
corOptions\$x = multiMEs[[set]]\$data[, useMEs];
if (useAbs)
{
diss = 1-abs(do.call(corFnc, corOptions));
} else {
diss = 1-do.call(corFnc, corOptions);
}
MEDiss[, , set] = diss;
}

if (equalizeQuantiles)
{
distMat = apply(MEDiss, 3, function(x) {as.numeric(as.dist(x))} )
dim(distMat) = c( nUseMEs * (nUseMEs-1)/2, nUseSets);
normalized = .equalizeQuantiles(distMat, summaryType = quantileSummary);
MEDiss = apply(normalized, 2, .turnDistVectorIntoMatrix, size = nUseMEs, Diag = FALSE, Upper = FALSE,
diagValue = 0);
}

ConsDiss = apply(MEDiss, c(1:2), quantile, probs = 1-consensusQuantile, names = FALSE, na.rm = TRUE);
colnames(ConsDiss) = rownames(ConsDiss) = make.unique(useNames);
ConsDiss;
}

.hierarchicalConsensusMEDissimilarity = function(multiMEs,
networkOptions,
consensusTree,
greyName,
calibrate)
{
nSets = checkSets(multiMEs)\$nSets;
useMEs = which(mtd.colnames(multiMEs)!=greyName);
useNames = mtd.colnames(multiMEs)[useMEs];
nUseMEs = length(useMEs);
if (nUseMEs == 0) return(matrix(numeric(0), 0, 0));
#  if (nUseMEs<2)
#    stop("Something is wrong: there are two or more proper modules, but less than two proper",
#         "eigengenes. Please check that the grey color label and module eigengene label",
#         "are correct.");

if (!isMultiData(networkOptions, strict = FALSE))
stop("'networkOptions' must be either a single list of class 'NetworkOptions'\n",
"or a MultiData structure containing one such list per input set. ");

if (length(networkOptions)!=nSets)
stop("Number of sets in 'multiMEs' and 'networkOptions' must be the same.");

MEDiss = mtd.mapply(function(me, netOpt)
{
cor.me = do.call(netOpt\$corFnc,
c(list(x = me), netOpt\$corOptions));
if (!grepl("signed", netOpt\$networkType)) cor.me = abs(cor.me);
cor.me;
}, mtd.subset(multiMEs, , useMEs), networkOptions, returnList = TRUE);

if (calibrate)
{
cons = hierarchicalConsensusCalculation(MEDiss,
consensusTree = consensusTree,
level = 1,
# Return options: the data can be either saved or returned but not both.
saveConsensusData = FALSE,
keepIntermediateResults = FALSE,
# Internal handling of data
useDiskCache = FALSE,
# Behaviour
collectGarbage = FALSE,
verbose = 0, indent = 0)\$consensusData
cons = BD.getData(cons, blocks = 1);
} else
cons = simpleHierarchicalConsensusCalculation(MEDiss, consensusTree)
consDiss = 1-cons;
colnames(consDiss) = rownames(consDiss) = make.unique(useNames);
consDiss;
}

#======================================================================================================
# ColorHandler.R
#======================================================================================================

# A set of global variables and functions that should help handling color names for some 400+ modules.
# A vector called .GlobalStandardColors is defined that holds color names with first few entries
# being the well-known and -loved colors. The rest is randomly chosen from the color names of R,
# excluding grey colors.

#---------------------------------------------------------------------------------------------------------
#
# .GlobalStandardColors
#
#---------------------------------------------------------------------------------------------------------
# This code forms a vector of color names in which the first entries are given by BaseColors and the rest
# is "randomly" chosen from the rest of R color names that do not contain "grey" nor "gray".

BaseColors = c("turquoise","blue","brown","yellow","green","red","black","pink","magenta",
"purple","greenyellow","tan","salmon","cyan", "midnightblue", "lightcyan",
"grey60", "lightgreen", "lightyellow", "royalblue", "darkred", "darkgreen",
"darkturquoise", "darkgrey",
"orange", "darkorange", "white", "skyblue", "saddlebrown", "steelblue",
"paleturquoise", "violet", "darkolivegreen", "darkmagenta" );

RColors = colors()[-grep("grey", colors())];
RColors = RColors[-grep("gray", RColors)];
InBase = match(BaseColors, RColors);
ExtraColors = RColors[-c(InBase[!is.na(InBase)])];
nExtras = length(ExtraColors);

# Here is the vector of colors that should be used by all functions:

.GlobalStandardColors = c(BaseColors, ExtraColors[rank(sin(13*c(1:nExtras) +sin(13*c(1:nExtras))) )] );

standardColors = function(n = NULL)
{
if (is.null(n)) return(.GlobalStandardColors);
if ((n>0) && (n<=length(.GlobalStandardColors)))
{
return(.GlobalStandardColors[c(1:n)]);
} else {
stop("Invalid number of standard colors requested.");
}
}

rm(BaseColors, RColors, ExtraColors, nExtras, InBase);

#---------------------------------------------------------------------------------------------------------
#
# normalizeLabels
#
#---------------------------------------------------------------------------------------------------------
# "Normalizes" numerical labels such that the largest group is labeled 1, the next largest 2 etc.
# If KeepZero == TRUE, label zero is preserved.

normalizeLabels = function(labels, keepZero = TRUE)
{
if (keepZero)
{
NonZero = (labels!=0);
}
else
{
NonZero = rep(TRUE, length(labels));
}
f = as.numeric(factor(labels[NonZero]));
t = table(labels[NonZero]);
# print(t)
r = rank(-as.vector(t), ties.method = "first");
norm_labs = rep(0, times = length(labels));
norm_labs[NonZero] = r[f];
norm_labs;
}

#---------------------------------------------------------------------------------------------------------
#
# labels2colors
#
#---------------------------------------------------------------------------------------------------------
# This function converts integer numerical labels labels into color names in the order either given by
# colorSeq,
# or (if colorSeq==NULL) by standardColors(). If GreyIsZero == TRUE, labels 0 will be assigned
# the color grey; otherwise presence of labels below 1 will trigger an error.
# dimensions of labels (if present) are preserved.

labels2colors = function(labels, zeroIsGrey = TRUE, colorSeq = NULL, naColor = "grey",
commonColorCode = TRUE)
{
if (is.null(colorSeq)) colorSeq = standardColors();

if (is.numeric(labels))
{
if (zeroIsGrey) minLabel = 0 else minLabel = 1
if (any(labels<0, na.rm = TRUE)) minLabel = min(c(labels), na.rm = TRUE)
nLabels = labels;
} else {

if (commonColorCode)
{
factors = factor(c(as.matrix(as.data.frame(labels))))
nLabels = as.numeric(factors)
dim(nLabels)= dim(labels);
} else {
labels = as.matrix(as.data.frame(labels));
factors = list();
for (c in 1:ncol(labels))
factors[[c]] = factor(labels[, c]);
nLabels = sapply(factors, as.numeric)
}
}

if (max(nLabels, na.rm = TRUE) > length(colorSeq))
{
nRepeats = as.integer((max(labels)-1)/length(colorSeq)) + 1;
warning(paste("labels2colors: Number of labels exceeds number of avilable colors.",
"Some colors will be repeated", nRepeats, "times."))
extColorSeq = colorSeq;
for (rep in 1:nRepeats)
extColorSeq = c(extColorSeq, paste(colorSeq, ".", rep, sep=""));
} else {
nRepeats = 1;
extColorSeq = colorSeq;
}
colors = rep("grey", length(nLabels));
fin = !is.na(nLabels);
colors[!fin] = naColor;
finLabels = nLabels[fin];
colors[fin][finLabels!=0] = extColorSeq[finLabels[finLabels!=0]];
if (!is.null(dim(labels)))
dim(colors) = dim(labels);

colors;
}

#========================================================================================
#
# MergeCloseModules
#
#========================================================================================

#---------------------------------------------------------------------------------
#
# moduleNumber
#
#---------------------------------------------------------------------------------
# Similar to modulecolor2 above, but returns numbers instead of colors, which is oftentimes more useful.
# 0 means unassigned.
# Return value is a simple vector, not a factor.
# Caution: the module numbers are neither sorted nor sequential, the only guarranteed fact is that grey
# probes are labeled by 0 and all probes belonging to the same module have the same number.

moduleNumber = function(dendro, cutHeight = 0.9, minSize = 50)
{
Branches = cutree(dendro, h = cutHeight);
NOnBranches = table(Branches);
TrueBranch = NOnBranches >= minSize;
Branches[!TrueBranch[Branches]] = 0;

Branches;
}

#--------------------------------------------------------------------------------------
#
# fixDataStructure
#
#--------------------------------------------------------------------------------------
# Check input data: if they are not a vector of lists, put them into the form of a vector of lists.

fixDataStructure = function(data, verbose = 0, indent = 0)
{
spaces = indentSpaces(indent);
if (!inherits(data, "list"))
{
if (verbose>0)
printFlush(paste(spaces,
"fixDataStructure: data is not a vector of lists: converting it into one."));
x = data;
data = vector(mode = "list", length = 1);
data[[1]] = list(data = x);
rm(x);
}
data;
}

#-------------------------------------------------------------------------------------------
#
# checkSets
#
#-------------------------------------------------------------------------------------------
# Checks sets for consistency and returns some diagnostics.

.permissiveDim = function(x)
{
d = dim(x);
if (is.null(d)) return( c(length(x), 1))
return(d)
}

checkSets = function(data, checkStructure = FALSE, useSets = NULL)
{
nSets = length(data);
if (is.null(useSets)) useSets = c(1:nSets);
if (nSets<=0) stop("No data given.");
structureOK = TRUE;
if (!inherits(data, "list"))
{
if (checkStructure)
{
structureOK = FALSE;
nGenes = 0; nSamples = 0;
} else {
stop("data does not appear to have the correct format. Consider using fixDataStructure",
"or setting checkStructure = TRUE when calling this function.");
}
} else {
nSamples = vector(length = nSets);
nGenes = .permissiveDim(data[[useSets[1]]]\$data)[2];
for (set in useSets)
{
if (nGenes!=.permissiveDim(data[[set]]\$data)[2])
{
if (checkStructure)
{
structureOK = FALSE;
} else {
stop(paste("Incompatible number of genes in set 1 and", set));
}
}
nSamples[set] = .permissiveDim(data[[set]]\$data)[1];
}
}

list(nSets = nSets, nGenes = nGenes, nSamples = nSamples, structureOK = structureOK);
}

#--------------------------------------------------------------------------------------
#
# multiSetMEs
#
#--------------------------------------------------------------------------------------

multiSetMEs = function(exprData, colors, universalColors = NULL, useSets = NULL,
useGenes = NULL, impute = TRUE,
nPC = 1, align = "along average", excludeGrey = FALSE,
grey = if (is.null(universalColors)) {if(is.numeric(colors)) 0 else "grey"} else
if (is.numeric(universalColors)) 0 else "grey",
subHubs = TRUE,
trapErrors = FALSE,
returnValidOnly = trapErrors,
softPower = 6,
verbose = 1, indent = 0)
{
spaces = indentSpaces(indent);
nSets = length(exprData);
setsize = checkSets(exprData, useSets = useSets);
nGenes = setsize\$nGenes;
nSamples = setsize\$nSamples;
if (verbose>0) printFlush(paste(spaces,"multiSetMEs: Calculating module MEs."));
MEs = vector(mode="list", length=nSets);
consValidMEs = NULL;
if (!is.null(universalColors))
consValidColors = universalColors;
if (is.null(useSets)) useSets = c(1:nSets);
if (is.null(useGenes))
{
for (set in useSets) {
if (verbose>0) printFlush(paste(spaces,"  Working on set", as.character(set), "..."));
if (is.null(universalColors)) {
setColors = colors[,set];
} else {
setColors = universalColors;
}
setMEs = moduleEigengenes(expr = exprData[[set]]\$data,
colors = setColors, impute = impute, nPC = nPC, align = align,
excludeGrey = excludeGrey, grey = grey,
trapErrors = trapErrors, subHubs = subHubs,
returnValidOnly = FALSE, softPower = softPower,
verbose = verbose-1, indent = indent+1);
if (!is.null(universalColors) && (!setMEs\$allOK))
{
if (is.null(consValidMEs)) {
consValidMEs = setMEs\$validMEs;
} else {
consValidMEs = consValidMEs * setMEs\$validMEs;
}
consValidColors[setMEs\$validColors!=universalColors] =
setMEs\$validColors[setMEs\$validColors!=universalColors]
}
MEs[[set]] = setMEs;
names(MEs[[set]])[names(setMEs)=='eigengenes'] = 'data';
# Here's what moduleEigengenes returns:
#
#  list(eigengenes = PrinComps, averageExpr = averExpr, varExplained = varExpl, nPC = nPC,
#       validMEs = validMEs, validColors = validColors, allOK = allOK, allPC = allPC, isPC = isPC,
#       isHub = isHub, validAEs = validAEs, allAEOK = allAEOK)
}
} else {
for (set in useSets) {
if (verbose>0) printFlush(paste(spaces,"  Working on set", as.character(set), "..."));
if (is.null(universalColors)) {
setColors = colors[useGenes ,set];
} else {
setColors = universalColors[useGenes];
}
setMEs = moduleEigengenes(expr = exprData[[set]]\$data[, useGenes],
colors = setColors, impute = impute, nPC = nPC, align = align,
excludeGrey = excludeGrey, grey = grey,
trapErrors = trapErrors, subHubs = subHubs,
returnValidOnly = FALSE, softPower = softPower,
verbose = verbose-1, indent = indent+1);
if (!is.null(universalColors) && (!setMEs\$allOK))
{
if (is.null(consValidMEs)) {
consValidMEs = setMEs\$validMEs;
} else {
consValidMEs = consValidMEs * setMEs\$validMEs;
}
consValidColors[setMEs\$validColors!=universalColors[useGenes]] =
setMEs\$validColors[setMEs\$validColors!=universalColors[useGenes]]
}
MEs[[set]] = setMEs;
names(MEs[[set]])[names(setMEs)=='eigengenes'] = 'data';
}
}
if (!is.null(universalColors))
{
for (set in 1:nSets)
{
if (!is.null(consValidMEs)) MEs[[set]]\$validMEs = consValidMEs;
MEs[[set]]\$validColors = consValidColors;
}
}
for (set in 1:nSets)
{
MEs[[set]]\$allOK = (sum(!MEs[[set]]\$validMEs)==0);
if (returnValidOnly)
{
valid = (MEs[[set]]\$validMEs > 0);
MEs[[set]]\$data = MEs[[set]]\$data[, valid, drop = FALSE];
MEs[[set]]\$averageExpr = MEs[[set]]\$averageExpr[, valid, drop = FALSE];
MEs[[set]]\$varExplained = MEs[[set]]\$varExplained[, valid, drop = FALSE];
MEs[[set]]\$isPC =  MEs[[set]]\$isPC[valid];
MEs[[set]]\$allPC = (sum(!MEs[[set]]\$isPC)==0)
MEs[[set]]\$isHub =  MEs[[set]]\$isHub[valid];
MEs[[set]]\$validAEs =  MEs[[set]]\$validAEs[valid];
MEs[[set]]\$allAEOK = (sum(!MEs[[set]]\$validAEs)==0)
MEs[[set]]\$validMEs = rep(TRUE, times = ncol(MEs[[set]]\$data));
}
}

names(MEs) = names(exprData);

MEs;
}

#---------------------------------------------------------------------------------------------
#
# MergeCloseModules
#
#---------------------------------------------------------------------------------------------

mergeCloseModules = function(
# input data
exprData, colors,

# Optional starting eigengenes
MEs = NULL,

# Optional restriction to a subset of all sets
useSets = NULL,

# If missing data are present, impute them?
impute = TRUE,

# Input handling options
checkDataFormat = TRUE,
unassdColor = if (is.numeric(colors)) 0 else "grey",

# Options for eigengene network construction
corFnc = cor, corOptions = list(use = 'p'),
useAbs = FALSE,

# Options for constructing the consensus
equalizeQuantiles = FALSE,
quantileSummary = "mean",
consensusQuantile = 0,

# Merging options
cutHeight = 0.2,
iterate = TRUE,

# Output options
relabel = FALSE,
colorSeq = NULL,
getNewMEs = TRUE,
getNewUnassdME = TRUE,

# Options controlling behaviour of the function
trapErrors = FALSE,
verbose = 1, indent = 0)
{

MEsInSingleFrame = FALSE;
spaces = indentSpaces(indent);

#numCols = is.numeric(colors);
#facCols = is.factor(colors);
#charCols = is.character(colors);

origColors = colors;

colors = colors[, drop = TRUE];

greyName = paste(moduleColor.getMEprefix(), unassdColor, sep="");

if (verbose>0) printFlush(paste(spaces,
"mergeCloseModules: Merging modules whose distance is less than", cutHeight));

if (verbose>3) printFlush(paste(spaces,
"  .. will look for grey label", greyName));

if (!checkSets(exprData, checkStructure = TRUE, useSets = useSets)\$structureOK)
{
if (checkDataFormat)
{
exprData = fixDataStructure(exprData);
MEsInSingleFrame = TRUE;
} else {
stop("Given exprData appear to be misformatted.");
}
}

setsize = checkSets(exprData, useSets = useSets);
nSets = setsize\$nSets;

if (!is.null(MEs))
{
checkMEs = checkSets(MEs, checkStructure = TRUE, useSets = useSets);
if (checkMEs\$structureOK)
{
if (nSets!=checkMEs\$nSets)
stop("Input error: numbers of sets in exprData and MEs differ.")
for (set in 1:nSets)
{
if (checkMEs\$nSamples[set]!=setsize\$nSamples[set])
stop(paste("Number of samples in MEs is incompatible with subset length for set", set));
}
} else {
if (MEsInSingleFrame)
{
MEs = fixDataStructure(MEs);
checkMEs = checkSets(MEs);
} else {
stop("MEs do not have the appropriate structure (same as exprData). ");
}
}
}

if (setsize\$nGenes!=length(colors))
stop("Number of genes in exprData is different from the length of original colors. They must equal.");

if ((cutHeight <0) | (cutHeight>(1+as.integer(useAbs))))
stop(paste("Given cutHeight is out of sensible range between 0 and", 1+as.integer(useAbs) ));

done = FALSE; iteration = 1;

MergedColors = colors;
ok = try(
{
while (!done)
{
if (is.null(MEs))
{
MEs = multiSetMEs(exprData, colors = NULL, universalColors = colors,
useSets = useSets, impute = impute,
subHubs = TRUE, trapErrors = FALSE, excludeGrey = TRUE,
grey = unassdColor,
verbose = verbose-1, indent = indent+1);
MEs = consensusOrderMEs(MEs, useAbs = useAbs, useSets = useSets, greyLast = FALSE);
} else if (nlevels(as.factor(colors))!=checkMEs\$nGenes)
{
if ((iteration==1) & (verbose>0)) printFlush(paste(spaces, "  Number of given module colors",
"does not match number of given MEs => recalculating the MEs."))
MEs = multiSetMEs(exprData, colors = NULL, universalColors = colors,
useSets = useSets, impute = impute,
subHubs = TRUE, trapErrors = FALSE, excludeGrey = TRUE,
grey = unassdColor,
verbose = verbose-1, indent = indent+1);
MEs = consensusOrderMEs(MEs, useAbs = useAbs, useSets = useSets, greyLast = FALSE);
}
if (iteration==1) oldMEs = MEs;

# Check colors for number of distinct colors that are not grey

colLevs = as.character(levels(as.factor(colors)));
if  ( length(colLevs[colLevs!=as.character(unassdColor)])<2 )
{
printFlush(paste(spaces,
"mergeCloseModules: less than two proper modules."));
printFlush(paste(spaces, " ..color levels are",
paste(colLevs, collapse = ", ")));
printFlush(paste(spaces, " ..there is nothing to merge."));
MergedNewColors = colors;
MergedColors = colors;
nOldMods = 1; nNewMods = 1;
oldTree = NULL; Tree = NULL;
break;
}

# Cluster the found module eigengenes and merge ones that are too close according to the specified
# quantile.

nOldMods = nlevels(as.factor(colors));

ConsDiss = .consensusMEDissimilarity(MEs, equalizeQuantiles = equalizeQuantiles,
quantileSummary = quantileSummary,
consensusQuantile = consensusQuantile, useAbs = useAbs,
corFnc = corFnc, corOptions = corOptions,
useSets = useSets, greyName = greyName);

Tree = fastcluster::hclust(as.dist(ConsDiss), method = "average");
if (iteration==1) oldTree = Tree;
TreeBranches = as.factor(moduleNumber(dendro = Tree, cutHeight = cutHeight, minSize = 1));
UniqueBranches = levels(TreeBranches);
nBranches = nlevels(TreeBranches)
NumberOnBranch = table(TreeBranches);
MergedColors = colors;

# Merge modules on the same branch

for (branch in 1:nBranches) if (NumberOnBranch[branch]>1)
{
ModulesOnThisBranch = names(TreeBranches)[TreeBranches==UniqueBranches[branch]];
ColorsOnThisBranch = substring(ModulesOnThisBranch, 3);
if (is.numeric(origColors)) ColorsOnThisBranch = as.numeric(ColorsOnThisBranch);
if (verbose>3)
printFlush(paste(spaces, "  Merging original colors",
paste(ColorsOnThisBranch, collapse=", ")));
for (color in 2:length(ColorsOnThisBranch))
MergedColors[MergedColors==ColorsOnThisBranch[color]] = ColorsOnThisBranch[1];
}

MergedColors = MergedColors[, drop = TRUE];

nNewMods = nlevels(as.factor(MergedColors));

if (nNewMods<nOldMods & iterate)
{
colors = MergedColors;
MEs = NULL;
} else {
done = TRUE;
}
iteration = iteration+1;
}
if (relabel)
{
RawModuleColors = levels(as.factor(MergedColors));
# relabel the merged colors to the usual order based on the number of genes in each module
if (is.null(colorSeq))
{
if (is.numeric(origColors)) {
colorSeq = c(1:length(table(origColors)));
} else {
nNewColors = length(RawModuleColors);
colorSeq = labels2colors(c(1:nNewColors))
}
}

# nGenesInModule = rep(0, nNewMods);
# for (mod in 1:nNewMods) nGenesInModule[mod] = sum(MergedColors==RawModuleColors[mod]);
nGenesInModule = table(MergedColors);

SortedRawModuleColors = RawModuleColors[order(-nGenesInModule)]

# Change the color names to the standard sequence, but leave grey grey
# (that's why rank in general does not equal color)
MergedNewColors = MergedColors;
if (is.factor(MergedNewColors)) MergedNewColors = as.character(MergedNewColors);
if (verbose>3) printFlush(paste(spaces, "   Changing original colors:"));
rank = 0;
for (color in 1:length(SortedRawModuleColors)) if (SortedRawModuleColors[color]!=unassdColor)
{
rank = rank + 1;
if (verbose>3) printFlush(paste(spaces, "      ", SortedRawModuleColors[color],
"to ", colorSeq[rank]));
MergedNewColors[MergedColors==SortedRawModuleColors[color]] = colorSeq[rank];
}
if (is.factor(MergedColors)) MergedNewColors = as.factor(MergedNewColors);
} else {
MergedNewColors = MergedColors;
}
MergedNewColors = MergedNewColors[, drop = TRUE];

if (getNewMEs)
{
if (nNewMods<nOldMods | relabel | getNewUnassdME)
{
if (verbose>0) printFlush(paste(spaces, "  Calculating new MEs..."));
NewMEs = multiSetMEs(exprData, colors = NULL, universalColors = MergedNewColors,
useSets = useSets, impute = impute, subHubs = TRUE, trapErrors = FALSE,
excludeGrey = !getNewUnassdME, grey = unassdColor,
verbose = verbose-1, indent = indent+1);
newMEs = consensusOrderMEs(NewMEs, useAbs = useAbs, useSets = useSets, greyLast = TRUE,
greyName = greyName);

ConsDiss = .consensusMEDissimilarity(newMEs,
equalizeQuantiles = equalizeQuantiles,
quantileSummary = quantileSummary,
consensusQuantile = consensusQuantile, useAbs = useAbs,
corFnc = corFnc, corOptions = corOptions,
useSets = useSets, greyName = greyName);
if (length(ConsDiss) > 1)
{
Tree = fastcluster::hclust(as.dist(ConsDiss), method = "average");
} else Tree = NULL;
} else {
newMEs = MEs;
}
} else {
newMEs = NULL;
}
if (MEsInSingleFrame)
{
newMEs = newMEs[[1]]\$data;
oldMEs = oldMEs[[1]]\$data;
}
}, silent = TRUE);

if (inherits(ok, 'try-error'))
{
if (!trapErrors) stop(ok);
if (verbose>0)
{
printFlush(paste(spaces, "Warning: merging of modules failed with the following error:"));
printFlush(paste('   ', spaces, ok));
printFlush(paste(spaces, " --> returning unmerged modules and *no* eigengenes."));
}
warning(paste("mergeCloseModules: merging of modules failed with the following error:\n",
"    ", ok, " --> returning unmerged modules and *no* eigengenes.\n"));
list(colors = origColors, allOK = FALSE);
} else {
list(colors = MergedNewColors, dendro = Tree, oldDendro = oldTree, cutHeight = cutHeight,
oldMEs = oldMEs, newMEs = newMEs, allOK = TRUE);
}
}

#---------------------------------------------------------------------------------------------
#
# hierarchicalMergeCloseModules
#
#---------------------------------------------------------------------------------------------

hierarchicalMergeCloseModules = function(
# input data
multiExpr,
multiExpr.imputed = NULL,
labels,

# Optional starting eigengenes
MEs = NULL,

unassdColor = if (is.numeric(labels)) 0 else "grey",
# If missing data are present, impute them?
impute = TRUE,

# Options for eigengene network construction
networkOptions,

# Options for constructing the consensus
consensusTree,
calibrateMESimilarities = FALSE,

# Merging options
cutHeight = 0.2,
iterate = TRUE,

# Output options
relabel = FALSE,
colorSeq = NULL,
getNewMEs = TRUE,
getNewUnassdME = TRUE,

# Options controlling behaviour of the function
trapErrors = FALSE,
verbose = 1, indent = 0)
{

MEsInSingleFrame = FALSE;
spaces = indentSpaces(indent);

#numCols = is.numeric(labels);
#facCols = is.factor(labels);
#charCols = is.character(labels);

origColors = labels;

useSets = consensusTreeInputs(consensusTree);

labels = labels[, drop = TRUE];

if (all(replaceMissing(labels==unassdColor, TRUE)))
return( list(labels = labels, allOK = FALSE));

greyName = paste(moduleColor.getMEprefix(), unassdColor, sep="");

if (verbose>0) printFlush(paste(spaces,
"mergeCloseModules: Merging modules whose distance is less than", cutHeight));

if (verbose>3) printFlush(paste(spaces,
"  .. will use unassigned ME label", greyName));

setsize = checkSets(multiExpr[useSets]);
nUseSets = setsize\$nSets;

if (is.null(multiExpr.imputed))
{
if (impute) {
multiExpr.imputed = mtd.apply(multiExpr[useSets], imputeByModule,
labels = labels,
excludeUnassigned = FALSE, unassignedLabel = unassdColor,
scale = TRUE)
} else
multiExpr.imputed = multiExpr[useSets];
} else
stopifnot(isTRUE(all.equal(checkSets(multiExpr.imputed), setsize)));

if (!is.null(MEs))
{
checkMEs = checkSets(MEs[useSets], checkStructure = TRUE);
if (checkMEs\$structureOK)
{
if (nUseSets!=checkMEs\$nSets)
stop("Input error: numbers of sets in multiExpr and MEs differ.")
for (set in 1:nUseSets)
{
if (checkMEs\$nSamples[set]!=setsize\$nSamples[set])
stop(paste("Number of samples in MEs is incompatible with subset length for set", set));
}
} else {
if (MEsInSingleFrame)
{
MEs = fixDataStructure(MEs);
checkMEs = checkSets(MEs);
} else {
stop("MEs do not have the appropriate structure (same as multiExpr). ");
}
}
}

if (inherits(networkOptions, "NetworkOptions"))
networkOptions = list2multiData(.listRep(networkOptions, nUseSets));

if (setsize\$nGenes!=length(labels))
stop("Number of genes in multiExpr is different from the length of original labels. They must equal.");

done = FALSE; iteration = 1;

MergedColors = labels;
#ok = try(
#{
while (!done)
{
if (is.null(MEs))
{
MEs = multiSetMEs(multiExpr.imputed, colors = NULL, universalColors = labels,
impute = impute,
subHubs = TRUE, trapErrors = FALSE, excludeGrey = TRUE,
grey = unassdColor,
verbose = verbose-1, indent = indent+1);
#MEs = consensusOrderMEs(MEs, useAbs = useAbs, greyLast = FALSE);
#collectGarbage();
} else if (nlevels(as.factor(labels))!=checkMEs\$nGenes)
{
if ((iteration==1) & (verbose>0)) printFlush(paste(spaces, "  Number of given module labels",
"does not match number of given MEs => recalculating the MEs."))
MEs = multiSetMEs(multiExpr.imputed, colors = NULL, universalColors = labels,
impute = impute,
subHubs = TRUE, trapErrors = FALSE, excludeGrey = TRUE,
grey = unassdColor,
verbose = verbose-1, indent = indent+1);
#MEs = consensusOrderMEs(MEs, useAbs = useAbs, greyLast = FALSE);
#collectGarbage();
}
if (iteration==1) oldMEs = MEs;

# Check labels for number of distinct labels that are not grey

colLevs = as.character(levels(as.factor(labels)));
if  ( length(colLevs[colLevs!=as.character(unassdColor)])<2 )
{
printFlush(paste(spaces,
"mergeCloseModules: less than two proper modules."));
printFlush(paste(spaces, " ..color levels are",
paste(colLevs, collapse = ", ")));
printFlush(paste(spaces, " ..there is nothing to merge."));
MergedNewColors = labels;
MergedColors = labels;
nOldMods = 1; nNewMods = 1;
oldTree = NULL; Tree = NULL;
break;
}

# Cluster the found module eigengenes and merge ones that are too close according to the specified
# quantile.

nOldMods = nlevels(as.factor(labels));

ConsDiss = .hierarchicalConsensusMEDissimilarity(MEs, networkOptions = networkOptions,
consensusTree = consensusTree,
greyName = greyName,
calibrate = calibrateMESimilarities);

Tree = fastcluster::hclust(as.dist(ConsDiss), method = "average");
if (iteration==1) oldTree = Tree;
TreeBranches = as.factor(moduleNumber(dendro = Tree, cutHeight = cutHeight, minSize = 1));
UniqueBranches = levels(TreeBranches);
nBranches = nlevels(TreeBranches)
NumberOnBranch = table(TreeBranches);
MergedColors = labels;

# Merge modules on the same branch

for (branch in 1:nBranches) if (NumberOnBranch[branch]>1)
{
ModulesOnThisBranch = names(TreeBranches)[TreeBranches==UniqueBranches[branch]];
ColorsOnThisBranch = substring(ModulesOnThisBranch, 3);
if (is.numeric(origColors)) ColorsOnThisBranch = as.numeric(ColorsOnThisBranch);
if (verbose>3)
printFlush(paste(spaces, "  Merging original labels",
paste(ColorsOnThisBranch, collapse=", ")));
for (color in 2:length(ColorsOnThisBranch))
MergedColors[MergedColors==ColorsOnThisBranch[color]] = ColorsOnThisBranch[1];
}

MergedColors = MergedColors[, drop = TRUE];

nNewMods = nlevels(as.factor(MergedColors));

if (nNewMods<nOldMods & iterate)
{
labels = MergedColors;
MEs = NULL;
} else {
done = TRUE;
}
iteration = iteration+1;
}
if (relabel)
{
RawModuleColors = levels(as.factor(MergedColors));
# relabel the merged labels to the usual order based on the number of genes in each module
if (is.null(colorSeq))
{
if (is.numeric(origColors)) {
colorSeq = c(1:length(table(origColors)));
} else {
nNewColors = length(RawModuleColors);
colorSeq = labels2colors(c(1:nNewColors))
}
}

# nGenesInModule = rep(0, nNewMods);
# for (mod in 1:nNewMods) nGenesInModule[mod] = sum(MergedColors==RawModuleColors[mod]);
nGenesInModule = table(MergedColors);

SortedRawModuleColors = RawModuleColors[order(-nGenesInModule)]

# Change the color names to the standard sequence, but leave grey grey
# (that's why rank in general does not equal color)
MergedNewColors = MergedColors;
if (is.factor(MergedNewColors)) MergedNewColors = as.character(MergedNewColors);
if (verbose>3) printFlush(paste(spaces, "   Changing original labels:"));
rank = 0;
for (color in 1:length(SortedRawModuleColors)) if (SortedRawModuleColors[color]!=unassdColor)
{
rank = rank + 1;
if (verbose>3) printFlush(paste(spaces, "      ", SortedRawModuleColors[color],
"to ", colorSeq[rank]));
MergedNewColors[MergedColors==SortedRawModuleColors[color]] = colorSeq[rank];
}
if (is.factor(MergedColors)) MergedNewColors = as.factor(MergedNewColors);
} else {
MergedNewColors = MergedColors;
}
MergedNewColors = MergedNewColors[, drop = TRUE];

if (getNewMEs)
{
if (nNewMods<nOldMods | relabel | getNewUnassdME)
{
if (verbose>0) printFlush(paste(spaces, "  Calculating new MEs..."));
NewMEs = multiSetMEs(multiExpr.imputed, colors = NULL, universalColors = MergedNewColors,
impute = impute, subHubs = TRUE, trapErrors = FALSE,
excludeGrey = !getNewUnassdME, grey = unassdColor,
verbose = verbose-1, indent = indent+1);
newMEs = orderMEsByHierarchicalConsensus(NewMEs, networkOptions,
consensusTree,
greyName = greyName, calibrate = calibrateMESimilarities);

ConsDiss = .hierarchicalConsensusMEDissimilarity(newMEs, networkOptions = networkOptions,
consensusTree = consensusTree,
greyName = greyName,
calibrate = calibrateMESimilarities);
if (length(ConsDiss) > 1)
{
Tree = fastcluster::hclust(as.dist(ConsDiss), method = "average");
} else Tree = NULL;
} else {
newMEs = MEs;
}
} else {
newMEs = NULL;
}
#}, silent = TRUE);

#if (class(ok)=='try-error')
#{
#  if (!trapErrors) stop(ok);
#  if (verbose>0)
#  {
#    printFlush(paste(spaces, "Warning: merging of modules failed with the following error:"));
#    printFlush(paste('   ', spaces, ok));
#    printFlush(paste(spaces, " --> returning unmerged modules and *no* eigengenes."));
#  }
#  warning(paste("mergeCloseModules: merging of modules failed with the following error:\n",
#                "    ", ok, " --> returning unmerged modules and *no* eigengenes.\n"));
#  list(labels = origColors, allOK = FALSE);
#} else {
list(labels = MergedNewColors, dendro = Tree, oldDendro = oldTree, cutHeight = cutHeight,
oldMEs = oldMEs, newMEs = newMEs, allOK = TRUE);
#}
}

# ===================================================
#For hard thresholding, we use the signum (step) function

{
}

# ===================================================
# For soft thresholding, one can use the sigmoid function
# But we have focused on the power adjacency function in the tutorial...
{
1/(1+exp(-alpha*(ss-mu)))
}

#This function is useful for speeding up the connectivity calculation.
#The idea is to partition the adjacency matrix into consecutive baches of a #given size.
#In principle, the larger the block size the faster is the calculation. But #smaller blockSizes require #less memory...
# Input: gene expression data set where *rows* correspond to microarray samples #and columns correspond to genes.
# If fewer than minNSamples contain gene expression information for a given
# gene, then its connectivity is set to missing.

softConnectivity=function(datExpr,
corFnc = "cor", corOptions = "use = 'p'",
weights = NULL,
type = "unsigned",
power = if (type == "signed") 15 else 6,
blockSize = 1500, minNSamples = NULL,
verbose = 2, indent = 0)
{
spaces = indentSpaces(indent);
nGenes=dim(datExpr)[[2]]

if (blockSize * nGenes>.largestBlockSize) blockSize = as.integer(.largestBlockSize/nGenes);
nSamples=dim(datExpr)[[1]]
if (is.null(minNSamples))
{
minNSamples = max(..minNSamples, nSamples/3);
}

if (nGenes<..minNGenes | nSamples<minNSamples )
stop(paste("Error: Something seems to be wrong. \n",
"   Make sure that the input data frame has genes as rows and array samples as columns.\n",
"   Alternatively, there seem to be fewer than", ..minNGenes, "genes or fewer than",
minNSamples, "samples. ") )
if (nGenes<nSamples )
printFlush("Warning: There are fewer genes than samples in the function softConnectivity. Maybe you should transpose the data?")

k=rep(NA,nGenes)
start = 1;
if (verbose>0) {
printFlush(paste(spaces, "softConnectivity: FYI: connecitivty of genes with less than",
ceiling(minNSamples), "valid samples will be returned as NA."));
cat(paste(spaces, "..calculating connectivities.."));
pind = initProgInd();
}
while (start < nGenes)
{
end = min(start + blockSize-1, nGenes);
index1=start:end;
ad1 = adjacency(datExpr, weights = weights, selectCols = index1, power = power, type = type,
corFnc = corFnc, corOptions = corOptions);
# If fewer than minNSamples contain gene expression information for a given
# gene, then we set its connectivity to 0.
NoSamplesAvailable=colSums(!is.na(datExpr[,index1]))
k[index1][NoSamplesAvailable< minNSamples]=NA
if (verbose>0) pind = updateProgInd(end/nGenes, pind);
start = end + 1;
}
if (verbose > 0) printFlush("");
k
} # end of function

# ==============================================================================
# The function PickHardThreshold can help one to estimate the cut-off value
# when using the signum (step) function.
# The first column lists the threshold ("cut"),
# the second column lists the corresponding p-value based on the Fisher Transform
# of the correlation.
# The third column reports the resulting scale free topology fitting index R^2.
# The fourth column reports the slope of the fitting line, it shoud be negative for
# biologically meaningul networks.
# The fifth column reports the fitting index for the truncated exponential model.
# Usually we ignore it.
# The remaining columns list the mean, median and maximum resulting connectivity.
# To pick a hard threshold (cut) with the scale free topology criterion:
# aim for high scale free R^2 (column 3), high connectivity (col 6) and negative slope
# (around -1, col 4).
# The output is a list with 2 components. The first component lists a sugggested cut-off
# while the second component contains the whole table.
# The removeFirst option removes the first point (k=0, P(k=0)) from the regression fit.
# nBreaks specifies how many intervals used to estimate the frequency p(k) i.e. the no. of points in the
# scale free topology plot.

pickHardThreshold=function (data, dataIsExpr = TRUE, RsquaredCut = 0.85, cutVector = seq(0.1, 0.9,
by = 0.05), moreNetworkConcepts=FALSE , removeFirst = FALSE, nBreaks = 10, corFnc = "cor",
corOptions = "use = 'p'")
{
nGenes = dim(data)[[2]]
colname1 = c("Cut", "p-value", "SFT.R.sq", "slope=",
"truncated R^2", "mean(k)", "median(k)", "max(k)")
if(moreNetworkConcepts) {
colname1=c(colname1,"Density", "Centralization", "Heterogeneity")
}
if (!dataIsExpr)
{
if (any(diag(data)!=1)) diag(data) = 1;
} else
nSamples = dim(data)[[1]]

datout = data.frame(matrix(NA, nrow = length(cutVector),
ncol = length(colname1)))
names(datout) = colname1
datout[, 1] = cutVector
if (dataIsExpr)
{
for (i in 1:length(cutVector))
{
cut1 = cutVector[i]
datout[i, 2] = 2 * (1 - pt(sqrt(nSamples - 1) * cut1/sqrt(1 -
cut1^2), nSamples - 1))
}
} else
datout[, 2] = NA;

fun1 = function(x, dataIsExpr) {
if (dataIsExpr)
{
corExpr = parse(text = paste(corFnc, "(x, data",
prepComma(corOptions), ")"))
corx = abs(eval(corExpr))
} else
corx = x;
out1 = rep(NA, length(cutVector))
for (j in c(1:length(cutVector))) {
out1[j] = sum(corx > cutVector[j], na.rm = TRUE)
}
out1
}
datk = t(apply(data, 2, fun1, dataIsExpr))
for (i in c(1:length(cutVector))) {
khelp= datk[, i] - 1
SFT1=scaleFreeFitIndex(k=khelp,nBreaks=nBreaks,removeFirst=removeFirst)
datout[i, 3] = SFT1\$Rsquared.SFT
datout[i, 4] = SFT1\$slope.SFT
datout[i, 6] = mean(khelp,na.rm = TRUE)
datout[i, 7] = median(khelp,na.rm = TRUE)
datout[i, 8] = max(khelp,na.rm = TRUE)
if(moreNetworkConcepts) {
Density = sum(khelp)/(nGenes * (nGenes - 1))
datout[i, 9] =Density
Centralization = nGenes*(max(khelp)-mean(khelp))/((nGenes-1)*(nGenes-2))
datout[i, 10] = Centralization
Heterogeneity = sqrt(nGenes * sum(khelp^2)/sum(khelp)^2 - 1)
datout[i, 11] = Heterogeneity
}
}
datout = as.data.frame(lapply(datout, as.numeric));
print(signif(data.frame(datout),3))
ind1 = datout[, 3] > RsquaredCut
indcut = NA
indcut = if (sum(ind1) > 0) min(c(1:length(ind1))[ind1]) else indcut;
cutEstimate = cutVector[indcut][[1]]
list(cutEstimate = cutEstimate, fitIndices = data.frame(datout))
} # end of function pickHardThreshold

#==============================================================================================
#
# pickSoftThreshold
#
#===============================================================================================
# The function pickSoftThreshold allows one to estimate the power parameter when using
# a soft thresholding approach with the use of the power function AF(s)=s^Power
# The removeFirst option removes the first point (k=1, P(k=1)) from the regression fit.
# PL: a rewrite that splits the data into a few blocks.
# SH: more netowkr concepts added.
# PL: re-written for parallel processing
# Alexey Sergushichev: speed up by pre-calculating correlation powers

pickSoftThreshold = function (
data,
dataIsExpr = TRUE,
weights = NULL,
RsquaredCut = 0.85,
powerVector = c(seq(1, 10, by = 1), seq(12, 20, by = 2)),
removeFirst = FALSE, nBreaks = 10, blockSize = NULL,
corFnc = cor, corOptions = list(use = 'p'),
networkType = "unsigned",
moreNetworkConcepts = FALSE,
gcInterval = NULL,
verbose = 0, indent = 0)
{
powerVector = sort(powerVector)
intType = charmatch(networkType, .networkTypes)
if (is.na(intType))
stop(paste("Unrecognized 'networkType'. Recognized values are",
paste(.networkTypes, collapse = ", ")))
nGenes = ncol(data);
if (nGenes<3)
{
stop("The input data data contain fewer than 3 rows (nodes).",
"\nThis would result in a trivial correlation network." )
}
if (!dataIsExpr)
{
checkSimilarity(data);
if (any(diag(data)!=1)) diag(data) = 1;
}

if (is.null(blockSize))
{
blockSize = blockSize(nGenes, rectangularBlocks = TRUE, maxMemoryAllocation = 2^30);
if (verbose > 0)
printFlush(spaste("pickSoftThreshold: will use block size ", blockSize, "."))
}
if (length(gcInterval)==0) gcInterval = 4*blockSize;

colname1 = c("Power", "SFT.R.sq", "slope", "truncated R.sq",
"mean(k)", "median(k)", "max(k)")
if(moreNetworkConcepts)
{
colname1=c(colname1,"Density", "Centralization", "Heterogeneity")
}
datout = data.frame(matrix(666, nrow = length(powerVector), ncol = length(colname1)))
names(datout) = colname1
datout[, 1] = powerVector
spaces = indentSpaces(indent)
if (verbose > 0) {
cat(paste(spaces, "pickSoftThreshold: calculating connectivity for given powers..."))
if (verbose == 1) pind = initProgInd()
else cat("\n")
}

# if we're using one of WGNCA's own correlation functions, set the number of threads to 1.
corFnc = match.fun(corFnc);
corFormals = formals(corFnc);

# Resulting connectivities
datk = matrix(0, nrow = nGenes, ncol = length(powerVector))

# Number of threads. In this case I need this explicitly.

nPowers = length(powerVector);

# Main loop
startG = 1
lastGC = 0;
corOptions\$x = data;
if (!is.null(weights))
{
if (!dataIsExpr)
stop("Weights can only be used when 'data' represents expression data ('dataIsExpr' must be TRUE).");
if (!isTRUE(all.equal(dim(data), dim(weights))))
stop("When 'weights' are given, dimensions of 'data' and 'weights' must be the same.");
corOptions\$weights.x = weights;
}
while (startG <= nGenes)
{
endG = min (startG + blockSize - 1, nGenes)

if (verbose > 1)
printFlush(paste(spaces, "  ..working on genes", startG, "through", endG, "of", nGenes))

nBlockGenes = endG - startG + 1;
# This assumes that the non-zero length allocations
# precede the zero-length ones
actualThreads = which(sapply(jobs, length) > 0);

datk[ c(startG:endG), ] = foreach(t = actualThreads, .combine = rbind) %dopar%
{
useGenes = c(startG:endG)[ jobs[[t]] ]
nGenes1 = length(useGenes);
if (dataIsExpr)
{
corOptions\$y = data[ , useGenes];
if (!is.null(weights))
corOptions\$weights.y = weights[ , useGenes];
corx = do.call(corFnc, corOptions);
if (intType == 1) {
corx = abs(corx)
} else if (intType == 2) {
corx = (1 + corx)/2
} else if (intType == 3) {
corx[corx < 0] = 0
}
if (sum(is.na(corx)) != 0)
warning(paste("Some correlations are NA in block",
startG, ":", endG, "."));
} else {
corx = data[, useGenes];
}
# Set the diagonal elements of corx to exactly 1. Possible small numeric errors can in extreme cases lead to
# negative connectivities.
ind = cbind(useGenes, 1:length(useGenes));
corx[ind] = 1;
datk.local = matrix(NA, nGenes1, nPowers);
corxPrev = matrix(1, nrow=nrow(corx), ncol=ncol(corx))
powerSteps <- powerVector - powerVector1
uniquePowerSteps <- unique(powerSteps)
corxPowers <- lapply(uniquePowerSteps, function(p) corx^p)
names(corxPowers) <- uniquePowerSteps
for (j in 1:nPowers) {
corxCur <- corxPrev * corxPowers[[as.character(powerSteps[j])]]
datk.local[, j] = colSums(corxCur, na.rm = TRUE) - 1
corxPrev <- corxCur
};
datk.local
} # End of %dopar% evaluation
# Move to the next block of genes.
startG = endG + 1
if ((gcInterval > 0) && (startG - lastGC > gcInterval)) { gc(); lastGC = startG; }
if (verbose == 1) pind = updateProgInd(endG/nGenes, pind)
}
if (verbose == 1) printFlush("");

for (i in c(1:length(powerVector)))
{
khelp= datk[, i]
if (any(khelp < 0)) browser();
SFT1=scaleFreeFitIndex(k=khelp,nBreaks=nBreaks,removeFirst=removeFirst)
datout[i, 2] = SFT1\$Rsquared.SFT
datout[i, 3] = SFT1\$slope.SFT
datout[i, 5] = mean(khelp,na.rm = TRUE)
datout[i, 6] = median(khelp,na.rm = TRUE)
datout[i, 7] = max(khelp,na.rm = TRUE)
if(moreNetworkConcepts)
{
Density = sum(khelp)/(nGenes * (nGenes - 1))
datout[i, 8] =Density
Centralization = nGenes*(max(khelp)-mean(khelp))/((nGenes-1)*(nGenes-2))
datout[i, 9] = Centralization
Heterogeneity = sqrt(nGenes * sum(khelp^2)/sum(khelp)^2 - 1)
datout[i, 10] = Heterogeneity
}
}
print(signif(data.frame(datout),3))
ind1 = datout[, 2] > RsquaredCut
indcut = NA
indcut = if (sum(ind1) > 0) min(c(1:length(ind1))[ind1]) else indcut;
powerEstimate = powerVector[indcut][[1]]
gc();
list(powerEstimate = powerEstimate, fitIndices = data.frame(datout))
}

# ===================================================
# The function ScaleFreePlot1 creates a plot for checking scale free topology
# when truncated1 = TRUE is specificed, it provides the R^2 measures for the following
# degree distributions: a) scale free topology, b) log-log R^2 and c) truncated exponential R^2

# The function ScaleFreePlot1 creates a plot for checking scale free topology

scaleFreePlot = function(connectivity, nBreaks=10, truncated = FALSE, removeFirst = FALSE, main = "", ...)
{
k = connectivity
discretized.k = cut(k, nBreaks)
dk = tapply(k, discretized.k, mean)
p.dk = as.vector(tapply(k, discretized.k, length)/length(k))
breaks1 = seq(from = min(k), to = max(k),
length = nBreaks + 1)
hist1 = suppressWarnings(hist(k, breaks = breaks1, equidist = FALSE, plot = FALSE, right = TRUE, ...))
dk2 = hist1\$mids
dk = ifelse(is.na(dk), dk2, dk)
dk = ifelse(dk == 0, dk2, dk)
p.dk = ifelse(is.na(p.dk), 0, p.dk)
log.dk = as.vector(log10(dk))
if (removeFirst) {
p.dk = p.dk[-1]
log.dk = log.dk[-1]
}
log.p.dk= as.numeric(log10(p.dk + 1e-09))
lm1 = lm(log.p.dk ~ log.dk)
if (truncated==TRUE)
{
lm2 = lm(log.p.dk ~ log.dk + I(10^log.dk))
slope=round(lm1\$coefficients[[2]],2),
printFlush("the red line corresponds to the truncated exponential fit")
title = paste(main,
", slope=", round(lm1\$coefficients[[2]],2),
} else {
title = paste(main, " scale R^2=",as.character(round(summary(lm1)\$adj.r.squared,2)),
", slope=", round(lm1\$coefficients[[2]],2))
slope=round(lm1\$coefficients[[2]],2))
}

suppressWarnings(plot(log.dk, log.p.dk, xlab="log10(k)", ylab="log10(p(k))", main = title, ... ))
lines(log.dk,predict(lm1),col=1)
if (truncated) lines(log.dk, predict(lm2), col = 2)
OUTPUT
} # end of function

##############################################################################################
##############################################################################################
# B) Computing the topological overlap matrix
##############################################################################################
##############################################################################################

# ===================================================
#The function TOMdist computes a dissimilarity
# based on the topological overlap matrix (Ravasz et al)
# Input: an Adjacency matrix with entries in [0,1]
#
#  ************* Removed: use 1-TOMsimilarity(adjMat). ***********************
#
#{
#if (maxh1>1 | minh1 < 0 )
#stop(paste("The adjacency matrix contains entries that are larger than 1 or",
#"smaller than 0: max =",maxh1,", min =",minh1))
#Dhelp1=matrix(connectivity,ncol=length(connectivity),nrow=length(connectivity))
#gc();gc();
##TOMmatrix=numTOM/denomTOM
## this turns the TOM matrix into a dissimilarity
#out1=1-as.matrix(numTOM/denomTOM)
#diag(out1)=1
## setting the diagonal to 1 is unconventional (it should be 0)
## but it leads to nicer looking TOM plots...
#out1
#}

##---------------------------------------------------------------------------
## This is a somewhat modified TOMdist - most checks are left out as they are
## often not necessary.
#
#  ******* This function is not necessary anymore. Left out. ***********
#
#TOMdistNoChecks = function(adjMat, useActualMax = FALSE)
#{
#Dhelp1 = matrix(connectivity,ncol=length(connectivity),nrow=length(connectivity))
#rm(Dhelp1);
##TOMmatrix=numTOM/denomTOM
## this turns the TOM matrix into a dissimilarity
#out1=1-as.matrix(numTOM/denomTOM)
#rm(numTOM); rm(denomTOM);
#collectGarbage();
#diag(out1)=1
## setting the diagonal to 1 is unconventional (it should be 0)
## but it leads to nicer looking TOM plots...
#out1
#}

#---------------------------------------------------------------------------
# exact equivalent of TOMdistNoChecks above, but returns similarity.
# This function works with a generalized adjacency that can be signed.
# If the adjacency is signed, returned TOM will be signed as well (use abs(TOM) to get the usual unsigned
# topological overlap)
# If checkDiag and na.rm are turned both off, the function saves a bit of memory overhead.

# ************* this function is replaced by TOMsimilarity that calls compiled code.

#TOMsimilarity = function(adjMat, useActualMax = FALSE, checkDiag = TRUE, na.rm = TRUE)
#{
#Dhelp1 = matrix(connectivity,ncol=length(connectivity),nrow=length(connectivity))
#rm(Dhelp1);
##TOMmatrix=numTOM/denomTOM
## this turns the TOM matrix into a dissimilarity
#out1=as.matrix(numTOM/denomTOM)
#rm(numTOM); rm(denomTOM);
#collectGarbage();
#diag(out1)=1
#out1
#}

# ===================================================
# This function computes a TOMk dissimilarity
# which generalizes the topological overlap matrix (Ravasz et al)
# Input: an Adjacency matrix with entries in [0,1]
# WARNING:  ONLY FOR UNWEIGHTED NETWORKS, i.e. the adjacency matrix contains binary entries...
# This function is explained in Yip and Horvath (2005)
# http://www.genetics.ucla.edu/labs/horvath/GTOM/
GTOMdist = function(adjMat, degree = 1)
{
if (degree!=round(abs(degree)))
stop("'degree' must be a positive integer.");
if (maxh1>1 | minh1 < 0 )
stop(paste("Entries of the adjacency matrix are not between 0 and 1: max =",
maxh1,", min =",minh1))

stop("Given adjacency matrix is not symmetric.")

if (degree>=2) for (i in 2:degree)
{
diag(B) <- diag(B) + 1;
B = B %*% adjMat;# this gives the number of paths with length at most degree connecting a pair
}
B <- (B>0);   # this gives the degree-step reachability from a node to another
diag(B) <- 0;   # exclude each node being its own neighbor
B <- B %*% B   # this gives the number of common degree-step-neighbor that a pair of nodes share

Nk <- diag(B);
B <- B +adjMat;   # numerator
diag(B) <- 1;
diag(denomTOM) <- 1;
1 - B/denomTOM   # this turns the TOM matrix into a dissimilarity
}

#=============================================================================================
#
# vectorTOM: calculate TOM of a vector (or a 'small' matrix) with expression
# data. If the number of columns in vect is small (or 1), number of columns in
# datExpr can be large.
#
#============================================================================================

vectorTOM = function(datExpr, vect, subtract1 = FALSE, blockSize = 2000,
corFnc = "cor", corOptions = "use = 'p'", networkType = "unsigned", power = 6,
verbose = 1, indent = 0)
{
spaces = indentSpaces(indent);

intType = charmatch(networkType, .networkTypes)
if (is.na(intType))
stop(paste("Unrecognized 'networkType'. Recognized values are", paste(.networkTypes, collapse = ", ")));

if (is.null(dim(vect)))
{
vect = as.matrix(vect)
vectIsVector = TRUE;
} else vectIsVector = FALSE;

if (nrow(vect)!=nrow(datExpr))
stop("Input error: numbers of samples in 'vect' and 'datExpr' must be the same.");

if (ncol(vect)>blockSize)
stop(paste("Input error: number of columns in 'vect' is too large. ",
"If you are certain you want to try anyway, increase 'blockSize' to at least",
"the number of columns in 'vect'."));

corEval = parse(text = paste(corFnc, "(datExpr, vect ", prepComma(corOptions), ")"));
corVE = eval(corEval);
if (intType==1)
{ corVE = abs(corVE);
} else if (intType==2)
{ corVE = (1+corVE)/2;
} else if (intType==3)
{ corVE[corVE < 0] = 0;
} else
stop("Unrecognized networkType argument. Recognized values are 'unsigned', 'signed', and 'signed hybrid'.");

corVE = corVE^power;

subtract1 = as.numeric(subtract1);

nVect = ncol(vect); nGenes = ncol(datExpr);
TOM = matrix(NA, nrow = nGenes, ncol = nVect);

if (verbose > 0) {
if (verbose > 1) cat(paste(spaces, "Calculating TOM of a set of vectors with genes"));
pind = initProgInd();
}
start = 1;
denomArr = array(0, dim = c(2, blockSize, nVect));
while (start <= nGenes)
{
end = min(start + blockSize-1, nGenes);
blockInd = c(start:end);
corEval = parse(text = paste(corFnc, "(datExpr[, blockInd], datExpr ", prepComma(corOptions), ")"));
corEE = eval(corEval);
if (intType==1)
{ corEE = abs(corEE);
} else if (intType==2)
{ corEE = (1+corEE)/2;
} else if (intType==3)
{ corEE[corEE < 0] = 0;
}
corEE = corEE^power;
num = corEE %*% corVE -subtract1 * corVE[blockInd, ]
kV = apply(corVE, 2, sum, na.rm = TRUE) - subtract1
kE = apply(corEE, 1, sum, na.rm = TRUE) - 1;
denomArr[1, 1:(end-start+1), ] = matrix(kV, nrow = end-start+1, ncol = nVect, byrow = TRUE);
denomArr[2, 1:(end-start+1), ] = matrix(kE, nrow = end-start+1, ncol = nVect);
denom = apply(denomArr[, 1:(end-start+1), ], c(2,3), min) + 1 - corVE[blockInd, ];
TOM[blockInd, ] = num/denom;
if (verbose > 0) pind = updateProgInd(end/nGenes, pind);
start = end + 1;
gc()
}
if (verbose>0) printFlush(" ");

TOM;
}

#=============================================================================================
#
# subsetTOM: calculate TOM of a subset of vectors with respect to a full set of vectors.
#
#============================================================================================

subsetTOM = function(datExpr, subset,
corFnc = "cor", corOptions = "use = 'p'",
weights = NULL, networkType = "unsigned", power = 6,
verbose = 1, indent = 0)
{
spaces = indentSpaces(indent);

if (!is.null(dim(subset)))
stop("'subset' must be a dimensionless vector.");

if (is.null(dim(datExpr)))
stop("'datExpr' must be a matrix or data frame.");
if (length(dim(datExpr))!=2)
stop("'datExpr' must be two-dimensional.");

nGenes = ncol(datExpr);

if (is.logical(subset))
subset = c(1:nGenes)[subset];

nBlock = length(subset);

if (any(!is.finite(subset))) stop("Entries of 'subset' must all be finite.");

if (min(subset) < 1 | max(subset) > nGenes)
stop(paste("Some entries of 'subset' are out of range.",
"\nNote: 'subset' must contain indices of the subset for which the TOM is calculated."));

intType = charmatch(networkType, .networkTypes)
if (is.na(intType))
stop(paste("Unrecognized 'networkType'. Recognized values are", paste(.networkTypes, collapse = ", ")));

adj = adjacency(datExpr, weights = weights, selectCols = subset, power = power,
type = networkType, corFnc = corFnc,
corOptions = corOptions);

kMat = matrix(k, nBlock, nBlock);

denom = pmin(kMat, t(kMat)) - adj[subset, ];

TOM = num/denom;
diag(TOM) = 1;

TOM;
}

#---------------------------------------------------------------------
#
#
#---------------------------------------------------------------------
# Computes the adjacency from the expression data: takes cor, transforms it as appropriate and possibly
# adds a sign if requested. No subselection on datExpr is performed.
# A slighly reworked version that assumes one wants the adjacency matrix of data with itself or a
# subset. The data are given only once, and an additional selection index for columns is given.
# Caution: no checking of selectCols validity is performed.
# The probability method is removed as it's not used.

type = "unsigned", power = if (type=="distance") 1 else 6,
corFnc = "cor", corOptions = list(use = 'p'), weights = NULL,
distFnc = "dist", distOptions = "method = 'euclidean'",
weightArgNames = c("weights.x", "weights.y"))
{
if (is.na(intType))
stop(paste("Unrecognized 'type'. Recognized values are", paste(.adjacencyTypes, collapse = ", ")));

corFnc.fnc = match.fun(corFnc);

.checkAndScaleWeights(weights, datExpr, scaleByMax = FALSE);

if (length(weights) > 0)
{
if (is.null(selectCols))
{
if (is.list(corOptions))
{
weightOpt = list(weights.x = weights);
names(weightOpt) = weightArgNames[1];
} else weightOpt = spaste(weightArgNames[1], " = weights");
} else {
if (is.list(corOptions))
{
weightOpt = list(weights.x = weights, weights.y = weights[, selectCols]);
names(weightOpt) = weightArgNames[c(1,2)];
} else weightOpt = spaste(weightArgNames[1], " = weights, ", weightArgNames[2], " = weights[, selectCols]");
}
} else {
weightOpt = if (is.list(corOptions)) list() else ""
}

if (intType < 4)
{
if (is.null(selectCols))
{
if (is.list(corOptions))
{
cor_mat = do.call(corFnc.fnc, c(list(x = datExpr), weightOpt, corOptions))
} else {
corExpr = parse(text = paste(corFnc, "(datExpr ", prepComma(weightOpt), prepComma(corOptions), ")"));
# cor_mat = cor(datExpr, use = "p");
cor_mat = eval(corExpr);
}
} else {
if (is.list(corOptions))
{
cor_mat = do.call(corFnc.fnc, c(list(x = datExpr, y = datExpr[, selectCols]), weightOpt, corOptions))
} else {
corExpr = parse(text = paste(corFnc, "(datExpr, datExpr[, selectCols] ", prepComma(weightOpt),
prepComma(corOptions), ")"));
#cor_mat = cor(datExpr, datExpr[, selectCols], use="p");
cor_mat = eval(corExpr);
}
}
} else {
if (!is.null(selectCols))
stop("The argument 'selectCols' cannot be used for distance adjacency.");
if (is.list(distOptions))
{
d = do.call(distFnc, c(list(x = t(datExpr)), distOptions));
} else {
corExpr = parse(text = paste(distFnc, "(t(datExpr) ", prepComma(distOptions), ")"));
# cor_mat = cor(datExpr, use = "p");
d = eval(corExpr);
}
if (any(d<0))
warning("Function WGCNA::adjacency: Distance function returned (some) negative values.");
cor_mat = 1-as.matrix( (d/max(d, na.rm = TRUE))^2 );
}

if (intType==1)
{ cor_mat = abs(cor_mat);
} else if (intType==2)
{ cor_mat = (1+cor_mat)/2;
} else if (intType==3)
{ cor_mat[cor_mat < 0] = 0;
}
cor_mat^power;
}

# A presumably faster and less memory-intensive version, only for "unsigned" networks.

unsignedAdjacency = function(datExpr, datExpr2 = NULL, power = 6,
corFnc = "cor", corOptions = "use = 'p'")
{
corExpr = parse(text = paste(corFnc, "(datExpr, datExpr2 ", prepComma(corOptions), ")"));
# abs(cor(datExpr, datExpr2, use="p"))^power;
abs(eval(corExpr))^power;
}

#####################################################################################################
#####################################################################################################
# C) Defining gene modules using clustering procedures
#####################################################################################################
#####################################################################################################

cutreeStatic = function(dendro, cutHeight = 0.9, minSize = 50)
{
normalizeLabels(moduleNumber(dendro, cutHeight, minSize));
}

cutreeStaticColor = function(dendro, cutHeight = 0.9, minSize = 50)
{
labels2colors(normalizeLabels(moduleNumber(dendro, cutHeight, minSize)));
}

plotColorUnderTree = function(
dendro,
colors,
rowLabels = NULL,
rowWidths = NULL,
rowText = NULL,
rowTextAlignment = c("left", "center", "right"),
rowTextIgnore = NULL,
textPositions = NULL,
cex.rowLabels = 1,
cex.rowText = 0.8,
separatorLine.col = "black",
...)
{
plotOrderedColors(
dendro\$order,
colors = colors,
rowLabels = rowLabels,
rowWidths = rowWidths,
rowText = rowText,
rowTextAlignment = rowTextAlignment,
rowTextIgnore = rowTextIgnore,
textPositions = textPositions,
cex.rowLabels = cex.rowLabels,
cex.rowText = cex.rowText,
startAt = 0,
align = "center",
separatorLine.col = separatorLine.col,
...);
}

plotOrderedColors = function(
order,
colors,
main = "",
rowLabels = NULL,
rowWidths = NULL,
rowText = NULL,
rowTextAlignment = c("left", "center", "right"),
rowTextIgnore = NULL,
textPositions = NULL,
cex.rowLabels = 1,
cex.rowText = 0.8,
startAt = 0,
align = c("center", "edge"),
separatorLine.col = "black",
...)
{
sAF = options("stringsAsFactors")
options(stringsAsFactors = FALSE);
on.exit(options(stringsAsFactors = sAF[[1]]), TRUE)
barplot(height=1, col = "white", border=FALSE, space=0, axes=FALSE, main = main)
align = match.arg(align);
.plotOrderedColorSubplot(
order = order, colors = colors,
rowLabels = rowLabels,
rowWidths = rowWidths,
rowText = rowText,
rowTextAlignment = rowTextAlignment,
rowTextIgnore = rowTextIgnore,
textPositions = textPositions,
cex.rowLabels = cex.rowLabels,
cex.rowText = cex.rowText,
startAt = startAt,
horizontal = TRUE,
align = align,
separatorLine.col = separatorLine.col,
...);
}

.transformCoordinates = function(x, y, angle, oldBox = c(0, 1, 0, 1), newBox = c(0, 1, 0, 1))
{
xt0 = x * cos(angle) - y * sin(angle);
yt0 = x * sin(angle) + y * cos(angle);

trBox.x = oldBox[c(1, 2)] * cos(angle) - oldBox[c(3,4)] * sin(angle)
trBox.y = oldBox[c(1, 2)] * sin(angle) + oldBox[c(3,4)] * cos(angle);

# the shift calculation basically assumes rotations only in multiples of 90 degrees...
scale.x = (newBox[2] - newBox[1])/(trBox.x[2] - trBox.x[1])
scale.y = (newBox[4] - newBox[3])/(trBox.y[2] - trBox.y[1]);

list(x = (xt0 - trBox.x[1]) * scale.x + newBox[1],
y = (yt0 - trBox.y[1]) * scale.y + newBox[3]);
}

.plotOrderedColorSubplot = function(
order,
colors,
rowLabels = NULL,
rowWidths = NULL,
rowText = NULL,
rowTextAlignment = c("left", "center", "right"),
rowTextIgnore = NULL,
textPositions = NULL,
textGuide.col = "darkgrey",
textGuide.lty = 3,
cex.rowLabels = 1,
cex.rowText = 0.8,
startAt = 0,
plotBox = NULL,  # Defaults to user-coordinate limits rotated according to "horizontal"
horizontal = TRUE,
rowLabelsAngle = NULL,   ## Defaults to the angle of the colors
rowLabelsPosition = "left",
align = c("center", "edge"),
limExpansionFactor.x = if (align=="center") 0.04 else 0,
limExpansionFactor.y = limExpansionFactor.x,
separatorLine.col = "black",
checkOrderLength = TRUE,
...)
{
if (length(colors)==0) return(NULL);
align = match.arg(align);
colors = as.matrix(colors);
dimC = dim(colors)
if (is.null(rowLabels) & (length(dimnames(colors)[[2]])==dimC[2]))
rowLabels = colnames(colors);
nColorRows = dimC[2];
if (checkOrderLength && (length(order) != dimC[1]) )
stop("Length of colors vector not compatible with number of objects in 'order'.");
C = colors[order, , drop = FALSE];
nColumns = dimC[1];

# Old plot box. could in principle be anything but the current value allows me to also get scaling of inches to user
# coordinates and the character width and height.
plotBox.full = par("usr");
pin = par("pin");
inchToUsr.x = (plotBox.full[2] - plotBox.full[1])/pin[1];
inchToUsr.y = (plotBox.full[4] - plotBox.full[3])/pin[2];
charWidth = strwidth("W", units = "inches") * (if (horizontal) inchToUsr.x else inchToUsr.y);

plotBox.contracted = plotBox.full;
fullRange.x = plotBox.full[2] - plotBox.full[1];
fullRange.y = plotBox.full[4] - plotBox.full[3];
limContractionFactor.x = limExpansionFactor.x/(1+2*limExpansionFactor.x);
plotBox.contracted[1] = plotBox.contracted[1] + limContractionFactor.x * fullRange.x;
plotBox.contracted[2] = plotBox.contracted[2] - limContractionFactor.x * fullRange.x;
range.x = plotBox.contracted[2] - plotBox.contracted[1];
limContractionFactor.y = limExpansionFactor.y/(1+2*limExpansionFactor.y);
plotBox.contracted[3] = plotBox.contracted[3] + limContractionFactor.y * fullRange.y;
plotBox.contracted[4] = plotBox.contracted[4] - limContractionFactor.y * fullRange.y;
range.x = plotBox.contracted[2] - plotBox.contracted[1];
range.y = plotBox.contracted[4] - plotBox.contracted[3];

step = range.x/(dimC[1] - (align=="center") + 2*startAt);

if (is.null(plotBox))
{
plotBox = par("usr");
if (!horizontal) plotBox = plotBox[c(3,4,1,2)];
}

if (!is.null(rowText))
{
if (is.null(textPositions)) textPositions = c(1:nColorRows);
if (is.logical(textPositions)) textPositions = c(1:nColorRows)[textPositions];
nTextRows = length(textPositions);
} else
nTextRows = 0;
nRows = nColorRows + nTextRows;
if (is.null(rowWidths))
{
ystep = range.y/nRows;
rowWidths = rep(ystep, nColorRows + nTextRows)
} else {
if (length(rowWidths)!=nRows)
stop("plotOrderedColors: Length of 'rowWidths' must equal the total number of rows.")
rowWidths = range.y * rowWidths/sum(rowWidths);
}

hasText = rep(0, nColorRows);
hasText[textPositions] = 1;
csPosition = cumsum(c(0, hasText[-nColorRows]));

colorRows = c(1:nColorRows) + csPosition;
rowType = rep(2, nRows);
rowType[colorRows] = 1;

physicalTextRow = c(1:nRows)[rowType==2];

yBottom = c(plotBox.contracted[3], plotBox.contracted[3] + cumsum(rowWidths[nRows:1])) ;
## Has one extra entry but that shouldn't hurt
yTop = plotBox.contracted[3] + cumsum(rowWidths[nRows:1])

if (!is.null(rowText))
{
rowTextAlignment = match.arg(rowTextAlignment);
rowText = as.matrix(rowText)
textPos = list();
textPosY = list();
textLevs = list();
for (tr in 1:nTextRows)
{
charHeight.in = max(strheight(rowText[, tr], units = "inches", cex = cex.rowText));
charHeight.scaled = charHeight.in * (if (horizontal) 1/pin[2] else dimC[1]/pin[1])
charHeight.scaled = charHeight.scaled * ( if (horizontal) range.y / abs(plotBox[4] - plotBox[3]) else
range.x / abs(plotBox[2] - plotBox[1]));
width1 = rowWidths[ physicalTextRow[tr] ];
nCharFit = floor(width1/charHeight.scaled/1.7/par("lheight"));
if (nCharFit<1) stop("Rows are too narrow to fit text. Consider decreasing cex.rowText.");
set = textPositions[tr];
#colLevs = sort(unique(colors[, set]));
#textLevs[[tr]] = rowText[match(colLevs, colors[, set]), tr];
textLevs[[tr]] = sort(unique(rowText[, tr]));
textLevs[[tr]] = textLevs[[tr]] [ !textLevs[[tr]] %in% rowTextIgnore ];
nLevs = length(textLevs[[tr]]);
textPos[[tr]] = rep(0, nLevs);
orderedText = rowText[order, tr]
for (cl in 1:nLevs)
{
ind = orderedText == textLevs[[tr]][cl];
sind = ind[-1];
ind1 = ind[-length(ind)];
starts = c( if (ind[1]) 1 else NULL, which(!ind1 & sind)+1)
ends = which(c(ind1 & !sind, ind[length(ind)] ));
if (length(starts)==0) starts = 1;
if (length(ends)==0) ends = length(ind);
if (ends[1] < starts[1]) starts = c(1, starts);
if (ends[length(ends)] < starts[length(starts)]) ends = c(ends, length(ind));
lengths = ends - starts;
long = which.max(lengths);
textPos[[tr]][cl] = switch(rowTextAlignment,
left = starts[long],
center = (starts[long] + ends[long])/2 + 0.5,
right = ends[long]+1);
}
if (rowTextAlignment=="left") {
yPos = seq(from = 1, to=nCharFit, by=1) / (nCharFit+1);
} else {
yPos = seq(from = nCharFit, to=1, by=-1) / (nCharFit+1);
}
textPosY[[tr]] = rep(yPos, ceiling(nLevs/nCharFit)+5)[1:nLevs][rank(textPos[[tr]])];
}
}
jIndex = nRows;
colorRectangles = list();
if (is.null(rowLabels)) rowLabels = c(1:nColorRows);
C[is.na(C)] = "grey"
if (align=="edge") alignShift = 0 else alignShift = 0.5;
angle.deg = if (horizontal) 0 else 90;
angle = angle.deg * pi/180;
if (is.null(rowLabelsAngle)) rowLabelsAngle = angle.deg;
for (j in 1:nColorRows)
{
jj = jIndex;
ind = 1:nColumns;
xl = plotBox.contracted[1] + (ind- 1 - alignShift + startAt) * step; xr = xl + step;
xl[xl < plotBox.full[1]] = plotBox.full[1];
xr[xr > plotBox.full[2]] = plotBox.full[2];
yb = rep(yBottom[jj], dimC[1]); yt = rep(yTop[jj], dimC[1]);
trafo1 = .transformCoordinates(xl, yb, angle = angle, oldBox = plotBox.full, newBox = plotBox)
trafo2 = .transformCoordinates(xr, yt, angle = angle, oldBox = plotBox.full, newBox = plotBox)
if (is.null(dim(C))) {
rect(trafo1\$x, trafo1\$y, trafo2\$x, trafo2\$y, col = as.character(C), border = as.character(C), xpd = TRUE);
} else {
rect(trafo1\$x, trafo1\$y, trafo2\$x, trafo2\$y, col = as.character(C[,j]), border = as.character(C[,j]), xpd = TRUE);
}
colorRectangles[[j]] = list(xl = trafo1\$x, yb = trafo1\$y, xr = trafo2\$x, yt = trafo2\$y);
rowLabelPos = .transformCoordinates(
x= if (rowLabelsPosition=="left") xl[1] else xr[nColumns],
y= (yBottom[jj] + yTop[jj])/2, angle = angle, oldBox = plotBox.full, newBox = plotBox);
xs1 = if (horizontal) charWidth/2 else 0;
ys1 = if (horizontal) 0 else charWidth/2;
if (rowLabelsPosition!="left") { xs1 = -xs1; ys1 = -ys1; }
text(rowLabels[j], adj = c(if (rowLabelsPosition=="left") 1 else 0, 0.5), x= rowLabelPos\$x-xs1, y= rowLabelPos\$y-ys1,
srt = rowLabelsAngle, cex=cex.rowLabels, xpd = TRUE);
textRow = match(j, textPositions);
if (is.finite(textRow))
{
jIndex = jIndex - 1;
xt = (textPos[[textRow]] - 1 - alignShift + startAt) * step + plotBox.contracted[1];
xt[xt<plotBox.full[1]] = plotBox.full[1];
xt[xt>plotBox.full[2]] = plotBox.full[2];
yt = yBottom[jIndex] + (yTop[jIndex]-yBottom[jIndex]) * (textPosY[[textRow]] + 1/(2*nCharFit+2));
nt = length(textLevs[[textRow]]);
trafo1 = .transformCoordinates(xt, yt, angle = angle, oldBox = plotBox.full, newBox = plotBox);
trafo2 = .transformCoordinates(xt, yTop[jIndex], angle = angle, oldBox = plotBox.full, newBox = plotBox);
for (l in 1:nt)
lines(c(trafo1\$x[l], trafo2\$x[l]), c(trafo1\$y[l], trafo2\$y[l]), col = textGuide.col, lty = textGuide.lty);
textAdj = c(0, 0.5, 1)[ match(rowTextAlignment, c("left", "center", "right")) ];
text(textLevs[[textRow]], x = trafo1\$x, y = trafo1\$y, adj = c(textAdj, 1), xpd = TRUE, cex = cex.rowText)
# printFlush("ok");
}
jIndex = jIndex - 1;
}
if (!is.na(separatorLine.col))
{
trafo1 = .transformCoordinates(min(xl), yBottom, angle = angle, oldBox = plotBox.full, newBox = plotBox);
trafo2 = .transformCoordinates(max(xr), yBottom, angle = angle, oldBox = plotBox.full, newBox = plotBox);
for (j in 1:(nColorRows + nTextRows+1))
lines(x=c(trafo1\$x[j], trafo2\$x[j]), y=c(trafo1\$y[j], trafo2\$y[j]), col = separatorLine.col);
}
invisible(list(colorRectangles = colorRectangles));
}

#========================================================================================================
# This function can be used to create an average linkage hierarchical
# clustering tree
# or the microarray samples. The rows of datExpr correspond to the samples and
# the columns to the genes
# You can optionally input a quantitative microarray sample trait.

plotClusterTreeSamples=function(datExpr, y = NULL, traitLabels = NULL, yLabels = NULL,
main = if (is.null(y)) "Sample dendrogram" else "Sample dendrogram and trait indicator",
setLayout = TRUE, autoColorHeight = TRUE, colorHeight = 0.3,
dendroLabels = NULL,
addGuide = FALSE, guideAll = TRUE, guideCount = NULL,
guideHang = 0.20, cex.traitLabels = 0.8,
cex.dendroLabels = 0.9, marAll = c(1,5,3,1),  saveMar = TRUE,
abHeight = NULL, abCol = "red", ...)
{
dendro = fastcluster::hclust( dist( datExpr  ), method="average" )
if (is.null(y) )
{
oldMar = par("mar");
par(mar = marAll);
plot(dendro, main=main, sub="", xlab = "", labels = dendroLabels, cex = cex.dendroLabels)
if (saveMar) par(oldMar);
} else {
if (is.null(traitLabels)) traitLabels = names(as.data.frame(y));
y = as.matrix(y);
if (!is.numeric(y) )
{
warning(paste("The microarray sample trait y will be transformed to numeric."));
dimy = dim(y)
y=as.numeric(y)
dim(y) = dimy;
} # end of if (!is.numeric(y) )
if (  nrow(as.matrix(datExpr)) != nrow(y) )
stop(paste("Input Error: dim(as.matrix(datExpr))[[1]] != length(y)\n",
"  In plain English: The number of microarray sample arrays does not match the number",
"of samples for the trait.\n",
"   Hint: Make sure rows of 'datExpr' (and 'y', if it is a matrix) correspond to samples."))

if (is.integer(y))
{
y = y-min(0, min(y, na.rm = TRUE)) + 1;
} else {
y = (y>=median(y, na.rm = TRUE)) + 1;
}
plotDendroAndColors(dendro, colors = y, groupLabels = traitLabels, rowText = yLabels,
setLayout = setLayout,
autoColorHeight = autoColorHeight, colorHeight = colorHeight,
guideHang = guideHang, cex.colorLabels = cex.traitLabels,
cex.dendroLabels = cex.dendroLabels, marAll = marAll,
saveMar = saveMar, abHeight = abHeight, abCol = abCol,
main = main,
...);
}
}# end of function PlotClusterTreeSamples

# ===================================================
# The function TOMplot creates a TOM plot
# Inputs:  distance measure, hierarchical (hclust) object, color label=colors

TOMplot = function(dissim, dendro, Colors=NULL, ColorsLeft = Colors, terrainColors=FALSE,
setLayout = TRUE, ...)
{
if ( is.null(Colors) ) Colors=rep("white", dim(as.matrix(dissim))[[1]] )
if ( is.null(ColorsLeft)) ColorsLeft = Colors;
nNodes=length(Colors)
if (nNodes<2) {
warning("You have only 1 or 2 genes in TOMplot. No plot will be produced")
} else {
if (nNodes != length(ColorsLeft))
stop("ERROR: number of (top) color labels does not equal number of left color labels")
if (nNodes != dim(dissim)[[1]] )
stop(paste("ERROR: number of color labels does not equal number of nodes in dissim.\n",
"     nNodes != dim(dissim)[[1]] "))
labeltree = as.character(Colors)
labelrow  = as.character(ColorsLeft)
#labelrow[dendro\$order[length(labeltree):1]]=labelrow[dendro\$order]
options(expressions = 10000)
dendro\$height = (dendro\$height - min(dendro\$height))/(1.15 *
(max(dendro\$height)-min(dendro\$height)))
if (terrainColors) {
.heatmap(as.matrix(dissim), Rowv=dendro,
Colv= dendro,
scale="none", revC = TRUE, ColSideColors=as.character(labeltree),
RowSideColors=as.character(labelrow), labRow=FALSE, labCol=FALSE,
col = terrain.colors(100), setLayout = setLayout, ...)
} else {
.heatmap(as.matrix(dissim), Rowv=dendro,
Colv= dendro,
scale="none",revC = TRUE, ColSideColors=as.character(labeltree),
RowSideColors=as.character(labelrow), labRow=FALSE, labCol=FALSE, setLayout = setLayout,
...)
} #end of if
}
} #end of function

plotNetworkHeatmap = function(datExpr,  plotGenes, weights = NULL, useTOM = TRUE, power = 6 ,
networkType = "unsigned", main = "Heatmap of the network")
{
match1=match( plotGenes ,colnames(datExpr) )
match1=match1[ !is.na(match1)]
nGenes=length(match1)
if (  sum( !is.na(match1) )  != length(plotGenes) )
{
printFlush(paste("Warning: Not all gene names were recognized.",
"Only the following genes were recognized. "));
printFlush(paste("   ", colnames(datExpr)[match1], collapse = ", " ))
}
if (nGenes< 3 )
{
warning(paste("Since you have fewer than 3 genes, the network will not be visualized.\n",
"   Hint: please input more genes.")); plot(1,1)
} else {
datErest=datExpr[, match1 ]
if (!is.null(weights)) weights = weights[, match1];
ADJ1 = adjacency(datErest, weights = weights, power = power, type = networkType)
if (useTOM) {
} else {
}
diag(diss1)=NA
hier1=fastcluster::hclust(as.dist(diss1), method="average" )
colors1=rep("white", nGenes)
labeltree = names(data.frame(datErest))
labelrow  = names(data.frame(datErest))
labelrow[hier1\$order[length(labeltree):1]]=labelrow[hier1\$order]
options(expressions = 10000)
heatmap(as.matrix(diss1),Rowv=as.dendrogram(hier1),Colv= as.dendrogram(hier1), scale="none", revC = TRUE,
labRow= labeltree, labCol= labeltree,main=main)
} # end of if (nGenes> 2 )
} # end of function

#####################################################################################################
#####################################################################################################
# E) Relating a measure of gene significance to the modules
#####################################################################################################
#####################################################################################################

# ===================================================
# The function ModuleEnrichment1 creates a bar plot that shows whether modules are enriched with
# significant genes.
# More specifically, it reports the mean gene significance for each module.
# The gene significance can be a binary variable or a quantitative variable.
# It also plots the 95% confidence interval of the mean (CI=mean +/- 1.96* standard error).
# It also reports a Kruskal Wallis P-value.

plotModuleSignificance = function(geneSignificance, colors, boxplot = FALSE,
main = "Gene significance across modules,",
ylab = "Gene Significance", ...)
{
if (length(geneSignificance) != length(colors) )
stop("Error: 'geneSignificance' and 'colors' do not have the same lengths")
no.colors=length(names(table(colors) ))
if (no.colors==1) pp=NA
if (no.colors>1)
{
pp=try(kruskal.test(geneSignificance,factor(colors))\$p.value)
if (inherits(pp, "try-error")) pp=NA
}
title = paste(main," p-value=", signif(pp,2), sep = "")
if (boxplot != TRUE) {
means1=as.vector(tapply(geneSignificance,colors,mean, na.rm = TRUE));
se1= as.vector(tapply(geneSignificance,colors,stdErr))
# par(mfrow=c(1,1))
barplot(means1, names.arg=names(table(colors) ),col= names(table(colors) ) ,ylab=ylab,
main = title, ...)
} else {
boxplot(split(geneSignificance,colors),notch = TRUE,varwidth = TRUE, col= names(table(colors) ),ylab=ylab,
main = title, ...)
}
} # end of function

#####################################################################################################
#####################################################################################################
# F) Carrying out a within module analysis (computing intramodular connectivity etc)
#####################################################################################################
#####################################################################################################

# ===================================================
#The function DegreeInOut computes for each gene
#a) the total number of connections,
#b) the number of connections with genes within its module,
#c) the number of connections with genes outside its module
# When scaleByMax=TRUE, the within module connectivities are scaled to 1, i.e. the max(K.Within)=1 for each module

intramodularConnectivity = function(adjMat, colors, scaleByMax = FALSE)
{
stop("'adjMat' is not a square matrix.");
stop("Dimensions of 'adjMat' and length of 'colors' differ.");
nNodes=length(colors)
colorLevels=levels(factor(colors))
nLevels=length(colorLevels)
kWithin=rep(-666,nNodes )
for (i in c(1:nLevels) )
{
rest1=colors==colorLevels[i];
if (sum(rest1) <3 ) {
kWithin[rest1]=0
} else {
kWithin[rest1]=apply(adjMat[rest1,rest1], 2, sum, na.rm = TRUE)
if (scaleByMax) kWithin[rest1]=kWithin[rest1]/max(kWithin[rest1])
}
}
kTotal= apply(adjMat, 2, sum, na.rm = TRUE)
kOut=kTotal-kWithin
if (scaleByMax) kOut=rep(NA, nNodes);
kDiff=kWithin-kOut
data.frame(kTotal,kWithin,kOut,kDiff)
}

intramodularConnectivity.fromExpr = function(datExpr, colors,
corFnc = "cor", corOptions = "use = 'p'",
weights = NULL,
distFnc = "dist", distOptions = "method = 'euclidean'",
networkType = "unsigned", power = if (networkType=="distance") 1 else 6,
scaleByMax = FALSE,
ignoreColors = if (is.numeric(colors)) 0 else "grey",
getWholeNetworkConnectivity = TRUE)
{
if (ncol(datExpr) !=length(colors))
stop("Number of columns (genes) in 'datExpr' and length of 'colors' differ.");
nNodes=length(colors)
colorLevels=levels(factor(colors))
colorLevels = colorLevels[!colorLevels %in% ignoreColors];
nLevels=length(colorLevels)
kWithin=rep(NA,nNodes )
for (i in c(1:nLevels) )
{
rest1=colors==colorLevels[i];
weights1 = if (is.null(weights)) weights else weights[, rest1];
if (sum(rest1) <3 ) {
kWithin[rest1]=0
} else {
adjMat = adjacency(datExpr[, rest1], weights = weights1, type = networkType, power = power,
corFnc = corFnc, corOptions = corOptions,
distFnc = distFnc, distOptions = distOptions);
if (scaleByMax) kWithin[rest1]=kWithin[rest1]/max(kWithin[rest1], na.rm = TRUE)
}
}
if (getWholeNetworkConnectivity)
{
kTotal= softConnectivity(datExpr, weights = weights, corFnc = corFnc, corOptions = corOptions,
type = networkType, power = power);
kOut=kTotal-kWithin
if (scaleByMax) kOut=rep(NA, nNodes);
kDiff=kWithin-kOut
data.frame(kTotal,kWithin,kOut,kDiff)
} else kWithin;
}

nPresent = function(x)
{
sum(!is.na(x))
}

{
if (is.null(dim) || length(dim)!=2 )
if (dim[1]!=dim[2])
if (min(adjMat, na.rm = TRUE) < min || max(adjMat, na.rm = TRUE) > max)
stop("some entries are not between", min, "and", max)
}

#####################################################################################################
#####################################################################################################
# G) Miscellaneous other functions, e.g. for computing the cluster coefficient.
#####################################################################################################
#####################################################################################################

# The function signedKME computes the module eigengene based connectivity.
# Input: datExpr= a possibly very large gene expression data set where the rows
# correspond to samples and the columns represent genes
# datME=data frame of module eigengenes (columns correspond to module eigengenes or MEs)
# A module eigengene based connectivity KME value will be computed if the gene has
# a non missing expression value in at least minNSamples arrays.
# Output a data frame where columns are the KME values
# corresponding to different modules.
# By splitting the expression data into different blocks, the function can deal with
# tens of thousands of gene expression data.
# If there are many eigengenes (say hundreds) consider decreasing the block size.

signedKME = function(datExpr, datME,
exprWeights = NULL,
MEWeights = NULL,
outputColumnName="kME",
corFnc = "cor", corOptions = "use = 'p'")
{
if (dim(datME)[[1]] != dim(datExpr)[[1]] )
stop("Number of samples (rows) in 'datExpr' and 'datME' must be the same.")
datExpr=as.matrix(datExpr)
datME=as.matrix(datME)
if (is.null(colnames(datExpr))) colnames(datExpr) = spaste("Gene.", 1:ncol(datExpr));
if (any(duplicated(colnames(datExpr)))) colnames(datExpr) = make.unique(colnames(datExpr));
if (!is.null(exprWeights))
exprWeights = .checkAndScaleWeights(exprWeights, datExpr, scaleByMax = FALSE, verbose = 0);
if (!is.null(MEWeights))
MEWeights = .checkAndScaleWeights(exprWeights, datME, scaleByMax = FALSE, verbose = 0);
output=list()
varianceZeroIndicatordatExpr=colVars(datExpr, na.rm = TRUE)==0
varianceZeroIndicatordatME =colVars(datME, na.rm = TRUE)==0
if (sum(varianceZeroIndicatordatExpr,na.rm = TRUE)>0 )
warning("Some genes are constant. Hint: consider removing constant columns from datExpr." )
if (sum(varianceZeroIndicatordatME,na.rm = TRUE)>0 )
warning(paste("Some module eigengenes are constant, which is suspicious.\n",
"    Hint: consider removing constant columns from datME." ))
no.presentdatExpr=colSums(!is.na(datExpr))
if (min(no.presentdatExpr)<..minNSamples )
warning(paste("Some gene expressions have fewer than 4 observations.\n",
"    Hint: consider removing genes with too many missing values or collect more arrays."))

if (!is.null(MEWeights)) corOptions = spaste("weights.y = MEWeights, ", corOptions);
if (!is.null(exprWeights)) corOptions = spaste("weights.x = exprWeights, ", corOptions);

#output=data.frame(cor(datExpr, datME, use="p"))
corExpr = parse(text = paste("data.frame(", corFnc, "(datExpr, datME, ", prepComma(corOptions), "))" ));
output = eval(corExpr);

output[no.presentdatExpr<..minNSamples, ]=NA
names(output)=paste(outputColumnName, substring(colnames(datME), first=3), sep="")
rownames(output) = make.unique(colnames(datExpr));
output
} # end of function signedKME

# ===================================================
# The function clusterCoef computes the cluster coefficients.
# Input is an adjacency matrix

{
computeLinksInNeighbors <- function(x, imatrix){x %*% imatrix %*% x}
total.edge <- c(rep(-666,nNodes))
if (maxh1>1 | minh1 < 0 )
stop(paste("The adjacency matrix contains entries that are larger than 1 or smaller than 0: max =",
maxh1,", min =",minh1))
total.edge = plainsum^2 - squaresum
CChelp=rep(-666, nNodes)
CChelp
} # end of function

# ===================================================
# The function addErrorBars  is used to create error bars in a barplot
{
if(!is.numeric(means)) {
stop("All arguments must be numeric")}

if(is.null(dim(means)) || length(dim(means))==1){
xval<-(cumsum(c(0.7,rep(1.2,length(means)-1))))
}else{
if (length(dim(means))==2){
xval<-cumsum(array(c(1,rep(0,dim(means)[1]-1)),
dim=c(1,length(means))))+0:(length(means)-1)+.5
}else{
stop("First argument must either be a vector or a matrix") }
}
MW<-0.25*(max(xval)/length(xval))
ERR1<-means+errors
ERR2<-means-errors
for(i in 1:length(means)){
segments(xval[i],means[i],xval[i],ERR1[i])
segments(xval[i]-MW,ERR1[i],xval[i]+MW,ERR1[i])
if(two.side){
segments(xval[i],means[i],xval[i],ERR2[i])
segments(xval[i]-MW,ERR2[i],xval[i]+MW,ERR2[i])
}
}
}

# ===================================================
# this function computes the standard error
stdErr <- function(x){ sqrt( var(x,na.rm = TRUE)/sum(!is.na(x))   ) }

# ===================================================
# The following two functions are for displaying the pair-wise correlation in a panel when using the command "pairs()"
# Typically, we use "pairs(DATA, upper.panel=panel.smooth, lower.panel=.panel.cor, diag.panel=panel.hist)" to
# put the correlation coefficients on the lower panel.

.panel.hist <- function(x, ...){
usr <- par("usr"); on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5) )
h <- hist(x, plot = FALSE)
breaks <- h\$breaks; nB <- length(breaks)
y <- h\$counts; y <- y/max(y)
rect(breaks[-nB], 0, breaks[-1], y, col="cyan", ...)
}

# ===================================================
# This function is used in "pairs()" function. The problem of the original  panel.cor is that
# when the correlation coefficient is very small, the lower panel will have a large font
# instead of a mini-font in a saved .ps file. This new function uses a format for corr=0.2
# when corr<0.2, but it still reports the original value of corr, with a minimum format.

.panel.cor=function(x, y, digits=2, prefix="", cex.cor){
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- abs(cor(x, y))
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
txt1=txt
r1=r
if (r<0.2) {
r1=0.2
txt1 <- format(c(r1, 0.123456789), digits=digits)[1]
txt1 <- paste(prefix, txt1, sep="")
}
if(missing(cex.cor)) cex <- 0.8/strwidth(txt1)
cex = cex * r1
r <- round(r, digits)
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
text(0.5, 0.5, txt, cex=cex)
}

# ===================================================
# This function collects garbage
# collect_garbage=function(){collectGarbage()}

#---------------------------------------------------------------------------------------------------------
# This function plots a barplot with all colors given. If Colors are not given, GlobalStandardColors are
# used, i.e. if you want to see the GlobalStandardColors, just call this function without parameters.
displayColors = function(colors = NULL)
{
if (is.null(colors)) colors = standardColors();
barplot(rep(1, length(colors)), col = colors, border = colors);
}

###############################################################################
# I) Functions for merging modules based on a high correlation of the module eigengenes
###############################################################################

#---------------------------------------------------------------------------------------------
#
# dynamicMergeCut
#
#---------------------------------------------------------------------------------------------

dynamicMergeCut = function(n, mergeCor=.9, Zquantile=2.35)
{
if (mergeCor>1 | mergeCor<0 ) stop("'mergeCor' must be between 0 and 1.")
if (mergeCor==1)
{
printFlush("dynamicMergeCut: given mergeCor=1 will be set to .999.");
mergeCor=.999
}
if (n<4 )
{
printFlush(paste("Warning in function dynamicMergeCut: too few observations for the dynamic",
"assignment of the merge threshold.\n    Will set the threshold to .35"));
mergethreshold=.35
} else {
# Fisher transform of the true merge correlation
FishermergeCor=.5*log((1+mergeCor)/(1-mergeCor))
E=exp(2*( FishermergeCor -Zquantile/sqrt(n-3)))
LowerBoundCIcor=(E-1)/(E+1)
mergethreshold=1- LowerBoundCIcor
}
if (mergethreshold>1) 1 else mergethreshold
}# end of function dynamicMergeCut

#======================================================================================================
#
# print.flush
#
# =====================================================================================================

#print.flush = function(...)
#{
#   printFlush(...);
#}

##############################################################################################
# I) GENERAL STATISTICAL FUNCTIONS
##############################################################################################

verboseScatterplot = function(x, y,
sample = NULL,
corFnc = "cor", corOptions = "use = 'p'",
main ="", xlab = NA, ylab = NA, cex=1, cex.axis = 1.5,
cex.lab = 1.5, cex.main = 1.5, abline = FALSE,
abline.color = 1, abline.lty = 1,
corLabel = corFnc,
displayAsZero = 1e-5,
col = 1, bg = 0, pch = 1,
lmFnc = lm,
plotPriority = NULL,
showPValue = TRUE,
...)
{
if ( is.na(xlab) ) xlab= as.character(match.call(expand.dots = FALSE)\$x)
if ( is.na(ylab) ) ylab= as.character(match.call(expand.dots = FALSE)\$y)
x= as.numeric(as.character(x))
y= as.numeric(as.character(y))
corExpr = parse(text = paste(corFnc, "(x, y ", prepComma(corOptions), ")"));
#cor=signif(cor(x,y,use="p",method=correlationmethod),2)
cor=signif(eval(corExpr),2)
if (is.finite(cor)) if (abs(cor) < displayAsZero) cor = 0;
corp = signif(corPvalueStudent(cor, sum(is.finite(x) & is.finite(y))), 2);
#corpExpr = parse(text = paste("cor.test(x, y, ", corOptions, ")"));
#corp=signif(cor.test(x,y,use="p",method=correlationmethod)\$p.value,2)
#corp=signif(eval(corpExpr)\$p.value,2)
if (is.finite(corp) && corp<10^(-200) ) corp="<1e-200" else corp = paste("=", corp, sep="");
if (!is.na(corLabel))
{
mainX = paste(main, " ", corLabel, "=", cor, if(is.finite(cor) && showPValue) spaste(", p",corp) else "", sep="");
} else
mainX = main;

if (length(col)<length(x)) col = rep(col, ceiling(length(x)/length(col)));
if (length(pch)<length(x)) pch = rep(pch, ceiling(length(x)/length(pch)));
if (length(cex)<length(x)) cex = rep(cex, ceiling(length(x)/length(cex)));
if (length(bg )<length(x))  bg = rep(bg,  ceiling(length(x)/length(bg)));

if (is.null(plotPriority)) plotPriority = rep(1, length(x));
if (length(plotPriority)!=length(x))
stop("When given, length of 'plotPriority' must equal length of 'x'.");

if (!is.null(sample))
{
if (length(sample) == 1)
{
sample = sample(length(x), sample)
}
priority1 = plotPriority[sample];
order1 = order(priority1, na.last = TRUE);
plot(x[sample][order1], y[sample][order1], main=mainX, xlab=xlab, ylab=ylab,
cex.axis=cex.axis, cex.lab=cex.lab, cex.main=cex.main,
col = col[sample][order1], bg = bg[sample][order1], pch = pch[sample][order1],
cex = cex[sample][order1],
...)
} else {
order1 = order(plotPriority, na.last = TRUE);
plot(x[order1], y[order1], main=mainX, xlab=xlab, ylab=ylab,
cex.axis=cex.axis, cex.lab=cex.lab, cex.main=cex.main, col = col[order1], bg = bg[order1],
pch = pch[order1], cex = cex[order1], ...)
}
if (abline)
{
lmFnc = match.fun(lmFnc);
fit = lmFnc(y~x);
abline(reg = fit, col = abline.color, lty = abline.lty);
}
invisible(sample);
}

verboseBoxplot = function(x, g,
main ="", xlab = NA, ylab = NA, cex=1, cex.axis = 1.5,
cex.lab = 1.5, cex.main = 1.5, notch = TRUE, varwidth = TRUE, ...,
pt.cex = 0.8, pch = 21, pt.col = "blue", pt.bg = "skyblue",
randomSeed = 31425, jitter = 0.6)
{
if ( is.na(xlab) ) xlab= as.character(match.call(expand.dots = FALSE)\$g)
#print(xlab1)
if ( is.na(ylab) ) ylab= as.character( match.call(expand.dots = FALSE)\$x)
#print(ylab1)
p1 = signif(kruskal.test(x, factor(g) )\$p.value,2)
#if (p1< 5.0*10^(-22) ) p1="< 5e-22"
boxp = boxplot(x~factor(g), notch = notch, varwidth = varwidth,
main=paste(main,"p =",p1 ),
xlab=xlab, ylab=ylab, cex=cex, cex.axis=cex.axis,cex.lab=cex.lab, cex.main=cex.main, ...);

{
if (exists(".Random.seed"))
{
savedSeed = .Random.seed;
set.seed(randomSeed);
on.exit(.Random.seed <<- savedSeed);
}

n = length(g);
pch = unlist(tapply(.extend(pch, n), g, identity));
pt.col = unlist(tapply(.extend(pt.col, n), g, identity));
pt.bg = unlist(tapply(.extend(pt.bg, n), g, identity));
pt.cex = unlist(tapply(.extend(pt.cex, n), g, identity));

set.seed(randomSeed)  # so we can make the identical plot again
points(jitter(rep(1:ncol(boxp\$stats),```