Nothing
#Copyright (C) 2008 Peter Langfelder
#This program is free software; you can redistribute it and/or
#modify it under the terms of the GNU General Public License
#as published by the Free Software Foundation; either version 2
#of the License, or (at your option) any later version.
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.
#You should have received a copy of the GNU General Public License
#along with this program; if not, write to the Free Software
#Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# In this version the blocks are chosen by pre-clustering.
#==========================================================================================================
#
# TOM similarity via a call to a compiled code.
#
#==========================================================================================================
TOMsimilarityFromExpr = function(
datExpr, weights = NULL,
corType = "pearson", networkType = "unsigned",
power = 6, TOMType = "signed", TOMDenom = "min",
maxPOutliers = 1,
quickCor = 0,
pearsonFallback = "individual",
cosineCorrelation = FALSE,
replaceMissingAdjacencies = FALSE,
suppressTOMForZeroAdjacencies = FALSE,
suppressNegativeTOM = FALSE,
useInternalMatrixAlgebra = FALSE,
nThreads = 0,
verbose = 1, indent = 0)
{
corTypeC = as.integer(pmatch(corType, .corTypes)-1);
if (is.na(corTypeC))
stop(paste("Invalid 'corType'. Recognized values are", paste(.corTypes, collapse = ", ")))
TOMTypeC = as.integer(pmatch(TOMType, .TOMTypes)-1);
if (is.na(TOMTypeC))
stop(paste("Invalid 'TOMType'. Recognized values are", paste(.TOMTypes, collapse = ", ")))
TOMDenomC = as.integer(pmatch(TOMDenom, .TOMDenoms)-1);
if (is.na(TOMDenomC))
stop(paste("Invalid 'TOMDenom'. Recognized values are", paste(.TOMDenoms, collapse = ", ")))
if ( (maxPOutliers < 0) | (maxPOutliers > 1)) stop("maxPOutliers must be between 0 and 1.");
if (quickCor < 0) stop("quickCor must be positive.");
if ( (maxPOutliers < 0) | (maxPOutliers > 1)) stop("maxPOutliers must be between 0 and 1.");
fallback = as.integer(pmatch(pearsonFallback, .pearsonFallbacks));
if (is.na(fallback))
stop(paste("Unrecognized 'pearsonFallback'. Recognized values are (unique abbreviations of)\n",
paste(.pearsonFallbacks, collapse = ", ")))
if (nThreads < 0) stop("nThreads must be positive.");
if (is.null(nThreads) || (nThreads==0)) nThreads = .useNThreads();
if ( (power<1) | (power>30) ) stop("power must be between 1 and 30.");
networkTypeC = as.integer(charmatch(networkType, .networkTypes)-1);
if (is.na(networkTypeC))
stop(paste("Unrecognized networkType argument.",
"Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", ")));
dimEx = dim(datExpr);
if (length(dimEx)!=2) stop("datExpr has incorrect dimensions.")
if (length(weights) > 0)
{
if (!isTRUE(all.equal(dimEx, dim(weights))))
stop("When 'weights' are given, they must have the same dimensions as 'datExpr'.")
if (any(!is.finite(weights)))
{
if (verbose > 0)
warning("Found non-finite weights. The corresponding data points will be removed.");
weights[!is.finite(weights)] = NA;
}
}
nGenes = dimEx[2];
nSamples = dimEx[1];
warn = as.integer(0);
datExpr = as.matrix(datExpr);
if (length(weights) > 0) weights = as.matrix(weights) else weights = NULL;
tom = .Call("tomSimilarity_call", datExpr, weights,
as.integer(corTypeC), as.integer(networkTypeC), as.double(power),
as.integer(TOMTypeC), as.integer(TOMDenomC),
as.double(maxPOutliers), as.double(quickCor),
as.integer(fallback), as.integer(cosineCorrelation),
as.integer(replaceMissingAdjacencies),
as.integer(suppressTOMForZeroAdjacencies),
as.integer(suppressNegativeTOM),
as.integer(useInternalMatrixAlgebra),
warn,
as.integer(nThreads), as.integer(verbose), as.integer(indent), PACKAGE = "WGCNA");
diag(tom) = 1;
return (tom);
}
#======================================================================================================
#
# TOMsimilarity (from adjacency)
#
#===================================================================================================
TOMsimilarity = function(adjMat, TOMType = "unsigned", TOMDenom = "min",
suppressTOMForZeroAdjacencies = FALSE, suppressNegativeTOM = FALSE,
useInternalMatrixAlgebra = FALSE, verbose = 1, indent = 0)
{
TOMTypeC = pmatch(TOMType, .TOMTypes)-1;
if (is.na(TOMTypeC))
stop(paste("Invalid 'TOMType'. Recognized values are", paste(.TOMTypes, collapse = ", ")))
if (TOMTypeC == 0)
stop("'TOMType' cannot be 'none' for this function.");
TOMDenomC = pmatch(TOMDenom, .TOMDenoms)-1;
if (is.na(TOMDenomC))
stop(paste("Invalid 'TOMDenom'. Recognized values are", paste(.TOMDenoms, collapse = ", ")))
checkAdjMat(adjMat, min = if (TOMTypeC==2) -1 else 0, max = 1);
if (any(is.na(adjMat))) adjMat[is.na(adjMat)] = 0;
#if (any(diag(adjMat)!=1)) diag(adjMat) = 1; <--- this is now done in compiled code
#nGenes = dim(adjMat)[1];
#tom = matrix(0, nGenes, nGenes);
#tomResult = .C("tomSimilarityFromAdj", as.double(as.matrix(adjMat)), as.integer(nGenes),
# as.integer(TOMTypeC),
# as.integer(TOMDenomC),
# tom = as.double(tom), as.integer(verbose), as.integer(indent), PACKAGE = "WGCNA")
#tom[,] = tomResult$tom;
#diag(tom) = 1;
#rm(tomResult); collectGarbage();
tom = .Call("tomSimilarityFromAdj_call", as.matrix(adjMat),
as.integer(TOMTypeC),
as.integer(TOMDenomC),
as.integer(suppressTOMForZeroAdjacencies),
as.integer(suppressNegativeTOM),
as.integer(useInternalMatrixAlgebra),
as.integer(verbose), as.integer(indent), PACKAGE = "WGCNA")
gc();
tom;
}
#==========================================================================================================
#
# TOMdist (from adjacency)
#
#==========================================================================================================
TOMdist = function(adjMat, TOMType = "unsigned", TOMDenom = "min",
suppressTOMForZeroAdjacencies = FALSE, suppressNegativeTOM = FALSE,
useInternalMatrixAlgebra = FALSE, verbose = 1, indent = 0)
{
1-TOMsimilarity(adjMat, TOMType, TOMDenom, suppressTOMForZeroAdjacencies = suppressTOMForZeroAdjacencies,
suppressNegativeTOM = suppressNegativeTOM,
useInternalMatrixAlgebra = useInternalMatrixAlgebra, verbose = verbose, indent = indent)
}
#==========================================================================================================
#
# blockwiseModules
#
#==========================================================================================================
# Function to calculate modules and eigengenes from all genes.
blockwiseModules = function(
# Input data
datExpr,
weights = NULL,
# Data checking options
checkMissingData = TRUE,
# Options for splitting data into blocks
blocks = NULL,
maxBlockSize = 5000,
blockSizePenaltyPower = 5,
nPreclusteringCenters = as.integer(min(ncol(datExpr)/20, 100*ncol(datExpr)/maxBlockSize)),
randomSeed = 54321,
# load TOM from previously saved file?
loadTOM = FALSE,
# Network construction arguments: correlation options
corType = "pearson",
maxPOutliers = 1,
quickCor = 0,
pearsonFallback = "individual",
cosineCorrelation = FALSE,
# Adjacency function options
power = 6,
networkType = "unsigned",
replaceMissingAdjacencies = FALSE,
# Topological overlap options
TOMType = "signed",
TOMDenom = "min",
suppressTOMForZeroAdjacencies = FALSE,
suppressNegativeTOM = FALSE,
# Saving or returning TOM
getTOMs = NULL,
saveTOMs = FALSE,
saveTOMFileBase = "blockwiseTOM",
# Basic tree cut options
deepSplit = 2,
detectCutHeight = 0.995,
minModuleSize = min(20, ncol(datExpr)/2 ),
# Advanced tree cut options
maxCoreScatter = NULL, minGap = NULL,
maxAbsCoreScatter = NULL, minAbsGap = NULL,
minSplitHeight = NULL, minAbsSplitHeight = NULL,
useBranchEigennodeDissim = FALSE,
minBranchEigennodeDissim = mergeCutHeight,
stabilityLabels = NULL,
stabilityCriterion = c("Individual fraction", "Common fraction"),
minStabilityDissim = NULL,
pamStage = TRUE, pamRespectsDendro = TRUE,
# Gene reassignment, module trimming, and module "significance" criteria
reassignThreshold = 1e-6,
minCoreKME = 0.5,
minCoreKMESize = minModuleSize/3,
minKMEtoStay = 0.3,
# Module merging options
mergeCutHeight = 0.15,
impute = TRUE,
trapErrors = FALSE,
# Output options
numericLabels = FALSE,
# Options controlling behaviour
nThreads = 0,
useInternalMatrixAlgebra = FALSE,
useCorOptionsThroughout = TRUE,
verbose = 0, indent = 0,
...)
{
spaces = indentSpaces(indent);
if (verbose>0)
printFlush(paste(spaces, "Calculating module eigengenes block-wise from all genes"));
seedSaved = FALSE;
if (!is.null(randomSeed))
{
if (exists(".Random.seed"))
{
seedSaved = TRUE;
savedSeed = .Random.seed
}
set.seed(randomSeed);
}
intCorType = pmatch(corType, .corTypes);
if (is.na(intCorType))
stop(paste("Invalid 'corType'. Recognized values are", paste(.corTypes, collapse = ", ")))
intTOMType = pmatch(TOMType, .TOMTypes);
if (is.na(intTOMType))
stop(paste("Invalid 'TOMType'. Recognized values are", paste(.TOMTypes, collapse = ", ")))
TOMDenomC = pmatch(TOMDenom, .TOMDenoms)-1;
if (is.na(TOMDenomC))
stop(paste("Invalid 'TOMDenom'. Recognized values are", paste(.TOMDenoms, collapse = ", ")))
if ( (maxPOutliers < 0) | (maxPOutliers > 1)) stop("maxPOutliers must be between 0 and 1.");
if (quickCor < 0) stop("quickCor must be positive.");
if (nThreads < 0) stop("nThreads must be positive.");
if (is.null(nThreads) || (nThreads==0)) nThreads = .useNThreads();
if ( (power<1) | (power>30) ) stop("power must be between 1 and 30.");
# if ( (minKMEtoJoin >1) | (minKMEtoJoin <0) ) stop("minKMEtoJoin must be between 0 and 1.");
intNetworkType = charmatch(networkType, .networkTypes);
if (is.na(intNetworkType))
stop(paste("Unrecognized networkType argument.",
"Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", ")));
fallback = pmatch(pearsonFallback, .pearsonFallbacks)
if (is.na(fallback))
stop(spaste("Unrecognized value '", pearsonFallback, "' of argument 'pearsonFallback'.",
"Recognized values are (unique abbreviations of)\n",
paste(.pearsonFallbacks, collapse = ", ")))
datExpr = as.matrix(datExpr);
dimEx = dim(datExpr);
if (length(dimEx)!=2) stop("datExpr has incorrect dimensions.")
weights = .checkAndScaleWeights(weights, datExpr, scaleByMax = FALSE);
haveWeights = length(weights)>0;
nGenes = dimEx[2];
nSamples = dimEx[1];
allLabels = rep(0, nGenes);
AllMEs = NULL;
allLabelIndex = NULL;
originalSampleNames = rownames(datExpr);
if (is.null(originalSampleNames)) originalSampleNames = spaste("Row.", 1:nrow(datExpr));
originalGeneNames = colnames(datExpr);
if (is.null(originalGeneNames)) originalGeneNames = spaste("Column.", 1:ncol(datExpr));
#if (maxBlockSize >= floor(sqrt(2^31)) )
# stop("'maxBlockSize must be less than ", floor(sqrt(2^31)), ". Please decrease it and try again.")
if (!is.null(blocks) && (length(blocks)!=nGenes))
stop("Input error: the length of 'geneRank' does not equal the number of genes in given 'datExpr'.");
if (!is.null(getTOMs))
warning("getTOMs is deprecated, please use saveTOMs instead.");
# Check data for genes and samples that have too many missing values
if (checkMissingData)
{
gsg = goodSamplesGenes(datExpr, weights = weights, verbose = verbose - 1, indent = indent + 1)
if (!gsg$allOK)
{
datExpr = datExpr[gsg$goodSamples, gsg$goodGenes];
if (haveWeights) weights = weights[gsg$goodSamples, gsg$goodGenes];
}
nGGenes = sum(gsg$goodGenes);
nGSamples = sum(gsg$goodSamples);
} else {
nGGenes = nGenes;
nGSamples = nSamples;
gsg = list(goodSamples = rep(TRUE, nSamples), goodGenes = rep(TRUE, nGenes), allOK = TRUE);
}
if (any(is.na(datExpr)))
{
datExpr.scaled.imputed = t(impute.knn(t(scale(datExpr)))$data)
} else
datExpr.scaled.imputed = scale(datExpr);
corFnc = .corFnc[intCorType];
corOptions = .corOptionList[[intCorType]];
corFncAcceptsWeights = intCorType==1;
if (useCorOptionsThroughout)
corOptions = c(corOptions, list(cosine = cosineCorrelation));
# Do not add the quick argument here. The calculations carried out here will not be slow anyway.
if (intCorType==2 && useCorOptionsThroughout )
corOptions = c(corOptions, list(maxPOutliers = maxPOutliers, pearsonFallback = pearsonFallback));
signed = networkType %in% c("signed", "signed hybrid");
# Set up advanced tree cut methods
otherArgs = list(...);
if (useBranchEigennodeDissim)
{
branchSplitFnc = list("branchEigengeneDissim");
externalSplitOptions = list(list( corFnc = corFnc, corOptions = corOptions,
signed = signed));
nExternalBranchSplitFnc = 1;
externalSplitFncNeedsDistance = FALSE;
minExternalSplit = minBranchEigennodeDissim;
} else {
branchSplitFnc = list();
externalSplitOptions = list()
externalSplitFncNeedsDistance = logical(0);
nExternalBranchSplitFnc = 0;
minExternalSplit = numeric(0);
}
if (!is.null(stabilityLabels))
{
stabilityCriterion = match.arg(stabilityCriterion);
branchSplitFnc = c(branchSplitFnc,
if (stabilityCriterion=="Individual fraction")
"branchSplitFromStabilityLabels.individualFraction" else "branchSplitFromStabilityLabels");
minExternalSplit = c(minExternalSplit, minStabilityDissim);
externalSplitFncNeedsDistance = c(externalSplitFncNeedsDistance, FALSE);
print(dim(stabilityLabels));
externalSplitOptions = c(externalSplitOptions, list(list(stabilityLabels = stabilityLabels)))
}
if ("useBranchSplit" %in% names(otherArgs))
{
if (otherArgs$useBranchSplit)
{
nExternalBranchSplitFnc = nExternalBranchSplitFnc + 1;
branchSplitFnc[[nExternalBranchSplitFnc]] = "branchSplit"
externalSplitOptions[[nExternalBranchSplitFnc]] = list(discardProp = 0.08, minCentralProp = 0.75,
nConsideredPCs = 3, signed = signed, getDetails = FALSE);
externalSplitFncNeedsDistance[nExternalBranchSplitFnc] = FALSE;
minExternalSplit[ nExternalBranchSplitFnc] = otherArgs$minBranchSplit;
}
}
# Split data into blocks if needed
if (is.null(blocks))
{
if (nGGenes > maxBlockSize)
{
if (verbose>1) printFlush(paste(spaces, "....pre-clustering genes to determine blocks.."));
clustering = projectiveKMeans(datExpr, preferredSize = maxBlockSize,
checkData = FALSE,
sizePenaltyPower = blockSizePenaltyPower,
nCenters = nPreclusteringCenters,
randomSeed = randomSeed,
verbose = verbose-2, indent = indent + 1);
gBlocks = .orderLabelsBySize(clustering$clusters)
if (verbose > 2) { printFlush("Block sizes:"); print(table(gBlocks)); }
} else
gBlocks = rep(1, nGGenes);
blocks = rep(NA, nGenes);
blocks[gsg$goodGenes] = gBlocks;
} else {
gBlocks = blocks[gsg$goodGenes];
}
blockLevels = as.numeric(levels(factor(gBlocks)));
blockSizes = table(gBlocks)
if (any(blockSizes > sqrt(2^31)-1))
printFlush(spaste(spaces,
"Found block(s) with size(s) larger than limit of 'int' indexing.\n",
spaces, " Support for such large blocks is experimental; please report\n",
spaces, " any issues to Peter.Langfelder@gmail.com."));
nBlocks = length(blockLevels);
# Initialize various variables
dendros = list();
TOMFiles = rep("", nBlocks);
blockGenes = list();
maxUsedLabel = 0;
for (blockNo in 1:nBlocks)
{
if (verbose>1) printFlush(paste(spaces, "..Working on block", blockNo, "."));
blockGenes[[blockNo]] = c(1:nGenes)[gsg$goodGenes][gBlocks==blockLevels[blockNo]];
block = c(1:nGGenes)[gBlocks==blockLevels[blockNo]];
selExpr = as.matrix(datExpr[, block]);
if (haveWeights) selWeights = weights[, block];
nBlockGenes = length(block);
TOMFiles[blockNo] = spaste(saveTOMFileBase, "-block.", blockNo, ".RData");
if (loadTOM)
{
if (verbose > 2)
printFlush(paste(spaces, " ..loading TOM for block", blockNo, "from file", TOMFiles[blockNo]));
x = try(load(file =TOMFiles[blockNo]), silent = TRUE);
if (x!="TOM")
{
loadTOM = FALSE
printFlush(spaste("Loading of TOM in block ", blockNo, " failed:\n file ",
TOMFiles[blockNo],
"\n either does not exist or does not contain the object 'TOM'.\n",
" Will recalculate TOM."));
} else if (!inherits(TOM, "dist")) {
printFlush(spaste("TOM file ", TOMFiles[blockNo],
" does not contain object of the right type or size.\n",
" Will recalculate TOM."))
} else {
size.1 = attr(TOM, "Size");
if (length(size.1)!=1 || size.1!=nBlockGenes)
{
printFlush(spaste("TOM file ", TOMFiles[blockNo],
" does not contain object of the right type or size.\n",
" Will recalculate TOM."))
loadTOM = FALSE
} else {
tom = as.matrix(TOM);
rm(TOM);
collectGarbage();
}
}
}
if (!loadTOM)
{
# Calculate TOM by calling a custom C function:
callVerb = max(0, verbose - 1); callInd = indent + 2;
CcorType = intCorType - 1;
CnetworkType = intNetworkType - 1;
CTOMType = intTOMType -1;
warn = as.integer(0);
tom = .Call("tomSimilarity_call", selExpr, weights,
as.integer(CcorType), as.integer(CnetworkType), as.double(power), as.integer(CTOMType),
as.integer(TOMDenomC),
as.double(maxPOutliers),
as.double(quickCor),
as.integer(fallback),
as.integer(cosineCorrelation),
as.integer(replaceMissingAdjacencies),
as.integer(suppressTOMForZeroAdjacencies),
as.integer(suppressNegativeTOM),
as.integer(useInternalMatrixAlgebra),
warn, as.integer(nThreads),
as.integer(callVerb), as.integer(callInd), PACKAGE = "WGCNA");
# FIXME: warn if necessary
if (saveTOMs)
{
TOM = as.dist(tom);
TOMFiles[blockNo] = paste(saveTOMFileBase, "-block.", blockNo, ".RData", sep="");
if (verbose > 2)
printFlush(paste(spaces, " ..saving TOM for block", blockNo, "into file", TOMFiles[blockNo]));
save(TOM, file =TOMFiles[blockNo]);
rm (TOM)
collectGarbage();
}
}
dissTom = 1-tom;
rm(tom);
collectGarbage();
if (verbose>2) printFlush(paste(spaces, "....clustering.."));
dendros[[blockNo]] = fastcluster::hclust(as.dist(dissTom), method = "average")
if (verbose>2) printFlush(paste(spaces, "....detecting modules.."));
datExpr.scaled.imputed.block = datExpr.scaled.imputed[, block];
if (nExternalBranchSplitFnc > 0) for (extBSFnc in 1:nExternalBranchSplitFnc)
externalSplitOptions[[extBSFnc]]$expr = datExpr.scaled.imputed.block;
collectGarbage();
blockLabels = try(cutreeDynamic(dendro = dendros[[blockNo]],
deepSplit = deepSplit,
cutHeight = detectCutHeight, minClusterSize = minModuleSize,
method = "hybrid", distM = dissTom,
maxCoreScatter = maxCoreScatter, minGap = minGap,
maxAbsCoreScatter = maxAbsCoreScatter, minAbsGap = minAbsGap,
minSplitHeight = minSplitHeight, minAbsSplitHeight = minAbsSplitHeight,
externalBranchSplitFnc = branchSplitFnc,
minExternalSplit = minExternalSplit,
externalSplitOptions = externalSplitOptions,
externalSplitFncNeedsDistance = externalSplitFncNeedsDistance,
assumeSimpleExternalSpecification = FALSE,
pamStage = pamStage, pamRespectsDendro = pamRespectsDendro,
verbose = verbose-3, indent = indent + 2), silent = FALSE);
collectGarbage();
if (verbose > 8)
{
labels0 = blockLabels
if (interactive())
plotDendroAndColors(dendros[[blockNo]], labels2colors(blockLabels), dendroLabels = FALSE,
main = paste("Block", blockNo),
rowText = blockLabels, textPositions = 1, rowTextAlignment = "center");
if (FALSE)
plotDendroAndColors(dendros[[blockNo]], labels2colors(allLabels), dendroLabels = FALSE,
main = paste("Block", blockNo));
}
if (inherits(blockLabels, 'try-error'))
{
if (verbose>0)
{
printFlush(paste(spaces, "*** cutreeDynamic returned the following error:\n",
spaces, blockLabels, spaces,
"Stopping the module detection here."));
} else
warning(paste("blockwiseModules: cutreeDynamic returned the following error:\n",
" ", blockLabels, "---> Continuing with next block. "));
next;
}
if (sum(blockLabels>0)==0)
{
if (verbose>1)
{
printFlush(paste(spaces, "No modules detected in block", blockNo));
}
blockNo = blockNo + 1;
next;
}
blockLabels[blockLabels>0] = blockLabels[blockLabels>0] + maxUsedLabel;
maxUsedLabel = max(blockLabels);
if (verbose>2) printFlush(paste(spaces, "....calculating module eigengenes.."));
MEs = try(moduleEigengenes(selExpr[, blockLabels!=0], blockLabels[blockLabels!=0], impute = impute,
# subHubs = TRUE, trapErrors = FALSE,
verbose = verbose - 3, indent = indent + 2), silent = TRUE);
if (inherits(MEs, 'try-error'))
{
if (trapErrors)
{
if (verbose>0) {
printFlush(paste(spaces, "*** moduleEigengenes failed with the following message:"));
printFlush(paste(spaces, " ", MEs));
printFlush(paste(spaces, " ---> Stopping module detection here."));
} else
warning(paste("blockwiseModules: moduleEigengenes failed with the following message:",
"\n ", MEs, "---> Continuing with next block. "));
next;
} else stop(MEs);
}
#propMEs = as.data.frame(MEs$eigengenes[, names(MEs$eigengenes)!="ME0"]);
propMEs = MEs$eigengenes;
blockLabelIndex = as.numeric(substring(names(propMEs), 3));
deleteModules = NULL;
changedModules = NULL;
# Check modules: make sure that of the genes present in the module, at least a minimum number
# have a correlation with the eigengene higher than a given cutoff.
if (verbose>2) printFlush(paste(spaces, "....checking kME in modules.."));
for (mod in 1:ncol(propMEs))
{
modGenes = (blockLabels==blockLabelIndex[mod]);
KME = do.call(corFnc,
c(list(selExpr[, modGenes], propMEs[, mod]),
if (corFncAcceptsWeights) list(
weights.x = if (haveWeights) weights[, modGenes] else NULL,
weights.y = NULL) else NULL,
corOptions));
if (intNetworkType==1) KME = abs(KME);
if (sum(KME>minCoreKME) < minCoreKMESize)
{
blockLabels[modGenes] = 0;
deleteModules = c(deleteModules, mod);
if (verbose>3)
printFlush(paste(spaces, " ..deleting module ", mod, ": of ", sum(modGenes),
" total genes in the module\n only ", sum(KME>minCoreKME),
" have the requisite high correlation with the eigengene.", sep=""));
} else if (sum(KME<minKMEtoStay)>0)
{
# Remove genes whose KME is too low:
if (verbose > 2)
printFlush(paste(spaces, " ..removing", sum(KME<minKMEtoStay),
"genes from module", mod, "because their KME is too low."));
blockLabels[modGenes][KME < minKMEtoStay] = 0;
if (sum(blockLabels[modGenes]>0) < minModuleSize)
{
deleteModules = c(deleteModules, mod);
blockLabels[modGenes] = 0;
if (verbose>3)
printFlush(paste(spaces, " ..deleting module ",blockLabelIndex[mod],
": not enough genes in the module after removal of low KME genes.", sep=""));
} else {
changedModules = union(changedModules, blockLabelIndex[mod]);
}
}
}
# Remove modules that are to be removed
if (!is.null(deleteModules))
{
propMEs = propMEs[, -deleteModules, drop = FALSE];
modGenes = is.finite(match(blockLabels, blockLabelIndex[deleteModules]));
blockLabels[modGenes] = 0;
modAllGenes = is.finite(match(allLabels, blockLabelIndex[deleteModules]));
allLabels[modAllGenes] = 0;
blockLabelIndex = blockLabelIndex[-deleteModules];
}
# Check if any modules are left
if (sum(blockLabels>0)==0)
{
if (verbose>1)
{
printFlush(paste(spaces, "No significant modules detected in block", blockNo));
}
blockNo = blockNo + 1;
next;
}
# Update allMEs and allLabels
if (is.null(AllMEs))
{
AllMEs = propMEs;
} else
AllMEs = cbind(AllMEs, propMEs);
allLabelIndex = c(allLabelIndex, blockLabelIndex);
assigned = block[blockLabels!=0];
allLabels[gsg$goodGenes][assigned] = blockLabels[blockLabels!=0];
rm(dissTom);
collectGarbage();
#if (blockNo < nBlocks)
#{
# leftoverBlockGenes = block[allLabels[block]==0];
# nLeftBG = length(leftoverBlockGenes);
# blocksLeft = c((blockNo+1):nBlocks);
# blockSizes = as.vector(table(blocks))[blocksLeft];
# blocksOpen = blocksLeft[blockSizes < maxBlockSize];
# nBlocksOpen = length(blocksOpen);
# if ((nLeftBG>0) && (nBlocksOpen>0))
# {
# openSizes = blockSizes[blocksOpen];
# centers = matrix(0, nGSamples, nBlocksOpen);
# for (cen in 1:nBlocksOpen)
# centers[, cen] = svd(datExpr[, blocks==blocksOpen[cen]], nu=1, nv=0)$u[,1];
# dst = geneCenterDist(datExpr[, block], centers);
# rowMat = matrix(c(1:nrow(dst)), nrow(dst), ncol(dst));
# colMat = matrix(c(1:ncol(dst)), nrow(dst), ncol(dst), byrow = TRUE);
# dstOrder = order(as.vector(dst));
# while ((nLeftBG>0) && (nBlocksOpen>0))
# {
# gene = colMat[dstOrder[1]];
# rowMatV = as.vector(rowMat);
# colMatV = as.vector(colMat);
blockNo = blockNo + 1;
}
# Check whether any of the already assigned genes should be re-assigned
deleteModules = NULL;
goodLabels = allLabels[gsg$goodGenes];
reassignIndex = rep(FALSE, length(goodLabels));
if (sum(goodLabels!=0) > 0)
{
propLabels = goodLabels[goodLabels!=0];
assGenes = (c(1:nGenes)[gsg$goodGenes])[goodLabels!=0];
KME = do.call(match.fun(corFnc),
c(list(datExpr[, goodLabels!=0], AllMEs),
if (corFncAcceptsWeights) list(
weights.x = if(haveWeights) weights[, goodLabels!=0] else NULL,
weights.y = NULL) else NULL,
corOptions));
if (intNetworkType == 1) KME = abs(KME)
nMods = ncol(AllMEs);
for (mod in 1:nMods)
{
modGenes = c(1:length(propLabels))[propLabels==allLabelIndex[mod]];
KMEmodule = KME[modGenes, mod];
KMEbest = apply(KME[modGenes, , drop = FALSE], 1, max);
candidates = (KMEmodule < KMEbest);
candidates[!is.finite(candidates)] = FALSE;
if (FALSE)
{
modDiss = dissTom[goodLabels==allLabelIndex[mod], goodLabels==allLabelIndex[mod]];
mod.k = colSums(modDiss);
boxplot(mod.k~candidates)
}
if (sum(candidates) > 0)
{
pModule = corPvalueFisher(KMEmodule[candidates], nSamples);
whichBest = apply(KME[modGenes[candidates], , drop = FALSE], 1, which.max);
pBest = corPvalueFisher(KMEbest[candidates], nSamples);
reassign = ifelse(is.finite(pBest/pModule), (pBest/pModule < reassignThreshold), FALSE);
if (sum(reassign)>0)
{
if (verbose > 2)
printFlush(paste(spaces, " ..reassigning", sum(reassign),
"genes from module", mod, "to modules with higher KME."));
allLabels[assGenes[modGenes[candidates][reassign]]] = whichBest[reassign];
changedModules = union(changedModules, whichBest[reassign]);
if (length(modGenes)-sum(reassign) < minModuleSize)
{
deleteModules = c(deleteModules, mod);
} else
changedModules = union(changedModules, mod);
}
}
}
}
# Remove modules that are to be removed
if (!is.null(deleteModules))
{
AllMEs = AllMEs[, -deleteModules, drop = FALSE];
genes = is.finite(match(allLabels, allLabelIndex[deleteModules]));
allLabels[genes] = 0;
allLabelIndex = allLabelIndex[-deleteModules];
goodLabels = allLabels[gsg$goodGenes];
}
if (verbose>1) printFlush(paste(spaces, "..merging modules that are too close.."));
if (numericLabels) {
colors = allLabels
} else {
colors = labels2colors(allLabels)
}
mergedAllColors = colors;
MEsOK = TRUE;
mergedMods = try(mergeCloseModules(datExpr, colors[gsg$goodGenes], cutHeight = mergeCutHeight,
relabel = TRUE, # trapErrors = FALSE,
corFnc = corFnc, corOptions = corOptions,
impute = impute,
verbose = verbose-2, indent = indent + 2), silent = TRUE);
if (inherits(mergedMods, 'try-error'))
{
warning(paste("blockwiseModules: mergeCloseModules failed with the following error message:\n ",
mergedMods, "\n--> returning unmerged colors.\n"));
MEs = try(moduleEigengenes(datExpr, colors[gsg$goodGenes], # subHubs = TRUE, trapErrors = FALSE,
impute = impute,
verbose = verbose-3, indent = indent+3), silent = TRUE);
if (inherits(MEs, 'try-error'))
{
if (!trapErrors) stop(MEs);
if (verbose>0)
{
printFlush(paste(spaces, "*** moduleEigengenes failed with the following error message:"));
printFlush(paste(spaces, " ", MEs));
printFlush(paste(spaces, "*** returning no module eigengenes.\n"));
} else
warning(paste("blockwiseModules: moduleEigengenes failed with the following error message:\n ",
MEs, "\n--> returning no module eigengenes.\n"));
allSampleMEs = NULL;
MEsOK = FALSE;
} else {
if (sum(!MEs$validMEs)>0)
{
mergedAllColors[gsg$goodGenes] = MEs$validColors;
MEs = MEs$eigengenes[, MEs$validMEs];
} else MEs = MEs$eigengenes;
allSampleMEs = as.data.frame(matrix(NA, nrow = nSamples, ncol = ncol(MEs)));
allSampleMEs[gsg$goodSamples, ] = MEs[,];
names(allSampleMEs) = names(MEs);
rownames(allSampleMEs) = make.unique(originalSampleNames)
}
} else {
mergedAllColors[gsg$goodGenes] = mergedMods$colors;
allSampleMEs = as.data.frame(matrix(NA, nrow = nSamples, ncol = ncol(mergedMods$newMEs)));
allSampleMEs[gsg$goodSamples, ] = mergedMods$newMEs[,];
names(allSampleMEs) = names(mergedMods$newMEs);
rownames(allSampleMEs) = make.unique(originalSampleNames);
}
if (seedSaved) .Random.seed <<- savedSeed;
if (!saveTOMs) TOMFiles = NULL;
names(colors) = originalGeneNames;
names(mergedAllColors) = originalGeneNames;
list(colors = mergedAllColors,
unmergedColors = colors,
MEs = allSampleMEs,
goodSamples = gsg$goodSamples,
goodGenes = gsg$goodGenes,
dendrograms = dendros,
TOMFiles = TOMFiles,
blockGenes = blockGenes,
blocks = blocks,
MEsOK = MEsOK);
}
#==================================================================================
#
# Helper functions
#
#==================================================================================
# order labels by size
.orderLabelsBySize = function(labels, exclude = NULL)
{
levels.0 = sort(unique(labels));
levels = levels.0[ !levels.0 %in% exclude]
levels.excl = levels.0 [levels.0 %in% exclude]
rearrange = labels %in% levels;
tab = table(labels [ rearrange ]);
rank = rank(-tab, ties.method = "first");
oldOrder = c(levels.excl, names(tab));
newOrder = c(levels.excl, names(tab)[rank]);
if (is.numeric(labels)) newOrder = as.numeric(newOrder);
newOrder[ match(labels, oldOrder) ]
}
#======================================================================================================
#
# Re-cut trees for blockwiseModules
#
#======================================================================================================
recutBlockwiseTrees = function(datExpr,
goodSamples, goodGenes,
blocks,
TOMFiles,
dendrograms,
corType = "pearson",
networkType = "unsigned",
deepSplit = 2,
detectCutHeight = 0.995, minModuleSize = min(20, ncol(datExpr)/2 ),
maxCoreScatter = NULL, minGap = NULL,
maxAbsCoreScatter = NULL, minAbsGap = NULL,
minSplitHeight = NULL, minAbsSplitHeight = NULL,
useBranchEigennodeDissim = FALSE,
minBranchEigennodeDissim = mergeCutHeight,
pamStage = TRUE, pamRespectsDendro = TRUE,
# minKMEtoJoin =0.7,
minCoreKME = 0.5, minCoreKMESize = minModuleSize/3,
minKMEtoStay = 0.3,
reassignThreshold = 1e-6,
mergeCutHeight = 0.15, impute = TRUE,
trapErrors = FALSE, numericLabels = FALSE,
verbose = 0, indent = 0, ...)
{
spaces = indentSpaces(indent);
#if (verbose>0)
# printFlush(paste(spaces, "Calculating module eigengenes block-wise from all genes"));
cutreeLabels = list()
intCorType = pmatch(corType, .corTypes);
if (is.na(intCorType))
stop(paste("Invalid 'corType'. Recognized values are", paste(.corTypes, collapse = ", ")))
# if ( (minKMEtoJoin >1) | (minKMEtoJoin <0) ) stop("minKMEtoJoin must be between 0 and 1.");
intNetworkType = charmatch(networkType, .networkTypes);
if (is.na(intNetworkType))
stop(paste("Unrecognized networkType argument.",
"Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", ")));
dimEx = dim(datExpr);
if (length(dimEx)!=2) stop("datExpr has incorrect dimensions.")
nGenes = dimEx[2];
nSamples = dimEx[1];
allLabels = rep(0, nGenes);
AllMEs = NULL;
allLabelIndex = NULL;
originalSampleNames = rownames(datExpr);
if (is.null(originalSampleNames)) originalSampleNames = spaste("Row.", 1:nrow(datExpr));
originalGeneNames = colnames(datExpr);
if (is.null(originalGeneNames)) originalGeneNames = spaste("Column.", 1:ncol(datExpr));
if (length(blocks)!=nGenes)
stop("Input error: the length of 'geneRank' does not equal the number of genes in given 'datExpr'.");
# Check data for genes and samples that have too many missing values
nGGenes = sum(goodGenes)
nGSamples = sum(goodSamples);
gsg = list(goodSamples = goodSamples, goodGenes = goodGenes,
allOK =(sum(!goodSamples) + sum(!goodGenes) == 0));
if (!gsg$allOK) datExpr = datExpr[goodSamples, goodGenes];
gBlocks = blocks[gsg$goodGenes];
blockLevels = as.numeric(levels(factor(gBlocks)));
blockSizes = table(gBlocks)
nBlocks = length(blockLevels);
datExpr.scaled.imputed = t(impute.knn(t(scale(datExpr)))$data)
if (any(is.na(datExpr)))
datExpr.scaled.imputed = t(impute.knn(t(scale(datExpr)))$data)
corFnc = .corFnc[intCorType];
corOptions = list(use = 'p');
signed = networkType %in% c("signed", "signed hybrid");
# Set up advanced tree cut methods
otherArgs = list(...);
if (useBranchEigennodeDissim)
{
branchSplitFnc = list("branchEigengeneDissim");
externalSplitOptions = list(list( corFnc = corFnc, corOptions = corOptions,
signed = signed));
externalSplitFncNeedsDistance = FALSE;
nExternalBranchSplitFnc = 1;
minExternalSplit = minBranchEigennodeDissim;
} else {
branchSplitFnc = list();
externalSplitOptions = list(list())
externalSplitFncNeedsDistance = logical(0);
nExternalBranchSplitFnc = 0;
minExternalSplit = numeric(0);
}
if ("useBranchSplit" %in% names(otherArgs))
{
if (otherArgs$useBranchSplit)
{
nExternalBranchSplitFnc = nExternalBranchSplitFnc + 1;
branchSplitFnc[[nExternalBranchSplitFnc]] = "branchSplit"
externalSplitOptions[[nExternalBranchSplitFnc]] = list(discardProp = 0.08, minCentralProp = 0.75,
nConsideredPCs = 3, signed = signed, getDetails = FALSE);
minExternalSplit[ nExternalBranchSplitFnc] = otherArgs$minBranchSplit;
externalSplitFncNeedsDistance[ nExternalBranchSplitFnc] = FALSE;
}
}
# Initialize various variables
blockNo = 1;
maxUsedLabel = 0;
while (blockNo <= nBlocks)
{
if (verbose>1) printFlush(paste(spaces, "..Working on block", blockNo, "."));
block = c(1:nGGenes)[gBlocks==blockLevels[blockNo]];
selExpr = as.matrix(datExpr[, block]);
nBlockGenes = length(block);
TOM = NULL; # Will be loaded below; this gets rid of warnings form Rcheck.
xx = try(load(TOMFiles[blockNo]), silent = TRUE);
if (inherits(xx, 'try-error'))
{
printFlush(paste("************\n File name", TOMFiles[blockNo],
"appears invalid: the load function returned the following error:\n ",
xx));
stop();
}
if (xx!='TOM')
stop(paste("The file", TOMFiles[blockNo], "does not contain the appopriate variable."));
if (!inherits(TOM, "dist"))
stop(paste("The file", TOMFiles[blockNo], "does not contain the appopriate distance structure."));
dissTom = as.matrix(1-TOM);
if (verbose>2) printFlush(paste(spaces, "....detecting modules.."));
datExpr.scaled.imputed.block = datExpr.scaled.imputed[, block];
if (nExternalBranchSplitFnc > 0) for (extBSFnc in 1:nExternalBranchSplitFnc)
externalSplitOptions[[extBSFnc]]$expr = datExpr.scaled.imputed.block;
blockLabels = try(cutreeDynamic(dendro = dendrograms[[blockNo]],
deepSplit = deepSplit,
cutHeight = detectCutHeight, minClusterSize = minModuleSize,
method ="hybrid",
maxCoreScatter = maxCoreScatter, minGap = minGap,
maxAbsCoreScatter = maxAbsCoreScatter, minAbsGap = minAbsGap,
minSplitHeight = minSplitHeight, minAbsSplitHeight = minAbsSplitHeight,
externalBranchSplitFnc = branchSplitFnc,
minExternalSplit = minExternalSplit,
externalSplitOptions = externalSplitOptions,
externalSplitFncNeedsDistance = externalSplitFncNeedsDistance,
assumeSimpleExternalSpecification = FALSE,
pamStage = pamStage, pamRespectsDendro = pamRespectsDendro,
distM = dissTom,
verbose = verbose-3, indent = indent + 2), silent = TRUE);
collectGarbage();
cutreeLabels[[blockNo]] = blockLabels;
if (inherits(blockLabels, 'try-error'))
{
if (verbose>0)
{
printFlush(paste(spaces, "*** cutreeDynamic returned the following error:\n",
spaces, blockLabels, spaces,
"Stopping the module detection here."));
} else
warning(paste("blockwiseModules: cutreeDynamic returned the following error:\n",
" ", blockLabels, "---> Continuing with next block. "));
next;
}
if (sum(blockLabels>0)==0)
{
if (verbose>1)
{
printFlush(paste(spaces, "No modules detected in block", blockNo));
}
blockNo = blockNo + 1;
next;
}
blockLabels[blockLabels>0] = blockLabels[blockLabels>0] + maxUsedLabel;
maxUsedLabel = max(blockLabels);
if (verbose>2) printFlush(paste(spaces, "....calculating module eigengenes.."));
MEs = try(moduleEigengenes(selExpr[, blockLabels!=0], blockLabels[blockLabels!=0], impute = impute,
# subHubs = TRUE, trapErrors = FALSE,
verbose = verbose - 3, indent = indent + 2), silent = TRUE);
if (inherits(MEs, 'try-error'))
{
if (trapErrors)
{
if (verbose>0) {
printFlush(paste(spaces, "*** moduleEigengenes failed with the following message:"));
printFlush(paste(spaces, " ", MEs));
printFlush(paste(spaces, " ---> Stopping module detection here."));
} else
warning(paste("blockwiseModules: moduleEigengenes failed with the following message:",
"\n ", MEs, "---> Continuing with next block. "));
next;
} else stop(MEs);
}
#propMEs = as.data.frame(MEs$eigengenes[, names(MEs$eigengenes)!="ME0"]);
propMEs = MEs$eigengenes;
blockLabelIndex = as.numeric(substring(names(propMEs), 3));
deleteModules = NULL;
changedModules = NULL;
# Check modules: make sure that of the genes present in the module, at least a minimum number
# have a correlation with the eigengene higher than a given cutoff.
if (verbose>2) printFlush(paste(spaces, "....checking modules for statistical meaningfulness.."));
for (mod in 1:ncol(propMEs))
{
modGenes = (blockLabels==blockLabelIndex[mod]);
corEval = parse(text = paste(corFnc, "(selExpr[, modGenes], propMEs[, mod]",
prepComma(.corOptions[intCorType]), ")"));
KME = as.vector(eval(corEval));
if (intNetworkType==1) KME = abs(KME);
if (sum(KME>minCoreKME) < minCoreKMESize)
{
blockLabels[modGenes] = 0;
deleteModules = c(deleteModules, mod);
if (verbose>3)
printFlush(paste(spaces, " ..deleting module ", mod, ": of ", sum(modGenes),
" total genes in the module\n only ", sum(KME>minCoreKME),
" have the requisite high correlation with the eigengene.", sep=""));
} else if (sum(KME<minKMEtoStay)>0)
{
# Remove genes whose KME is too low:
if (verbose > 2)
printFlush(paste(spaces, " ..removing", sum(KME<minKMEtoStay),
"genes from module", mod, "because their KME is too low."));
blockLabels[modGenes][KME < minKMEtoStay] = 0;
if (sum(blockLabels[modGenes]>0) < minModuleSize)
{
deleteModules = c(deleteModules, mod);
blockLabels[modGenes] = 0;
if (verbose>3)
printFlush(paste(spaces, " ..deleting module ",blockLabelIndex[mod],
": not enough genes in the module after removal of low KME genes.", sep=""));
} else {
changedModules = union(changedModules, blockLabelIndex[mod]);
}
}
}
# Remove modules that are to be removed
if (!is.null(deleteModules))
{
propMEs = propMEs[, -deleteModules, drop = FALSE];
modGenes = is.finite(match(blockLabels, blockLabelIndex[deleteModules]));
blockLabels[modGenes] = 0;
modAllGenes = is.finite(match(allLabels, blockLabelIndex[deleteModules]));
allLabels[modAllGenes] = 0;
blockLabelIndex = blockLabelIndex[-deleteModules];
}
# Check if any modules are left
if (sum(blockLabels>0)==0)
{
if (verbose>1)
{
printFlush(paste(spaces, "No significant modules detected in block", blockNo));
}
blockNo = blockNo + 1;
next;
}
# Update allMEs and allLabels
if (is.null(AllMEs))
{
AllMEs = propMEs;
} else
AllMEs = cbind(AllMEs, propMEs);
allLabelIndex = c(allLabelIndex, blockLabelIndex);
assigned = block[blockLabels!=0];
allLabels[assigned] = blockLabels[blockLabels!=0];
rm(dissTom);
collectGarbage();
blockNo = blockNo + 1;
}
# Check whether any of the already assigned genes should be re-assigned
deleteModules = NULL;
goodLabels = allLabels[gsg$goodGenes];
if (sum(goodLabels!=0) > 0)
{
propLabels = goodLabels[goodLabels!=0];
assGenes = (c(1:nGenes)[gsg$goodGenes])[goodLabels!=0];
corEval = parse(text = paste(corFnc, "(datExpr[, goodLabels!=0], AllMEs",
prepComma(.corOptions[intCorType]), ")"));
KME = eval(corEval);
if (intNetworkType == 1) KME = abs(KME)
nMods = ncol(AllMEs);
for (mod in 1:nMods)
{
modGenes = c(1:length(propLabels))[propLabels==allLabelIndex[mod]];
KMEmodule = KME[modGenes, mod];
KMEbest = apply(KME[modGenes, , drop = FALSE], 1, max);
candidates = (KMEmodule < KMEbest);
candidates[!is.finite(candidates)] = FALSE;
if (sum(candidates) > 0)
{
pModule = corPvalueFisher(KMEmodule[candidates], nSamples);
whichBest = apply(KME[modGenes[candidates], , drop = FALSE], 1, which.max);
pBest = corPvalueFisher(KMEbest[candidates], nSamples);
reassign = ifelse(is.finite(pBest/pModule), (pBest/pModule < reassignThreshold), FALSE);
if (sum(reassign)>0)
{
if (verbose > 2)
printFlush(paste(spaces, " ..reassigning", sum(reassign),
"genes from module", mod, "to modules with higher KME."));
allLabels[assGenes[modGenes[candidates][reassign]]] = whichBest[reassign];
changedModules = union(changedModules, whichBest[reassign]);
if (length(modGenes)-sum(reassign) < minModuleSize)
{
deleteModules = c(deleteModules, mod);
} else
changedModules = union(changedModules, mod);
}
}
}
}
# Remove modules that are to be removed
if (!is.null(deleteModules))
{
AllMEs = AllMEs[, -deleteModules, drop = FALSE];
genes = is.finite(match(allLabels, allLabelIndex[deleteModules]));
allLabels[genes] = 0;
allLabelIndex = allLabelIndex[-deleteModules];
goodLabels = allLabels[gsg$goodGenes];
}
if (verbose>1) printFlush(paste(spaces, "..merging modules that are too close.."));
if (numericLabels) {
colors = allLabels
} else {
colors = labels2colors(allLabels)
}
mergedAllColors = colors;
MEsOK = TRUE;
mergedMods = try(mergeCloseModules(datExpr, colors[gsg$goodGenes], cutHeight = mergeCutHeight,
relabel = TRUE, # trapErrors = FALSE,
impute = impute,
verbose = verbose-2, indent = indent + 2), silent = TRUE);
if (inherits(mergedMods, 'try-error'))
{
warning(paste("blockwiseModules: mergeCloseModules failed with the following error message:\n ",
mergedMods, "\n--> returning unmerged colors.\n"));
MEs = try(moduleEigengenes(datExpr, colors[gsg$goodGenes], # subHubs = TRUE, trapErrors = FALSE,
impute = impute,
verbose = verbose-3, indent = indent+3), silent = TRUE);
if (inherits(MEs, 'try-error'))
{
if (!trapErrors) stop(MEs);
if (verbose>0)
{
printFlush(paste(spaces, "*** moduleEigengenes failed with the following error message:"));
printFlush(paste(spaces, " ", MEs));
printFlush(paste(spaces, "*** returning no module eigengenes.\n"));
} else
warning(paste("blockwiseModules: moduleEigengenes failed with the following error message:\n ",
MEs, "\n--> returning no module eigengenes.\n"));
allSampleMEs = NULL;
MEsOK = FALSE;
} else {
if (sum(!MEs$validMEs)>0)
{
colors[gsg$goodGenes] = MEs$validColors;
MEs = MEs$eigengenes[, MEs$validMEs];
} else MEs = MEs$eigengenes;
allSampleMEs = as.data.frame(matrix(NA, nrow = nSamples, ncol = ncol(MEs)));
allSampleMEs[gsg$goodSamples, ] = MEs[,];
names(allSampleMEs) = names(MEs);
}
} else {
mergedAllColors[gsg$goodGenes] = mergedMods$colors;
allSampleMEs = as.data.frame(matrix(NA, nrow = nSamples, ncol = ncol(mergedMods$newMEs)));
allSampleMEs[gsg$goodSamples, ] = mergedMods$newMEs[,];
names(allSampleMEs) = names(mergedMods$newMEs);
rownames(allSampleMEs) = make.unique(originalSampleNames);
}
names(colors) = originalGeneNames;
names(mergedAllColors) = originalGeneNames;
list(colors = mergedAllColors,
unmergedColors = colors,
cutreeLabels = cutreeLabels,
MEs = allSampleMEs,
#goodSamples = gsg$goodSamples,
#goodGenes = gsg$goodGenes,
#dendrograms = dendrograms,
#TOMFiles = TOMFiles,
#blockGenes = blockGenes,
MEsOK = MEsOK);
}
#==========================================================================================================
#
# blockwiseIndividualTOMs
#
#==========================================================================================================
# This function calculates and saves blockwise topological overlaps for a given multi expression data. The
# argument blocks can be given to specify blocks, or the blocks can be omitted and will be calculated if
# necessary.
# Note on naming of output files: %s will translate into set number, %N into set name (if given in
# multiExpr), %b into block number.
.substituteTags = function(format, tags, replacements)
{
nTags = length(tags);
if (length(replacements)!= nTags) stop("Length of tags and replacements must be the same.");
for (t in 1:nTags)
format = gsub(as.character(tags[t]), as.character(replacements[t]), format, fixed = TRUE);
format;
}
.processFileName = function(format, setNumber, setNames, blockNumber, analysisName = "")
{
# The following is a workaround around empty (NULL) setNames. Replaces the name with the setNumber.
if (is.null(setNames)) setNames = rep(setNumber, setNumber)
.substituteTags(format, c("%s", "%N", "%b", "%a"),
c(setNumber, setNames[setNumber], blockNumber, analysisName));
}
blockwiseIndividualTOMs = function(
multiExpr,
multiWeights = NULL,
# Data checking options
checkMissingData = TRUE,
# Blocking options
blocks = NULL,
maxBlockSize = 5000,
blockSizePenaltyPower = 5,
nPreclusteringCenters = NULL,
randomSeed = 54321,
# Network construction arguments: correlation options
corType = "pearson",
maxPOutliers = 1,
quickCor = 0,
pearsonFallback = "individual",
cosineCorrelation = FALSE,
# Adjacency function options
power = 6,
networkType = "unsigned",
checkPower = TRUE,
replaceMissingAdjacencies = FALSE,
# Topological overlap options
TOMType = "unsigned",
TOMDenom = "min",
suppressTOMForZeroAdjacencies = FALSE,
suppressNegativeTOM = FALSE,
# Save individual TOMs? If not, they will be returned in the session.
saveTOMs = TRUE,
individualTOMFileNames = "individualTOM-Set%s-Block%b.RData",
# General options
nThreads = 0,
useInternalMatrixAlgebra = FALSE,
verbose = 2, indent = 0)
{
spaces = indentSpaces(indent);
dataSize = checkSets(multiExpr, checkStructure = TRUE);
if (dataSize$structureOK)
{
nSets = dataSize$nSets;
nGenes = dataSize$nGenes;
multiFormat = TRUE;
} else {
multiExpr = multiData(multiExpr);
multiWeights = multiData(multiWeights);
nSets = dataSize$nSets;
nGenes = dataSize$nGenes;
multiFormat = FALSE;
}
.checkAndScaleMultiWeights(multiWeights, multiExpr, scaleByMax = FALSE);
if (length(power)!=1)
{
if (length(power)!=nSets)
stop("Invalid arguments: Length of 'power' must equal number of sets given in 'multiExpr'.");
} else {
power = rep(power, nSets);
}
#if (maxBlockSize >= floor(sqrt(2^31)) )
# stop("'maxBlockSize must be less than ", floor(sqrt(2^31)), ". Please decrease it and try again.")
if (!is.null(blocks) && (length(blocks)!=nGenes))
stop("Input error: length of 'blocks' must equal number of genes in 'multiExpr'.");
if (verbose>0)
printFlush(paste(spaces, "Calculating topological overlaps block-wise from all genes"));
intCorType = pmatch(corType, .corTypes);
if (is.na(intCorType))
stop(paste("Invalid 'corType'. Recognized values are", paste(.corTypes, collapse = ", ")))
intTOMType = pmatch(TOMType, .TOMTypes);
if (is.na(intTOMType))
stop(paste("Invalid 'TOMType'. Recognized values are", paste(.TOMTypes, collapse = ", ")))
TOMDenomC = pmatch(TOMDenom, .TOMDenoms)-1;
if (is.na(TOMDenomC))
stop(paste("Invalid 'TOMDenom'. Recognized values are", paste(.TOMDenoms, collapse = ", ")))
if ( checkPower & ((sum(power<1)>0) | (sum(power>50)>0) ) ) stop("power must be between 1 and 50.");
intNetworkType = charmatch(networkType, .networkTypes);
if (is.na(intNetworkType))
stop(paste("Unrecognized networkType argument.",
"Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", ")));
if ( (maxPOutliers < 0) | (maxPOutliers > 1)) stop("maxPOutliers must be between 0 and 1.");
if (quickCor < 0) stop("quickCor must be positive.");
fallback = pmatch(pearsonFallback, .pearsonFallbacks)
if (is.na(fallback))
stop(paste("Unrecognized 'pearsonFallback'. Recognized values are (unique abbreviations of)\n",
paste(.pearsonFallbacks, collapse = ", ")))
if (nThreads < 0) stop("nThreads must be positive.");
if (is.null(nThreads) || (nThreads==0)) nThreads = .useNThreads();
nSamples = dataSize$nSamples;
# Check data for genes and samples that have too many missing values
if (checkMissingData)
{
gsg = goodSamplesGenesMS(multiExpr, verbose = verbose - 1, indent = indent + 1)
if (!gsg$allOK)
{
multiExpr = mtd.subset(multiExpr, gsg$goodSamples, gsg$goodGenes);
if (!is.null(multiWeights)) multiWeights = mtd.subset(multiWeights, gsg$goodSamples, gsg$goodGenes);
}
} else {
gsg = list(goodGenes = rep(TRUE, nGenes), goodSamples = lapply(nSamples, function(n) rep(TRUE, n)),
allOK = TRUE);
}
nGGenes = sum(gsg$goodGenes);
nGSamples = rep(0, nSets);
for (set in 1:nSets) nGSamples[set] = sum(gsg$goodSamples[[set]]);
if (is.null(blocks))
{
if (nGGenes > maxBlockSize)
{
if (verbose>1) printFlush(paste(spaces, "....pre-clustering genes to determine blocks.."));
clustering = consensusProjectiveKMeans(multiExpr, preferredSize = maxBlockSize,
sizePenaltyPower = blockSizePenaltyPower, checkData = FALSE,
nCenters = nPreclusteringCenters,
randomSeed = randomSeed,
verbose = verbose-2, indent = indent + 1);
gBlocks = .orderLabelsBySize(clustering$clusters);
} else
gBlocks = rep(1, nGGenes);
blocks = rep(NA, nGenes);
blocks[gsg$goodGenes] = gBlocks;
} else {
gBlocks = blocks[gsg$goodGenes];
}
blockLevels = as.numeric(levels(factor(gBlocks)));
blockSizes = table(gBlocks)
nBlocks = length(blockLevels);
if (any(blockSizes > sqrt(2^31)-1))
printFlush(spaste(spaces,
"Found block(s) with size(s) larger than limit of 'int' indexing.\n",
spaces, " Support for such large blocks is experimental; please report\n",
spaces, " any issues to Peter.Langfelder@gmail.com."));
# check file names for uniqueness
actualFileNames = NULL;
if (saveTOMs)
{
actualFileNames = matrix("", nSets, nBlocks);
for (set in 1:nSets) for (b in 1:nBlocks)
actualFileNames[set, b] = .processFileName(individualTOMFileNames, set, names(multiExpr), b);
rownames(actualFileNames) = spaste("Set.", c(1:nSets));
colnames(actualFileNames) = spaste("Block.", c(1:nBlocks));
if (length(unique(as.vector(actualFileNames))) < nSets * nBlocks)
{
printFlush("Error: File names for (some) set/block combinations are not unique:");
print(actualFileNames);
stop("File names must be unique.");
}
}
# Initialize various variables
blockGenes = list();
blockNo = 1;
collectGarbage();
setTomDS = list();
# Here's where the analysis starts
for (blockNo in 1:nBlocks)
{
if (verbose>1 && nBlocks > 1) printFlush(paste(spaces, "..Working on block", blockNo, "."));
# Select the block genes
block = c(1:nGGenes)[gBlocks==blockLevels[blockNo]];
nBlockGenes = length(block);
blockGenes[[blockNo]] = c(1:nGenes)[gsg$goodGenes][gBlocks==blockLevels[blockNo]];
errorOccurred = FALSE;
# Set up file names or memory space to hold the set TOMs
if (!saveTOMs)
{
setTomDS[[blockNo]] = array(0, dim = c(nBlockGenes*(nBlockGenes-1)/2, nSets));
}
# For each set: calculate and save TOM
for (set in 1:nSets)
{
if (verbose>2) printFlush(paste(spaces, "....Working on set", set))
selExpr = as.matrix(multiExpr[[set]]$data[, block]);
if (!is.null(multiWeights)) selWeights = as.matrix(multiWeights[[set]]$data[, block]) else
selWeights = NULL;
# Calculate TOM by calling a custom C function:
callVerb = max(0, verbose - 1); callInd = indent + 2;
CcorType = intCorType - 1;
CnetworkType = intNetworkType - 1;
CTOMType = intTOMType - 1;
# tempExpr = as.double(as.matrix(selExpr));
warn = 0L;
tom = .Call("tomSimilarity_call", selExpr, selWeights,
as.integer(CcorType), as.integer(CnetworkType), as.double(power[set]), as.integer(CTOMType),
as.integer(TOMDenomC),
as.double(maxPOutliers),
as.double(quickCor),
as.integer(fallback),
as.integer(cosineCorrelation),
as.integer(replaceMissingAdjacencies),
as.integer(suppressTOMForZeroAdjacencies),
as.integer(suppressNegativeTOM),
as.integer(useInternalMatrixAlgebra),
warn, as.integer(nThreads),
as.integer(callVerb), as.integer(callInd), PACKAGE = "WGCNA");
# FIXME: warn if necessary
tomDS = as.dist(tom);
# dim(tom) = c(nBlockGenes, nBlockGenes);
rm(tom);
# Save the calculated TOM either to disk in chunks or to memory.
if (saveTOMs)
{
save(tomDS, file = actualFileNames[set, blockNo]);
} else {
setTomDS[[blockNo]] [, set] = tomDS[];
}
}
rm(tomDS); collectGarbage();
}
if (!multiFormat)
{
gsg$goodSamples = gsg$goodSamples[[1]];
}
list(actualTOMFileNames = actualFileNames,
TOMSimilarities = if(!saveTOMs) setTomDS else NULL,
blocks = blocks,
blockGenes = blockGenes,
goodSamplesAndGenes = gsg,
nGGenes = nGGenes,
gBlocks = gBlocks,
nThreads = nThreads,
saveTOMs = saveTOMs,
intNetworkType = intNetworkType,
intCorType = intCorType,
nSets = nSets,
setNames = names(multiExpr)
)
}
#==========================================================================================================
#
# lowerTri2matrix
#
#==========================================================================================================
lowerTri2matrix = function(x, diag = 1)
{
if (inherits(x, "dist"))
{
mat = as.matrix(x)
} else {
n = length(x);
n1 = (1 + sqrt(1 + 8*n))/2
if (floor(n1)!=n1) stop("Input length does not translate into matrix");
mat = matrix(0, n1, n1);
mat[lower.tri(mat)] = x;
mat = mat + t(mat);
}
diag(mat) = diag;
mat;
}
#==========================================================================================================
#
# blockwiseConsensusModules
#
#==========================================================================================================
.checkComponents = function(object, names)
{
objNames = names(object);
inObj = names %in% objNames;
if (!all(inObj))
stop(".checkComponents: object is missing the following components:\n",
paste(names[!inObj], collapse = ", "));
}
# Function to calculate consensus modules and eigengenes from all genes.
blockwiseConsensusModules = function(
multiExpr,
# Data checking options
checkMissingData = TRUE,
# Blocking options
blocks = NULL,
maxBlockSize = 5000,
blockSizePenaltyPower = 5,
nPreclusteringCenters = NULL,
randomSeed = 54321,
# individual TOM information
individualTOMInfo = NULL,
useIndivTOMSubset = NULL,
# Network construction arguments: correlation options
corType = "pearson",
maxPOutliers = 1,
quickCor = 0,
pearsonFallback = "individual",
cosineCorrelation = FALSE,
# Adjacency function options
power = 6,
networkType = "unsigned",
checkPower = TRUE,
replaceMissingAdjacencies = FALSE,
# Topological overlap options
TOMType = "unsigned",
TOMDenom = "min",
suppressNegativeTOM = FALSE,
# Save individual TOMs?
saveIndividualTOMs = TRUE,
individualTOMFileNames = "individualTOM-Set%s-Block%b.RData",
# Consensus calculation options: network calibration
networkCalibration = c("single quantile", "full quantile", "none"),
## Save scaled TOMs? <-- leave this option for users willing to run consensusTOM on its own
#saveScaledIndividualTOMs = FALSE,
#scaledIndividualTOMFilePattern = "scaledIndividualTOM-Set%s-Block%b.RData",
# Simple quantile calibration options
calibrationQuantile = 0.95,
sampleForCalibration = TRUE, sampleForCalibrationFactor = 1000,
getNetworkCalibrationSamples = FALSE,
# Consensus definition
consensusQuantile = 0,
useMean = FALSE,
setWeights = NULL,
# Saving the consensus TOM
saveConsensusTOMs = FALSE,
consensusTOMFilePattern = "consensusTOM-block.%b.RData",
# Internal handling of TOMs
useDiskCache = TRUE, chunkSize = NULL,
cacheBase = ".blockConsModsCache",
cacheDir = ".",
# Alternative consensus TOM input from a previous calculation
consensusTOMInfo = NULL,
# Basic tree cut options
deepSplit = 2,
detectCutHeight = 0.995, minModuleSize = 20,
checkMinModuleSize = TRUE,
# Advanced tree cut opyions
maxCoreScatter = NULL, minGap = NULL,
maxAbsCoreScatter = NULL, minAbsGap = NULL,
minSplitHeight = NULL, minAbsSplitHeight = NULL,
useBranchEigennodeDissim = FALSE,
minBranchEigennodeDissim = mergeCutHeight,
stabilityLabels = NULL,
minStabilityDissim = NULL,
pamStage = TRUE, pamRespectsDendro = TRUE,
# Gene joining and removal from a module, and module "significance" criteria
reassignThresholdPS = 1e-4,
trimmingConsensusQuantile = consensusQuantile,
# minKMEtoJoin =0.7,
minCoreKME = 0.5, minCoreKMESize = minModuleSize/3,
minKMEtoStay = 0.2,
# Module eigengene calculation options
impute = TRUE,
trapErrors = FALSE,
# Module merging options
equalizeQuantilesForModuleMerging = FALSE,
quantileSummaryForModuleMerging = "mean",
mergeCutHeight = 0.15,
mergeConsensusQuantile = consensusQuantile,
# Output options
numericLabels = FALSE,
# General options
nThreads = 0,
verbose = 2, indent = 0,
...)
{
spaces = indentSpaces(indent);
dataSize = checkSets(multiExpr);
nSets = dataSize$nSets;
nGenes = dataSize$nGenes;
if (length(power)!=1)
{
if (length(power)!=nSets)
stop("Invalid arguments: Length of 'power' must equal number of sets given in 'multiExpr'.");
} else {
power = rep(power, nSets);
}
seedSaved = FALSE;
if (!is.null(randomSeed))
{
if (exists(".Random.seed"))
{
seedSaved = TRUE;
savedSeed = .Random.seed
}
set.seed(randomSeed);
}
if ( (consensusQuantile < 0) | (consensusQuantile > 1) )
stop("'consensusQuantile' must be between 0 and 1.");
if (checkMinModuleSize & (minModuleSize > nGenes/2))
{
minModuleSize = nGenes/2;
warning("blockwiseConsensusModules: minModuleSize appeared too large and was lowered to",
minModuleSize,
". If you insist on keeping the original minModuleSize, set checkMinModuleSize = FALSE.");
}
if (verbose>0)
printFlush(paste(spaces, "Calculating consensus modules and module eigengenes",
"block-wise from all genes"));
originalGeneNames = mtd.colnames(multiExpr);
if (is.null(originalGeneNames)) originalGeneNames = spaste("Column.", 1:nGenes)
originalSampleNames = mtd.apply(multiExpr, function(x)
{
out = rownames(x);
if (is.null(out)) out = spaste("Row.", 1:nrow(x));
out;
});
branchSplitFnc = NULL;
minBranchDissimilarities = numeric(0);
externalSplitFncNeedsDistance = logical(0);
if (useBranchEigennodeDissim)
{
branchSplitFnc = "mtd.branchEigengeneDissim";
minBranchDissimilarities = minBranchEigennodeDissim;
externalSplitFncNeedsDistance = FALSE;
}
if (!is.null(stabilityLabels))
{
branchSplitFnc = c(branchSplitFnc, "branchSplitFromStabilityLabels");
minBranchDissimilarities = c(minBranchDissimilarities, minStabilityDissim);
externalSplitFncNeedsDistance = c(externalSplitFncNeedsDistance, FALSE);
}
# Basic checks on consensusTOMInfo
if (!is.null(consensusTOMInfo))
{
.checkComponents(consensusTOMInfo, c("saveConsensusTOMs", "individualTOMInfo", "goodSamplesAndGenes"));
if (length(consensusTOMInfo$individualTOMInfo$blocks)!=nGenes)
stop("Inconsistent number of genes in 'consensusTOMInfo$individualTOMInfo$blocks'.");
if (!is.null(consensusTOMInfo$consensusQuantile) &&
(consensusQuantile!=consensusTOMInfo$consensusQuantile) )
warning(immediate. = TRUE,
"blockwiseConsensusModules: given (possibly default) 'consensusQuantile' is different\n",
"from the value recorded in 'consensusTOMInfo'. This is normally undesirable and may\n",
"indicate a mistake in the function call.");
}
# Handle "other arguments"
args = list(...);
if (is.null(args$reproduceBranchEigennodeQuantileError))
{
reproduceBranchEigennodeQuantileError = FALSE;
} else reproduceBranchEigennodeQuantileError = args$reproduceBranchEigennodeQuantileError;
# If topological overlaps weren't calculated yet, calculate them.
removeIndividualTOMsOnExit = FALSE;
nBlocks.0 = length(unique(blocks));
if (is.null(individualTOMInfo))
{
if (is.null(consensusTOMInfo))
{
individualTOMInfo = blockwiseIndividualTOMs(multiExpr = multiExpr,
checkMissingData = checkMissingData,
blocks = blocks,
maxBlockSize = maxBlockSize,
blockSizePenaltyPower = blockSizePenaltyPower,
nPreclusteringCenters = nPreclusteringCenters,
randomSeed = randomSeed,
corType = corType,
maxPOutliers = maxPOutliers,
quickCor = quickCor,
pearsonFallback = pearsonFallback,
cosineCorrelation = cosineCorrelation,
power = power,
networkType = networkType,
replaceMissingAdjacencies= replaceMissingAdjacencies,
TOMType = TOMType,
TOMDenom = TOMDenom,
suppressNegativeTOM = suppressNegativeTOM,
saveTOMs = useDiskCache | nBlocks.0>1,
individualTOMFileNames = individualTOMFileNames,
nThreads = nThreads,
verbose = verbose, indent = indent);
removeIndividualTOMsOnExit = TRUE;
} else
individualTOMInfo = consensusTOMInfo$individualTOMInfo;
}
if (is.null(useIndivTOMSubset))
{
if (individualTOMInfo$nSets != nSets)
stop(paste("Number of sets in individualTOMInfo and in multiExpr do not agree.\n",
" To use a subset of individualTOMInfo, set useIndivTOMSubset appropriately."));
useIndivTOMSubset = c(1:nSets);
}
if (length(useIndivTOMSubset)!=nSets)
stop("Length of 'useIndivTOMSubset' must equal the number of sets in 'multiExpr'");
if (length(unique(useIndivTOMSubset))!=nSets)
stop("Entries of 'useIndivTOMSubset' must be unique");
if (any(useIndivTOMSubset<1) | any(useIndivTOMSubset>individualTOMInfo$nSets))
stop("All entries of 'useIndivTOMSubset' must be between 1 and the number of sets in individualTOMInfo");
# if ( (minKMEtoJoin >1) | (minKMEtoJoin <0) ) stop("minKMEtoJoin must be between 0 and 1.");
intNetworkType = individualTOMInfo$intNetworkType;
intCorType = individualTOMInfo$intCorType;
corFnc = match.fun(.corFnc[intCorType]);
corOptions = list(use = 'p');
fallback = pmatch(pearsonFallback, .pearsonFallbacks)
nSamples = dataSize$nSamples;
allLabels = rep(0, nGenes);
allLabelIndex = NULL;
# Restrict data to goodSamples and goodGenes
gsg = individualTOMInfo$goodSamplesAndGenes;
# Restrict gsg to used sets
gsg$goodSamples = gsg$goodSamples[useIndivTOMSubset];
if (!gsg$allOK)
multiExpr = mtd.subset(multiExpr, gsg$goodSamples, gsg$goodGenes);
# prepare scaled and imputed multiExpr.
multiExpr.scaled = mtd.apply(multiExpr, scale);
hasMissing = unlist(multiData2list(mtd.apply(multiExpr, function(x) { any(is.na(x)) })));
# Impute those that have missing data
multiExpr.scaled.imputed = mtd.mapply(function(x, doImpute)
{ if (doImpute) t(impute.knn(t(x))$data) else x },
multiExpr.scaled, hasMissing);
nGGenes = sum(gsg$goodGenes);
nGSamples = rep(0, nSets);
for (set in 1:nSets) nGSamples[set] = sum(gsg$goodSamples[[ set ]]);
blocks = individualTOMInfo$blocks;
gBlocks = individualTOMInfo$gBlocks;
blockLevels = sort(unique(gBlocks));
blockSizes = table(gBlocks)
nBlocks = length(blockLevels);
if (is.null(chunkSize)) chunkSize = as.integer(.largestBlockSize/nSets)
reassignThreshold = reassignThresholdPS^nSets;
consMEs = vector(mode = "list", length = nSets);
dendros = list();
maxUsedLabel = 0;
collectGarbage();
# Here's where the analysis starts
removeConsensusTOMOnExit = FALSE;
if (is.null(consensusTOMInfo) && (nBlocks==1 || saveConsensusTOMs || getNetworkCalibrationSamples))
{
consensusTOMInfo = consensusTOM(
individualTOMInfo = individualTOMInfo,
useIndivTOMSubset = useIndivTOMSubset,
networkCalibration = networkCalibration,
saveCalibratedIndividualTOMs = FALSE,
calibrationQuantile = calibrationQuantile,
sampleForCalibration = sampleForCalibration,
sampleForCalibrationFactor = sampleForCalibrationFactor,
getNetworkCalibrationSamples = getNetworkCalibrationSamples,
consensusQuantile = consensusQuantile,
useMean = useMean,
setWeights = setWeights,
# Return options
saveConsensusTOMs = saveConsensusTOMs,
consensusTOMFilePattern = consensusTOMFilePattern,
returnTOMs = nBlocks==1,
# Internal handling of TOMs
useDiskCache = useDiskCache,
chunkSize = chunkSize,
cacheBase = cacheBase,
cacheDir = cacheDir,
verbose = verbose, indent = indent);
removeConsensusTOMOnExit = !saveConsensusTOMs;
}
blockwiseConsensusCalculation = is.null(consensusTOMInfo);
for (blockNo in 1:nBlocks)
{
if (verbose>1) printFlush(paste(spaces, "..Working on block", blockNo, "."));
# Select block genes
block = c(1:nGGenes)[gBlocks==blockLevels[blockNo]];
nBlockGenes = length(block);
selExpr = mtd.subset(multiExpr, , block);
errorOccurred = FALSE;
if (blockwiseConsensusCalculation)
{
# This code is only reached if input saveConsensusTOMs is FALSE and there are at least 2 blocks.
consensusTOMInfo = consensusTOM(
individualTOMInfo = individualTOMInfo,
useIndivTOMSubset = useIndivTOMSubset,
useBlocks = blockNo,
networkCalibration = networkCalibration,
saveCalibratedIndividualTOMs = FALSE,
calibrationQuantile = calibrationQuantile,
sampleForCalibration = sampleForCalibration,
sampleForCalibrationFactor = sampleForCalibrationFactor,
getNetworkCalibrationSamples = FALSE,
consensusQuantile = consensusQuantile,
useMean = useMean,
setWeights = setWeights,
saveConsensusTOMs = FALSE,
returnTOMs = TRUE,
useDiskCache = useDiskCache,
chunkSize = chunkSize,
cacheBase = cacheBase,
cacheDir = cacheDir);
consTomDS = consensusTOMInfo$consensusTOM[[1]];
# Remove the consensus TOM from the structure.
consensusTOMInfo$consensusTOM[[1]] = NULL;
consensusTOMInfo$consensusTOM = NULL;
} else {
if (consensusTOMInfo$saveConsensusTOMs)
{
consTomDS = .loadObject(file = consensusTOMInfo$TOMFiles[blockNo],
size = nBlockGenes * (nBlockGenes-1)/2);
} else
consTomDS = consensusTOMInfo$consensusTOM[[blockNo]];
}
# Temporary "cast" so fastcluster::hclust doesn't complain about non-integer size.
attr(consTomDS, "Size") = as.integer(attr(consTomDS, "Size"));
consTomDS = 1-consTomDS;
collectGarbage();
if (verbose>2) printFlush(paste(spaces, "....clustering and detecting modules.."));
errorOccured = FALSE;
dendros[[blockNo]] = fastcluster::hclust(consTomDS, method = "average");
if (verbose > 8)
{
if (interactive())
plot(dendros[[blockNo]], labels = FALSE, main = paste("Block", blockNo));
}
externalSplitOptions = list();
e.index = 1;
if (useBranchEigennodeDissim)
{
externalSplitOptions[[e.index]] = list(multiExpr = mtd.subset(multiExpr.scaled.imputed,, block),
corFnc = corFnc, corOptions = corOptions,
consensusQuantile = consensusQuantile,
signed = networkType %in% c("signed", "signed hybrid"),
reproduceQuantileError = reproduceBranchEigennodeQuantileError);
e.index = e.index +1;
}
if (!is.null(stabilityLabels))
{
externalSplitOptions[[e.index]] = list(stabilityLabels = stabilityLabels);
e.index = e.index + 1;
}
collectGarbage();
#blockLabels = try(cutreeDynamic(dendro = dendros[[blockNo]],
blockLabels = cutreeDynamic(dendro = dendros[[blockNo]],
distM = as.matrix(consTomDS),
deepSplit = deepSplit,
cutHeight = detectCutHeight, minClusterSize = minModuleSize,
method ="hybrid",
maxCoreScatter = maxCoreScatter, minGap = minGap,
maxAbsCoreScatter = maxAbsCoreScatter, minAbsGap = minAbsGap,
minSplitHeight = minSplitHeight, minAbsSplitHeight = minAbsSplitHeight,
externalBranchSplitFnc = branchSplitFnc,
minExternalSplit = minBranchDissimilarities,
externalSplitOptions = externalSplitOptions,
externalSplitFncNeedsDistance = externalSplitFncNeedsDistance,
assumeSimpleExternalSpecification = FALSE,
pamStage = pamStage, pamRespectsDendro = pamRespectsDendro,
verbose = verbose-3, indent = indent + 2)
#verbose = verbose-3, indent = indent + 2), silent = TRUE);
if (verbose > 8)
{
print(table(blockLabels));
if (interactive())
plotDendroAndColors(dendros[[blockNo]], labels2colors(blockLabels), dendroLabels = FALSE,
main = paste("Block", blockNo));
}
if (inherits(blockLabels, 'try-error'))
{
(if (verbose>0) printFlush else warning)
(paste(spaces, "blockwiseConsensusModules: cutreeDynamic failed:\n ", spaces,
blockLabels, "\n", spaces, " Error occured in block", blockNo, "\n",
spaces, " Continuing with next block. "));
next;
} else {
blockLabels[blockLabels>0] = blockLabels[blockLabels>0] + maxUsedLabel;
maxUsedLabel = max(blockLabels);
}
if (sum(blockLabels>0)==0)
{
if (verbose>1)
{
printFlush(paste(spaces, "No modules detected in block", blockNo,
"--> continuing with next block."))
}
next;
}
# Calculate eigengenes for this batch
if (verbose>2) printFlush(paste(spaces, "....calculating eigengenes.."));
blockAssigned = c(1:nBlockGenes)[blockLabels!=0];
blockLabelIndex = as.numeric(levels(as.factor(blockLabels[blockAssigned])));
blockConsMEs = try(multiSetMEs(selExpr, universalColors = blockLabels,
excludeGrey = TRUE, grey = 0, impute = impute,
# trapErrors = TRUE, returnValidOnly = TRUE,
verbose = verbose-4, indent = indent + 3), silent = TRUE);
if (inherits(blockConsMEs, 'try-error'))
{
if (verbose>0)
{
printFlush(paste(spaces, "*** multiSetMEs failed with the message:"));
printFlush(paste(spaces, " ", blockConsMEs));
printFlush(paste(spaces, "*** --> Ending module detection here"));
} else warning(paste("blocwiseConsensusModules: multiSetMEs failed with the message: \n",
" ", blockConsMEs, "\n--> continuing with next block."));
next;
}
deleteModules = NULL;
changedModules = NULL;
# find genes whose closest module eigengene has cor higher than minKMEtoJoin and assign them
# Removed - should not change blocks before clustering them
#unassGenes = c(c(1:nGGenes)[-block][allLabels[-block]==0], block[blockLabels==0]);
#if (length(unassGenes) > 0)
#{
#blockKME = array(0, dim = c(length(unassGenes), ncol(blockConsMEs[[1]]$data), nSets));
#corEval = parse(text = paste(.corFnc[intCorType],
#"( multiExpr[[set]]$data[, unassGenes], blockConsMEs[[set]]$data,",
#.corOptions[intCorType], ")"))
#for (set in 1:nSets) blockKME[, , set] = eval(corEval);
#if (intNetworkType==1) blockKME = abs(blockKME);
#consKME = as.matrix(apply(blockKME, c(1,2), min));
#consKMEmax = apply(consKME, 1, max);
#closestModule = blockLabelIndex[apply(consKME, 1, which.max)];
#assign = (consKMEmax >= minKMEtoJoin );
#if (sum(assign>0))
#{
#allLabels[unassGenes[assign]] = closestModule[assign];
#changedModules = union(changedModules, closestModule[assign]);
#}
#rm(blockKME, consKME, consKMEmax);
#}
collectGarbage();
# Check modules: make sure that of the genes present in the module, at least a minimum number
# have a correlation with the eigengene higher than a given cutoff, and that all member genes have
# the required minimum consensus KME
if (verbose>2)
printFlush(paste(spaces, "....checking consensus modules for statistical meaningfulness.."));
for (mod in 1:ncol(blockConsMEs[[1]]$data))
{
modGenes = (blockLabels==blockLabelIndex[mod]);
KME = matrix(0, nrow = sum(modGenes), ncol = nSets);
corEval = parse(text = paste(.corFnc[intCorType],
"( selExpr[[set]]$data[, modGenes], blockConsMEs[[set]]$data[, mod]",
prepComma(.corOptions[intCorType]), ")"))
for (set in 1:nSets) KME[, set] = as.vector(eval(corEval));
if (intNetworkType==1) KME = abs(KME);
consKME = apply(KME, 1, quantile, probs = trimmingConsensusQuantile, names = FALSE, na.rm = TRUE);
if (sum(consKME>minCoreKME) < minCoreKMESize)
{
blockLabels[modGenes] = 0;
deleteModules = union(deleteModules, mod);
if (verbose>3)
printFlush(paste(spaces, " ..deleting module ",blockLabelIndex[mod],
": of ", sum(modGenes),
" total genes in the module only ", sum(consKME>minCoreKME),
" have the requisite high correlation with the eigengene in all sets.", sep=""));
} else if (sum(consKME<minKMEtoStay)>0)
{
if (verbose > 3)
printFlush(paste(spaces, " ..removing", sum(consKME<minKMEtoStay),
"genes from module", blockLabelIndex[mod], "because their KME is too low."));
blockLabels[modGenes][consKME < minKMEtoStay] = 0;
if (sum(blockLabels[modGenes]>0) < minModuleSize)
{
deleteModules = union(deleteModules, mod);
blockLabels[modGenes] = 0;
if (verbose>3)
printFlush(paste(spaces, " ..deleting module ",blockLabelIndex[mod],
": not enough genes in the module after removal of low KME genes.", sep=""));
} else {
changedModules = union(changedModules, blockLabelIndex[mod]);
}
}
}
# Remove marked modules
if (!is.null(deleteModules))
{
for (set in 1:nSets) blockConsMEs[[set]]$data = blockConsMEs[[set]]$data[, -deleteModules];
modGenes = is.finite(match(blockLabels, blockLabelIndex[deleteModules]));
blockLabels[modGenes] = 0;
modAllGenes = is.finite(match(allLabels, blockLabelIndex[deleteModules]));
allLabels[modAllGenes] = 0;
blockLabelIndex = blockLabelIndex[-deleteModules];
}
# Check whether there's anything left
if (sum(blockLabels>0)==0)
{
if (verbose>1)
{
printFlush(paste(spaces, " ..No significant modules detected in block", blockNo))
printFlush(paste(spaces, " ..continuing with next block."));
}
next;
}
# Update module eigengenes
for (set in 1:nSets)
if (is.null(dim(blockConsMEs[[set]]$data)))
dim(blockConsMEs[[set]]$data) = c(length(blockConsMEs[[set]]$data), 1);
if (is.null(consMEs[[1]]))
{
for (set in 1:nSets) consMEs[[set]] = list(data = blockConsMEs[[set]]$data);
} else for (set in 1:nSets)
consMEs[[set]]$data = cbind(consMEs[[set]]$data, blockConsMEs[[set]]$data);
# Update allLabels
allLabelIndex = c(allLabelIndex, blockLabelIndex);
allLabels[gsg$goodGenes][block[blockAssigned]] = blockLabels[blockAssigned];
collectGarbage();
}
# Check whether any of the already assigned genes (in this or previous blocks) should be re-assigned
if (verbose>2)
printFlush(paste(spaces, "....checking for genes that should be reassigned.."));
deleteModules = NULL;
goodLabels = allLabels[gsg$goodGenes];
if (sum(goodLabels!=0) > 0)
{
propLabels = goodLabels[goodLabels!=0];
assGenes = (c(1:nGenes)[gsg$goodGenes])[goodLabels!=0];
corEval = parse(text = paste(.corFnc[intCorType],
"(multiExpr[[set]]$data[, goodLabels!=0], consMEs[[set]]$data",
prepComma(.corOptions[intCorType]), ")"));
nMods = ncol(consMEs[[1]]$data);
lpValues = array(0, dim = c(length(propLabels), nMods, nSets));
sumSign = array(0, dim = c(length(propLabels), nMods));
if (verbose>3)
printFlush(paste(spaces, "......module membership p-values.."));
for (set in 1:nSets)
{
KME = eval(corEval);
if (intNetworkType == 1) KME = abs(KME)
lpValues[,,set] = -2*log(corPvalueFisher(KME, nGSamples[set], twoSided = FALSE));
sumSign = sumSign + sign(KME);
}
if (verbose>3)
printFlush(paste(spaces, "......module membership scores.."));
scoreAll = as.matrix(apply(lpValues, c(1,2), sum)) * (nSets + sumSign)/(2*nSets);
scoreAll[!is.finite(scoreAll)] = 0.001 # This low should be enough
bestScore = apply(scoreAll, 1, max);
if (intNetworkType==1) sumSign = abs(sumSign);
if (verbose>3)
{
cat(paste(spaces, "......individual modules.."));
pind = initProgInd();
}
for (mod in 1:nMods)
{
modGenes = c(1:length(propLabels))[propLabels==allLabelIndex[mod]];
scoreMod = scoreAll[modGenes, mod];
candidates = (bestScore[modGenes] > scoreMod);
candidates[!is.finite(candidates)] = FALSE;
if (sum(candidates) > 0)
{
pModule = pchisq(scoreMod[candidates], nSets, log.p = TRUE)
whichBest = apply(scoreAll[modGenes[candidates], ], 1, which.max);
pBest = pchisq(bestScore[modGenes[candidates]], nSets, log.p = TRUE);
reassign = ifelse(is.finite(pBest - pModule),
( (pBest - pModule) < log(reassignThreshold) ),
FALSE);
if (sum(reassign)>0)
{
allLabels[assGenes[modGenes[candidates][reassign]]] = whichBest[reassign];
changedModules = union(changedModules, whichBest[reassign]);
if (length(modGenes)-sum(reassign) < minModuleSize)
{
deleteModules = union(deleteModules, mod);
} else
changedModules = union(changedModules, mod);
}
}
if (verbose > 3) pind = updateProgInd(mod/nMods, pind);
}
rm(lpValues, sumSign, scoreAll);
if (verbose > 3) printFlush("");
}
# Remove marked modules
if (!is.null(deleteModules))
{
# for (set in 1:nSets) consMEs[[set]]$data = consMEs[[set]]$data[, -deleteModules];
modGenes = is.finite(match(allLabels, allLabelIndex[deleteModules]));
allLabels[modGenes] = 0;
# allLabelIndex = allLabelIndex[-deleteModules];
}
if (verbose>1) printFlush(paste(spaces, "..merging consensus modules that are too close.."));
#print(table(allLabels));
#print(is.numeric(allLabels))
if (numericLabels) {
colors = allLabels
} else {
colors = labels2colors(allLabels)
}
mergedColors = colors;
mergedMods = try(mergeCloseModules(multiExpr, colors[gsg$goodGenes],
equalizeQuantiles = equalizeQuantilesForModuleMerging,
quantileSummary = quantileSummaryForModuleMerging,
consensusQuantile = mergeConsensusQuantile,
cutHeight = mergeCutHeight,
relabel = TRUE, impute = impute,
verbose = verbose-2, indent = indent + 2), silent = TRUE);
if (inherits(mergedMods, 'try-error'))
{
if (verbose>0)
{
printFlush(paste(spaces, 'blockwiseConsensusModules: mergeCloseModule failed with this message:\n',
spaces, ' ', mergedMods, spaces,
'---> returning unmerged consensus modules'));
} else warning(paste('blockwiseConsensusModules: mergeCloseModule failed with this message:\n ',
mergedMods, '---> returning unmerged consensus modules'));
MEs = try(multiSetMEs(multiExpr, universalColors = colors[gsg$goodGenes]
# trapErrors = TRUE, returnValidOnly = TRUE
), silent = TRUE);
if (inherits(MEs, 'try-error'))
{
warning(paste('blockwiseConsensusModules: ME calculation failed with this message:\n ',
MEs, '---> returning empty module eigengenes'));
allSampleMEs = NULL;
} else {
if (!MEs[[1]]$allOK) mergedColors[gsg$goodGenes] = MEs[[1]]$validColors;
allSampleMEs = vector(mode = "list", length = nSets);
names(allSampleMEs) = names(multiExpr);
for (set in 1:nSets)
{
allSampleMEs[[set]] =
list(data = as.data.frame(matrix(NA, nrow = nSamples[set], ncol = ncol(MEs[[set]]$data))));
allSampleMEs[[set]]$data[gsg$goodSamples[[set]], ] = MEs[[set]]$data[,];
names(allSampleMEs[[set]]$data) = names(MEs[[set]]$data);
rownames(allSampleMEs[[set]]$data) = make.unique(originalSampleNames[[set]]$data)
}
}
} else {
mergedColors[gsg$goodGenes] = mergedMods$colors;
allSampleMEs = vector(mode = "list", length = nSets);
names(allSampleMEs) = names(multiExpr);
for (set in 1:nSets)
{
allSampleMEs[[set]] =
list(data = as.data.frame(matrix(NA, nrow = nSamples[set],
ncol = ncol(mergedMods$newMEs[[1]]$data))));
allSampleMEs[[set]]$data[gsg$goodSamples[[set]], ] = mergedMods$newMEs[[set]]$data[,];
names(allSampleMEs[[set]]$data) = names(mergedMods$newMEs[[set]]$data);
rownames(allSampleMEs[[set]]$data) = make.unique(originalSampleNames[[set]]$data);
}
}
if (seedSaved) .Random.seed <<- savedSeed;
if (removeConsensusTOMOnExit)
{
.checkAndDelete(consensusTOMInfo$TOMFiles);
consensusTOMInfo$TOMFiles = NULL;
}
if (removeIndividualTOMsOnExit)
{
.checkAndDelete(individualTOMInfo$actualTOMFileNames);
individualTOMInfo$actualTOMFileNames = NULL;
}
# Under no circumstances return consensus TOM or individual TOM similarities within the returned list.
consensusTOMInfo$consensusTOM = NULL;
individualTOMInfo$TOMSimilarities = NULL
names(mergedColors) = names(colors) = originalGeneNames;
list(colors = mergedColors,
unmergedColors = colors,
multiMEs = allSampleMEs,
goodSamples = gsg$goodSamples,
goodGenes = gsg$goodGenes,
dendrograms = dendros,
TOMFiles = consensusTOMInfo$TOMFiles,
blockGenes = individualTOMInfo$blockGenes,
blocks = blocks,
originCount = consensusTOMInfo$originCount,
networkCalibrationSamples = consensusTOMInfo$networkCalibrationSamples,
individualTOMInfo = individualTOMInfo,
consensusTOMInfo = if (saveConsensusTOMs) consensusTOMInfo else NULL,
consensusQuantile = consensusQuantile
)
}
#==========================================================================================================
#
# recutConsensusTrees
#
#==========================================================================================================
recutConsensusTrees = function(multiExpr,
goodSamples, goodGenes,
blocks,
TOMFiles,
dendrograms,
corType = "pearson",
networkType = "unsigned",
deepSplit = 2,
detectCutHeight = 0.995, minModuleSize = 20,
checkMinModuleSize = TRUE,
maxCoreScatter = NULL, minGap = NULL,
maxAbsCoreScatter = NULL, minAbsGap = NULL,
minSplitHeight = NULL, minAbsSplitHeight = NULL,
useBranchEigennodeDissim = FALSE,
minBranchEigennodeDissim = mergeCutHeight,
pamStage = TRUE, pamRespectsDendro = TRUE,
# minKMEtoJoin =0.7,
trimmingConsensusQuantile = 0,
minCoreKME = 0.5, minCoreKMESize = minModuleSize/3,
minKMEtoStay = 0.2,
reassignThresholdPS = 1e-4,
mergeCutHeight = 0.15,
mergeConsensusQuantile = trimmingConsensusQuantile,
impute = TRUE,
trapErrors = FALSE,
numericLabels = FALSE,
verbose = 2, indent = 0)
{
spaces = indentSpaces(indent);
dataSize = checkSets(multiExpr);
nSets = dataSize$nSets;
nGenes = dataSize$nGenes;
nSamples = dataSize$nSamples;
originalGeneNames = mtd.colnames(multiExpr);
if (is.null(originalGeneNames)) originalGeneNames = spaste("Column.", 1:nGenes);
originalSampleNames = mtd.apply(multiExpr, function(x)
{
out = rownames(x);
if (is.null(out)) out = spaste("Row.", 1:nrow(x));
out;
});
if (length(blocks)!=nGenes)
stop("Input error: length of 'blocks' must equal number of genes in 'multiExpr'.");
#if (verbose>0)
# printFlush(paste(spaces, "Calculating consensus modules and module eigengenes",
# "block-wise from all genes"));
intCorType = pmatch(corType, .corTypes);
if (is.na(intCorType))
stop(paste("Invalid 'corType'. Recognized values are", paste(.corTypes, collapse = ", ")))
# if ( (minKMEtoJoin >1) | (minKMEtoJoin <0) ) stop("minKMEtoJoin must be between 0 and 1.");
intNetworkType = charmatch(networkType, .networkTypes);
if (is.na(intNetworkType))
stop(paste("Unrecognized networkType argument.",
"Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", ")));
allLabels = rep(0, nGenes);
allLabelIndex = NULL;
corFnc = match.fun(.corFnc[intCorType]);
corOptions = list(use = 'p');
# Get rid of bad genes and bad samples
gsg = list(goodGenes = goodGenes, goodSamples = goodSamples, allOK = TRUE);
gsg$allOK = (sum(!gsg$goodGenes)==0);
nGGenes = sum(gsg$goodGenes);
nGSamples = rep(0, nSets);
for (set in 1:nSets)
{
nGSamples[set] = sum(gsg$goodSamples[[set]]);
gsg$allOK = gsg$allOK && (sum(!gsg$goodSamples[[set]])==0);
}
if (!gsg$allOK)
for (set in 1:nSets)
multiExpr[[set]]$data = multiExpr[[set]]$data[gsg$goodSamples[[set]], gsg$goodGenes];
gBlocks = blocks[gsg$goodGenes];
blockLevels = as.numeric(levels(factor(gBlocks)));
blockSizes = table(gBlocks)
nBlocks = length(blockLevels);
reassignThreshold = reassignThresholdPS^nSets;
# prepare scaled and imputed multiExpr.
multiExpr.scaled = mtd.apply(multiExpr, scale);
hasMissing = unlist(multiData2list(mtd.apply(multiExpr, function(x) { any(is.na(x)) })));
# Impute those that have missing data
multiExpr.scaled.imputed = mtd.mapply(function(x, doImpute)
{ if (doImpute) t(impute.knn(t(x))$data) else x },
multiExpr.scaled, hasMissing);
if (useBranchEigennodeDissim)
{
branchSplitFnc = "mtd.branchEigengeneDissim";
} else
branchSplitFnc = NULL;
# Initialize various variables
consMEs = vector(mode = "list", length = nSets);
blockNo = 1;
maxUsedLabel = 0;
collectGarbage();
# Here's where the analysis starts
while (blockNo <= nBlocks)
{
if (verbose>1) printFlush(paste(spaces, "..Working on block", blockNo, "."));
# Select most connected genes
block = c(1:nGGenes)[gBlocks==blockLevels[blockNo]];
nBlockGenes = length(block);
selExpr = vector(mode = "list", length = nSets);
for (set in 1:nSets)
selExpr[[set]] = list(data = multiExpr[[set]]$data[, block]);
# This is how TOMs are saved:
#if (saveTOMs)
#{
# TOMFiles[blockNo] = paste(saveTOMFileBase, "-block.", blockNo, ".RData", sep="");
# save(consTomDS, file = TOMFiles[blockNo]);
#}
#consTomDS = 1-consTomDS;
#collectGarbage();
xx = try(load(TOMFiles[blockNo]), silent = TRUE);
if (inherits(xx, 'try-error'))
{
printFlush(paste("************\n File name", TOMFiles[blockNo],
"appears invalid: the load function returned the following error:\n ",
xx));
stop();
}
if (xx!='consTomDS')
stop(paste("The file", TOMFiles[blockNo], "does not contain the appopriate variable."));
if (!inherits(consTomDS, "dist"))
stop(paste("The file", TOMFiles[blockNo], "does not contain the appopriate distance structure."));
consTomDS = 1-consTomDS;
collectGarbage();
if (verbose>2) printFlush(paste(spaces, "....clustering and detecting modules.."));
errorOccured = FALSE;
blockLabels = try(cutreeDynamic(dendro = dendrograms[[blockNo]],
deepSplit = deepSplit,
cutHeight = detectCutHeight, minClusterSize = minModuleSize,
method ="hybrid",
maxCoreScatter = maxCoreScatter, minGap = minGap,
maxAbsCoreScatter = maxAbsCoreScatter, minAbsGap = minAbsGap,
minSplitHeight = minSplitHeight, minAbsSplitHeight = minAbsSplitHeight,
externalBranchSplitFnc = if (useBranchEigennodeDissim)
branchSplitFnc else NULL,
minExternalSplit = minBranchEigennodeDissim,
externalSplitOptions = list(multiExpr = mtd.subset(multiExpr.scaled.imputed, , block),
corFnc = corFnc, corOptions = corOptions,
consensusQuantile = mergeConsensusQuantile,
signed = networkType %in% c("signed", "signed hybrid")),
externalSplitFncNeedsDistance = FALSE,
pamStage = pamStage, pamRespectsDendro = pamRespectsDendro,
distM = as.matrix(consTomDS),
verbose = verbose-3, indent = indent + 2), silent = TRUE);
if (inherits(blockLabels, 'try-error'))
{
(if (verbose>0) printFlush else warning)
(paste(spaces, "blockwiseConsensusModules: cutreeDynamic failed:\n ",
blockLabels, "\nError occured in block", blockNo, "\nContinuing with next block."));
next;
} else {
blockLabels[blockLabels>0] = blockLabels[blockLabels>0] + maxUsedLabel;
maxUsedLabel = max(blockLabels);
}
if (sum(blockLabels>0)==0)
{
if (verbose>1)
{
printFlush(paste(spaces, "No modules detected in block", blockNo))
printFlush(paste(spaces, " Continuing with next block."))
}
next;
}
# Calculate eigengenes for this batch
if (verbose>2) printFlush(paste(spaces, "....calculating eigengenes.."));
blockAssigned = c(1:nBlockGenes)[blockLabels!=0];
blockLabelIndex = as.numeric(levels(as.factor(blockLabels[blockAssigned])));
blockConsMEs = try(multiSetMEs(selExpr, universalColors = blockLabels,
excludeGrey = TRUE, grey = 0, impute = impute,
# trapErrors = TRUE, returnValidOnly = TRUE,
verbose = verbose-4, indent = indent + 3), silent = TRUE);
if (inherits(blockConsMEs, 'try-error'))
{
if (verbose>0)
{
printFlush(paste(spaces, "*** multiSetMEs failed with the message:"));
printFlush(paste(spaces, " ", blockConsMEs));
printFlush(paste(spaces, "*** --> Ending module detection here"));
} else warning(paste("blocwiseConsensusModules: multiSetMEs failed with the message: \n",
" ", blockConsMEs, "\n--> Continuing with next block."));
next;
}
deleteModules = NULL;
changedModules = NULL;
# find genes whose closest module eigengene has cor higher than minKMEtoJoin and assign them
# Removed - should not change blocks before clustering them
#unassGenes = c(c(1:nGGenes)[-block][allLabels[-block]==0], block[blockLabels==0]);
#if (length(unassGenes) > 0)
#{
#blockKME = array(0, dim = c(length(unassGenes), ncol(blockConsMEs[[1]]$data), nSets));
#corEval = parse(text = paste(.corFnc[intCorType],
#"( multiExpr[[set]]$data[, unassGenes], blockConsMEs[[set]]$data,",
#.corOptions[intCorType], ")"))
#for (set in 1:nSets) blockKME[, , set] = eval(corEval);
#if (intNetworkType==1) blockKME = abs(blockKME);
#consKME = as.matrix(apply(blockKME, c(1,2), min));
#consKMEmax = apply(consKME, 1, max);
#closestModule = blockLabelIndex[apply(consKME, 1, which.max)];
#assign = (consKMEmax >= minKMEtoJoin );
#if (sum(assign>0))
#{
#allLabels[unassGenes[assign]] = closestModule[assign];
#changedModules = union(changedModules, closestModule[assign]);
#}
#rm(blockKME, consKME, consKMEmax);
#}
collectGarbage();
# Check modules: make sure that of the genes present in the module, at least a minimum number
# have a correlation with the eigengene higher than a given cutoff, and that all member genes have
# the required minimum consensus KME
if (verbose>2)
printFlush(paste(spaces, "....checking consensus modules for statistical meaningfulness.."));
for (mod in 1:ncol(blockConsMEs[[1]]$data))
{
modGenes = (blockLabels==blockLabelIndex[mod]);
KME = matrix(0, nrow = sum(modGenes), ncol = nSets);
corEval = parse(text = paste(.corFnc[intCorType],
"( selExpr[[set]]$data[, modGenes], blockConsMEs[[set]]$data[, mod]",
prepComma(.corOptions[intCorType]), ")"))
for (set in 1:nSets) KME[, set] = as.vector(eval(corEval));
if (intNetworkType==1) KME = abs(KME);
consKME = apply(KME, 1, quantile, probs = trimmingConsensusQuantile, na.rm = TRUE);
if (sum(consKME>minCoreKME) < minCoreKMESize)
{
blockLabels[modGenes] = 0;
deleteModules = union(deleteModules, mod);
if (verbose>3)
printFlush(paste(spaces, " ..deleting module ",blockLabelIndex[mod],
": of ", sum(modGenes),
" total genes in the module only ", sum(consKME>minCoreKME),
" have the requisite high correlation with the eigengene in all sets.", sep=""));
} else if (sum(consKME<minKMEtoStay)>0)
{
if (verbose > 3)
printFlush(paste(spaces, " ..removing", sum(consKME<minKMEtoStay),
"genes from module", blockLabelIndex[mod], "because their KME is too low."));
blockLabels[modGenes][consKME < minKMEtoStay] = 0;
if (sum(blockLabels[modGenes]>0) < minModuleSize)
{
deleteModules = union(deleteModules, mod);
blockLabels[modGenes] = 0;
if (verbose>3)
printFlush(paste(spaces, " ..deleting module ",blockLabelIndex[mod],
": not enough genes in the module after removal of low KME genes.", sep=""));
} else {
changedModules = union(changedModules, blockLabelIndex[mod]);
}
}
}
# Remove marked modules
if (!is.null(deleteModules))
{
for (set in 1:nSets) blockConsMEs[[set]]$data = blockConsMEs[[set]]$data[, -deleteModules];
modGenes = is.finite(match(blockLabels, blockLabelIndex[deleteModules]));
blockLabels[modGenes] = 0;
modAllGenes = is.finite(match(allLabels, blockLabelIndex[deleteModules]));
allLabels[modAllGenes] = 0;
blockLabelIndex = blockLabelIndex[-deleteModules];
}
# Check whether there's anything left
if (sum(blockLabels>0)==0)
{
if (verbose>1)
{
printFlush(paste(spaces, " No significant modules detected in block", blockNo))
printFlush(paste(spaces, " Continuing with next block."));
}
next;
}
# Update module eigengenes
for (set in 1:nSets)
if (is.null(dim(blockConsMEs[[set]]$data)))
dim(blockConsMEs[[set]]$data) = c(length(blockConsMEs[[set]]$data), 1);
if (is.null(consMEs[[1]]))
{
for (set in 1:nSets) consMEs[[set]] = list(data = blockConsMEs[[set]]$data);
} else for (set in 1:nSets)
consMEs[[set]]$data = cbind(consMEs[[set]]$data, blockConsMEs[[set]]$data);
# Update allLabels
allLabelIndex = c(allLabelIndex, blockLabelIndex);
allLabels[block[blockAssigned]] = blockLabels[blockAssigned];
collectGarbage();
blockNo = blockNo + 1;
}
# Check whether any of the already assigned genes (in this or previous blocks) should be re-assigned
if (verbose>2)
printFlush(paste(spaces, "....checking for genes that should be reassigned.."));
deleteModules = NULL;
goodLabels = allLabels[gsg$goodGenes];
if (sum(goodLabels!=0) > 0)
{
propLabels = goodLabels[goodLabels!=0];
assGenes = (c(1:nGenes)[gsg$goodGenes])[goodLabels!=0];
corEval = parse(text = paste(.corFnc[intCorType],
"(multiExpr[[set]]$data[, goodLabels!=0], consMEs[[set]]$data",
prepComma(.corOptions[intCorType]), ")"));
nMods = ncol(consMEs[[1]]$data);
lpValues = array(0, dim = c(length(propLabels), nMods, nSets));
sumSign = array(0, dim = c(length(propLabels), nMods));
if (verbose>3)
printFlush(paste(spaces, "......module membership p-values.."));
for (set in 1:nSets)
{
KME = eval(corEval);
if (intNetworkType == 1) KME = abs(KME)
lpValues[,,set] = -2*log(corPvalueFisher(KME, nGSamples[set], twoSided = FALSE));
sumSign = sumSign + sign(KME);
}
if (verbose>3)
printFlush(paste(spaces, "......module membership scores.."));
scoreAll = as.matrix(apply(lpValues, c(1,2), sum)) * (nSets + sumSign)/(2*nSets);
scoreAll[!is.finite(scoreAll)] = 0.001 # This low should be enough
bestScore = apply(scoreAll, 1, max);
if (intNetworkType==1) sumSign = abs(sumSign);
if (verbose>3)
{
cat(paste(spaces, "......individual modules.."));
pind = initProgInd();
}
for (mod in 1:nMods)
{
modGenes = c(1:length(propLabels))[propLabels==allLabelIndex[mod]];
scoreMod = scoreAll[modGenes, mod];
candidates = (bestScore[modGenes] > scoreMod);
candidates[!is.finite(candidates)] = FALSE;
if (sum(candidates) > 0)
{
pModule = pchisq(scoreMod[candidates], nSets, log.p = TRUE)
whichBest = apply(scoreAll[modGenes[candidates], ], 1, which.max);
pBest = pchisq(bestScore[modGenes[candidates]], nSets, log.p = TRUE);
reassign = ifelse(is.finite(pBest - pModule),
( (pBest - pModule) < log(reassignThreshold) ),
FALSE);
if (sum(reassign)>0)
{
allLabels[assGenes[modGenes[candidates][reassign]]] = whichBest[reassign];
changedModules = union(changedModules, whichBest[reassign]);
if (length(modGenes)-sum(reassign) < minModuleSize)
{
deleteModules = union(deleteModules, mod);
} else
changedModules = union(changedModules, mod);
}
}
if (verbose > 3) pind = updateProgInd(mod/nMods, pind);
}
rm(lpValues, sumSign, scoreAll);
if (verbose > 3) printFlush("");
}
# Remove marked modules
if (!is.null(deleteModules))
{
# for (set in 1:nSets) consMEs[[set]]$data = consMEs[[set]]$data[, -deleteModules];
modGenes = is.finite(match(allLabels, allLabelIndex[deleteModules]));
allLabels[modGenes] = 0;
# allLabelIndex = allLabelIndex[-deleteModules];
}
if (verbose>1) printFlush(paste(spaces, "..merging consensus modules that are too close.."));
#print(table(allLabels));
#print(is.numeric(allLabels))
if (numericLabels) {
colors = allLabels
} else {
colors = labels2colors(allLabels)
}
mergedColors = colors;
mergedMods = try(mergeCloseModules(multiExpr, colors[gsg$goodGenes],
consensusQuantile = mergeConsensusQuantile,
cutHeight = mergeCutHeight,
relabel = TRUE, impute = impute,
verbose = verbose-2, indent = indent + 2), silent = TRUE);
if (inherits(mergedMods, 'try-error'))
{
if (verbose>0)
{
printFlush(paste(spaces, 'blockwiseConsensusModules: mergeCloseModule failed with this message:\n',
spaces, ' ', mergedMods, spaces,
'---> returning unmerged consensus modules'));
} else warning(paste('blockwiseConsensusModules: mergeCloseModule failed with this message:\n ',
mergedMods, '---> returning unmerged consensus modules'));
MEs = try(multiSetMEs(multiExpr, universalColors = colors[gsg$goodGenes]
# trapErrors = TRUE, returnValidOnly = TRUE
), silent = TRUE);
if (inherits(MEs, 'try-error'))
{
warning(paste('blockwiseConsensusModules: ME calculation failed with this message:\n ',
MEs, '---> returning empty module eigengenes'));
allSampleMEs = NULL;
} else {
if (!MEs[[1]]$allOK) mergedColors[gsg$goodGenes] = MEs[[1]]$validColors;
allSampleMEs = vector(mode = "list", length = nSets);
names(allSampleMEs) = names(multiExpr);
for (set in 1:nSets)
{
allSampleMEs[[set]] =
list(data = as.data.frame(matrix(NA, nrow = nSamples[set], ncol = ncol(MEs[[set]]$data))));
allSampleMEs[[set]]$data[gsg$goodSamples[[set]], ] = MEs[[set]]$data[,];
names(allSampleMEs[[set]]$data) = names(MEs[[set]]$data);
rownames(allSampleMEs[[set]]$data) = make.unique(originalSampleNames[[set]]$data);
}
}
} else {
mergedColors[gsg$goodGenes] = mergedMods$colors;
allSampleMEs = vector(mode = "list", length = nSets);
names(allSampleMEs) = names(multiExpr);
for (set in 1:nSets)
{
allSampleMEs[[set]] =
list(data = as.data.frame(matrix(NA, nrow = nSamples[set],
ncol = ncol(mergedMods$newMEs[[1]]$data))));
allSampleMEs[[set]]$data[gsg$goodSamples[[set]], ] = mergedMods$newMEs[[set]]$data[,];
names(allSampleMEs[[set]]$data) = names(mergedMods$newMEs[[set]]$data);
rownames(allSampleMEs[[set]]$data) = make.unique(originalSampleNames[[set]]$data);
}
}
names(mergedColors) = names(colors) = originalGeneNames;
list(colors = mergedColors,
unmergedColors = colors,
multiMEs = allSampleMEs
# goodSamples = gsg$goodSamples,
# goodGenes = gsg$goodGenes,
# dendrograms = dendros,
# blockGenes = blockGenes,
# originCount = originCount,
# TOMFiles = TOMFiles
);
}
#===================================================================================================================
#
# Preliminary clustering via projective k-means
#
#===================================================================================================================
projectiveKMeans = function (
datExpr,
preferredSize = 5000,
nCenters = as.integer(min(ncol(datExpr)/20, preferredSize^2/ncol(datExpr))),
sizePenaltyPower = 4,
networkType = "unsigned",
randomSeed = 54321,
checkData = TRUE,
imputeMissing = TRUE,
maxIterations = 1000,
verbose = 0, indent = 0 )
{
centerGeneDist = function(centers, oldDst = NULL, changed = c(1:nCenters),
blockSize = 50000, verbose = 0, spaces = "")
{
if (is.null(oldDst))
{
oldDst = array(0, c(nCenters, nGenes));
changed = c(1:nCenters);
}
dstAll = oldDst;
nChanged = length(changed)
nBlocks = ceiling(ncol(datExpr)/blockSize);
blocks = allocateJobs(ncol(datExpr), nBlocks);
if (verbose > 5)
pind = initProgInd(spaste(spaces, " ..centerGeneDist: "));
for (b in 1:nBlocks)
{
if (intNetworkType==1)
{
dst = 1-abs(cor(centers[, changed], datExpr[, blocks[[b]] ]));
} else {
dst = 1-cor(centers[, changed], datExpr[, blocks[[b]] ]);
}
dstAll[changed, blocks[[b]]] = dst;
if (verbose > 5) pind = updateProgInd(b/nBlocks, pind);
}
dstAll;
}
memberCenterDist = function(dst, membership, sizes = NULL, changed = c(1:nCenters), oldMCDist = NULL)
{
if (is.null(oldMCDist))
{
changed = c(1:nCenters);
oldMCDist = rep(0, nGenes);
}
centerDist = oldMCDist;
if (!is.null(changed))
{
if (is.null(sizes)) sizes = table(membership)
if (length(sizes)!=nCenters)
{
sizes2 = rep(0, nCenters);
sizes2[as.numeric(names(sizes))] = sizes;
sizes = sizes2;
}
if (is.finite(sizePenaltyPower))
{
sizeCorrections = (sizes/preferredSize)^sizePenaltyPower;
sizeCorrections[sizeCorrections < 1] = 1;
} else {
sizeCorrections = rep(1, length(sizes));
sizeCorrections[sizes > preferredSize] = Inf;
}
for (cen in changed) if (sizes[cen]!=0)
{
if (is.finite(sizeCorrections[cen]))
{
centerDist[membership==cen] = dst[cen, membership==cen] * sizeCorrections[cen];
} else
centerDist[membership==cen] = 10 + dst[cen, membership==cen];
}
}
centerDist;
}
spaces = indentSpaces(indent);
if (verbose > 0)
printFlush(paste(spaces, "Projective K-means:"));
datExpr = scale(as.matrix(datExpr));
if (any(is.na(datExpr)))
{
if (imputeMissing)
{
printFlush(spaste(spaces, "projectiveKMeans: imputing missing data in 'datExpr'.\n",
"To reproduce older results, use 'imputeMissing = FALSE'. "));
datExpr = t(impute.knn(t(datExpr))$data);
} else {
printFlush(spaste(spaces, "projectiveKMeans: there are missing data in 'datExpr'.\n",
"SVD will not work; will use a weighted mean approximation."));
}
}
#if (preferredSize >= floor(sqrt(2^31)) )
# stop("'preferredSize must be less than ", floor(sqrt(2^31)), ". Please decrease it and try again.")
if (preferredSize >= sqrt(2^31)-1)
printFlush(spaste(
spaces, " Support for blocks larger than sqrt(2^31) is experimental; please report\n",
spaces, " any issues to Peter.Langfelder@gmail.com."));
if (exists(".Random.seed"))
{
seedSaved = TRUE;
savedSeed = .Random.seed
} else seedSaved = FALSE;
set.seed(randomSeed);
nAllSamples = nrow(datExpr);
nAllGenes = ncol(datExpr);
intNetworkType = charmatch(networkType, .networkTypes);
if (is.na(intNetworkType))
stop(paste("Unrecognized networkType argument.",
"Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", ")));
if (verbose > 1)
printFlush(paste(spaces, "..using", nCenters, "centers."));
# Check data for genes and samples that have too many missing values
if (checkData)
{
if (verbose > 0)
printFlush(paste(spaces, "..checking data for excessive number of missing values.."));
gsg = goodSamplesGenes(datExpr, verbose = verbose -1, indent = indent + 1)
if (!gsg$allOK) datExpr = datExpr[gsg$goodSamples, gsg$goodGenes];
}
nGenes = ncol(datExpr);
nSamples = nrow(datExpr);
datExpr[is.na(datExpr)] = 0;
centers = matrix(0, nSamples, nCenters);
randGeneIndex = sample(nGenes, size = nGenes);
temp = rep(c(1:nCenters), times = ceiling(nGenes/nCenters));
membership = temp[randGeneIndex];
if (verbose > 0)
printFlush(paste(spaces, "..k-means clustering.."));
changed = c(1:nCenters); dst = NULL; centerDist = NULL;
iteration = 0;
while (!is.null(changed) && (iteration < maxIterations))
{
iteration = iteration + 1;
if (verbose > 1) printFlush(paste(spaces, " ..iteration", iteration));
clusterSizes = table(membership);
if (verbose > 5) pind = initProgInd(paste(spaces, " ....calculating centers: "))
for (cen in sort(changed))
{
centers[, cen] = .alignedFirstPC(datExpr[, membership==cen], verbose = verbose-2,
indent = indent+2);
if (verbose > 5) pind = updateProgInd(cen/nCenters, pind);
}
if (verbose > 5) { pind = updateProgInd(1, pind); printFlush("")}
if (verbose > 5) printFlush(paste(spaces, " ....calculating center to gene distances"));
dst = centerGeneDist(centers, dst, changed, verbose = verbose, spaces = spaces);
centerDist = memberCenterDist(dst, membership, clusterSizes, changed, centerDist);
nearestDist = rep(0, nGenes);
nearest = rep(0, nGenes);
if (verbose > 5) printFlush(paste(spaces, " ....finding nearest center for each gene"));
minRes = .Call("minWhich_call", dst, 0L, PACKAGE = "WGCNA")
nearestDist = minRes$min;
nearest = minRes$which;
if (sum(centerDist>nearestDist)>0)
{
proposedMemb = nearest;
accepted = FALSE;
while (!accepted && (sum(proposedMemb!=membership)>0))
{
if (verbose > 2)
cat(paste(spaces, " ..proposing to move", sum(proposedMemb!=membership), "genes"));
moved = c(1:nGenes)[proposedMemb!=membership];
newCentDist = memberCenterDist(dst, proposedMemb);
gotWorse = newCentDist[moved] > centerDist[moved]
if (sum(!is.finite(gotWorse))>0)
warning("Have NAs in gotWorse.");
if (sum(gotWorse)==0)
{
accepted = TRUE;
if (verbose > 2) printFlush(paste("..move accepted."));
} else {
if (verbose > 2) printFlush(paste("..some genes got worse. Trying again."));
ord = order(centerDist[moved[gotWorse]] - newCentDist[moved[gotWorse]])
n = ceiling(length(ord)*3/5);
proposedMemb[moved[gotWorse][ord[c(1:n)]]] = membership[moved[gotWorse][ord[c(1:n)]]];
}
}
if (accepted)
{
propSizes = table(proposedMemb);
keep = as.numeric(names(propSizes));
centers = centers[, keep];
dst = dst[keep, ];
changedAll = union(membership[moved], proposedMemb[moved]);
changedKeep = changedAll[is.finite(match(changedAll, keep))];
changed = rank(changedKeep); # This is a way to make say 1,3,4 into 1,2,3
membership = as.numeric(as.factor(proposedMemb));
if ( (verbose > 1) && (sum(keep) < nCenters))
printFlush(paste(spaces, " ..dropping", nCenters - sum(keep),
"centers because ther clusters are empty."));
nCenters = length(keep);
} else {
changed = NULL;
if (verbose > 2) printFlush(paste("Could not find a suitable move to improve the clustering."));
}
} else {
changed = NULL;
if (verbose > 2)
printFlush(paste("Clustering is stable: all genes are closest to their assigned center."));
}
if (verbose > 5)
{
printFlush("Sizes of biggest preliminary clusters:");
order = order(-as.numeric(clusterSizes));
print(as.numeric(clusterSizes)[order[1:min(100, length(order))]]);
}
}
if (verbose > 2 & verbose <6)
{
printFlush("Sizes of preliminary clusters:");
print(clusterSizes);
}
merged = sizeRestrictedClusterMerge(
datExpr,
clusters = membership,
clusterSizes = clusterSizes,
centers = centers,
maxSize = preferredSize,
networkType = networkType,
verbose = verbose,
indent = indent);
list(clusters = merged$clusters, centers = merged$centers);
}
sizeRestrictedClusterMerge = function(
datExpr,
clusters,
clusterSizes = NULL,
centers = NULL,
maxSize,
networkType = "unsigned",
verbose = 0,
indent = 0)
{
if (ncol(datExpr)!=length(clusters))
stop("Number of variables (columns) in 'datExpr' and length of 'clusters' are not the same.");
spaces = indentSpaces(indent);
intNetworkType = charmatch(networkType, .networkTypes);
if (is.null(clusterSizes))
{
clusterSizes = table(clusters);
centers = NULL;
}
nCenters = length(clusterSizes);
nSamples = nrow(datExpr);
if (is.null(centers))
{
centers = matrix(0, nSamples, nCenters);
for (i in 1:nCenters)
{
if (clusterSizes[i] > 1)
{
centers[, i] = .alignedFirstPC(datExpr[, clusters==i], verbose = verbose-2,
indent = indent+2)
} else {
#xx = try({centers[, i] = scale(datExpr[, clusters==i, drop = FALSE])/(sum(is.finite(datExpr[, clusters==i]))-1)});
#if (inherits(xx, "try-error")) browser();
centers[, i] = scale(datExpr[, clusters==i, drop = FALSE])/(sum(is.finite(datExpr[, clusters==i]))-1);
}
}
}
if (verbose > 0) printFlush(paste(spaces, "..merging smaller clusters..."));
small = (clusterSizes < maxSize); # typically all clusters will be below the max size, but is not a requirement.
if (intNetworkType==1)
{
clustDist = 1-abs(cor(centers[, small]));
} else {
clustDist = 1-cor(centers[, small]);
}
diag(clustDist) = 10;
# merge nearby clusters if their sizes allow merging
done = FALSE;
while (!done & (sum(small)>1))
{
smallIndex = c(1:nCenters)[small]
nSmall = sum(small);
distOrder = order(as.vector(clustDist))[seq(from=2, to = nSmall * (nSmall-1), by=2)];
i = 1; canMerge = FALSE;
while (i <= length(distOrder) && (!canMerge))
{
jj = as.integer( (distOrder[i]-1)/nSmall + 1);
ii = distOrder[i] - (jj-1) * nSmall
whichI = smallIndex[ii]
whichJ = smallIndex[jj];
canMerge = sum(clusterSizes[c(whichI, whichJ)]) <= maxSize;
i = i + 1;
}
if (canMerge)
{
clusters[clusters==whichJ] = whichI;
clusterSizes[whichI] = clusterSizes[whichI] + clusterSizes[whichJ];
centers[, whichI] = .alignedFirstPC(datExpr[, clusters==whichI], verbose = verbose-2,
indent = indent+2);
nCenters = nCenters -1;
if (verbose > 3)
printFlush(paste(spaces, " ..merged clusters", whichI, "and", whichJ,
"whose combined size is", clusterSizes[whichI]));
clusters[clusters>whichJ] = clusters[clusters>whichJ] - 1;
centers = centers[ , -whichJ, drop = FALSE];
clusterSizes = clusterSizes[-whichJ];
small = clusterSizes < maxSize;
if (sum(small) > 1) # Only update if there are at least 2 small clusters left
{
clustDist = clustDist[-jj, -jj, drop = FALSE];
if (clusterSizes[whichI] < maxSize)
{
cr1 = c(cor(centers[, small], centers[, whichI]));
clustDist[, ii] = clustDist[ii, ] = if (intNetworkType==1) 1-abs(cr1) else 1-cr1;
clustDist[ii, ii] = 10;
} else {
clustDist = clustDist[-ii, -ii, drop = FALSE]
}
}
} else done = TRUE;
}
list(clusters = clusters, centers = centers);
}
#======================================================================================================
#
# Consensus preliminary partitioning
#
#======================================================================================================
consensusProjectiveKMeans = function (
multiExpr,
preferredSize = 5000,
nCenters = NULL,
sizePenaltyPower = 4,
networkType = "unsigned",
randomSeed = 54321,
checkData = TRUE,
imputeMissing = TRUE,
useMean = (length(multiExpr) > 3),
maxIterations = 1000,
verbose = 0, indent = 0 )
{
centerGeneDist = function(centers, oldDst = NULL, changed = c(1:nCenters))
{
if (is.null(oldDst))
{
oldDst = array(0, c(nCenters, nGenes));
changed = c(1:nCenters);
}
dstAll = oldDst;
nChanged = length(changed)
if (nChanged!=0)
{
dstX = array(0, c(nSets, nChanged, nGenes));
for (set in 1:nSets)
if (intNetworkType==1)
{
dstX[set, , ] = 1-abs(cor(centers[[set]]$data[, changed], multiExpr[[set]]$data));
} else {
dstX[set, , ] = 1-cor(centers[[set]]$data[, changed], multiExpr[[set]]$data);
}
if (nSets==1)
{
dstAll[changed, ] = dstX[1, , ];
} else {
#dst = array(0, c(nChanged, nGenes));
if (useMean)
{
dstAll[changed, ] = base::colMeans(dstX, dims = 1);
} else {
dstAll[changed, ] = -minWhichMin(-dstX, byRow = FALSE, dims = 1)$min;
}
}
}
dstAll;
}
memberCenterDist = function(dst, membership, sizes = NULL, changed = c(1:nCenters), oldMCDist = NULL)
{
if (is.null(oldMCDist))
{
changed = c(1:nCenters);
oldMCDist = rep(0, nGenes);
}
centerDist = oldMCDist;
if (!is.null(changed))
{
if (is.null(sizes)) sizes = table(membership)
if (length(sizes)!=nCenters)
{
sizes2 = rep(0, nCenters);
sizes2[as.numeric(names(sizes))] = sizes;
sizes = sizes2;
}
if (is.finite(sizePenaltyPower))
{
sizeCorrections = (sizes/preferredSize)^sizePenaltyPower;
sizeCorrections[sizeCorrections < 1] = 1;
} else {
sizeCorrections = rep(1, length(sizes));
sizeCorrections[sizes > preferredSize] = Inf;
}
for (cen in changed) if (sizes[cen]!=0)
{
if (is.finite(sizeCorrections[cen]))
{
centerDist[membership==cen] = dst[cen, membership==cen] * sizeCorrections[cen];
} else
centerDist[membership==cen] = 10 + dst[cen, membership==cen];
}
}
centerDist;
}
spaces = indentSpaces(indent);
if (verbose > 0)
printFlush(paste(spaces, "Consensus projective K-means:"));
allSize = checkSets(multiExpr);
nSamples = allSize$nSamples;
nGenes = allSize$nGenes;
nSets = allSize$nSets;
#if (preferredSize >= floor(sqrt(2^31)) )
# stop("'preferredSize must be less than ", floor(sqrt(2^31)), ". Please decrease it and try again.")
if (preferredSize >= sqrt(2^31)-1)
printFlush(spaste(
spaces, " Support for blocks larger than sqrt(2^31) is experimental; please report\n",
spaces, " any issues to Peter.Langfelder@gmail.com."));
if (exists(".Random.seed"))
{
seedSaved = TRUE;
savedSeed = .Random.seed
} else seedSaved = FALSE;
set.seed(randomSeed);
if (is.null(nCenters)) nCenters = as.integer(min(nGenes/20, 100 * nGenes/preferredSize));
if (verbose > 1)
printFlush(paste(spaces, "..using", nCenters, "centers."));
intNetworkType = charmatch(networkType, .networkTypes);
if (is.na(intNetworkType))
stop(paste("Unrecognized networkType argument.",
"Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", ")));
for (set in 1:nSets)
multiExpr[[set]]$data = scale(as.matrix(multiExpr[[set]]$data));
# Check data for genes and samples that have too many missing values
if (checkData)
{
if (verbose > 0)
printFlush(paste(spaces, "..checking data for excessive number of missing values.."));
gsg = goodSamplesGenesMS(multiExpr, verbose = verbose - 1, indent = indent + 1);
for (set in 1:nSets)
{
if (!gsg$allOK)
multiExpr[[set]]$data = scale(multiExpr[[set]]$data[gsg$goodSamples[[set]], gsg$goodGenes]);
}
}
anyNA = mtd.apply(multiExpr, function(x) any(is.na(x)), mdaSimplify = TRUE);
if (imputeMissing && any(anyNA))
{
if (verbose > 0)
printFlush(paste(spaces, "..imputing missing data.."));
multiExpr[anyNA] = mtd.apply(multiExpr[anyNA], function(x) t(impute.knn(t(x))$data), mdaVerbose = verbose>1)
} else if (any(anyNA))
{
printFlush(paste(spaces, "Found missing data. These will be replaced by zeros;\n",
spaces, " for a better replacement, use 'imputeMissing=TRUE'."));
for (set in 1:nSets)
multiExpr[[set]]$data[is.na(multiExpr[[set]]$data)] = 0;
}
setSize = checkSets(multiExpr);
nGenes = setSize$nGenes;
nSamples = setSize$nSamples;
centers = vector(mode="list", length = nSets);
for (set in 1:nSets)
centers[[set]] = list(data = matrix(0, nSamples[set], nCenters));
randGeneIndex = sample(nGenes, size = nGenes);
temp = rep(c(1:nCenters), times = ceiling(nGenes/nCenters));
membership = temp[randGeneIndex];
if (verbose > 0)
printFlush(paste(spaces, "..consensus k-means clustering.."));
changed = c(1:nCenters); dst = NULL;
iteration = 0;
centerDist = NULL;
while (!is.null(changed) && (iteration < maxIterations))
{
iteration = iteration + 1;
if (verbose > 1) printFlush(paste(spaces, " ..iteration", iteration));
clusterSizes = table(membership);
for (set in 1:nSets) for (cen in changed)
centers[[set]]$data[, cen] = .alignedFirstPC(multiExpr[[set]]$data[, membership==cen],
verbose = verbose-2, indent = indent+2);
dst = centerGeneDist(centers, dst, changed);
centerDist = memberCenterDist(dst, membership, clusterSizes, changed, centerDist);
changed = NULL;
minRes = minWhichMin(dst);
nearestDist = minRes$min;
nearest = minRes$which;
if (sum(centerDist>nearestDist)>0)
{
proposedMemb = nearest;
accepted = FALSE;
while (!accepted && (sum(proposedMemb!=membership)>0))
{
if (verbose > 2)
cat(paste(spaces, " ..proposing to move", sum(proposedMemb!=membership), "genes"));
moved = c(1:nGenes)[proposedMemb!=membership];
newCentDist = memberCenterDist(dst, proposedMemb);
gotWorse = newCentDist[moved] > centerDist[moved]
if (sum(gotWorse)==0)
{
accepted = TRUE;
if (verbose > 2) printFlush(paste("..move accepted."));
} else {
if (verbose > 2) printFlush(paste("..some genes got worse. Trying again."));
ord = order(centerDist[moved[gotWorse]] - newCentDist[moved[gotWorse]])
n = ceiling(length(ord)*3/5);
proposedMemb[moved[gotWorse][ord[c(1:n)]]] = membership[moved[gotWorse][ord[c(1:n)]]];
}
}
if (accepted)
{
propSizes = table(proposedMemb);
keep = as.numeric(names(propSizes));
for (set in 1:nSets)
centers[[set]]$data = centers[[set]]$data[, keep];
dst = dst[keep, ];
changedAll = union(membership[moved], proposedMemb[moved]);
changedKeep = changedAll[is.finite(match(changedAll, keep))];
changed = rank(changedKeep); # This is a way to make say 1,3,4 into 1,2,3
membership = as.numeric(as.factor(proposedMemb));
if ( (verbose > 1) && (sum(keep) < nCenters))
printFlush(paste(spaces, " ..dropping", nCenters - sum(keep),
"centers because ther clusters are empty."));
nCenters = length(keep);
} else {
changed = NULL;
if (verbose > 2) printFlush(paste("Could not find a suitable move to improve the clustering."));
}
} else {
changed = NULL;
if (verbose > 2)
printFlush(paste("Clustering is stable: all genes are closest to their assigned center."));
}
}
consCenterDist = function(centers, select)
{
if (is.logical(select))
{
nC = sum(select);
} else {
nC = length(select);
}
distX = array(0, dim=c(nSets, nC, nC));
for (set in 1:nSets)
{
if (intNetworkType==1)
{
distX[set, , ] = 1-abs(cor(centers[[set]]$data[, select]));
} else {
distX[set, , ] = 1-cor(centers[[set]]$data[, select]);
}
}
if (nSets==1) {
distX[1, , ]
} else
-minWhichMin(-distX, byRow = FALSE, dims = 1)$min;
}
unmergedMembership = membership;
unmergedCenters = centers;
# merge nearby clusters if their sizes allow merging
if (verbose > 0) printFlush(paste(spaces, "..merging smaller clusters..."));
clusterSizes = as.vector(table(membership));
small = (clusterSizes < preferredSize);
done = FALSE;
while (!done & (sum(small)>1))
{
smallIndex = c(1:nCenters)[small]
nSmall = sum(small);
clustDist = consCenterDist(centers, smallIndex);
diag(clustDist) = 10;
distOrder = order(as.vector(clustDist))[seq(from=2, to = nSmall * (nSmall-1), by=2)];
i = 1; canMerge = FALSE;
while (i <= length(distOrder) && (!canMerge))
{
whichJ = smallIndex[as.integer( (distOrder[i]-1)/nSmall + 1)];
whichI = smallIndex[distOrder[i] - (whichJ-1) * nSmall];
canMerge = sum(clusterSizes[c(whichI, whichJ)]) < preferredSize;
i = i + 1;
}
if (canMerge)
{
membership[membership==whichJ] = whichI;
clusterSizes[whichI] = sum(clusterSizes[c(whichI, whichJ)]);
#if (verbose > 1)
# printFlush(paste(spaces, " ..merged clusters", whichI, "and", whichJ,
# "whose combined size is", clusterSizes[whichI]));
for (set in 1:nSets)
centers[[set]]$data[, whichI] = .alignedFirstPC(multiExpr[[set]]$data[, membership==whichI],
verbose = verbose-2, indent = indent+2);
for (set in 1:nSets)
centers[[set]]$data = centers[[set]]$data[,-whichJ];
membership[membership>whichJ] = membership[membership>whichJ] - 1;
nCenters = nCenters -1;
clusterSizes = as.vector(table(membership));
small = (clusterSizes < preferredSize);
if (verbose > 3)
printFlush(paste(spaces, " ..merged clusters", whichI, "and", whichJ,
"whose combined size is", clusterSizes[whichI]));
} else done = TRUE;
}
if (checkData)
{
membershipAll = rep(NA, allSize$nGenes);
membershipAll[gsg$goodGenes] = membership;
} else
membershipAll = membership;
if (seedSaved) .Random.seed <<- savedSeed;
return( list(clusters = membershipAll, centers = centers, unmergedClusters = unmergedMembership,
unmergedCenters = unmergedCenters) );
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.