Nothing
# New multilevel specification of consensus: a hierarchical list.
# The consensus calculation needs to be general enough so it can be used for module merging (consensus
# eigengene network) as well as for calculation of consensus kMEs, and possibly for other stuff as well.
# Consensus specification for a single operation:
# - inputs: a list specifying the input of the consensus. This should be general enough to handle
# blockwise data but also not specific to adjacencies.
# The inut should be a multiData structure, with each
# - calibrationMethod: currently one of "none", "simple quantile", "full quantile"
# - consensusQuantile
# - saveCalibratedIndividualTOMs (= FALSE)
# - calibratedIndividualTOMFilePattern (= "calibratedIndividualTOM-Set%s-Block%b.RData")
# The consensus calculation itself does not need information about correlation type etc; the consensus
# calculation is very general.
# For network analysis applications of the consensus: will also have to keep information about how the
# individual network adjacencies (TOMs) are to be created or were created - correlation type, network type,
# TOM type, correlation options etc.
# So we will keep 2 different types of information around:
# 1. network construction options. A simple list giving the necessary network construction options.
# 2. ConsensusOptions: This will also be a list giving the options but not holding the actual consensus
# inputs or outputs.
# outputs of network construction and consensus construction should be held in separate lists.
# Output of individualTOMs:
# - adjacency data, a list of blockwiseData instances
# - block information
# - network construction options, separately for each adjacency (network construction could be different)
# For consensusTOM:
# Inputs:
# - adjacency data: either from individual TOMs or from other consensus TOMs
# . note that constructing a complicated consensus manually (i.e., using consensus TOMs
# as inputs to higher-level consensus) is of limited use
# since consensus modules would need the full consensus tree anyway.
# - consensus tree
# Not really needed: block information
# outputs:
# - consensus adjacency data
# - copy of consensus options
# - other (diagnostic etc) information
# For consensus modules
# Inputs:
# - undelying expression data
# - optional consensus TOM
# - block information
# - network options for each expression data set
# - consensus tree
.checkPower = function(power)
{
if (any(!is.finite(power)) | (sum(power<1)>0) | (sum(power>50)>0) )
stop("power must be between 1 and 50.");
}
#==========================================================================================================
#
# Defining a single consensus operation
#
#==========================================================================================================
newConsensusTree = function(consensusOptions = newConsensusOptions(),
inputs,
analysisName = NULL)
{
if (!inherits(consensusOptions, "ConsensusOptions"))
stop("'consensusOptions' must be of class ConsensusOptions.");
out = list(consensusOptions = consensusOptions,
inputs = inputs,
analysisName = analysisName);
class(out) = c("ConsensusTree", class(out));
out;
}
consensusTreeInputs = function(consensusTree, flatten = TRUE)
{
out = lapply(consensusTree$inputs, function(inp) if (inherits(inp, "ConsensusTree")) consensusTreeInputs(inp) else inp)
if (flatten) out = unlist(out);
out;
}
newConsensusOptions = function(
calibration = c("full quantile", "single quantile", "none"),
# Simple quantile scaling options
calibrationQuantile = 0.95,
sampleForCalibration = TRUE,
sampleForCalibrationFactor = 1000,
# Consensus definition
consensusQuantile = 0,
useMean = FALSE,
setWeights = NULL,
# Consensus post-processing
suppressNegativeResults = FALSE, # Potentially useful for Nowick-type signed TOM
# Name to prevent clashing of files
analysisName = "")
{
calibration = match.arg(calibration);
if (any(!is.finite(setWeights))) stop("Entries of 'setWeights' must all be finite.");
if ( (consensusQuantile < 0) | (consensusQuantile > 1) )
stop("'consensusQuantile' must be between 0 and 1.");
out = list( calibration = calibration,
calibrationQuantile = calibrationQuantile,
sampleForCalibration = sampleForCalibration,
sampleForCalibrationFactor = sampleForCalibrationFactor,
consensusQuantile = consensusQuantile,
useMean = useMean,
setWeights = setWeights,
suppressNegativeResults = suppressNegativeResults,
analysisName = analysisName);
class(out) = c("ConsensusOptions", class(out))
out;
}
newCorrelationOptions = function(
corType = c("pearson", "bicor"),
maxPOutliers = 0.05,
quickCor = 0,
pearsonFallback = "individual",
cosineCorrelation = FALSE,
nThreads = 0,
corFnc = if (corType=="bicor") "bicor" else "cor",
corOptions = c(
list(use = 'p',
cosine = cosineCorrelation,
quick = quickCor,
nThreads = nThreads),
if (corType=="bicor")
list(maxPOutliers = maxPOutliers,
pearsonFallback = pearsonFallback) else NULL))
{
if ( (maxPOutliers < 0) | (maxPOutliers > 1)) stop("maxPOutliers must be between 0 and 1.");
if (quickCor < 0) stop("quickCor must be positive.");
if (nThreads < 0) stop("nThreads must be positive.");
corType = match.arg(corType);
if ( (maxPOutliers < 0) | (maxPOutliers > 1)) stop("maxPOutliers must be between 0 and 1.");
if (quickCor < 0) stop("quickCor must be positive.");
fallback = pmatch(pearsonFallback, .pearsonFallbacks)
if (is.na(fallback))
stop(paste("Unrecognized 'pearsonFallback'. Recognized values are (unique abbreviations of)\n",
paste(.pearsonFallbacks, collapse = ", ")))
out = list(
corType = corType,
maxPOutliers = maxPOutliers,
quickCor = quickCor,
pearsonFallback = pearsonFallback,
pearsonFallback.code = match(pearsonFallback, .pearsonFallbacks),
cosineCorrelation = cosineCorrelation,
corFnc = corFnc,
corOptions = corOptions,
corType.code = match(corType, .corTypes),
nThreads = nThreads);
class(out) = c("CorrelationOptions", class(out));
out;
}
newNetworkOptions = function(
correlationOptions = newCorrelationOptions(),
# Adjacency options
replaceMissingAdjacencies = TRUE,
power = 6,
networkType = c("signed hybrid", "signed", "unsigned"),
checkPower = TRUE,
# Topological overlap options
TOMType = c("signed", "signed Nowick", "unsigned", "none",
"signed 2", "signed Nowick 2", "unsigned 2"),
TOMDenom = c("mean", "min"),
suppressTOMForZeroAdjacencies = FALSE,
suppressNegativeTOM = FALSE,
# Internal behavior options
useInternalMatrixAlgebra = FALSE)
{
if (checkPower) .checkPower(power);
networkType = match.arg(networkType);
TOMType = match.arg(TOMType);
TOMDenom = match.arg(TOMDenom);
out = c(correlationOptions,
list(replaceMissingAdjacencies = replaceMissingAdjacencies,
power = power,
networkType = networkType,
TOMType = TOMType,
TOMDenom = TOMDenom,
networkType.code = match(networkType, .networkTypes),
TOMType.code = match(TOMType, .TOMTypes),
TOMDenom.code = match(TOMDenom, .TOMDenoms),
suppressTOMForZeroAdjacencies = suppressTOMForZeroAdjacencies,
suppressNegativeTOM = suppressNegativeTOM,
useInternalMatrixAlgebra = useInternalMatrixAlgebra));
class(out) = c("NetworkOptions", class(correlationOptions));
out;
}
#====================================================================================================
#
# cor, network, and consensus calculations
#
#====================================================================================================
.corCalculation = function(x, y = NULL, weights.x = NULL, weights.y = NULL, correlationOptions)
{
if (!inherits(correlationOptions, "CorrelationOptions"))
stop(".corCalculation: 'correlationOptions' does not have correct type.");
do.call(match.fun(correlationOptions$corFnc),
c(list(x = x, y= y, weights.x = weights.x, weights.y = weights.y),
correlationOptions$corOptions));
}
# network calculation. Returns the resulting topological overlap or adjacency matrix.
.networkCalculation = function(data, networkOptions, weights = NULL,
verbose = 1, indent = 0)
{
if (is.data.frame(data)) data = as.matrix(data);
if (!inherits(networkOptions, "NetworkOptions"))
stop(".networkCalculation: 'networkOptions' does not have correct type.");
.checkAndScaleWeights(weights, data, scaleByMax = FALSE);
callVerb = max(0, verbose - 1); callInd = indent + 2;
CcorType = networkOptions$corType.code - 1;
CnetworkType = networkOptions$networkType.code - 1;
CTOMType = networkOptions$TOMType.code -1;
CTOMDenom = networkOptions$TOMDenom.code -1;
warn = as.integer(0);
# For now return the full matrix; eventually we may return just the dissimilarity.
# To make full use of the lower triagle space saving we'd have to modify the underlying C code
# dramatically, otherwise it will still need to allocate the full matrix for the matrix multiplication.
.Call("tomSimilarity_call", data, weights,
as.integer(CcorType), as.integer(CnetworkType),
as.double(networkOptions$power), as.integer(CTOMType),
as.integer(CTOMDenom),
as.double(networkOptions$maxPOutliers),
as.double(networkOptions$quickCor),
as.integer(networkOptions$pearsonFallback.code),
as.integer(networkOptions$cosineCorrelation),
as.integer(networkOptions$replaceMissingAdjacencies),
as.integer(networkOptions$suppressTOMForZeroAdjacencies),
as.integer(networkOptions$suppressNegativeTOM),
as.integer(networkOptions$useInternalMatrixAlgebra),
warn, as.integer(min(1, networkOptions$nThreads)),
as.integer(callVerb), as.integer(callInd), PACKAGE = "WGCNA");
}
# the following is contained in consensusOptions:
# out = list( calibration = calibration,
# calibrationQuantile = calibrationQuantile,
# sampleForCalibration = sampleForCalibration,
# sampleForCalibrationFactor = sampleForCalibrationFactor,
# consensusQuantile = consensusQuantile,
# useMean = useMean,
# setWeights = setWeights);
#==============================================================================================
#
# general utility functions
#
#==============================================================================================
# Try to guess whether disk cache should be used
# this should work for both multiData as well as for simple lists of arrays.
.useDiskCache = function(multiExpr, blocks = NULL, chunkSize = NULL,
nSets = if (isMultiData(multiExpr)) length(multiExpr) else 1,
nGenes = checkSets(multiExpr)$nGenes)
{
if (is.null(chunkSize)) chunkSize = as.integer(.largestBlockSize/(2*nSets))
if (length(blocks) == 0) {
blockLengths = nGenes;
} else
blockLengths = as.numeric(table(blocks));
max(blockLengths) > chunkSize;
}
.dimensions = function(x)
{
if (is.null(dim(x))) return(length(x))
return(dim(x))
}
.shiftList = function(c0, lst)
{
if (length(lst)==0) return(list(c0));
ll = length(lst)
out = list();
out[[1]] = c0;
for (i in 1:ll)
out[[i+1]] = lst[[i]];
out;
}
.checkListDimConsistencyAndGetDimnames = function(dataList)
{
nPars = length(dataList)
dn = NULL
for (p in 1:nPars)
{
if (mode(dataList[[p]])!="numeric")
stop(paste("Argument number", p, " is not numeric."))
if (p==1) {
dim = .dimensions(dataList[[p]])
} else {
if (!isTRUE(all.equal(.dimensions(dataList[[p]]), dim)))
stop("Argument dimensions are not consistent.");
}
if (prod(dim)==0) stop(paste("Argument has zero dimension."));
if (is.null(dn)) dn = dimnames(dataList[[p]]);
}
dn;
}
.mtd.checkDimConsistencyAndGetDimnames = function(mtd)
{
nPars = length(mtd)
dn = NULL
for (p in 1:nPars)
{
if (mode(mtd[[p]]$data)!="numeric")
stop(paste("Argument number", p, " is not numeric."))
if (p==1) {
dim = .dimensions(mtd[[p]]$data)
} else {
if (!isTRUE(all.equal(.dimensions(mtd[[p]]$data), dim)))
stop("Argument dimensions are not consistent.");
}
if (prod(dim)==0) stop(paste("Argument has zero dimension."));
if (is.null(dn)) dn = dimnames(mtd[[p]]$data);
}
dn;
}
#==============================================================================================
#
# general utility functions for working with disk cache
#
#==============================================================================================
.saveChunks = function(data, chunkSize, fileBase, cacheDir, fileNames = NULL)
{
ld = length(data);
nChunks = ceiling(ld/chunkSize);
if (is.null(fileNames))
{
if (length(fileBase)!=1)
stop("Internal error: length of 'fileBase' must be 1.");
fileNames = rep("", nChunks);
x = 1;
for (c in 1:nChunks)
{
fileNames[c] = tempfile(pattern = fileBase, tmpdir = cacheDir);
# This effectively reserves the file name
save(x, file = fileNames[c]);
}
} else {
if (length(fileNames)!=nChunks)
stop("Internal error: length of 'fileNames' must equal the number of chunks.");
}
chunkLengths = rep(0, nChunks);
start = 1;
for (c in 1:nChunks)
{
end = min(start + chunkSize-1, ld);
chunkLengths[c] = end - start + 1;
temp = data[start:end];
save(temp, file = fileNames[c]);
start = end + 1;
}
rm(temp); #gc();
list(files = fileNames, chunkLengths = chunkLengths);
}
.loadAsList = function(file)
{
env = new.env();
load(file = file, envir = env);
as.list(env);
};
.loadObject = function(file, name = NULL, size = NULL)
{
x = .loadAsList(file);
if (!is.null(name) && (names(x)[1]!=name))
stop("File ", file, " does not contain object '", name, "'.")
obj = x[[1]];
if (!is.null(size) && (length(obj)!=size))
stop("Object '", name, "' does not have the correct length.");
obj;
}
.vector2dist = function(x)
{
n = length(x);
n1 = (1 + sqrt(1 + 8*n))/2
if (floor(n1)!=n1) stop("Input length not consistent with a distance structure.");
attributes(x) = list(Size = as.integer(n1), Diag = FALSE, Upper = FALSE);
class(x) = "dist";
x;
}
.emptyDist = function(nObjects, fill = 0)
{
n = (nObjects * (nObjects-1))/2;
.vector2dist(rep(fill, n));
}
.checkAndDelete = function(files)
{
if (length(files)>0) lapply(as.character(files), function(file) if (file.exists(file)) file.remove(file));
NULL;
}
.qorder = function(data)
{
data = as.numeric(data);
.Call("qorder", data, PACKAGE = "WGCNA")
}
# Actual consensus calculation distilled into one function. data is assumed to have sets in columns
# and samples/observations/whatever in rows. setWeightMat should be a matrix of dimensions (nSets, 1)
# and be normalized to sum=1.
.consensusCalculation.base = function(data, useMean, setWeightMat, consensusQuantile)
{
nSets = ncol(data);
if (nSets==1)
{
out.list = list(consensus = as.vector(data));
} else {
if (useMean)
{
if (any(is.na(data)))
{
finiteMat = 1-is.na(data);
data[is.na(data)] = 0;
out = data %*% setWeightMat / finiteMat%*%setWeightMat;
} else {
out = data %*% setWeightMat;
}
out.list = list(consensus = out);
} else if (consensusQuantile == 0)
{
whichmin = .Call("minWhich_call", data, 1L, PACKAGE = "WGCNA");
out.list = list(consensus = whichmin$min);
} else {
out.list = list(consensus = rowQuantileC(data, p = consensusQuantile));
}
}
if (is.null(out.list$originCount))
{
out.list$originCount = apply(data, 2, function(x) sum(x <= out.list$consensus, na.rm = TRUE));
names(out.list$originCount) = 1:nSets
}
out.list;
}
.consensusCalculation.base.FromList = function(dataList, useMean, setWeights, consensusQuantile)
{
nSets = length(dataList);
if (nSets==1)
{
out.list = list(consensus = dataList[[1]]);
} else {
if (useMean)
{
out.list = list(consensus = pmean(dataList, setWeights));
} else if (consensusQuantile == 0)
{
whichmin = pminWhich.fromList(dataList);
out.list = list(consensus = whichmin$min);
} else {
out.list = list(consensus = pquantile.fromList(dataList, prob = consensusQuantile));
}
}
if (is.null(out.list$originCount))
{
out.list$originCount = sapply(dataList, function(x) sum(x <= out.list$consensus, na.rm = TRUE));
names(out.list$originCount) = 1:nSets
}
out.list;
}
#==============================================================================================
#
# utility functions for working with multiple blockwise adjacencies.
#
#==============================================================================================
newBlockInformation = function(
blocks,
goodSamplesAndGenes)
{
blockGenes = tapply(1:length(blocks), blocks, identity)
names(blockGenes) = sort(unique(blocks));
nGGenes = sum(goodSamplesAndGenes$goodGenes);
gBlocks = blocks[goodSamplesAndGenes$goodGenes];
out = list(blocks = blocks,
blockGenes = blockGenes,
goodSamplesAndGenes = goodSamplesAndGenes,
nGGenes = nGGenes,
gBlocks = gBlocks);
class(out) = c("BlockInformation", class(out));
out;
}
#=======================================================================================================
#
# individualTOMs
# This is essentially a re-badged blockwiseIndividualTOMs with a different input and ouptut format.
#
#=======================================================================================================
# The following is contained in networkOptions:
#corType = corType,
#maxPOutliers = maxPOutliers,
#quickCor = quickCor,
#pearsonFallback = pearsonFallback,
#cosineCorrelation = cosineCorrelation,
#corFnc = corFnc,
#corOptions = corOptions,
#corType.code = match(corType, .corTypes),
# Adjacency options
#replaceMissingAdjacencies = TRUE,
#power = 6,
#networkType = c("signed hybrid", "signed", "unsigned"),
# Topological overlap options
#TOMType = c("signed", "unsigned"),
#TOMDenom = c("mean", "min"))
individualTOMs = function(
multiExpr,
multiWeights = NULL,
multiExpr.imputed = NULL, ## Optional, useful for pre-clustering if preclustering is needed.
# Data checking options
checkMissingData = TRUE,
# Blocking options
blocks = NULL,
maxBlockSize = 5000,
blockSizePenaltyPower = 5,
nPreclusteringCenters = NULL,
randomSeed = 54321,
# Network construction options. This can be a single object of class NetworkOptions, or a multiData
# structure of NetworkOptions objects, one per element of multiExpr.
networkOptions,
# Save individual TOMs? This is equivalent to using external = TRUE in blockwiseData
saveTOMs = TRUE,
individualTOMFileNames = "individualTOM-Set%s-Block%b.RData",
# Behaviour options
collectGarbage = TRUE,
verbose = 2, indent = 0)
{
spaces = indentSpaces(indent);
dataSize = checkSets(multiExpr, checkStructure = TRUE);
if (dataSize$structureOK)
{
nSets = dataSize$nSets;
nGenes = dataSize$nGenes;
multiFormat = TRUE;
} else {
multiExpr = multiData(multiExpr);
if (!is.null(multiWeights)) multiWeights = multiData(multiWeights);
nSets = dataSize$nSets;
nGenes = dataSize$nGenes;
multiFormat = FALSE;
}
.checkAndScaleMultiWeights(multiWeights, multiExpr, scaleByMax = FALSE);
if (is.null(names(multiExpr)))
names(multiExpr) = spaste("Set", 1:nSets);
if (inherits(networkOptions, "NetworkOptions"))
{
networkOptions = setNames(list2multiData(.listRep(networkOptions, nSets)), names(multiExpr));
} else {
if (!all(mtd.apply(networkOptions, inherits, "NetworkOptions", mdaSimplify = TRUE)))
stop("'networkOptions' must be of class 'NetworkOptions' or a multiData structure\n",
" of objects of the class.\n",
" See newNetworkOptions for creating valid network options.");
}
if (!is.null(randomSeed))
{
if (exists(".Random.seed"))
{
savedSeed = .Random.seed
on.exit(.Random.seed <<-savedSeed);
}
set.seed(randomSeed);
}
#if (maxBlockSize >= floor(sqrt(2^31)) )
# stop("'maxBlockSize must be less than ", floor(sqrt(2^31)), ". Please decrease it and try again.")
if (!is.null(blocks) && (length(blocks)!=nGenes))
stop("Input error: length of 'blocks' must equal number of genes in 'multiExpr'.");
if (verbose>0)
printFlush(paste(spaces, "Calculating topological overlaps block-wise from all genes"));
nSamples = dataSize$nSamples;
# Check data for genes and samples that have too many missing values
# Check that multiExpr has valid (mtd)column names. If column names are missing, generate them.
colIDs = mtd.colnames(multiExpr);
if (is.null(colIDs)) colIDs = c(1:dataSize$nGenes);
if (checkMissingData)
{
gsg = goodSamplesGenesMS(multiExpr, multiWeights = multiWeights,
verbose = verbose - 1, indent = indent + 1)
if (!gsg$allOK)
{
multiExpr = mtd.subset(multiExpr, gsg$goodSamples, gsg$goodGenes);
if (!is.null(multiWeights))
multiWeights = mtd.subset(multiWeights, gsg$goodSamples, gsg$goodGenes);
if (!is.null(multiExpr.imputed))
multiExpr.imputed = mtd.subset(multiExpr.imputed, gsg$goodSamples, gsg$goodGenes);
colIDs = colIDs[gsg$goodGenes];
}
} else {
gsg = list(goodGenes = rep(TRUE, nGenes), goodSamples = lapply(nSamples, function(n) rep(TRUE, n)));
gsg$allOK = TRUE;
}
nGGenes = sum(gsg$goodGenes);
nGSamples = rep(0, nSets);
for (set in 1:nSets) nGSamples[set] = sum(gsg$goodSamples[[set]]);
if (is.null(blocks))
{
if (nGGenes > maxBlockSize)
{
if (verbose>1) printFlush(paste(spaces, "....pre-clustering genes to determine blocks.."));
clustering = consensusProjectiveKMeans(
if (!is.null(multiExpr.imputed)) multiExpr.imputed else multiExpr,
preferredSize = maxBlockSize,
sizePenaltyPower = blockSizePenaltyPower, checkData = FALSE,
nCenters = nPreclusteringCenters,
randomSeed = randomSeed,
verbose = verbose-2, indent = indent + 1);
gBlocks = .orderLabelsBySize(clustering$clusters);
} else
gBlocks = rep(1, nGGenes);
blocks = rep(NA, nGenes);
blocks[gsg$goodGenes] = gBlocks;
} else {
gBlocks = blocks[gsg$goodGenes];
}
blockLevels = as.numeric(levels(factor(gBlocks)));
blockSizes = table(gBlocks)
nBlocks = length(blockLevels);
if (any(blockSizes > sqrt(2^31)-1))
printFlush(spaste(spaces,
"Found block(s) with size(s) larger than limit of 'int' indexing.\n",
spaces, " Support for such large blocks is experimental; please report\n",
spaces, " any issues to Peter.Langfelder@gmail.com."));
# check file names for uniqueness
actualFileNames = matrix("", nSets, nBlocks);
if (saveTOMs)
{
for (set in 1:nSets) for (b in 1:nBlocks)
actualFileNames[set, b] = .processFileName(individualTOMFileNames, set, names(multiExpr), b);
rownames(actualFileNames) = spaste("Set.", c(1:nSets));
colnames(actualFileNames) = spaste("Block.", c(1:nBlocks));
if (length(unique(as.vector(actualFileNames))) < nSets * nBlocks)
{
printFlush("Error: File names for (some) set/block combinations are not unique:");
print(actualFileNames);
stop("File names must be unique.");
}
}
# Initialize various variables
blockGenes = list();
blockNo = 1;
setTomDS = replicate(nSets, list());
# Here's where the analysis starts
for (blockNo in 1:nBlocks)
{
if (verbose>1 && nBlocks > 1) printFlush(paste(spaces, "..Working on block", blockNo, "."));
# Select the block genes
block = c(1:nGGenes)[gBlocks==blockLevels[blockNo]];
#nBlockGenes = length(block);
#blockGenes[[blockNo]] = c(1:nGenes)[gsg$goodGenes][gBlocks==blockLevels[blockNo]];
#errorOccurred = FALSE;
# For each set: calculate and save TOM
for (set in 1:nSets)
{
if (verbose>2) printFlush(paste(spaces, "....Working on set", set))
selExpr = as.matrix(multiExpr[[set]]$data[, block]);
if (!is.null(multiWeights)) {
selWeights = as.matrix(multiWeights[[set]]$data[, block])
} else
selWeights = NULL;
tomDS = as.dist(.networkCalculation(selExpr, networkOptions[[set]]$data, weights = selWeights,
verbose = verbose -2, indent = indent+2));
setTomDS[[set]]$data = addBlockToBlockwiseData(if (blockNo==1) NULL else setTomDS[[set]]$data,
external = saveTOMs,
blockData = tomDS,
blockFile = actualFileNames[set, blockNo],
recordAttributes = TRUE,
metaData = list(IDs = colIDs[block]))
}
if (collectGarbage) { rm(tomDS); gc(); }
}
names(setTomDS) = names(multiExpr);
if (!multiFormat)
{
gsg$goodSamples = gsg$goodSamples[[1]];
setTomDS = setTomDS[[1]]$data;
}
blockInformation = newBlockInformation(blocks, gsg);
list(blockwiseAdjacencies = setTomDS,
setNames = names(multiExpr),
nSets = length(multiExpr),
blockInfo = blockInformation,
networkOptions = networkOptions
)
}
#=====================================================================================================
#
# hierarchical consensus TOM
#
#=====================================================================================================
hierarchicalConsensusTOM = function(
# Supply either ...
# ... information needed to calculate individual TOMs
multiExpr,
multiWeights = NULL,
# Data checking options
checkMissingData = TRUE,
# Blocking options
blocks = NULL,
maxBlockSize = 20000,
blockSizePenaltyPower = 5,
nPreclusteringCenters = NULL,
randomSeed = 12345,
# Network construction options. This can be a single object of class NetworkOptions, or a multiData
# structure of NetworkOptions objects, one per element of multiExpr.
networkOptions,
# Save individual TOMs?
keepIndividualTOMs = TRUE,
individualTOMFileNames = "individualTOM-Set%s-Block%b.RData",
# ... or information about individual (more precisely, input) TOMs
individualTOMInfo = NULL,
# Consensus calculation options
consensusTree,
useBlocks = NULL,
# Save calibrated TOMs?
saveCalibratedIndividualTOMs = FALSE,
calibratedIndividualTOMFilePattern = "calibratedIndividualTOM-Set%s-Block%b.RData",
# Return options
saveConsensusTOM = TRUE,
consensusTOMFilePattern = "consensusTOM-%a-Block%b.RData",
getCalibrationSamples = FALSE,
# Return the intermediate results as well?
keepIntermediateResults = saveConsensusTOM,
# Internal handling of TOMs
useDiskCache = NULL, chunkSize = NULL,
cacheDir = ".",
cacheBase = ".blockConsModsCache",
# Behavior
collectGarbage = TRUE,
verbose = 1,
indent = 0)
{
if (!is.null(randomSeed))
{
if (exists(".Random.seed"))
{
savedSeed = .Random.seed
on.exit(.Random.seed <<-savedSeed);
}
set.seed(randomSeed);
}
localIndividualTOMCalculation = is.null(individualTOMInfo);
if (is.null(individualTOMInfo))
{
if (missing(multiExpr)) stop("Either 'individualTOMInfo' or 'multiExpr' must be given.");
if (is.null(useDiskCache)) useDiskCache = .useDiskCache(multiExpr, blocks, chunkSize);
time = system.time({individualTOMInfo = individualTOMs(multiExpr = multiExpr,
multiWeights = multiWeights,
checkMissingData = checkMissingData,
blocks = blocks,
maxBlockSize = maxBlockSize,
blockSizePenaltyPower = blockSizePenaltyPower,
nPreclusteringCenters = nPreclusteringCenters,
randomSeed = NULL,
networkOptions = networkOptions,
saveTOMs = useDiskCache | keepIndividualTOMs,
individualTOMFileNames = individualTOMFileNames,
collectGarbage = collectGarbage,
verbose = verbose, indent = indent);});
if (verbose > 1) { printFlush("Timimg for individual TOMs:"); print(time); }
}
consensus = hierarchicalConsensusCalculation(individualTOMInfo$blockwiseAdjacencies,
consensusTree,
level = 1,
useBlocks = useBlocks,
randomSeed = NULL,
saveCalibratedIndividualData = saveCalibratedIndividualTOMs,
calibratedIndividualDataFilePattern = calibratedIndividualTOMFilePattern,
saveConsensusData = saveConsensusTOM,
consensusDataFileNames = consensusTOMFilePattern,
getCalibrationSamples= getCalibrationSamples,
keepIntermediateResults = keepIntermediateResults,
useDiskCache = useDiskCache,
chunkSize = chunkSize,
cacheDir = cacheDir,
cacheBase = cacheBase,
collectGarbage = collectGarbage,
verbose = verbose,
indent = indent);
if (localIndividualTOMCalculation)
{
if (!keepIndividualTOMs)
{
# individual TOMs are contained in the individualTOMInfo list; remove them.
mtd.apply(individualTOMInfo$blockwiseAdjacencies, BD.checkAndDeleteFiles);
individualTOMInfo$blockwiseAdjacencies = NULL
}
}
c( consensus,
list(individualTOMInfo = individualTOMInfo,
consensusTree = consensusTree)
);
}
#========================================================================================================
#
# Merge consensusTOMInfo lists
#
#========================================================================================================
#
# Caution: at present the function does not check that the inputs are compatible and compatible with the
# supplied blocks.
.mergeConsensusTOMInformationLists = function(blockInformation, consensusTOMInfoList)
{
blocks = blockInformation$blocks;
blockLevels = unique(blocks);
nBlocks = length(blockLevels);
out = consensusTOMInfoList[[1]];
# Merging consensus information
out$consensusData = do.call(mergeBlockwiseData, lapply(consensusTOMInfoList, getElement, "consensusData"));
if (out$saveCalibratedIndividualData)
{
out$calibratedIndividualData = lapply(1:out$nSets, function(set)
do.call(mergeBlockwiseData,
lapply(consensusTOMInfoList, function(ct) ct$calibratedIndividualData[[set]])));
}
if (!is.null(out$calibrationSamples))
{
out$calibrationSamples = do.call(c, lapply(consensusTOMInfoList, getElement, "calibrationSamples"));
}
out$originCount = rowSums(do.call(cbind, lapply(consensusTOMInfoList, getElement, "originCount")))
# Merging information in individualTOMInfo
out$individualTOMInfo$blockwiseAdjacencies = lapply(1:out$nSets, function(set)
list(data = do.call(mergeBlockwiseData, lapply(consensusTOMInfoList, function(ct)
ct$individualTOMInfo$blockwiseAdjacencies[[set]]$data))));
out$individualTOMInfo$blockInfo = blockInformation;
out;
}
#========================================================================================================
#
# consensusTOM: old, single-layer consensus.
#
#========================================================================================================
consensusTOM = function(
# Supply either ...
# ... information needed to calculate individual TOMs
multiExpr,
# Data checking options
checkMissingData = TRUE,
# Blocking options
blocks = NULL,
maxBlockSize = 5000,
blockSizePenaltyPower = 5,
nPreclusteringCenters = NULL,
randomSeed = 54321,
# Network construction arguments: correlation options
corType = "pearson",
maxPOutliers = 1,
quickCor = 0,
pearsonFallback = "individual",
cosineCorrelation = FALSE,
replaceMissingAdjacencies = FALSE,
# Adjacency function options
power = 6,
networkType = "unsigned",
checkPower = TRUE,
# Topological overlap options
TOMType = "unsigned",
TOMDenom = "min",
suppressNegativeTOM = FALSE,
# Save individual TOMs?
saveIndividualTOMs = TRUE,
individualTOMFileNames = "individualTOM-Set%s-Block%b.RData",
# ... or individual TOM information
individualTOMInfo = NULL,
useIndivTOMSubset = NULL,
##### Consensus calculation options
useBlocks = NULL,
networkCalibration = c("single quantile", "full quantile", "none"),
# Save calibrated TOMs?
saveCalibratedIndividualTOMs = FALSE,
calibratedIndividualTOMFilePattern = "calibratedIndividualTOM-Set%s-Block%b.RData",
# Simple quantile scaling options
calibrationQuantile = 0.95,
sampleForCalibration = TRUE, sampleForCalibrationFactor = 1000,
getNetworkCalibrationSamples = FALSE,
# Consensus definition
consensusQuantile = 0,
useMean = FALSE,
setWeights = NULL,
# Return options
saveConsensusTOMs = TRUE,
consensusTOMFilePattern = "consensusTOM-Block%b.RData",
returnTOMs = FALSE,
# Internal handling of TOMs
useDiskCache = NULL, chunkSize = NULL,
cacheDir = ".",
cacheBase = ".blockConsModsCache",
nThreads = 1,
# Diagnostic messages
verbose = 1,
indent = 0)
{
spaces = indentSpaces(indent);
networkCalibration = match.arg(networkCalibration);
seedSaved = FALSE;
if (!is.null(randomSeed))
{
if (exists(".Random.seed"))
{
savedSeed = .Random.seed
on.exit(.Random.seed <<-savedSeed);
}
set.seed(randomSeed);
}
if (any(!is.finite(setWeights))) stop("Entries of 'setWeights' must all be finite.");
localIndividualTOMCalculation = is.null(individualTOMInfo);
if (is.null(individualTOMInfo))
{
if (missing(multiExpr)) stop("Either 'individualTOMInfo' or 'multiExpr' must be given.");
dataSize = checkSets(multiExpr);
nSets.all = dataSize$nSets;
nGenes = dataSize$nGenes;
if (is.null(useDiskCache)) useDiskCache = .useDiskCache(multiExpr, blocks, chunkSize);
if (length(power)!=1)
{
if (length(power)!=nSets.all)
stop("Invalid arguments: Length of 'power' must equal number of sets given in 'multiExpr'.");
} else {
power = rep(power, nSets.all);
}
if ( (consensusQuantile < 0) | (consensusQuantile > 1) )
stop("'consensusQuantile' must be between 0 and 1.");
time = system.time({individualTOMInfo = blockwiseIndividualTOMs(multiExpr = multiExpr,
checkMissingData = checkMissingData,
blocks = blocks,
maxBlockSize = maxBlockSize,
blockSizePenaltyPower = blockSizePenaltyPower,
nPreclusteringCenters = nPreclusteringCenters,
randomSeed = randomSeed,
corType = corType,
maxPOutliers = maxPOutliers,
quickCor = quickCor,
pearsonFallback = pearsonFallback,
cosineCorrelation = cosineCorrelation,
replaceMissingAdjacencies = replaceMissingAdjacencies,
power = power,
networkType = networkType,
TOMType = TOMType,
TOMDenom = TOMDenom,
suppressNegativeTOM = suppressNegativeTOM,
saveTOMs = useDiskCache | saveIndividualTOMs,
individualTOMFileNames = individualTOMFileNames,
nThreads = nThreads,
verbose = verbose, indent = indent);});
if (verbose > 1) { printFlush("Timimg for individual TOMs:"); print(time); }
if (!saveIndividualTOMs & useDiskCache)
on.exit(.checkAndDelete(individualTOMInfo$actualTOMFileNames), add = TRUE)
} else {
nSets.all = if (individualTOMInfo$saveTOMs)
nrow(individualTOMInfo$actualTOMFileNames) else ncol(individualTOMInfo$TOMSimilarities[[1]]);
nGenes = length(individualTOMInfo$blocks);
if (is.null(useDiskCache)) useDiskCache = .useDiskCache(NULL, blocks, chunkSize,
nSets = nSets.all, nGenes = nGenes);
}
setNames = individualTOMInfo$setNames;
if (is.null(setNames)) setNames = spaste("Set.", 1:individualTOMInfo$nSets)
nGoodGenes = length(individualTOMInfo$gBlocks);
if (is.null(setWeights)) setWeights = rep(1, nSets.all);
if (length(setWeights)!=nSets.all)
stop("Length of 'setWeights' must equal the number of sets.");
setWeightMat = as.matrix(setWeights/sum(setWeights));
if (is.null(useIndivTOMSubset))
{
if (individualTOMInfo$nSets != nSets.all)
stop(paste("Number of sets in individualTOMInfo and in multiExpr do not agree.\n",
" To use a subset of individualTOMInfo, set useIndivTOMSubset appropriately."));
useIndivTOMSubset = c(1:nSets.all);
}
nSets = length(useIndivTOMSubset);
if (length(unique(useIndivTOMSubset))!=nSets)
stop("Entries of 'useIndivTOMSubset' must be unique");
if (any(useIndivTOMSubset<1) | any(useIndivTOMSubset>individualTOMInfo$nSets))
stop("All entries of 'useIndivTOMSubset' must be between 1 and the number of sets in individualTOMInfo");
# if ( (minKMEtoJoin >1) | (minKMEtoJoin <0) ) stop("minKMEtoJoin must be between 0 and 1.");
gsg = individualTOMInfo$goodSamplesAndGenes;
# Restrict gsg to used sets
gsg$goodSamples = gsg$goodSamples[useIndivTOMSubset];
if (is.null(chunkSize)) chunkSize = as.integer(.largestBlockSize/(2*nSets))
# Initialize various variables
if (getNetworkCalibrationSamples)
{
if (!sampleForCalibration)
stop(paste("Incompatible input options: networkCalibrationSamples can only be returned",
"if sampleForCalibration is TRUE."));
networkCalibrationSamples = list();
}
blockLevels = sort(unique(individualTOMInfo$gBlocks));
nBlocks = length(blockLevels);
if (is.null(useBlocks)) useBlocks = blockLevels;
useBlockIndex = match(useBlocks, blockLevels);
if (!all(useBlocks %in% blockLevels))
stop("All entries of 'useBlocks' must be valid block levels.");
if (any(duplicated(useBlocks)))
stop("Entries of 'useBlocks' must be unique.");
nUseBlocks = length(useBlocks);
if (nUseBlocks==0)
stop("'useBlocks' cannot be non-NULL and empty at the same time.");
consensusTOM.out = list();
TOMFiles = rep("", nUseBlocks);
originCount = rep(0, nSets);
calibratedIndividualTOMFileNames = NULL;
if (saveCalibratedIndividualTOMs)
{
calibratedIndividualTOMFileNames = matrix("", nSets, nBlocks);
for (set in 1:nSets) for (b in 1:nBlocks)
calibratedIndividualTOMFileNames[set, b] = .processFileName(calibratedIndividualTOMFilePattern,
setNumber = set, setNames = setNames, blockNumber = b);
}
gc();
# Here's where the analysis starts
for (blockIndex in 1:nUseBlocks)
{
blockNo = useBlockIndex[blockIndex];
if (verbose>1) printFlush(paste(spaces, "..Working on block", blockNo, "."));
# Select block genes
block = c(1:nGoodGenes)[individualTOMInfo$gBlocks==blockLevels[blockNo]];
nBlockGenes = length(block);
# blockGenes[[blockNo]] = c(1:nGenes)[gsg$goodGenes][gBlocks==blockLevels[blockNo]];
scaleQuant = rep(1, nSets);
scalePowers = rep(1, nSets);
# Set up file names or memory space to hold the set TOMs
if (useDiskCache)
{
nChunks = ceiling(nBlockGenes * (nBlockGenes-1)/2/chunkSize);
chunkFileNames = array("", dim = c(nChunks, nSets));
on.exit(.checkAndDelete(chunkFileNames), add = TRUE);
} else nChunks = 1;
if (nChunks==1) useDiskCache = FALSE;
if (!useDiskCache)
{
# Note: setTomDS will contained the scaled set TOM matrices.
setTomDS = matrix(0, nBlockGenes*(nBlockGenes-1)/2, nSets);
colnames(setTomDS) = setNames;
}
# create an empty consTomDS distance structure.
consTomDS = .emptyDist(nBlockGenes);
# sample entry indices from the distance structure for TOM scaling, if requested
if (networkCalibration=="single quantile" && sampleForCalibration)
{
qx = min(calibrationQuantile, 1-calibrationQuantile);
nScGenes = min(sampleForCalibrationFactor * 1/qx, length(consTomDS));
nTOMEntries = length(consTomDS)
scaleSample = sample(nTOMEntries, nScGenes);
if (getNetworkCalibrationSamples)
networkCalibrationSamples[[blockIndex]] = list(sampleIndex = scaleSample,
TOMSamples = matrix(NA, nScGenes, nSets));
}
if (networkCalibration %in% c("single quantile", "none"))
{
for (set in 1:nSets)
{
if (verbose>2) printFlush(paste(spaces, "....Working on set", useIndivTOMSubset[set]))
if (individualTOMInfo$saveTOMs)
{
tomDS = .loadObject(individualTOMInfo$ actualTOMFileNames[useIndivTOMSubset[set], blockNo],
name = "tomDS", size = nBlockGenes*(nBlockGenes-1)/2);
} else {
tomDS = consTomDS;
tomDS[] = individualTOMInfo$TOMSimilarities[[blockNo]] [, useIndivTOMSubset[set]]
}
if (networkCalibration=="single quantile")
{
# Scale TOMs so that calibrationQuantile agree in each set
if (sampleForCalibration)
{
if (getNetworkCalibrationSamples)
{
networkCalibrationSamples[[blockIndex]]$TOMSamples[, set] = tomDS[scaleSample];
scaleQuant[set] = quantile(networkCalibrationSamples[[blockIndex]]$TOMSamples[, set],
probs = calibrationQuantile, type = 8);
} else {
scaleQuant[set] = quantile(tomDS[scaleSample], probs = calibrationQuantile, type = 8);
}
} else
scaleQuant[set] = quantile(x = tomDS, probs = calibrationQuantile, type = 8);
if (set>1)
{
scalePowers[set] = log(scaleQuant[1])/log(scaleQuant[set]);
tomDS = tomDS^scalePowers[set];
}
if (saveCalibratedIndividualTOMs)
save(tomDS, file = calibratedIndividualTOMFileNames[set, blockNo]);
}
# Save the calculated TOM either to disk in chunks or to memory.
if (useDiskCache)
{
if (verbose > 3) printFlush(paste(spaces, "......saving TOM similarity to disk cache.."));
sc = .saveChunks(tomDS, chunkSize, cacheBase, cacheDir = cacheDir);
chunkFileNames[, set] = sc$files;
chunkLengths = sc$chunkLengths;
} else {
setTomDS[, set] = tomDS[];
}
rm(tomDS); gc();
}
} else if (networkCalibration=="full quantile")
{
# Step 1: load each TOM, get order, split TOM into chunks according to order, and save.
if (verbose>1) printFlush(spaste(spaces, "..working on quantile normalization"))
if (useDiskCache)
{
orderFiles = rep("", nSets);
on.exit(.checkAndDelete(orderFiles),add = TRUE);
}
for (set in 1:nSets)
{
if (verbose>2) printFlush(paste(spaces, "....Working on set", useIndivTOMSubset[set]))
if (individualTOMInfo$saveTOMs)
{
tomDS = .loadObject(individualTOMInfo$ actualTOMFileNames[useIndivTOMSubset[set], blockNo],
name = "tomDS", size = nBlockGenes*(nBlockGenes-1)/2);
} else {
tomDS = consTomDS;
tomDS[] = individualTOMInfo$TOMSimilarities[[blockNo]] [, useIndivTOMSubset[set]]
}
if (useDiskCache)
{
# Order TOM (this may take a long time...)
if (verbose > 3) printFlush(spaste(spaces, "......ordering TOM"));
time = system.time({order1 = .qorder(tomDS)});
if (verbose > 3) { printFlush("Time to order TOM:"); print(time); }
# save the order
orderFiles[set] = tempfile(pattern = spaste(".orderForSet", set), tmpdir = cacheDir);
if (verbose > 3) printFlush(spaste(spaces, "......saving order and ordered TOM"));
save(order1, file = orderFiles[set]);
# Save ordered tomDS into chunks
tomDS.ordered = tomDS[order1];
sc = .saveChunks(tomDS.ordered, chunkSize, cacheBase, cacheDir = cacheDir);
chunkFileNames[, set] = sc$files;
chunkLengths = sc$chunkLengths;
} else {
setTomDS[, set] = tomDS[]
}
}
if (useDiskCache)
{
# Step 2: Load chunks one by one and quantile normalize
if (verbose > 2) printFlush(spaste(spaces, "....quantile normalizing chunks"));
for (c in 1:nChunks)
{
if (verbose > 3) printFlush(spaste(spaces, "......QN for chunk ", c, " of ", nChunks));
chunkData = matrix(NA, chunkLengths[c], nSets);
for (set in 1:nSets)
chunkData[, set] = .loadObject(chunkFileNames[c, set]);
time = system.time({ chunk.norm = normalize.quantiles(chunkData, copy = FALSE);});
if (verbose > 1) { printFlush("Time to QN chunk:"); print(time); }
# Save quantile normalized chunks
for (set in 1:nSets)
{
temp = chunk.norm[, set];
save(temp, file = chunkFileNames[c, set]);
}
}
if (verbose > 2) printFlush(spaste(spaces, "....putting together full QN'ed TOMs"));
# Put together full TOMs
for (set in 1:nSets)
{
load(orderFiles[set]);
start = 1;
for (c in 1:nChunks)
{
end = start + chunkLengths[c] - 1;
tomDS[order1[start:end]] = .loadObject(chunkFileNames[c, set], size = chunkLengths[c]);
start = start + chunkLengths[c];
}
if (saveCalibratedIndividualTOMs)
save(tomDS, file = calibratedIndividualTOMFileNames[set, blockNo]);
.saveChunks(tomDS, chunkSize, fileNames = chunkFileNames[, set]);
unlink(orderFiles[set]);
}
} else {
# If disk cache is not being used, simply call normalize.quantiles on the full set.
setTomDS = normalize.quantiles(setTomDS);
if (saveCalibratedIndividualTOMs) for (set in 1:nSets)
{
tomDS = .vector2dist(setTomDS[, set]);
save(tomDS, file = calibratedIndividualTOMFileNames[set, blockNo]);
}
}
} else stop("Unrecognized value of 'networkCalibration': ", networkCalibration);
# Calculate consensus network
if (verbose > 2)
printFlush(paste(spaces, "....Calculating consensus network"));
if (useDiskCache)
{
start = 1;
for (chunk in 1:nChunks)
{
if (verbose > 3) printFlush(paste(spaces, "......working on chunk", chunk));
end = start + chunkLengths[chunk] - 1;
setChunks = array(0, dim = c(chunkLengths[chunk], nSets));
for (set in 1:nSets)
{
load(file = chunkFileNames[chunk, set]);
setChunks[, set] = temp;
file.remove(chunkFileNames[chunk, set]);
}
colnames(setChunks) = setNames;
tmp = .consensusCalculation.base(setChunks, useMean = useMean, setWeightMat = setWeightMat,
consensusQuantile = consensusQuantile);
consTomDS[start:end] = tmp$consensus;
countIndex = as.numeric(names(tmp$originCount));
originCount[countIndex] = originCount[countIndex] + tmp$originCount;
rm(tmp);
start = end + 1;
}
} else {
tmp = .consensusCalculation.base(setTomDS, useMean = useMean, setWeightMat = setWeightMat,
consensusQuantile = consensusQuantile);
consTomDS[] = tmp$consensus;
countIndex = as.numeric(names(tmp$originCount));
originCount[countIndex] = originCount[countIndex] + tmp$originCount;
rm(tmp);
}
# Save the consensus TOM if requested
if (saveConsensusTOMs)
{
TOMFiles[blockIndex] = .substituteTags(consensusTOMFilePattern, "%b", blockNo);
if (TOMFiles[blockIndex]==consensusTOMFilePattern)
stop(paste("File name for consensus TOM must contain the tag %b somewhere in the file name -\n",
" - this tag will be replaced by the block number. "));
save(consTomDS, file = TOMFiles[blockIndex]);
}
if (returnTOMs) consensusTOM.out[[blockIndex]] = consTomDS;
gc();
}
if (!saveConsensusTOMs) TOMFiles = NULL;
if (!returnTOMs) consensusTOM.out = NULL;
if (localIndividualTOMCalculation)
{
if (!individualTOMInfo$saveTOMs)
{
# individual TOMs are contained in the individualTOMInfo list; remove them.
individualTOMInfo$TOMSimilarities = NULL;
}
}
list(consensusTOM = consensusTOM.out,
TOMFiles = TOMFiles,
saveConsensusTOMs = saveConsensusTOMs,
individualTOMInfo = individualTOMInfo,
useIndivTOMSubset = useIndivTOMSubset,
goodSamplesAndGenes = gsg,
nGGenes = nGoodGenes,
nSets = nSets,
saveCalibratedIndividualTOMs = saveCalibratedIndividualTOMs,
calibratedIndividualTOMFileNames = calibratedIndividualTOMFileNames,
networkCalibrationSamples = if (getNetworkCalibrationSamples) networkCalibrationSamples else NULL,
consensusQuantile = consensusQuantile,
originCount = originCount
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.