consensusCalculation = function(
# a list or multiData structure of either numeric vectors (possibly arrays) or blockwiseAdj objects
individualData,
consensusOptions,
useBlocks = NULL,
randomSeed = NULL,
saveCalibratedIndividualData = FALSE,
calibratedIndividualDataFilePattern = "calibratedIndividualData-%a-Set%s-Block%b.RData",
# Return options: the data can be either saved or returned but not both.
saveConsensusData = TRUE,
consensusDataFileNames = "consensusData-%a-Block%b.RData",
getCalibrationSamples= FALSE,
# Internal handling of data
useDiskCache = NULL, chunkSize = NULL,
cacheDir = ".",
cacheBase = ".blockConsModsCache",
# Behaviour
collectGarbage = FALSE,
verbose = 1, indent = 0)
{
nSets = length(individualData);
if (! isMultiData(individualData))
individualData = list2multiData(individualData);
setNames = names(individualData);
if (is.null(setNames)) setNames = rep("", nSets);
blockwise = inherits(individualData[[1]]$data, "BlockwiseData");
if (!blockwise)
{
blockDimnames = .mtd.checkDimConsistencyAndGetDimnames(individualData);
blockLengths = length(individualData[[1]]$data);
blockAttributes = attributes(individualData[[1]]$data);
metaData = list();
} else {
blockLengths = BD.blockLengths(individualData[[1]]$data);
blockAttributes = individualData[[1]]$data$attributes;
metaData = BD.getMetaData(individualData[[1]]$data, blocks = 1);
}
nBlocks = length(blockLengths);
spaces = indentSpaces(indent);
if (!is.null(randomSeed))
{
if (exists(".Random.seed"))
{
savedSeed = .Random.seed
on.exit(.Random.seed <<-savedSeed);
}
set.seed(randomSeed);
}
setWeights = consensusOptions$setWeights;
if (is.null(setWeights)) setWeights = rep(1, nSets);
if (length(setWeights)!=nSets)
stop("Length of 'setWeights' must equal the number of sets.");
if (any(!is.finite(setWeights)))
stop("setWeights must all be finite.");
setWeightMat = as.matrix(setWeights)/sum(setWeights)
if (is.null(chunkSize)) chunkSize = as.integer(.largestBlockSize/(2*nSets))
if (is.null(useDiskCache)) useDiskCache = .useDiskCache(individualData, chunkSize = chunkSize);
# Initialize various variables
if (getCalibrationSamples)
{
if (!consensusOptions$sampleForCalibration)
stop(paste("Incompatible input options: calibrationSamples can only be returned",
"if sampleForCalibration is TRUE."));
calibrationSamples = list();
}
blockLevels = 1:nBlocks;
if (is.null(useBlocks)) useBlocks = blockLevels;
useBlockIndex = match(useBlocks, blockLevels);
if (!all(useBlocks %in% blockLevels))
stop("All entries of 'useBlocks' must be valid block levels.");
if (any(duplicated(useBlocks)))
stop("Entries of 'useBlocks' must be unique.");
nUseBlocks = length(useBlocks);
if (nUseBlocks==0)
stop("'useBlocks' cannot be non-NULL and empty at the same time.");
consensus.out = list();
consensusFiles = rep("", nUseBlocks);
originCount = rep(0, nSets);
calibratedIndividualDataFileNames = NULL;
if (saveCalibratedIndividualData)
{
calibratedIndividualDataFileNames = matrix("", nSets, nBlocks);
for (set in 1:nSets) for (b in 1:nBlocks)
calibratedIndividualDataFileNames[set, b] =
.processFileName(calibratedIndividualDataFilePattern, setNumber = set, setNames = setNames,
blockNumber = b, analysisName = consensusOptions$analysisName);
}
if (collectGarbage) gc();
calibratedIndividualData.saved = vector(mode = "list", length = nSets);
consensusData = NULL;
dataFiles = character(nUseBlocks);
# Here's where the analysis starts
for (blockIndex in 1:nUseBlocks)
{
block = useBlockIndex[blockIndex];
if (verbose>1) printFlush(spaste(spaces, "..Working on block ", block, "."));
scaleQuant = rep(1, nSets);
scalePowers = rep(1, nSets);
useDiskCache1 = useDiskCache && nSets > 1; ### No need to use disk cache when there is only 1 set.
# Set up file names or memory space to hold the set Data
if (useDiskCache1)
{
nChunks = ceiling(blockLengths[block]/chunkSize);
chunkFileNames = array("", dim = c(nChunks, nSets));
on.exit(.checkAndDelete(chunkFileNames), add = TRUE);
} else nChunks = 1;
if (nChunks==1) useDiskCache1 = FALSE;
if (!useDiskCache1)
{
# Note: setTomDS will contained the scaled set Data matrices.
calibratedData = array(0, dim = c(blockLengths[block], nSets));
}
# sample entry indices from the distance structure for Data scaling, if requested
if (consensusOptions$calibration=="single quantile" &&
consensusOptions$sampleForCalibration)
{
qx = min(consensusOptions$calibrationQuantile, 1-consensusOptions$calibrationQuantile);
nScGenes = min(consensusOptions$sampleForCalibrationFactor * 1/qx, blockLengths[block]);
scaleSample = sample(blockLengths[block], nScGenes);
if (getCalibrationSamples)
calibrationSamples[[blockIndex]] = list(sampleIndex = scaleSample,
TOMSamples = matrix(NA, nScGenes, nSets));
}
if (consensusOptions$calibration %in% c("single quantile", "none"))
{
for (set in 1:nSets)
{
if (verbose>2) printFlush(spaste(spaces, "....Working on set ", set, " (", setNames[set], ")"))
# We need to drop dimensions here but we may need them later. Keep that in mind.
tomDS = as.numeric(BD.getData(individualData[[set]]$data, block, simplify = TRUE));
if (consensusOptions$calibration=="single quantile")
{
# Scale Data so that calibrationQuantile agree in each set
if (consensusOptions$sampleForCalibration)
{
if (getCalibrationSamples)
{
calibrationSamples[[blockIndex]]$dataSamples[, set] = tomDS[scaleSample];
scaleQuant[set] = quantile(calibrationSamples[[blockIndex]]$dataSamples[, set],
probs = consensusOptions$calibrationQuantile, type = 8);
} else {
scaleQuant[set] = quantile(tomDS[scaleSample], probs = consensusOptions$calibrationQuantile,
type = 8);
}
} else
scaleQuant[set] = quantile(x = tomDS, probs = consensusOptions$calibrationQuantile, type = 8);
if (set>1)
{
scalePowers[set] = log(scaleQuant[1])/log(scaleQuant[set]);
tomDS = tomDS^scalePowers[set];
}
if (saveCalibratedIndividualData)
calibratedIndividualData.saved[[set]] =
addBlockToBlockwiseData(
calibratedIndividualData.saved[[set]],
.setAttrFromList(tomDS, blockAttributes[[blockIndex]]),
external = TRUE,
recordAttributes = TRUE,
metaData = metaData,
blockFile = calibratedIndividualDataFileNames[set, block])
}
# Save the calculated Data either to disk in chunks or to memory.
if (useDiskCache1)
{
if (verbose > 3) printFlush(paste(spaces, "......saving Data similarity to disk cache.."));
sc = .saveChunks(tomDS, chunkSize, cacheBase, cacheDir = cacheDir);
chunkFileNames[, set] = sc$files;
chunkLengths = sc$chunkLengths;
} else {
calibratedData[, set] = tomDS;
}
}
if (collectGarbage) gc();
} else if (consensusOptions$calibration=="full quantile")
{
# Step 1: load each data set, get order, split Data into chunks according to order, and save.
if (verbose>1) printFlush(spaste(spaces, "..working on quantile normalization"))
if (useDiskCache1)
{
orderFiles = rep("", nSets);
on.exit(.checkAndDelete(orderFiles),add = TRUE);
}
for (set in 1:nSets)
{
if (verbose>2) printFlush(spaste(spaces, "....Working on set ", set, " (", setNames[set], ")"))
tomDS = as.numeric(BD.getData(individualData[[set]]$data, block, simplify = TRUE));
if (useDiskCache1)
{
# Order Data (this may take a long time...)
if (verbose > 3) printFlush(spaste(spaces, "......ordering Data"));
time = system.time({order1 = .qorder(tomDS)});
if (verbose > 1) { printFlush("Time to order Data:"); print(time); }
# save the order
orderFiles[set] = tempfile(pattern = spaste(".orderForSet", set), tmpdir = cacheDir);
if (verbose > 3) printFlush(spaste(spaces, "......saving order and ordered Data"));
save(order1, file = orderFiles[set]);
# Save ordered tomDS into chunks
tomDS.ordered = tomDS[order1];
sc = .saveChunks(tomDS.ordered, chunkSize, cacheBase, cacheDir = cacheDir);
chunkFileNames[, set] = sc$files;
chunkLengths = sc$chunkLengths;
} else {
calibratedData[, set] = tomDS
}
}
if (useDiskCache1)
{
# Step 2: Load chunks one by one and quantile normalize
if (verbose > 2) printFlush(spaste(spaces, "....quantile normalizing chunks"));
for (c in 1:nChunks)
{
if (verbose > 3) printFlush(spaste(spaces, "......QN for chunk ", c, " of ", nChunks));
chunkData = matrix(NA, chunkLengths[c], nSets);
for (set in 1:nSets)
chunkData[, set] = .loadObject(chunkFileNames[c, set]);
time = system.time({ chunk.norm = normalize.quantiles(chunkData, copy = FALSE);});
if (verbose > 1) { printFlush("Time to QN chunk:"); print(time); }
# Save quantile normalized chunks
for (set in 1:nSets)
{
temp = chunk.norm[, set];
save(temp, file = chunkFileNames[c, set]);
}
}
if (verbose > 2) printFlush(spaste(spaces, "....putting together full QN'ed Data"));
# Put together full Data
for (set in 1:nSets)
{
load(orderFiles[set]);
start = 1;
for (c in 1:nChunks)
{
end = start + chunkLengths[c] - 1;
tomDS[order1[start:end]] = .loadObject(chunkFileNames[c, set], size = chunkLengths[c]);
start = start + chunkLengths[c];
}
if (saveCalibratedIndividualData)
calibratedIndividualData.saved[[set]] = addBlockToBlockwiseData(
calibratedIndividualData.saved[[set]],
.setAttrFromList(tomDS, blockAttributes[[blockIndex]]),
external = TRUE,
recordAttributes = TRUE,
metaData = metaData,
blockFile = calibratedIndividualDataFileNames[set, blockIndex]);
.saveChunks(tomDS, chunkSize, fileNames = chunkFileNames[, set]);
unlink(orderFiles[set]);
}
} else {
# If disk cache is not being used, simply call normalize.quantiles on the full set.
if (nSets > 1) calibratedData = normalize.quantiles(calibratedData);
if (saveCalibratedIndividualData) for (set in 1:nSets)
{
calibratedIndividualData.saved[[set]] = addBlockToBlockwiseData(
calibratedIndividualData.saved[[set]],
.setAttrFromList(calibratedData[, set], blockAttributes[[blockIndex]]),
external = TRUE,
recordAttributes = TRUE,
metaData = metaData,
blockFile = calibratedIndividualDataFileNames[set, blockIndex]);
}
}
} else stop("Unrecognized value of 'calibration' in consensusOptions: ", consensusOptions$calibration);
# Calculate consensus
if (verbose > 2)
printFlush(paste(spaces, "....Calculating consensus"));
# create an empty consTomDS distance structure.
consTomDS = numeric(blockLengths[block]);
if (useDiskCache1)
{
start = 1;
for (chunk in 1:nChunks)
{
if (verbose > 3) printFlush(paste(spaces, "......working on chunk", chunk));
end = start + chunkLengths[chunk] - 1;
setChunks = array(0, dim = c(chunkLengths[chunk], nSets));
for (set in 1:nSets)
{
load(file = chunkFileNames[chunk, set]);
setChunks[, set] = temp;
file.remove(chunkFileNames[chunk, set]);
}
tmp = .consensusCalculation.base(setChunks, useMean = consensusOptions$useMean,
setWeightMat = setWeightMat,
consensusQuantile = consensusOptions$consensusQuantile);
consTomDS[start:end] = tmp$consensus;
if (!is.null(tmp$originCount))
{
countIndex = as.numeric(names(tmp$originCount));
originCount[countIndex] = originCount[countIndex] + tmp$originCount;
}
start = end + 1;
}
} else {
tmp = .consensusCalculation.base(calibratedData, useMean = consensusOptions$useMean,
setWeightMat = setWeightMat,
consensusQuantile = consensusOptions$consensusQuantile);
consTomDS[] = tmp$consensus;
if (!is.null(tmp$originCount))
{
countIndex = as.numeric(names(tmp$originCount));
originCount[countIndex] = originCount[countIndex] + tmp$originCount;
}
}
# Save the consensus Data if requested
if (saveConsensusData)
{
if (!grepl("%b", consensusDataFileNames))
stop(paste("File name for consensus data must contain the tag %b somewhere in the file name -\n",
" - this tag will be replaced by the block number. "));
dataFiles[blockIndex] = .substituteTags(consensusDataFileNames, c("%b", "%a"),
c(block, consensusOptions$analysisName[1]));
}
consensusData = addBlockToBlockwiseData(
consensusData,
.setAttrFromList(consTomDS, blockAttributes[[blockIndex]]),
external = saveConsensusData,
recordAttributes = TRUE,
metaData = metaData,
blockFile = if (saveConsensusData) dataFiles[blockIndex] else NULL)
if (collectGarbage) gc();
}
list(
#blockwiseData
consensusData = consensusData,
# int
nSets = nSets,
# Logical
saveCalibratedIndividualData = saveCalibratedIndividualData,
# List of blockwise data of length nSets
calibratedIndividualData = calibratedIndividualData.saved,
# List with one component per block
calibrationSamples = if (getCalibrationSamples) calibrationSamples else NULL,
# Numeric vector with nSets components
originCount = originCount,
consensusOptions = consensusOptions
)
}
#==========================================================================================================
#
# Hierarchical consensus calculation
#
#==========================================================================================================
# hierarchical consensus tree: a list with the following components:
# inputs: either an atomic character vector whose entries match names of individualData, or a list in
# which each component can either be a single character string giving a name in individualDara, or
# another hierarchical consensus tree.
# consensusOptions: a list of class ConsensusOptions
# analysisName: optional, analysis name used for naming files when saving to disk.
# Here individualData is a list or multiData in which every component is either a blockwiseData instance or
# a numeric object (matrix, vector etc). Function consensusCalculation handles both.
hierarchicalConsensusCalculation = function(
individualData,
consensusTree,
level = 1,
useBlocks = NULL,
randomSeed = NULL,
saveCalibratedIndividualData = FALSE,
calibratedIndividualDataFilePattern = "calibratedIndividualData-%a-Set%s-Block%b.RData",
# Return options: the data can be either saved or returned but not both.
saveConsensusData = TRUE,
consensusDataFileNames = "consensusData-%a-Block%b.RData",
getCalibrationSamples= FALSE,
# Return the intermediate results as well?
keepIntermediateResults = FALSE,
# Internal handling of data
useDiskCache = NULL, chunkSize = NULL,
cacheDir = ".",
cacheBase = ".blockConsModsCache",
# Behaviour
collectGarbage = FALSE,
verbose = 1, indent = 0)
{
spaces = indentSpaces(indent);
individualNames = names(individualData);
if (is.null(individualNames))
stop("'individualData' must be a named list.");
if (!isMultiData(individualData)) individualData = list2multiData(individualData);
if (!"inputs" %in% names(consensusTree))
stop("'consensusTree' must contain component 'inputs'.");
if (!"consensusOptions" %in% names(consensusTree))
stop("'consensusTree' must contain component 'consensusOptions'.");
if (!is.null(randomSeed))
{
if (exists(".Random.seed"))
{
savedSeed = .Random.seed
on.exit(.Random.seed <<-savedSeed);
}
set.seed(randomSeed);
}
# Set names for consensusTree$inputs so that the names are informative.
if (is.null(names(consensusTree$inputs)))
{
names(consensusTree$inputs) = spaste("Level.", level, ".Input.",
1:length(consensusTree$inputs));
validInputNames = FALSE;
} else
validInputNames = TRUE;
isChar = sapply(consensusTree$inputs, is.character);
names(consensusTree$inputs)[isChar] = consensusTree$inputs[isChar];
if (!is.null(consensusTree$analysisName))
consensusTree$consensusOptions$analysisName = consensusTree$analysisName;
# Recursive step if necessary
if (verbose > 0)
printFlush(spaste(spaces, "------------------------------------------------------------------\n",
spaces, " Working on ", consensusTree$consensusOptions$analysisName,
"\n", spaces, "------------------------------------------------------------------"));
names(consensusTree$inputs) = make.unique(make.names(names(consensusTree$inputs)));
inputs0 = mtd.mapply(function(inp1, name)
{
if (is.character(inp1))
{
if (!inp1 %in% names(individualData))
stop("Element '", inp1, "' is not among names of 'individualData'.");
inp1;
} else {
if ("analysisName" %in% names(inp1)) name1 = inp1$analysisName else name1 = name;
inp1$consensusOptions$analysisName = name1;
hierarchicalConsensusCalculation(individualData, inp1,
useBlocks = useBlocks,
level = level + 1,
randomSeed = NULL,
saveCalibratedIndividualData = saveCalibratedIndividualData,
calibratedIndividualDataFilePattern =calibratedIndividualDataFilePattern,
saveConsensusData = saveConsensusData,
consensusDataFileNames = consensusDataFileNames,
getCalibrationSamples = getCalibrationSamples,
keepIntermediateResults = keepIntermediateResults,
useDiskCache = useDiskCache,
chunkSize = chunkSize,
cacheDir = cacheDir,
cacheBase = cacheBase,
collectGarbage = collectGarbage,
verbose = verbose -2, indent = indent + 2);
}
}, consensusTree$inputs, names(consensusTree$inputs));
names(inputs0) = names(consensusTree$inputs)
inputData = mtd.apply(inputs0, function(inp1)
{
if (is.character(inp1))
{
individualData[[inp1]]$data
} else
inp1$consensusData;
});
inputIsIntermediate = !sapply(consensusTree$inputs, is.character);
# Need to check that all inputData have the same format. In particular, some could be plain numeric data and
# some could be BlockwiseData.
nInputs1 = length(inputData);
isBlockwise = mtd.apply(inputData, inherits, "BlockwiseData", mdaSimplify = TRUE);
if (any(!isBlockwise)) for (i in which(!isBlockwise))
inputData[[i]]$data = newBlockwiseData(list(inputData[[i]]$data), external = FALSE)
names(inputData) = names(consensusTree$inputs)
# Calculate the consensus
if (verbose > 0)
printFlush(spaste(spaces, "..Final consensus calculation.."));
consensus = consensusCalculation(
individualData = inputData,
consensusOptions = consensusTree$consensusOptions,
randomSeed = NULL,
saveCalibratedIndividualData = saveCalibratedIndividualData,
calibratedIndividualDataFilePattern =calibratedIndividualDataFilePattern,
saveConsensusData = saveConsensusData,
consensusDataFileNames = consensusDataFileNames,
getCalibrationSamples = getCalibrationSamples,
useDiskCache = useDiskCache,
chunkSize = chunkSize,
cacheDir = cacheDir,
cacheBase = cacheBase,
collectGarbage = collectGarbage,
verbose = verbose-1, indent = indent+1);
if (saveConsensusData && !keepIntermediateResults && any(inputIsIntermediate))
mtd.apply(inputData[inputIsIntermediate], BD.checkAndDeleteFiles);
out = c(consensus, if (keepIntermediateResults) list(inputs = inputs0) else NULL);
out;
}
#==========================================================================================================
#
# Simple hierarchical consensus calculation from numeric data, with minimum checking and no calibration.
#
#==========================================================================================================
# Simpler version of consensus calculation, suitable for small data where calibration is not
# necessary.
simpleConsensusCalculation = function(
# multiData or list of numeric vectors
individualData,
consensusOptions,
verbose = 1, indent = 0)
{
nSets = length(individualData);
if (isMultiData(individualData))
individualData = multiData2list(individualData);
if (consensusOptions$useMean)
{
setWeights = consensusOptions$setWeights;
if (is.null(setWeights)) setWeights = rep(1, nSets);
if (length(setWeights)!=nSets)
stop("Length of 'setWeights' must equal the number of sets.");
} else setWeights = NULL;
.consensusCalculation.base.FromList(individualData, useMean = consensusOptions$useMean,
setWeights = setWeights,
consensusQuantile = consensusOptions$consensusQuantile)$consensus;
}
# Simple hierarchical consensus
simpleHierarchicalConsensusCalculation = function(
# multiData or list of numeric vectors
individualData,
consensusTree,
level = 1)
{
individualNames = names(individualData);
if (is.null(individualNames))
stop("'individualData' must be named.");
if (is.null(names(consensusTree$inputs)))
names(consensusTree$inputs) = spaste("Level.", level, ".Input.",
1:length(consensusTree$inputs));
if (isMultiData(individualData))
individualData = multiData2list(individualData);
isChar = sapply(consensusTree$inputs, is.character);
names(consensusTree$inputs)[isChar] = consensusTree$inputs[isChar];
# Recursive step if necessary
names(consensusTree$inputs) = make.unique(make.names(names(consensusTree$inputs)));
inputData = mapply(function(inp1, name)
{
if (is.character(inp1))
{
if (!inp1 %in% names(individualData))
stop("Element '", inp1, "' is not among names of 'individualData'.");
individualData[[inp1]];
} else {
if ("analysisName" %in% names(inp1)) name1 = inp1$analysisName else name1 = name;
inp1$consensusOptions$analysisName = name1;
simpleHierarchicalConsensusCalculation(individualData, inp1,
level = level + 1)
}
}, consensusTree$inputs, names(consensusTree$inputs), SIMPLIFY = FALSE);
# Calculate the consensus
simpleConsensusCalculation(
individualData = inputData,
consensusOptions = consensusTree$consensusOptions)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.