# Call blockwise modules several times with sampled data, collect the results
# -02: first entry in the result will be the base modules.
# Also: add functions for determining module stability and whether modules should be merged.
# ===================================================================================================
#
# sampledBlockwiseModules
#
# ===================================================================================================
sampledBlockwiseModules <- function(
datExpr,
nRuns,
startRunIndex = 1,
endRunIndex = startRunIndex + nRuns - 1,
replace = FALSE,
fraction = if (replace) 1.0 else 0.63,
randomSeed = 12345,
checkSoftPower = TRUE,
nPowerCheckSamples = 2000,
skipUnsampledCalculation = FALSE,
corType = "pearson",
power = 6,
networkType = "unsigned",
saveTOMs = FALSE,
saveTOMFileBase = "TOM",
...,
verbose = 2, indent = 0) {
spaces <- indentSpaces(indent)
result <- list()
runTOMFileBase <- saveTOMFileBase
nSamples <- nrow(datExpr)
nGenes <- ncol(datExpr)
corTypeI <- pmatch(corType, .corTypes)
if (is.na(corTypeI)) {
stop(paste("Invalid 'corType'. Recognized values are", paste(.corTypes, collapse = ", ")))
}
corFnc <- .corFnc[corTypeI]
seedSaved <- FALSE
if (!is.null(randomSeed)) {
if (exists(".Random.seed")) {
seedSaved <- TRUE
savedSeed <- .Random.seed
}
set.seed(randomSeed)
}
if (checkSoftPower) {
if (verbose > 0) printFlush(paste(spaces, "...calculating reference mean adjacencies.."))
useGenes <- sample(nGenes, nPowerCheckSamples, replace = FALSE)
adj <- adjacency(datExpr[, useGenes],
power = power, type = networkType,
corFnc = corFnc
)
refAdjMeans <- mean(as.dist(adj))
}
for (run in startRunIndex:endRunIndex)
{
set.seed(randomSeed + 2 * run + 1)
if (verbose > 0) printFlush(paste(spaces, "...working on run", run, ".."))
if (saveTOMs) {
runTOMFileBase <- paste(saveTOMFileBase, "-run-", run, sep = "")
}
if (run > startRunIndex || skipUnsampledCalculation) {
useSamples <- sample(nSamples, as.integer(nSamples * fraction), replace = replace)
} else {
useSamples <- c(1:nSamples)
}
if (verbose > 2) {
printFlush(paste(spaces, "Using the following samples: "))
print(useSamples)
}
samExpr <- as.matrix(datExpr[useSamples, ])
samPowers <- power
if (checkSoftPower) {
if (verbose > 1) printFlush(paste(spaces, " ...calculating mean adjacencies in sampled data.."))
adj <- adjacency(samExpr[, useGenes],
power = power, type = networkType,
corFnc = corFnc
)
sampledAdjMeans <- mean(as.dist(adj))
samPowers <- power * log(refAdjMeans) / log(sampledAdjMeans)
if (!is.finite(samPowers)) samPowers <- power
}
mods <- blockwiseModules(
datExpr = samExpr,
randomSeed = NULL,
power = samPowers,
corType = corType,
networkType = networkType,
saveTOMs = saveTOMs,
saveTOMFileBase = runTOMFileBase,
...,
verbose = verbose - 2, indent = indent + 2
)
result[[run]] <- list(mods = mods, samples = useSamples, powers = samPowers)
}
if (seedSaved) .Random.seed <<- savedSeed
result
}
# ===================================================================================================
#
# sampledHierarchicalConsensusModules
#
# ===================================================================================================
sampledHierarchicalConsensusModules <- function(
multiExpr,
multiWeights = NULL,
networkOptions,
consensusTree,
nRuns,
startRunIndex = 1,
endRunIndex = startRunIndex + nRuns - 1,
replace = FALSE,
fraction = if (replace) 1.0 else 0.63,
randomSeed = 12345,
checkSoftPower = TRUE,
nPowerCheckSamples = 2000,
individualTOMFilePattern = "individualTOM-Run.%r-Set%s-Block.%b.RData",
keepConsensusTOMs = FALSE,
consensusTOMFilePattern = "consensusTOM-Run.%r-%a-Block.%b.RData",
skipUnsampledCalculation = FALSE,
...,
verbose = 2, indent = 0,
saveRunningResults = TRUE,
runningResultsFile = "results.tmp.RData") {
spaces <- indentSpaces(indent)
result <- list()
exprSize <- checkSets(multiExpr)
nSets <- exprSize$nSets
nSamples <- exprSize$nSamples
.checkAndScaleMultiWeights(multiWeights, multiExpr, scaleByMax = FALSE)
if (inherits(networkOptions, "NetworkOptions")) {
networkOptions <- list2multiData(replicate(nSets, networkOptions, simplify = FALSE))
}
if (!is.null(randomSeed)) {
if (exists(".Random.seed")) {
savedSeed <- .Random.seed
on.exit(
{
.Random.seed <<- savedSeed
},
add = FALSE
)
}
set.seed(randomSeed)
}
powers <- unlist(mtd.apply(networkOptions, getElement, "power"))
if (nPowerCheckSamples > exprSize$nGenes) nPowerCheckSamples <- exprSize$nGenes
if (checkSoftPower) {
if (verbose > 0) printFlush(paste(spaces, "...calculating reference mean adjacencies.."))
useGenes <- sample(exprSize$nGenes, nPowerCheckSamples, replace = FALSE)
refAdjMeans <- rep(0, nSets)
for (set in 1:nSets)
{
adj <- adjacency(multiExpr[[set]]$data[, useGenes],
weights = if (is.null(multiWeights)) NULL else multiWeights[[set]]$data[, useGenes],
power = networkOptions[[set]]$data$power, type = networkOptions[[set]]$data$networkType,
corFnc = networkOptions[[set]]$data$corFnc,
corOptions = networkOptions[[set]]$data$corOptions
)
refAdjMeans[set] <- mean(as.dist(adj))
}
}
for (run in startRunIndex:endRunIndex)
{
runTOMFileBase <- .substituteTags(consensusTOMFilePattern, "%r", run)
individualTOMFiles1 <- .substituteTags(individualTOMFilePattern, "%r", run)
set.seed(randomSeed + 2 * run + 1)
if (verbose > 0) printFlush(paste(spaces, "Working on run", run, ".."))
useSamples <- list()
for (set in 1:nSets)
{
if (run > startRunIndex - skipUnsampledCalculation) {
printFlush("This run will be on sampled data.")
useSamples[[set]] <- sample(nSamples[set], as.integer(nSamples[set] * fraction), replace = replace)
} else {
useSamples[[set]] <- c(1:nSamples[set])
}
}
samExpr <- mtd.subset(multiExpr, useSamples)
if (!is.null(multiWeights)) {
samWeights <- mtd.subset(multiWeights, useSamples)
} else {
samWeights <- NULL
}
samPowers <- powers
if (checkSoftPower) {
if (verbose > 1) printFlush(paste(spaces, " ...calculating mean adjacencies in sampled data.."))
sampledAdjMeans <- rep(0, nSets)
for (set in 1:nSets)
{
adj <- adjacency(samExpr[[set]]$data[, useGenes],
weights = if (is.null(multiWeights)) NULL else samWeights[[set]]$data[, useGenes],
power = networkOptions[[set]]$data$power, type = networkOptions[[set]]$data$networkType,
corFnc = networkOptions[[set]]$data$corFnc,
corOptions = networkOptions[[set]]$data$corOptions
)
sampledAdjMeans[set] <- mean(as.dist(adj))
}
samPowers <- powers * log(refAdjMeans) / log(sampledAdjMeans)
samPowers[!is.finite(samPowers)] <- powers[!is.finite(samPowers)]
}
networkOptions1 <- mtd.mapply(
function(x, power) {
x$power <- power
x
},
networkOptions, samPowers
)
collectGarbage()
mods <- hierarchicalConsensusModules(
multiExpr = samExpr,
multiWeights = samWeights,
randomSeed = NULL,
networkOptions = networkOptions1,
consensusTree = consensusTree,
saveConsensusTOM = TRUE,
consensusTOMFilePattern = runTOMFileBase,
saveIndividualTOMs = TRUE,
individualTOMFileNames = individualTOMFiles1,
keepIndividualTOMs = FALSE,
keepConsensusTOM = keepConsensusTOMs,
...,
verbose = verbose - 2, indent = indent + 2
)
result[[run - startRunIndex + 1]] <- list(mods = mods, samples = useSamples, powers = samPowers)
if (saveRunningResults) save(result, file = runningResultsFile)
print(lapply(mods, object.size))
print(gc())
}
result
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.