# Copyright (C) 2008 Peter Langfelder
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# In this version the blocks are chosen by pre-clustering.
# ==========================================================================================================
#
# TOM similarity via a call to a compiled code.
#
# ==========================================================================================================
TOMsimilarityFromExpr <- function(datExpr,
weights = NULL,
corType = "pearson",
networkType = "unsigned",
power = 6,
TOMType = "signed",
TOMDenom = "min",
maxPOutliers = 1,
quickCor = 0,
pearsonFallback = "individual",
cosineCorrelation = FALSE,
replaceMissingAdjacencies = FALSE,
suppressTOMForZeroAdjacencies = FALSE,
useInternalMatrixAlgebra = FALSE,
nThreads = 0,
verbose = 1,
indent = 0) {
corTypeC <- as.integer(pmatch(corType, .corTypes) - 1)
if (is.na(corTypeC)) {
stop(glue("Invalid 'corType'. Recognized values are {.corTypes}"))
}
TOMTypeC <- as.integer(pmatch(TOMType, .TOMTypes) - 1)
if (is.na(TOMTypeC)) {
stop(glue("Invalid 'TOMType'. Recognized values are {.TOMTypes}"))
}
TOMDenomC <- as.integer(pmatch(TOMDenom, .TOMDenoms) - 1)
if (is.na(TOMDenomC)) {
stop(glue("Invalid 'TOMDenom'. Recognized values are {.TOMDenoms}"))
}
if ((maxPOutliers < 0) | (maxPOutliers > 1)) stop("maxPOutliers must be between 0 and 1.")
if (quickCor < 0) stop("quickCor must be positive.")
if ((maxPOutliers < 0) | (maxPOutliers > 1)) stop("maxPOutliers must be between 0 and 1.")
fallback <- as.integer(pmatch(pearsonFallback, .pearsonFallbacks))
if (is.na(fallback)) {
stop(glue("Unrecognized 'pearsonFallback'. Recognized values are (unique abbreviations of)\n {.pearsonFallbacks}"))
}
if (nThreads < 0) stop("nThreads must be positive.")
if (is.null(nThreads) || (nThreads == 0)) nThreads <- .useNThreads()
if ((power < 1) | (power > 30)) stop("power must be between 1 and 30.")
networkTypeC <- as.integer(charmatch(networkType, .networkTypes) - 1)
if (is.na(networkTypeC)) {
stop(paste(
"Unrecognized networkType argument.",
"Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", ")
))
}
dimEx <- dim(datExpr)
if (length(dimEx) != 2) stop("datExpr has incorrect dimensions.")
if (length(weights) > 0) {
if (!isTRUE(all.equal(dimEx, dim(weights)))) {
stop("When 'weights' are given, they must have the same dimensions as 'datExpr'.")
}
if (any(!is.finite(weights))) {
if (verbose > 0) {
warning("Found non-finite weights. The corresponding data points will be removed.")
}
weights[!is.finite(weights)] <- NA
}
}
nGenes <- dimEx[2]
nSamples <- dimEx[1]
warn <- as.integer(0)
datExpr <- as.matrix(datExpr)
if (length(weights) > 0) weights <- as.matrix(weights) else weights <- NULL
tom <- .Call("tomSimilarity_call", datExpr, weights,
as.integer(corTypeC), as.integer(networkTypeC), as.double(power),
as.integer(TOMTypeC), as.integer(TOMDenomC),
as.double(maxPOutliers), as.double(quickCor),
as.integer(fallback), as.integer(cosineCorrelation),
as.integer(replaceMissingAdjacencies),
as.integer(suppressTOMForZeroAdjacencies),
as.integer(useInternalMatrixAlgebra),
warn,
as.integer(nThreads), as.integer(verbose), as.integer(indent),
PACKAGE = "WGCNA"
)
diag(tom) <- 1
return(tom)
}
# ======================================================================================================
#
# TOMsimilarity (from adjacency)
#
# ===================================================================================================
TOMsimilarity <- function(adjMat, TOMType = "unsigned", TOMDenom = "min",
suppressTOMForZeroAdjacencies = FALSE, useInternalMatrixAlgebra = FALSE, verbose = 1, indent = 0) {
TOMTypeC <- pmatch(TOMType, .TOMTypes) - 1
if (is.na(TOMTypeC)) {
stop(paste("Invalid 'TOMType'. Recognized values are", paste(.TOMTypes, collapse = ", ")))
}
if (TOMTypeC == 0) {
stop("'TOMType' cannot be 'none' for this function.")
}
TOMDenomC <- pmatch(TOMDenom, .TOMDenoms) - 1
if (is.na(TOMDenomC)) {
stop(paste("Invalid 'TOMDenom'. Recognized values are", paste(.TOMDenoms, collapse = ", ")))
}
checkAdjMat(adjMat, min = if (TOMTypeC == 2) -1 else 0, max = 1)
if (any(is.na(adjMat))) adjMat[is.na(adjMat)] <- 0
tom <- .Call("tomSimilarityFromAdj_call", as.matrix(adjMat),
as.integer(TOMTypeC),
as.integer(TOMDenomC),
as.integer(suppressTOMForZeroAdjacencies),
as.integer(useInternalMatrixAlgebra),
as.integer(verbose), as.integer(indent),
PACKAGE = "WGCNA"
)
tom
}
#' @title TOMdist
#' @description FUNCTION_DESCRIPTION
#' @param adjMat PARAM_DESCRIPTION
#' @param TOMType PARAM_DESCRIPTION, Default: unsigned
#' @param TOMDenom PARAM_DESCRIPTION, Default: min
#' @param suppressTOMForZeroAdjacencies PARAM_DESCRIPTION, Default: FALSE
#' @param useInternalMatrixAlgebra PARAM_DESCRIPTION, Default: FALSE
#' @param verbose PARAM_DESCRIPTION, Default: 1
#' @param indent PARAM_DESCRIPTION, Default: 0
#' @return OUTPUT_DESCRIPTION
#' @details DETAILS
#' @examples
#' \dontrun{
#' if (interactive()) {
#' # EXAMPLE1
#' }
#'
#' @export
TOMdist <- function(adjMat,
TOMType = unsigned,
TOMDenom = min,
suppressTOMForZeroAdjacencies = FALSE,
useInternalMatrixAlgebra = FALSE,
verbose = 1,
indent = 0) {
1 - TOMsimilarity(adjMat, TOMType, TOMDenom,
suppressTOMForZeroAdjacencies = suppressTOMForZeroAdjacencies,
useInternalMatrixAlgebra = useInternalMatrixAlgebra, verbose = verbose, indent = indent
)
}
#' @title blockwiseModules
#' @description Function to calculate modules and eigengenes from all genes.
#'
#' @param datExpr
#' @param weights
#' @param checkMissingData
#' @param blocks
#' @param maxBlockSize
#' @param blockSizePenaltyPower
#' @param nPreclusteringCenters
#' @param randomSeed
#' @param loadTOM
#' @param corType
#' @param maxPOutliers
#' @param quickCor
#' @param pearsonFallback
#' @param cosineCorrelation
#' @param power
#' @param networkType
#' @param replaceMissingAdjacencies
#' @param suppressTOMForZeroAdjacencies
#' @param TOMType
#' @param TOMDenom
#' @param getTOMs
#' @param saveTOMs
#' @param saveTOMFileBase
#' @param deepSplit
#' @param detectCutHeight
#' @param minModuleSize
#' @param maxCoreScatter
#' @param minGap
#' @param maxAbsCoreScatter
#' @param minAbsGap
#' @param minSplitHeight
#' @param minAbsSplitHeight
#' @param useBranchEigennodeDissim
#' @param minBranchEigennodeDissim
#' @param stabilityLabels
#' @param stabilityCriterion
#' @param minStabilityDissim
#' @param pamStage
#' @param pamRespectsDendro
#' @param reassignThreshold
#' @param minCoreKME
#' @param minCoreKMESize
#' @param minKMEtoStay
#' @param mergeCutHeight
#' @param impute
#' @param trapErrors
#' @param numericLabels
#' @param nThreads
#' @param useInternalMatrixAlgebra
#' @param useCorOptionsThroughout
#' @param verbose
#' @param indent
#' @param ...
#'
#' @return
#' @examples
#' \dontrun{
#' if (interactive()) {
#' # EXAMPLE1
#' }
#' }
#' @seealso
#' \code{\link[fastcluster]{hclust}}
#' @export
#' @importFrom fastcluster hclust
#' @importFrom glue glue
#'
blockwiseModules <- function(
# Input data
datExpr,
weights = NULL,
# Data checking options
checkMissingData = TRUE,
# Options for splitting data into blocks
blocks = NULL,
maxBlockSize = 5000,
blockSizePenaltyPower = 5,
nPreclusteringCenters = as.integer(min(ncol(datExpr) / 20, 100 * ncol(datExpr) / maxBlockSize)),
randomSeed = 12345,
# load TOM from previously saved file?
loadTOM = FALSE,
# Network construction arguments: correlation options
corType = "pearson",
maxPOutliers = 1,
quickCor = 0,
pearsonFallback = "individual",
cosineCorrelation = FALSE,
# Adjacency function options
power = 6,
networkType = "unsigned",
replaceMissingAdjacencies = FALSE,
suppressTOMForZeroAdjacencies = FALSE,
# Topological overlap options
TOMType = "signed",
TOMDenom = "min",
# Saving or returning TOM
getTOMs = NULL,
saveTOMs = FALSE,
saveTOMFileBase = "blockwiseTOM",
# Basic tree cut options
deepSplit = 2,
detectCutHeight = 0.995,
minModuleSize = min(20, ncol(datExpr) / 2),
# Advanced tree cut options
maxCoreScatter = NULL, minGap = NULL,
maxAbsCoreScatter = NULL, minAbsGap = NULL,
minSplitHeight = NULL, minAbsSplitHeight = NULL,
useBranchEigennodeDissim = FALSE,
minBranchEigennodeDissim = mergeCutHeight,
stabilityLabels = NULL,
stabilityCriterion = c("Individual fraction", "Common fraction"),
minStabilityDissim = NULL,
pamStage = TRUE, pamRespectsDendro = TRUE,
# Gene reassignment, module trimming, and module "significance" criteria
reassignThreshold = 1e-6,
minCoreKME = 0.5,
minCoreKMESize = minModuleSize / 3,
minKMEtoStay = 0.3,
# Module merging options
mergeCutHeight = 0.15,
impute = TRUE,
trapErrors = FALSE,
# Output options
numericLabels = FALSE,
# Options controlling behaviour
nThreads = 0,
useInternalMatrixAlgebra = FALSE,
useCorOptionsThroughout = TRUE,
verbose = 0, indent = 0,
...) {
spaces <- indentSpaces(indent)
if (verbose > 0) {
message(paste(spaces, "Calculating module eigengenes block-wise from all genes"))
}
seedSaved <- FALSE
if (!is.null(randomSeed)) {
if (exists(".Random.seed")) {
seedSaved <- TRUE
savedSeed <- .Random.seed
}
set.seed(randomSeed)
}
intCorType <- pmatch(corType, .corTypes)
if (is.na(intCorType)) {
stop(paste("Invalid 'corType'. Recognized values are", paste(.corTypes, collapse = ", ")))
}
intTOMType <- pmatch(TOMType, .TOMTypes)
if (is.na(intTOMType)) {
stop(paste("Invalid 'TOMType'. Recognized values are", paste(.TOMTypes, collapse = ", ")))
}
TOMDenomC <- pmatch(TOMDenom, .TOMDenoms) - 1
if (is.na(TOMDenomC)) {
stop(paste("Invalid 'TOMDenom'. Recognized values are", paste(.TOMDenoms, collapse = ", ")))
}
if ((maxPOutliers < 0) | (maxPOutliers > 1)) stop("maxPOutliers must be between 0 and 1.")
if (quickCor < 0) stop("quickCor must be positive.")
if (nThreads < 0) stop("nThreads must be positive.")
if (is.null(nThreads) || (nThreads == 0)) nThreads <- .useNThreads()
if ((power < 1) | (power > 30)) stop("power must be between 1 and 30.")
# if ( (minKMEtoJoin >1) | (minKMEtoJoin <0) ) stop("minKMEtoJoin must be between 0 and 1.");
intNetworkType <- charmatch(networkType, .networkTypes)
if (is.na(intNetworkType)) {
stop(paste(
"Unrecognized networkType argument.",
"Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", ")
))
}
fallback <- pmatch(pearsonFallback, .pearsonFallbacks)
if (is.na(fallback)) {
stop(spaste(
"Unrecognized value '", pearsonFallback, "' of argument 'pearsonFallback'.",
"Recognized values are (unique abbreviations of)\n",
paste(.pearsonFallbacks, collapse = ", ")
))
}
datExpr <- as.matrix(datExpr)
dimEx <- dim(datExpr)
if (length(dimEx) != 2) stop("datExpr has incorrect dimensions.")
weights <- .checkAndScaleWeights(weights, datExpr, scaleByMax = FALSE)
haveWeights <- length(weights) > 0
nGenes <- dimEx[2]
nSamples <- dimEx[1]
allLabels <- rep(0, nGenes)
AllMEs <- NULL
allLabelIndex <- NULL
# if (maxBlockSize >= floor(sqrt(2^31)) )
# stop("'maxBlockSize must be less than ", floor(sqrt(2^31)), ". Please decrease it and try again.")
if (!is.null(blocks) && (length(blocks) != nGenes)) {
stop("Input error: the length of 'geneRank' does not equal the number of genes in given 'datExpr'.")
}
if (!is.null(getTOMs)) {
warning("getTOMs is deprecated, please use saveTOMs instead.")
}
# Check data for genes and samples that have too many missing values
if (checkMissingData) {
gsg <- goodSamplesGenes(datExpr, weights = weights, verbose = verbose - 1, indent = indent + 1)
if (!gsg$allOK) {
datExpr <- datExpr[gsg$goodSamples, gsg$goodGenes]
if (haveWeights) weights <- weights[gsg$goodSamples, gsg$goodGenes]
}
nGGenes <- sum(gsg$goodGenes)
nGSamples <- sum(gsg$goodSamples)
} else {
nGGenes <- nGenes
nGSamples <- nSamples
gsg <- list(goodSamples = rep(TRUE, nSamples), goodGenes = rep(TRUE, nGenes), allOK = TRUE)
}
if (any(is.na(datExpr))) {
datExpr.scaled.imputed <- t(impute.knn(t(scale(datExpr)))$data)
} else {
datExpr.scaled.imputed <- scale(datExpr)
}
corFnc <- .corFnc[intCorType]
corOptions <- .corOptionList[[intCorType]]
corFncAcceptsWeights <- intCorType == 1
if (useCorOptionsThroughout) {
corOptions <- c(corOptions, list(cosine = cosineCorrelation))
}
# Do not add the quick argument here. The calculations carried out here will not be slow anyway.
if (intCorType == 2 && useCorOptionsThroughout) {
corOptions <- c(corOptions, list(maxPOutliers = maxPOutliers, pearsonFallback = pearsonFallback))
}
signed <- networkType %in% c("signed", "signed hybrid")
# Set up advanced tree cut methods
otherArgs <- list(...)
if (useBranchEigennodeDissim) {
branchSplitFnc <- list("branchEigengeneDissim")
externalSplitOptions <- list(list(
corFnc = corFnc, corOptions = corOptions,
signed = signed
))
nExternalBranchSplitFnc <- 1
externalSplitFncNeedsDistance <- FALSE
minExternalSplit <- minBranchEigennodeDissim
} else {
branchSplitFnc <- list()
externalSplitOptions <- list()
externalSplitFncNeedsDistance <- logical(0)
nExternalBranchSplitFnc <- 0
minExternalSplit <- numeric(0)
}
if (!is.null(stabilityLabels)) {
stabilityCriterion <- match.arg(stabilityCriterion)
branchSplitFnc <- c(
branchSplitFnc,
if (stabilityCriterion == "Individual fraction") {
"branchSplitFromStabilityLabels.individualFraction"
} else {
"branchSplitFromStabilityLabels"
}
)
minExternalSplit <- c(minExternalSplit, minStabilityDissim)
externalSplitFncNeedsDistance <- c(externalSplitFncNeedsDistance, FALSE)
print(dim(stabilityLabels))
externalSplitOptions <- c(externalSplitOptions, list(list(stabilityLabels = stabilityLabels)))
}
if ("useBranchSplit" %in% names(otherArgs)) {
if (otherArgs$useBranchSplit) {
nExternalBranchSplitFnc <- nExternalBranchSplitFnc + 1
branchSplitFnc[[nExternalBranchSplitFnc]] <- "branchSplit"
externalSplitOptions[[nExternalBranchSplitFnc]] <- list(
discardProp = 0.08, minCentralProp = 0.75,
nConsideredPCs = 3, signed = signed, getDetails = FALSE
)
externalSplitFncNeedsDistance[nExternalBranchSplitFnc] <- FALSE
minExternalSplit[nExternalBranchSplitFnc] <- otherArgs$minBranchSplit
}
}
# Split data into blocks if needed
if (is.null(blocks)) {
if (nGGenes > maxBlockSize) {
if (verbose > 1) message(paste(spaces, "....pre-clustering genes to determine blocks.."))
clustering <- projectiveKMeans(datExpr,
preferredSize = maxBlockSize,
checkData = FALSE,
sizePenaltyPower = blockSizePenaltyPower,
nCenters = nPreclusteringCenters,
verbose = verbose - 2, indent = indent + 1
)
gBlocks <- .orderLabelsBySize(clustering$clusters)
if (verbose > 2) {
message("Block sizes:")
print(table(gBlocks))
}
} else {
gBlocks <- rep(1, nGGenes)
}
blocks <- rep(NA, nGenes)
blocks[gsg$goodGenes] <- gBlocks
} else {
gBlocks <- blocks[gsg$goodGenes]
}
blockLevels <- as.numeric(levels(factor(gBlocks)))
blockSizes <- table(gBlocks)
if (any(blockSizes > sqrt(2^31) - 1)) {
message(spaste(
spaces,
"Found block(s) with size(s) larger than limit of 'int' indexing.\n",
spaces, " Support for such large blocks is experimental; please report\n",
spaces, " any issues to Peter.Langfelder@gmail.com."
))
}
nBlocks <- length(blockLevels)
# Initialize various variables
dendros <- list()
TOMFiles <- rep("", nBlocks)
blockGenes <- list()
maxUsedLabel <- 0
for (blockNo in 1:nBlocks) {
if (verbose > 1) {
message(glue(" ..Working on block {blockNo}."))
}
blockGenes[[blockNo]] <- c(1:nGenes)[gsg$goodGenes][gBlocks == blockLevels[blockNo]]
block <- c(1:nGGenes)[gBlocks == blockLevels[blockNo]]
selExpr <- as.matrix(datExpr[, block])
if (haveWeights) selWeights <- weights[, block]
nBlockGenes <- length(block)
TOMFiles[blockNo] <- glue("{saveTOMFileBase}-block. {blockNo}.RData")
if (loadTOM) {
if (verbose > 2) {
message(glue(" ..loading TOM for block {blockNo} from file {TOMFiles[blockNo]}"))
}
x <- try(load(file = TOMFiles[blockNo]), silent = TRUE)
if (x != "TOM") {
loadTOM <- FALSE
message(glue("Loading of TOM in block {blockNo} failed:\n file {TOMFiles[blockNo]}\n either does not exist or does not contain the object 'TOM'.\nWill recalculate TOM."))
} else if (!inherits(TOM, "dist")) {
message(glue("TOM file {TOMFiles[blockNo]} does not contain object of the right type or size.\n", " Will recalculate TOM."))
} else {
size.1 <- attr(TOM, "Size")
if (length(size.1) != 1 || size.1 != nBlockGenes) {
message(spaste("TOM file {TOMFiles[blockNo]} does not contain object of the right type or size.\nWill recalculate TOM."))
loadTOM <- FALSE
} else {
tom <- as.matrix(TOM)
rm(TOM)
gc()
}
}
}
if (!loadTOM) {
# Calculate TOM by calling a custom C function:
callVerb <- max(0, verbose - 1)
callInd <- indent + 2
CcorType <- intCorType - 1
CnetworkType <- intNetworkType - 1
CTOMType <- intTOMType - 1
warn <- as.integer(0)
tom <- .Call("tomSimilarity_call", selExpr, weights,
as.integer(CcorType),
as.integer(CnetworkType),
as.double(power),
as.integer(CTOMType),
as.integer(TOMDenomC),
as.double(maxPOutliers),
as.double(quickCor),
as.integer(fallback),
as.integer(cosineCorrelation),
as.integer(replaceMissingAdjacencies),
as.integer(suppressTOMForZeroAdjacencies),
as.integer(useInternalMatrixAlgebra),
warn, as.integer(nThreads),
as.integer(callVerb), as.integer(callInd),
PACKAGE = "WGCNA"
)
# FIXME: warn if necessary
if (saveTOMs) {
TOM <- as.dist(tom)
TOMFiles[blockNo] <- glue("{saveTOMFileBase}-block.{blockNo}.RData")
if (verbose > 2) {
message(glue(spaces, " ..saving TOM for block {blockNo} into file {TOMFiles[blockNo]}"))
}
save(TOM, file = TOMFiles[blockNo])
rm(TOM)
gc()
}
}
dissTom <- 1 - tom
rm(tom)
gc()
if (verbose > 2) message(" ....clustering..")
dendros[[blockNo]] <- fastcluster::hclust(as.dist(dissTom), method = "average")
if (verbose > 2) message(" ....detecting modules..")
datExpr.scaled.imputed.block <- datExpr.scaled.imputed[, block]
if (nExternalBranchSplitFnc > 0) {
for (extBSFnc in 1:nExternalBranchSplitFnc) {
externalSplitOptions[[extBSFnc]]$expr <- datExpr.scaled.imputed.block
}
}
gc()
blockLabels <- try(cutreeDynamic(
dendro = dendros[[blockNo]],
deepSplit = deepSplit,
cutHeight = detectCutHeight, minClusterSize = minModuleSize,
method = "hybrid", distM = dissTom,
maxCoreScatter = maxCoreScatter, minGap = minGap,
maxAbsCoreScatter = maxAbsCoreScatter, minAbsGap = minAbsGap,
minSplitHeight = minSplitHeight, minAbsSplitHeight = minAbsSplitHeight,
externalBranchSplitFnc = branchSplitFnc,
minExternalSplit = minExternalSplit,
externalSplitOptions = externalSplitOptions,
externalSplitFncNeedsDistance = externalSplitFncNeedsDistance,
assumeSimpleExternalSpecification = FALSE,
pamStage = pamStage, pamRespectsDendro = pamRespectsDendro,
verbose = verbose - 3, indent = indent + 2
), silent = FALSE)
gc()
if (verbose > 8) {
labels0 <- blockLabels
if (interactive()) {
plotDendroAndColors(dendros[[blockNo]], labels2colors(blockLabels),
dendroLabels = FALSE,
main = paste("Block", blockNo),
rowText = blockLabels, textPositions = 1, rowTextAlignment = "center"
)
}
if (FALSE) {
plotDendroAndColors(dendros[[blockNo]], labels2colors(allLabels),
dendroLabels = FALSE,
main = paste("Block", blockNo)
)
}
}
if (class(blockLabels) == "try-error") {
if (verbose > 0) {
message(glue(" *** cutreeDynamic returned the following error:\n {blockLabels} {spaces} Stopping the module detection here."))
} else {
warning(glue("blockwiseModules: cutreeDynamic returned the following error:\n {blockLabels}---> Continuing with next block. "))
}
next
}
if (sum(blockLabels > 0) == 0) {
if (verbose > 1) {
message(glue(" No modules detected in block{blockNo}"))
}
blockNo <- blockNo + 1
next
}
blockLabels[blockLabels > 0] <- blockLabels[blockLabels > 0] + maxUsedLabel
maxUsedLabel <- max(blockLabels)
if (verbose > 2) message(" ....calculating module eigengenes..")
MEs <- try(moduleEigengenes(selExpr[, blockLabels != 0], blockLabels[blockLabels != 0],
impute = impute,
# subHubs = TRUE, trapErrors = FALSE,
verbose = verbose - 3, indent = indent + 2
), silent = TRUE)
if (class(MEs) == "try-error") {
if (trapErrors) {
if (verbose > 0) {
message(" *** moduleEigengenes failed with the following message:")
message(glue(" {MEs}"))
message(" ---> Stopping module detection here.")
} else {
warning(glue("blockwiseModules: moduleEigengenes failed with the following message:\n {MEs}---> Continuing with next block. "))
}
next
} else {
stop(MEs)
}
}
propMEs <- MEs$eigengenes
blockLabelIndex <- as.numeric(substring(names(propMEs), 3))
deleteModules <- NULL
changedModules <- NULL
# Check modules: make sure that of the genes present in the module, at least a minimum number
# have a correlation with the eigengene higher than a given cutoff.
if (verbose > 2) message("....checking kME in modules..")
for (mod in 1:ncol(propMEs))
{
modGenes <- (blockLabels == blockLabelIndex[mod])
KME <- do.call(
corFnc,
c(
list(
selExpr[, modGenes],
propMEs[, mod]
),
if (corFncAcceptsWeights) {
list(
weights.x = if (haveWeights) {
weights[, modGenes]
}
else {
NULL
},
weights.y = NULL
)
} else {
NULL
},
corOptions
)
)
if (intNetworkType == 1) {
KME <- abs(KME)
}
if (sum(KME > minCoreKME) < minCoreKMESize) {
blockLabels[modGenes] <- 0
deleteModules <- c(deleteModules, mod)
if (verbose > 3) {
message(glue(" ..deleting module {mod}: of {sum(modGenes)} total genes in the module\n only {sum(KME > minCoreKME)} have the requisite high correlation with the eigengene."))
}
} else if (sum(KME < minKMEtoStay) > 0) {
# Remove genes whose KME is too low:
if (verbose > 2) {
message(glue(" ..removing {sum(KME < minKMEtoStay)} genes from module {mod} because their KME is too low."))
}
blockLabels[modGenes][KME < minKMEtoStay] <- 0
if (sum(blockLabels[modGenes] > 0) < minModuleSize) {
deleteModules <- c(deleteModules, mod)
blockLabels[modGenes] <- 0
if (verbose > 3) {
message(glue(" ..deleting module {blockLabelIndex[mod]}: not enough genes in the module after removal of low KME genes."))
}
} else {
changedModules <- union(changedModules, blockLabelIndex[mod])
}
}
}
# Remove modules that are to be removed
if (!is.null(deleteModules)) {
propMEs <- propMEs[, -deleteModules, drop = FALSE]
modGenes <- is.finite(match(blockLabels, blockLabelIndex[deleteModules]))
blockLabels[modGenes] <- 0
modAllGenes <- is.finite(match(allLabels, blockLabelIndex[deleteModules]))
allLabels[modAllGenes] <- 0
blockLabelIndex <- blockLabelIndex[-deleteModules]
}
# Check if any modules are left
if (sum(blockLabels > 0) == 0) {
if (verbose > 1) {
message(glue("No significant modules detected in block {blockNo}"))
}
blockNo <- blockNo + 1
next
}
# Update allMEs and allLabels
if (is.null(AllMEs)) {
AllMEs <- propMEs
} else {
AllMEs <- cbind(AllMEs, propMEs)
}
allLabelIndex <- c(allLabelIndex, blockLabelIndex)
assigned <- block[blockLabels != 0]
allLabels[gsg$goodGenes][assigned] <- blockLabels[blockLabels != 0]
rm(dissTom)
gc()
blockNo <- blockNo + 1
}
# Check whether any of the already assigned genes should be re-assigned
deleteModules <- NULL
goodLabels <- allLabels[gsg$goodGenes]
reassignIndex <- rep(FALSE, length(goodLabels))
if (sum(goodLabels != 0) > 0) {
propLabels <- goodLabels[goodLabels != 0]
assGenes <- (c(1:nGenes)[gsg$goodGenes])[goodLabels != 0]
KME <- do.call(
match.fun(corFnc),
c(
list(datExpr[, goodLabels != 0], AllMEs),
if (corFncAcceptsWeights) {
list(
weights.x = if (haveWeights) weights[, goodLabels != 0] else NULL,
weights.y = NULL
)
} else {
NULL
},
corOptions
)
)
if (intNetworkType == 1) KME <- abs(KME)
nMods <- ncol(AllMEs)
for (mod in 1:nMods)
{
modGenes <- c(1:length(propLabels))[propLabels == allLabelIndex[mod]]
KMEmodule <- KME[modGenes, mod]
KMEbest <- apply(KME[modGenes, , drop = FALSE], 1, max)
candidates <- (KMEmodule < KMEbest)
candidates[!is.finite(candidates)] <- FALSE
if (FALSE) {
modDiss <- dissTom[goodLabels == allLabelIndex[mod], goodLabels == allLabelIndex[mod]]
mod.k <- colSums(modDiss)
boxplot(mod.k ~ candidates)
}
if (sum(candidates) > 0) {
pModule <- corPvalueFisher(KMEmodule[candidates], nSamples)
whichBest <- apply(KME[modGenes[candidates], , drop = FALSE], 1, which.max)
pBest <- corPvalueFisher(KMEbest[candidates], nSamples)
reassign <- ifelse(is.finite(pBest / pModule), (pBest / pModule < reassignThreshold), FALSE)
if (sum(reassign) > 0) {
if (verbose > 2) {
message(glue(" ..reassigning {sum(reassign)} genes from module {mod} to modules with higher KME."))
}
allLabels[assGenes[modGenes[candidates][reassign]]] <- whichBest[reassign]
changedModules <- union(changedModules, whichBest[reassign])
if (length(modGenes) - sum(reassign) < minModuleSize) {
deleteModules <- c(deleteModules, mod)
} else {
changedModules <- union(changedModules, mod)
}
}
}
}
}
# Remove modules that are to be removed
if (!is.null(deleteModules)) {
AllMEs <- AllMEs[, -deleteModules, drop = FALSE]
genes <- is.finite(match(allLabels, allLabelIndex[deleteModules]))
allLabels[genes] <- 0
allLabelIndex <- allLabelIndex[-deleteModules]
goodLabels <- allLabels[gsg$goodGenes]
}
if (verbose > 1) message("..merging modules that are too close..")
if (numericLabels) {
colors <- allLabels
} else {
colors <- labels2colors(allLabels)
}
mergedAllColors <- colors
MEsOK <- TRUE
mergedMods <- try(mergeCloseModules(datExpr, colors[gsg$goodGenes],
cutHeight = mergeCutHeight,
relabel = TRUE, # trapErrors = FALSE,
corFnc = corFnc, corOptions = corOptions,
impute = impute,
verbose = verbose - 2, indent = indent + 2
), silent = TRUE)
if (class(mergedMods) == "try-error") {
warning(glue("blockwiseModules: mergeCloseModules failed with the following error message:\n {mergedMods}\n--> returning unmerged colors.\n"))
MEs <- try(moduleEigengenes(datExpr, colors[gsg$goodGenes], # subHubs = TRUE, trapErrors = FALSE,
impute = impute,
verbose = verbose - 3, indent = indent + 3
), silent = TRUE)
if (class(MEs) == "try-error") {
if (!trapErrors) stop(MEs)
if (verbose > 0) {
message(" *** moduleEigengenes failed with the following error message:\n {MEs}\n *** returning no module eigengenes.\n")
} else {
warning(glue("blockwiseModules: moduleEigengenes failed with the following error message:\n {MEs}\n--> returning no module eigengenes.\n"))
}
allSampleMEs <- NULL
MEsOK <- FALSE
} else {
if (sum(!MEs$validMEs) > 0) {
colors[gsg$goodGenes] <- MEs$validColors
MEs <- MEs$eigengenes[, MEs$validMEs]
} else {
MEs <- MEs$eigengenes
}
allSampleMEs <- as.data.frame(matrix(NA, nrow = nSamples, ncol = ncol(MEs)))
allSampleMEs[gsg$goodSamples, ] <- MEs[, ]
names(allSampleMEs) <- names(MEs)
}
} else {
mergedAllColors[gsg$goodGenes] <- mergedMods$colors
allSampleMEs <- as.data.frame(matrix(NA, nrow = nSamples, ncol = ncol(mergedMods$newMEs)))
allSampleMEs[gsg$goodSamples, ] <- mergedMods$newMEs[, ]
names(allSampleMEs) <- names(mergedMods$newMEs)
}
if (seedSaved) .Random.seed <<- savedSeed
if (!saveTOMs) TOMFiles <- NULL
list(
colors = mergedAllColors,
unmergedColors = colors,
MEs = allSampleMEs,
goodSamples = gsg$goodSamples,
goodGenes = gsg$goodGenes,
dendrograms = dendros,
TOMFiles = TOMFiles,
blockGenes = blockGenes,
blocks = blocks,
MEsOK = MEsOK
)
}
# ==================================================================================
#
# Helper functions
#
# ==================================================================================
# order labels by size
.orderLabelsBySize <- function(labels, exclude = NULL) {
levels.0 <- sort(unique(labels))
levels <- levels.0[!levels.0 %in% exclude]
levels.excl <- levels.0 [levels.0 %in% exclude]
rearrange <- labels %in% levels
tab <- table(labels [rearrange])
rank <- rank(-tab, ties.method = "first")
oldOrder <- c(levels.excl, names(tab))
newOrder <- c(levels.excl, names(tab)[rank])
if (is.numeric(labels)) newOrder <- as.numeric(newOrder)
newOrder[match(labels, oldOrder)]
}
# ======================================================================================================
#
# Re-cut trees for blockwiseModules
#
# ======================================================================================================
recutBlockwiseTrees <- function(datExpr,
goodSamples, goodGenes,
blocks,
TOMFiles,
dendrograms,
corType = "pearson",
networkType = "unsigned",
deepSplit = 2,
detectCutHeight = 0.995, minModuleSize = min(20, ncol(datExpr) / 2),
maxCoreScatter = NULL, minGap = NULL,
maxAbsCoreScatter = NULL, minAbsGap = NULL,
minSplitHeight = NULL, minAbsSplitHeight = NULL,
useBranchEigennodeDissim = FALSE,
minBranchEigennodeDissim = mergeCutHeight,
pamStage = TRUE, pamRespectsDendro = TRUE,
# minKMEtoJoin =0.7,
minCoreKME = 0.5, minCoreKMESize = minModuleSize / 3,
minKMEtoStay = 0.3,
reassignThreshold = 1e-6,
mergeCutHeight = 0.15, impute = TRUE,
trapErrors = FALSE, numericLabels = FALSE,
verbose = 0, indent = 0, ...) {
spaces <- indentSpaces(indent)
# if (verbose>0)
# message(paste(spaces, "Calculating module eigengenes block-wise from all genes"));
cutreeLabels <- list()
intCorType <- pmatch(corType, .corTypes)
if (is.na(intCorType)) {
stop(paste("Invalid 'corType'. Recognized values are", paste(.corTypes, collapse = ", ")))
}
# if ( (minKMEtoJoin >1) | (minKMEtoJoin <0) ) stop("minKMEtoJoin must be between 0 and 1.");
intNetworkType <- charmatch(networkType, .networkTypes)
if (is.na(intNetworkType)) {
stop(paste(
"Unrecognized networkType argument.",
"Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", ")
))
}
dimEx <- dim(datExpr)
if (length(dimEx) != 2) stop("datExpr has incorrect dimensions.")
nGenes <- dimEx[2]
nSamples <- dimEx[1]
allLabels <- rep(0, nGenes)
AllMEs <- NULL
allLabelIndex <- NULL
if (length(blocks) != nGenes) {
stop("Input error: the length of 'geneRank' does not equal the number of genes in given 'datExpr'.")
}
# Check data for genes and samples that have too many missing values
nGGenes <- sum(goodGenes)
nGSamples <- sum(goodSamples)
gsg <- list(
goodSamples = goodSamples, goodGenes = goodGenes,
allOK = (sum(!goodSamples) + sum(!goodGenes) == 0)
)
if (!gsg$allOK) datExpr <- datExpr[goodSamples, goodGenes]
gBlocks <- blocks[gsg$goodGenes]
blockLevels <- as.numeric(levels(factor(gBlocks)))
blockSizes <- table(gBlocks)
nBlocks <- length(blockLevels)
datExpr.scaled.imputed <- t(impute.knn(t(scale(datExpr)))$data)
if (any(is.na(datExpr))) {
datExpr.scaled.imputed <- t(impute.knn(t(scale(datExpr)))$data)
}
corFnc <- .corFnc[intCorType]
corOptions <- list(use = "p")
signed <- networkType %in% c("signed", "signed hybrid")
# Set up advanced tree cut methods
otherArgs <- list(...)
if (useBranchEigennodeDissim) {
branchSplitFnc <- list("branchEigengeneDissim")
externalSplitOptions <- list(list(
corFnc = corFnc, corOptions = corOptions,
signed = signed
))
externalSplitFncNeedsDistance <- FALSE
nExternalBranchSplitFnc <- 1
minExternalSplit <- minBranchEigennodeDissim
} else {
branchSplitFnc <- list()
externalSplitOptions <- list(list())
externalSplitFncNeedsDistance <- logical(0)
nExternalBranchSplitFnc <- 0
minExternalSplit <- numeric(0)
}
if ("useBranchSplit" %in% names(otherArgs)) {
if (otherArgs$useBranchSplit) {
nExternalBranchSplitFnc <- nExternalBranchSplitFnc + 1
branchSplitFnc[[nExternalBranchSplitFnc]] <- "branchSplit"
externalSplitOptions[[nExternalBranchSplitFnc]] <- list(
discardProp = 0.08, minCentralProp = 0.75,
nConsideredPCs = 3, signed = signed, getDetails = FALSE
)
minExternalSplit[nExternalBranchSplitFnc] <- otherArgs$minBranchSplit
externalSplitFncNeedsDistance[nExternalBranchSplitFnc] <- FALSE
}
}
# Initialize various variables
blockNo <- 1
maxUsedLabel <- 0
while (blockNo <= nBlocks) {
if (verbose > 1) message(paste(spaces, "..Working on block", blockNo, "."))
block <- c(1:nGGenes)[gBlocks == blockLevels[blockNo]]
selExpr <- as.matrix(datExpr[, block])
nBlockGenes <- length(block)
TOM <- NULL # Will be loaded below; this gets rid of warnings form Rcheck.
xx <- try(load(TOMFiles[blockNo]), silent = TRUE)
if (class(xx) == "try-error") {
message(paste(
"************\n File name", TOMFiles[blockNo],
"appears invalid: the load function returned the following error:\n ",
xx
))
stop()
}
if (xx != "TOM") {
stop(paste("The file", TOMFiles[blockNo], "does not contain the appopriate variable."))
}
if (class(TOM) != "dist") {
stop(paste("The file", TOMFiles[blockNo], "does not contain the appopriate distance structure."))
}
dissTom <- as.matrix(1 - TOM)
if (verbose > 2) message(paste(spaces, "....detecting modules.."))
datExpr.scaled.imputed.block <- datExpr.scaled.imputed[, block]
if (nExternalBranchSplitFnc > 0) {
for (extBSFnc in 1:nExternalBranchSplitFnc) {
externalSplitOptions[[extBSFnc]]$expr <- datExpr.scaled.imputed.block
}
}
blockLabels <- try(cutreeDynamic(
dendro = dendrograms[[blockNo]],
deepSplit = deepSplit,
cutHeight = detectCutHeight, minClusterSize = minModuleSize,
method = "hybrid",
maxCoreScatter = maxCoreScatter, minGap = minGap,
maxAbsCoreScatter = maxAbsCoreScatter, minAbsGap = minAbsGap,
minSplitHeight = minSplitHeight, minAbsSplitHeight = minAbsSplitHeight,
externalBranchSplitFnc = branchSplitFnc,
minExternalSplit = minExternalSplit,
externalSplitOptions = externalSplitOptions,
externalSplitFncNeedsDistance = externalSplitFncNeedsDistance,
assumeSimpleExternalSpecification = FALSE,
pamStage = pamStage, pamRespectsDendro = pamRespectsDendro,
distM = dissTom,
verbose = verbose - 3, indent = indent + 2
), silent = TRUE)
gc()
cutreeLabels[[blockNo]] <- blockLabels
if (class(blockLabels) == "try-error") {
if (verbose > 0) {
message(paste(
spaces, "*** cutreeDynamic returned the following error:\n",
spaces, blockLabels, spaces,
"Stopping the module detection here."
))
} else {
warning(paste(
"blockwiseModules: cutreeDynamic returned the following error:\n",
" ", blockLabels, "---> Continuing with next block. "
))
}
next
}
if (sum(blockLabels > 0) == 0) {
if (verbose > 1) {
message(paste(spaces, "No modules detected in block", blockNo))
}
blockNo <- blockNo + 1
next
}
blockLabels[blockLabels > 0] <- blockLabels[blockLabels > 0] + maxUsedLabel
maxUsedLabel <- max(blockLabels)
if (verbose > 2) message(paste(spaces, "....calculating module eigengenes.."))
MEs <- try(moduleEigengenes(selExpr[, blockLabels != 0], blockLabels[blockLabels != 0],
impute = impute,
# subHubs = TRUE, trapErrors = FALSE,
verbose = verbose - 3, indent = indent + 2
), silent = TRUE)
if (class(MEs) == "try-error") {
if (trapErrors) {
if (verbose > 0) {
message(paste(spaces, "*** moduleEigengenes failed with the following message:"))
message(paste(spaces, " ", MEs))
message(paste(spaces, " ---> Stopping module detection here."))
} else {
warning(paste(
"blockwiseModules: moduleEigengenes failed with the following message:",
"\n ", MEs, "---> Continuing with next block. "
))
}
next
} else {
stop(MEs)
}
}
# propMEs = as.data.frame(MEs$eigengenes[, names(MEs$eigengenes)!="ME0"]);
propMEs <- MEs$eigengenes
blockLabelIndex <- as.numeric(substring(names(propMEs), 3))
deleteModules <- NULL
changedModules <- NULL
# Check modules: make sure that of the genes present in the module, at least a minimum number
# have a correlation with the eigengene higher than a given cutoff.
if (verbose > 2) message(paste(spaces, "....checking modules for statistical meaningfulness.."))
for (mod in 1:ncol(propMEs))
{
modGenes <- (blockLabels == blockLabelIndex[mod])
corEval <- parse(text = paste(
corFnc, "(selExpr[, modGenes], propMEs[, mod]",
prepComma(.corOptions[intCorType]), ")"
))
KME <- as.vector(eval(corEval))
if (intNetworkType == 1) KME <- abs(KME)
if (sum(KME > minCoreKME) < minCoreKMESize) {
blockLabels[modGenes] <- 0
deleteModules <- c(deleteModules, mod)
if (verbose > 3) {
message(paste(spaces, " ..deleting module ", mod, ": of ", sum(modGenes),
" total genes in the module\n only ", sum(KME > minCoreKME),
" have the requisite high correlation with the eigengene.",
sep = ""
))
}
} else if (sum(KME < minKMEtoStay) > 0) {
# Remove genes whose KME is too low:
if (verbose > 2) {
message(paste(
spaces, " ..removing", sum(KME < minKMEtoStay),
"genes from module", mod, "because their KME is too low."
))
}
blockLabels[modGenes][KME < minKMEtoStay] <- 0
if (sum(blockLabels[modGenes] > 0) < minModuleSize) {
deleteModules <- c(deleteModules, mod)
blockLabels[modGenes] <- 0
if (verbose > 3) {
message(paste(spaces, " ..deleting module ", blockLabelIndex[mod],
": not enough genes in the module after removal of low KME genes.",
sep = ""
))
}
} else {
changedModules <- union(changedModules, blockLabelIndex[mod])
}
}
}
# Remove modules that are to be removed
if (!is.null(deleteModules)) {
propMEs <- propMEs[, -deleteModules, drop = FALSE]
modGenes <- is.finite(match(blockLabels, blockLabelIndex[deleteModules]))
blockLabels[modGenes] <- 0
modAllGenes <- is.finite(match(allLabels, blockLabelIndex[deleteModules]))
allLabels[modAllGenes] <- 0
blockLabelIndex <- blockLabelIndex[-deleteModules]
}
# Check if any modules are left
if (sum(blockLabels > 0) == 0) {
if (verbose > 1) {
message(paste(spaces, "No significant modules detected in block", blockNo))
}
blockNo <- blockNo + 1
next
}
# Update allMEs and allLabels
if (is.null(AllMEs)) {
AllMEs <- propMEs
} else {
AllMEs <- cbind(AllMEs, propMEs)
}
allLabelIndex <- c(allLabelIndex, blockLabelIndex)
assigned <- block[blockLabels != 0]
allLabels[assigned] <- blockLabels[blockLabels != 0]
rm(dissTom)
gc()
blockNo <- blockNo + 1
}
# Check whether any of the already assigned genes should be re-assigned
deleteModules <- NULL
goodLabels <- allLabels[gsg$goodGenes]
if (sum(goodLabels != 0) > 0) {
propLabels <- goodLabels[goodLabels != 0]
assGenes <- (c(1:nGenes)[gsg$goodGenes])[goodLabels != 0]
corEval <- parse(text = paste(
corFnc, "(datExpr[, goodLabels!=0], AllMEs",
prepComma(.corOptions[intCorType]), ")"
))
KME <- eval(corEval)
if (intNetworkType == 1) KME <- abs(KME)
nMods <- ncol(AllMEs)
for (mod in 1:nMods)
{
modGenes <- c(1:length(propLabels))[propLabels == allLabelIndex[mod]]
KMEmodule <- KME[modGenes, mod]
KMEbest <- apply(KME[modGenes, , drop = FALSE], 1, max)
candidates <- (KMEmodule < KMEbest)
candidates[!is.finite(candidates)] <- FALSE
if (sum(candidates) > 0) {
pModule <- corPvalueFisher(KMEmodule[candidates], nSamples)
whichBest <- apply(KME[modGenes[candidates], , drop = FALSE], 1, which.max)
pBest <- corPvalueFisher(KMEbest[candidates], nSamples)
reassign <- ifelse(is.finite(pBest / pModule), (pBest / pModule < reassignThreshold), FALSE)
if (sum(reassign) > 0) {
if (verbose > 2) {
message(paste(
spaces, " ..reassigning", sum(reassign),
"genes from module", mod, "to modules with higher KME."
))
}
allLabels[assGenes[modGenes[candidates][reassign]]] <- whichBest[reassign]
changedModules <- union(changedModules, whichBest[reassign])
if (length(modGenes) - sum(reassign) < minModuleSize) {
deleteModules <- c(deleteModules, mod)
} else {
changedModules <- union(changedModules, mod)
}
}
}
}
}
# Remove modules that are to be removed
if (!is.null(deleteModules)) {
AllMEs <- AllMEs[, -deleteModules, drop = FALSE]
genes <- is.finite(match(allLabels, allLabelIndex[deleteModules]))
allLabels[genes] <- 0
allLabelIndex <- allLabelIndex[-deleteModules]
goodLabels <- allLabels[gsg$goodGenes]
}
if (verbose > 1) message(paste(spaces, "..merging modules that are too close.."))
if (numericLabels) {
colors <- allLabels
} else {
colors <- labels2colors(allLabels)
}
mergedAllColors <- colors
MEsOK <- TRUE
mergedMods <- try(mergeCloseModules(datExpr, colors[gsg$goodGenes],
cutHeight = mergeCutHeight,
relabel = TRUE, # trapErrors = FALSE,
impute = impute,
verbose = verbose - 2, indent = indent + 2
), silent = TRUE)
if (class(mergedMods) == "try-error") {
warning(paste(
"blockwiseModules: mergeCloseModules failed with the following error message:\n ",
mergedMods, "\n--> returning unmerged colors.\n"
))
MEs <- try(moduleEigengenes(datExpr, colors[gsg$goodGenes], # subHubs = TRUE, trapErrors = FALSE,
impute = impute,
verbose = verbose - 3, indent = indent + 3
), silent = TRUE)
if (class(MEs) == "try-error") {
if (!trapErrors) stop(MEs)
if (verbose > 0) {
message(paste(spaces, "*** moduleEigengenes failed with the following error message:"))
message(paste(spaces, " ", MEs))
message(paste(spaces, "*** returning no module eigengenes.\n"))
} else {
warning(paste(
"blockwiseModules: moduleEigengenes failed with the following error message:\n ",
MEs, "\n--> returning no module eigengenes.\n"
))
}
allSampleMEs <- NULL
MEsOK <- FALSE
} else {
if (sum(!MEs$validMEs) > 0) {
colors[gsg$goodGenes] <- MEs$validColors
MEs <- MEs$eigengenes[, MEs$validMEs]
} else {
MEs <- MEs$eigengenes
}
allSampleMEs <- as.data.frame(matrix(NA, nrow = nSamples, ncol = ncol(MEs)))
allSampleMEs[gsg$goodSamples, ] <- MEs[, ]
names(allSampleMEs) <- names(MEs)
}
} else {
mergedAllColors[gsg$goodGenes] <- mergedMods$colors
allSampleMEs <- as.data.frame(matrix(NA, nrow = nSamples, ncol = ncol(mergedMods$newMEs)))
allSampleMEs[gsg$goodSamples, ] <- mergedMods$newMEs[, ]
names(allSampleMEs) <- names(mergedMods$newMEs)
}
list(
colors = mergedAllColors,
unmergedColors = colors,
cutreeLabels = cutreeLabels,
MEs = allSampleMEs,
# goodSamples = gsg$goodSamples,
# goodGenes = gsg$goodGenes,
# dendrograms = dendrograms,
# TOMFiles = TOMFiles,
# blockGenes = blockGenes,
MEsOK = MEsOK
)
}
# ==========================================================================================================
#
# blockwiseIndividualTOMs
#
# ==========================================================================================================
# This function calculates and saves blockwise topological overlaps for a given multi expression data. The
# argument blocks can be given to specify blocks, or the blocks can be omitted and will be calculated if
# necessary.
# Note on naming of output files: %s will translate into set number, %N into set name (if given in
# multiExpr), %b into block number.
.substituteTags <- function(format, tags, replacements) {
nTags <- length(tags)
if (length(replacements) != nTags) stop("Length of tags and replacements must be the same.")
for (t in 1:nTags) {
format <- gsub(as.character(tags[t]), as.character(replacements[t]), format, fixed = TRUE)
}
format
}
.processFileName <- function(format, setNumber, setNames, blockNumber, analysisName = "") {
# The following is a workaround around empty (NULL) setNames. Replaces the name with the setNumber.
if (is.null(setNames)) setNames <- rep(setNumber, setNumber)
.substituteTags(
format, c("%s", "%N", "%b", "%a"),
c(setNumber, setNames[setNumber], blockNumber, analysisName)
)
}
blockwiseIndividualTOMs <- function(
multiExpr,
multiWeights = NULL,
# Data checking options
checkMissingData = TRUE,
# Blocking options
blocks = NULL,
maxBlockSize = 5000,
blockSizePenaltyPower = 5,
nPreclusteringCenters = NULL,
randomSeed = 12345,
# Network construction arguments: correlation options
corType = "pearson",
maxPOutliers = 1,
quickCor = 0,
pearsonFallback = "individual",
cosineCorrelation = FALSE,
# Adjacency function options
power = 6,
networkType = "unsigned",
checkPower = TRUE,
replaceMissingAdjacencies = FALSE,
suppressTOMForZeroAdjacencies = FALSE,
# Topological overlap options
TOMType = "unsigned",
TOMDenom = "min",
# Save individual TOMs? If not, they will be returned in the session.
saveTOMs = TRUE,
individualTOMFileNames = "individualTOM-Set%s-Block%b.RData",
# General options
nThreads = 0,
useInternalMatrixAlgebra = FALSE,
verbose = 2, indent = 0) {
spaces <- indentSpaces(indent)
dataSize <- checkSets(multiExpr, checkStructure = TRUE)
if (dataSize$structureOK) {
nSets <- dataSize$nSets
nGenes <- dataSize$nGenes
multiFormat <- TRUE
} else {
multiExpr <- multiData(multiExpr)
multiWeights <- multiData(multiWeights)
nSets <- dataSize$nSets
nGenes <- dataSize$nGenes
multiFormat <- FALSE
}
.checkAndScaleMultiWeights(multiWeights, multiExpr, scaleByMax = FALSE)
if (length(power) != 1) {
if (length(power) != nSets) {
stop("Invalid arguments: Length of 'power' must equal number of sets given in 'multiExpr'.")
}
} else {
power <- rep(power, nSets)
}
seedSaved <- FALSE
if (!is.null(randomSeed)) {
if (exists(".Random.seed")) {
seedSaved <- TRUE
savedSeed <- .Random.seed
}
set.seed(randomSeed)
}
# if (maxBlockSize >= floor(sqrt(2^31)) )
# stop("'maxBlockSize must be less than ", floor(sqrt(2^31)), ". Please decrease it and try again.")
if (!is.null(blocks) && (length(blocks) != nGenes)) {
stop("Input error: length of 'blocks' must equal number of genes in 'multiExpr'.")
}
if (verbose > 0) {
message(paste(spaces, "Calculating topological overlaps block-wise from all genes"))
}
intCorType <- pmatch(corType, .corTypes)
if (is.na(intCorType)) {
stop(paste("Invalid 'corType'. Recognized values are", paste(.corTypes, collapse = ", ")))
}
intTOMType <- pmatch(TOMType, .TOMTypes)
if (is.na(intTOMType)) {
stop(paste("Invalid 'TOMType'. Recognized values are", paste(.TOMTypes, collapse = ", ")))
}
TOMDenomC <- pmatch(TOMDenom, .TOMDenoms) - 1
if (is.na(TOMDenomC)) {
stop(paste("Invalid 'TOMDenom'. Recognized values are", paste(.TOMDenoms, collapse = ", ")))
}
if (checkPower & ((sum(power < 1) > 0) | (sum(power > 50) > 0))) stop("power must be between 1 and 50.")
intNetworkType <- charmatch(networkType, .networkTypes)
if (is.na(intNetworkType)) {
stop(paste(
"Unrecognized networkType argument.",
"Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", ")
))
}
if ((maxPOutliers < 0) | (maxPOutliers > 1)) stop("maxPOutliers must be between 0 and 1.")
if (quickCor < 0) stop("quickCor must be positive.")
fallback <- pmatch(pearsonFallback, .pearsonFallbacks)
if (is.na(fallback)) {
stop(paste(
"Unrecognized 'pearsonFallback'. Recognized values are (unique abbreviations of)\n",
paste(.pearsonFallbacks, collapse = ", ")
))
}
if (nThreads < 0) stop("nThreads must be positive.")
if (is.null(nThreads) || (nThreads == 0)) nThreads <- .useNThreads()
nSamples <- dataSize$nSamples
# Check data for genes and samples that have too many missing values
if (checkMissingData) {
gsg <- goodSamplesGenesMS(multiExpr, verbose = verbose - 1, indent = indent + 1)
if (!gsg$allOK) {
multiExpr <- mtd.subset(multiExpr, gsg$goodSamples, gsg$goodGenes)
if (!is.null(multiWeights)) multiWeights <- mtd.subset(multiWeights, gsg$goodSamples, gsg$goodGenes)
}
} else {
gsg <- list(
goodGenes = rep(TRUE, nGenes), goodSamples = lapply(nSamples, function(n) rep(TRUE, n)),
allOK = TRUE
)
}
nGGenes <- sum(gsg$goodGenes)
nGSamples <- rep(0, nSets)
for (set in 1:nSets) nGSamples[set] <- sum(gsg$goodSamples[[set]])
if (is.null(blocks)) {
if (nGGenes > maxBlockSize) {
if (verbose > 1) message(paste(spaces, "....pre-clustering genes to determine blocks.."))
clustering <- consensusProjectiveKMeans(multiExpr,
preferredSize = maxBlockSize,
sizePenaltyPower = blockSizePenaltyPower, checkData = FALSE,
nCenters = nPreclusteringCenters,
verbose = verbose - 2, indent = indent + 1
)
gBlocks <- .orderLabelsBySize(clustering$clusters)
} else {
gBlocks <- rep(1, nGGenes)
}
blocks <- rep(NA, nGenes)
blocks[gsg$goodGenes] <- gBlocks
} else {
gBlocks <- blocks[gsg$goodGenes]
}
blockLevels <- as.numeric(levels(factor(gBlocks)))
blockSizes <- table(gBlocks)
nBlocks <- length(blockLevels)
if (any(blockSizes > sqrt(2^31) - 1)) {
message(spaste(
spaces,
"Found block(s) with size(s) larger than limit of 'int' indexing.\n",
spaces, " Support for such large blocks is experimental; please report\n",
spaces, " any issues to Peter.Langfelder@gmail.com."
))
}
# check file names for uniqueness
actualFileNames <- NULL
if (saveTOMs) {
actualFileNames <- matrix("", nSets, nBlocks)
for (set in 1:nSets) {
for (b in 1:nBlocks) {
actualFileNames[set, b] <- .processFileName(individualTOMFileNames, set, names(multiExpr), b)
}
}
rownames(actualFileNames) <- spaste("Set.", c(1:nSets))
colnames(actualFileNames) <- spaste("Block.", c(1:nBlocks))
if (length(unique(as.vector(actualFileNames))) < nSets * nBlocks) {
message("Error: File names for (some) set/block combinations are not unique:")
print(actualFileNames)
stop("File names must be unique.")
}
}
# Initialize various variables
blockGenes <- list()
blockNo <- 1
gc()
setTomDS <- list()
# Here's where the analysis starts
for (blockNo in 1:nBlocks)
{
if (verbose > 1 && nBlocks > 1) message(paste(spaces, "..Working on block", blockNo, "."))
# Select the block genes
block <- c(1:nGGenes)[gBlocks == blockLevels[blockNo]]
nBlockGenes <- length(block)
blockGenes[[blockNo]] <- c(1:nGenes)[gsg$goodGenes][gBlocks == blockLevels[blockNo]]
errorOccurred <- FALSE
# Set up file names or memory space to hold the set TOMs
if (!saveTOMs) {
setTomDS[[blockNo]] <- array(0, dim = c(nBlockGenes * (nBlockGenes - 1) / 2, nSets))
}
# For each set: calculate and save TOM
for (set in 1:nSets)
{
if (verbose > 2) message(paste(spaces, "....Working on set", set))
selExpr <- as.matrix(multiExpr[[set]]$data[, block])
if (!is.null(multiWeights)) {
selWeights <- as.matrix(multiWeights[[set]]$data[, block])
} else {
selWeights <- NULL
}
# Calculate TOM by calling a custom C function:
callVerb <- max(0, verbose - 1)
callInd <- indent + 2
CcorType <- intCorType - 1
CnetworkType <- intNetworkType - 1
CTOMType <- intTOMType - 1
# tempExpr = as.double(as.matrix(selExpr));
warn <- 0L
tom <- .Call("tomSimilarity_call", selExpr, selWeights,
as.integer(CcorType), as.integer(CnetworkType), as.double(power[set]), as.integer(CTOMType),
as.integer(TOMDenomC),
as.double(maxPOutliers),
as.double(quickCor),
as.integer(fallback),
as.integer(cosineCorrelation),
as.integer(replaceMissingAdjacencies),
as.integer(suppressTOMForZeroAdjacencies),
as.integer(useInternalMatrixAlgebra),
warn, as.integer(nThreads),
as.integer(callVerb), as.integer(callInd),
PACKAGE = "WGCNA"
)
# FIXME: warn if necessary
tomDS <- as.dist(tom)
# dim(tom) = c(nBlockGenes, nBlockGenes);
rm(tom)
# Save the calculated TOM either to disk in chunks or to memory.
if (saveTOMs) {
save(tomDS, file = actualFileNames[set, blockNo])
} else {
setTomDS[[blockNo]] [, set] <- tomDS[]
}
}
rm(tomDS)
gc()
}
if (!multiFormat) {
gsg$goodSamples <- gsg$goodSamples[[1]]
}
# Re-set random number generator if necessary
if (seedSaved) .Random.seed <<- savedSeed
list(
actualTOMFileNames = actualFileNames,
TOMSimilarities = if (!saveTOMs) setTomDS else NULL,
blocks = blocks,
blockGenes = blockGenes,
goodSamplesAndGenes = gsg,
nGGenes = nGGenes,
gBlocks = gBlocks,
nThreads = nThreads,
saveTOMs = saveTOMs,
intNetworkType = intNetworkType,
intCorType = intCorType,
nSets = nSets,
setNames = names(multiExpr)
)
}
# ==========================================================================================================
#
# lowerTri2matrix
#
# ==========================================================================================================
lowerTri2matrix <- function(x, diag = 1) {
if (class(x) == "dist") {
mat <- as.matrix(x)
} else {
n <- length(x)
n1 <- (1 + sqrt(1 + 8 * n)) / 2
if (floor(n1) != n1) stop("Input length does not translate into matrix")
mat <- matrix(0, n1, n1)
mat[lower.tri(mat)] <- x
mat <- mat + t(mat)
}
diag(mat) <- diag
mat
}
# ==========================================================================================================
#
# blockwiseConsensusModules
#
# ==========================================================================================================
.checkComponents <- function(object, names) {
objNames <- names(object)
inObj <- names %in% objNames
if (!all(inObj)) {
stop(
".checkComponents: object is missing the following components:\n",
paste(names[!inObj], collapse = ", ")
)
}
}
# Function to calculate consensus modules and eigengenes from all genes.
blockwiseConsensusModules <- function(
multiExpr,
# Data checking options
checkMissingData = TRUE,
# Blocking options
blocks = NULL,
maxBlockSize = 5000,
blockSizePenaltyPower = 5,
nPreclusteringCenters = NULL,
randomSeed = 12345,
# individual TOM information
individualTOMInfo = NULL,
useIndivTOMSubset = NULL,
# Network construction arguments: correlation options
corType = "pearson",
maxPOutliers = 1,
quickCor = 0,
pearsonFallback = "individual",
cosineCorrelation = FALSE,
# Adjacency function options
power = 6,
networkType = "unsigned",
checkPower = TRUE,
replaceMissingAdjacencies = FALSE,
# Topological overlap options
TOMType = "unsigned",
TOMDenom = "min",
# Save individual TOMs?
saveIndividualTOMs = TRUE,
individualTOMFileNames = "individualTOM-Set%s-Block%b.RData",
# Consensus calculation options: network calibration
networkCalibration = c("single quantile", "full quantile", "none"),
## Save scaled TOMs? <-- leave this option for users willing to run consensusTOM on its own
# saveScaledIndividualTOMs = FALSE,
# scaledIndividualTOMFilePattern = "scaledIndividualTOM-Set%s-Block%b.RData",
# Simple quantile calibration options
calibrationQuantile = 0.95,
sampleForCalibration = TRUE, sampleForCalibrationFactor = 1000,
getNetworkCalibrationSamples = FALSE,
# Consensus definition
consensusQuantile = 0,
useMean = FALSE,
setWeights = NULL,
# Saving the consensus TOM
saveConsensusTOMs = FALSE,
consensusTOMFilePattern = "consensusTOM-block.%b.RData",
# Internal handling of TOMs
useDiskCache = TRUE, chunkSize = NULL,
cacheBase = ".blockConsModsCache",
cacheDir = ".",
# Alternative consensus TOM input from a previous calculation
consensusTOMInfo = NULL,
# Basic tree cut options
deepSplit = 2,
detectCutHeight = 0.995, minModuleSize = 20,
checkMinModuleSize = TRUE,
# Advanced tree cut opyions
maxCoreScatter = NULL, minGap = NULL,
maxAbsCoreScatter = NULL, minAbsGap = NULL,
minSplitHeight = NULL, minAbsSplitHeight = NULL,
useBranchEigennodeDissim = FALSE,
minBranchEigennodeDissim = mergeCutHeight,
stabilityLabels = NULL,
minStabilityDissim = NULL,
pamStage = TRUE, pamRespectsDendro = TRUE,
# Gene joining and removal from a module, and module "significance" criteria
reassignThresholdPS = 1e-4,
trimmingConsensusQuantile = consensusQuantile,
# minKMEtoJoin =0.7,
minCoreKME = 0.5, minCoreKMESize = minModuleSize / 3,
minKMEtoStay = 0.2,
# Module eigengene calculation options
impute = TRUE,
trapErrors = FALSE,
# Module merging options
equalizeQuantilesForModuleMerging = FALSE,
quantileSummaryForModuleMerging = "mean",
mergeCutHeight = 0.15,
mergeConsensusQuantile = consensusQuantile,
# Output options
numericLabels = FALSE,
# General options
nThreads = 0,
verbose = 2, indent = 0,
...) {
spaces <- indentSpaces(indent)
dataSize <- checkSets(multiExpr)
nSets <- dataSize$nSets
nGenes <- dataSize$nGenes
if (length(power) != 1) {
if (length(power) != nSets) {
stop("Invalid arguments: Length of 'power' must equal number of sets given in 'multiExpr'.")
}
} else {
power <- rep(power, nSets)
}
seedSaved <- FALSE
if (!is.null(randomSeed)) {
if (exists(".Random.seed")) {
seedSaved <- TRUE
savedSeed <- .Random.seed
}
set.seed(randomSeed)
}
if ((consensusQuantile < 0) | (consensusQuantile > 1)) {
stop("'consensusQuantile' must be between 0 and 1.")
}
if (checkMinModuleSize & (minModuleSize > nGenes / 2)) {
minModuleSize <- nGenes / 2
warning(
"blockwiseConsensusModules: minModuleSize appeared too large and was lowered to",
minModuleSize,
". If you insist on keeping the original minModuleSize, set checkMinModuleSize = FALSE."
)
}
if (verbose > 0) {
message(paste(
spaces, "Calculating consensus modules and module eigengenes",
"block-wise from all genes"
))
}
# prepare scaled and imputed multiExpr.
multiExpr.scaled <- mtd.apply(multiExpr, scale)
hasMissing <- unlist(multiData2list(mtd.apply(multiExpr, function(x) {
any(is.na(x))
})))
# Impute those that have missing data
multiExpr.scaled.imputed <- mtd.mapply(
function(x, doImpute) {
if (doImpute) t(impute.knn(t(x))$data) else x
},
multiExpr.scaled, hasMissing
)
branchSplitFnc <- NULL
minBranchDissimilarities <- numeric(0)
externalSplitFncNeedsDistance <- logical(0)
if (useBranchEigennodeDissim) {
branchSplitFnc <- "mtd.branchEigengeneDissim"
minBranchDissimilarities <- minBranchEigennodeDissim
externalSplitFncNeedsDistance <- FALSE
}
if (!is.null(stabilityLabels)) {
branchSplitFnc <- c(branchSplitFnc, "branchSplitFromStabilityLabels")
minBranchDissimilarities <- c(minBranchDissimilarities, minStabilityDissim)
externalSplitFncNeedsDistance <- c(externalSplitFncNeedsDistance, FALSE)
}
# Basic checks on consensusTOMInfo
if (!is.null(consensusTOMInfo)) {
.checkComponents(consensusTOMInfo, c("saveConsensusTOMs", "individualTOMInfo", "goodSamplesAndGenes"))
if (length(consensusTOMInfo$individualTOMInfo$blocks) != nGenes) {
stop("Inconsistent number of genes in 'consensusTOMInfo$individualTOMInfo$blocks'.")
}
if (!is.null(consensusTOMInfo$consensusQuantile) &&
(consensusQuantile != consensusTOMInfo$consensusQuantile)) {
warning(
immediate. = TRUE,
"blockwiseConsensusModules: given (possibly default) 'consensusQuantile' is different\n",
"from the value recorded in 'consensusTOMInfo'. This is normally undesirable and may\n",
"indicate a mistake in the function call."
)
}
}
# Handle "other arguments"
args <- list(...)
if (is.null(args$reproduceBranchEigennodeQuantileError)) {
reproduceBranchEigennodeQuantileError <- FALSE
} else {
reproduceBranchEigennodeQuantileError <- args$reproduceBranchEigennodeQuantileError
}
# If topological overlaps weren't calculated yet, calculate them.
removeIndividualTOMsOnExit <- FALSE
nBlocks.0 <- length(unique(blocks))
if (is.null(individualTOMInfo)) {
if (is.null(consensusTOMInfo)) {
individualTOMInfo <- blockwiseIndividualTOMs(
multiExpr = multiExpr,
checkMissingData = checkMissingData,
blocks = blocks,
maxBlockSize = maxBlockSize,
blockSizePenaltyPower = blockSizePenaltyPower,
nPreclusteringCenters = nPreclusteringCenters,
randomSeed = NULL,
corType = corType,
maxPOutliers = maxPOutliers,
quickCor = quickCor,
pearsonFallback = pearsonFallback,
cosineCorrelation = cosineCorrelation,
power = power,
networkType = networkType,
replaceMissingAdjacencies = replaceMissingAdjacencies,
TOMType = TOMType,
TOMDenom = TOMDenom,
saveTOMs = useDiskCache | nBlocks.0 > 1,
individualTOMFileNames = individualTOMFileNames,
nThreads = nThreads,
verbose = verbose, indent = indent
)
removeIndividualTOMsOnExit <- TRUE
} else {
individualTOMInfo <- consensusTOMInfo$individualTOMInfo
}
}
if (is.null(useIndivTOMSubset)) {
if (individualTOMInfo$nSets != nSets) {
stop(paste(
"Number of sets in individualTOMInfo and in multiExpr do not agree.\n",
" To use a subset of individualTOMInfo, set useIndivTOMSubset appropriately."
))
}
useIndivTOMSubset <- c(1:nSets)
}
if (length(useIndivTOMSubset) != nSets) {
stop("Length of 'useIndivTOMSubset' must equal the number of sets in 'multiExpr'")
}
if (length(unique(useIndivTOMSubset)) != nSets) {
stop("Entries of 'useIndivTOMSubset' must be unique")
}
if (any(useIndivTOMSubset < 1) | any(useIndivTOMSubset > individualTOMInfo$nSets)) {
stop("All entries of 'useIndivTOMSubset' must be between 1 and the number of sets in individualTOMInfo")
}
# if ( (minKMEtoJoin >1) | (minKMEtoJoin <0) ) stop("minKMEtoJoin must be between 0 and 1.");
intNetworkType <- individualTOMInfo$intNetworkType
intCorType <- individualTOMInfo$intCorType
corFnc <- match.fun(.corFnc[intCorType])
corOptions <- list(use = "p")
fallback <- pmatch(pearsonFallback, .pearsonFallbacks)
nSamples <- dataSize$nSamples
allLabels <- rep(0, nGenes)
allLabelIndex <- NULL
# Restrict data to goodSamples and goodGenes
gsg <- individualTOMInfo$goodSamplesAndGenes
# Restrict gsg to used sets
gsg$goodSamples <- gsg$goodSamples[useIndivTOMSubset]
if (!gsg$allOK) {
multiExpr <- mtd.subset(multiExpr, gsg$goodSamples, gsg$goodGenes)
}
nGGenes <- sum(gsg$goodGenes)
nGSamples <- rep(0, nSets)
for (set in 1:nSets) nGSamples[set] <- sum(gsg$goodSamples[[set]])
blocks <- individualTOMInfo$blocks
gBlocks <- individualTOMInfo$gBlocks
blockLevels <- sort(unique(gBlocks))
blockSizes <- table(gBlocks)
nBlocks <- length(blockLevels)
if (is.null(chunkSize)) chunkSize <- as.integer(.largestBlockSize / nSets)
reassignThreshold <- reassignThresholdPS^nSets
consMEs <- vector(mode = "list", length = nSets)
dendros <- list()
maxUsedLabel <- 0
gc()
# Here's where the analysis starts
removeConsensusTOMOnExit <- FALSE
if (is.null(consensusTOMInfo) && (nBlocks == 1 || saveConsensusTOMs || getNetworkCalibrationSamples)) {
consensusTOMInfo <- consensusTOM(
individualTOMInfo = individualTOMInfo,
useIndivTOMSubset = useIndivTOMSubset,
networkCalibration = networkCalibration,
saveCalibratedIndividualTOMs = FALSE,
calibrationQuantile = calibrationQuantile,
sampleForCalibration = sampleForCalibration,
sampleForCalibrationFactor = sampleForCalibrationFactor,
getNetworkCalibrationSamples = getNetworkCalibrationSamples,
consensusQuantile = consensusQuantile,
useMean = useMean,
setWeights = setWeights,
# Return options
saveConsensusTOMs = saveConsensusTOMs,
consensusTOMFilePattern = consensusTOMFilePattern,
returnTOMs = nBlocks == 1,
# Internal handling of TOMs
useDiskCache = useDiskCache,
chunkSize = chunkSize,
cacheBase = cacheBase,
cacheDir = cacheDir,
verbose = verbose, indent = indent
)
removeConsensusTOMOnExit <- !saveConsensusTOMs
}
blockwiseConsensusCalculation <- is.null(consensusTOMInfo)
for (blockNo in 1:nBlocks)
{
if (verbose > 1) message(paste(spaces, "..Working on block", blockNo, "."))
# Select block genes
block <- c(1:nGGenes)[gBlocks == blockLevels[blockNo]]
nBlockGenes <- length(block)
selExpr <- mtd.subset(multiExpr, , block)
errorOccurred <- FALSE
if (blockwiseConsensusCalculation) {
# This code is only reached if input saveConsensusTOMs is FALSE and there are at least 2 blocks.
consensusTOMInfo <- consensusTOM(
individualTOMInfo = individualTOMInfo,
useIndivTOMSubset = useIndivTOMSubset,
useBlocks = blockNo,
networkCalibration = networkCalibration,
saveCalibratedIndividualTOMs = FALSE,
calibrationQuantile = calibrationQuantile,
sampleForCalibration = sampleForCalibration,
sampleForCalibrationFactor = sampleForCalibrationFactor,
getNetworkCalibrationSamples = FALSE,
consensusQuantile = consensusQuantile,
useMean = useMean,
setWeights = setWeights,
saveConsensusTOMs = FALSE,
returnTOMs = TRUE,
useDiskCache = useDiskCache,
chunkSize = chunkSize,
cacheBase = cacheBase,
cacheDir = cacheDir
)
consTomDS <- consensusTOMInfo$consensusTOM[[1]]
# Remove the consensus TOM from the structure.
consensusTOMInfo$consensusTOM[[1]] <- NULL
consensusTOMInfo$consensusTOM <- NULL
} else {
if (consensusTOMInfo$saveConsensusTOMs) {
consTomDS <- .loadObject(
file = consensusTOMInfo$TOMFiles[blockNo],
size = nBlockGenes * (nBlockGenes - 1) / 2
)
} else {
consTomDS <- consensusTOMInfo$consensusTOM[[blockNo]]
}
}
# Temporary "cast" so fastcluster::hclust doesn't complain about non-integer size.
attr(consTomDS, "Size") <- as.integer(attr(consTomDS, "Size"))
consTomDS <- 1 - consTomDS
gc()
if (verbose > 2) message(paste(spaces, "....clustering and detecting modules.."))
errorOccured <- FALSE
dendros[[blockNo]] <- fastcluster::hclust(consTomDS, method = "average")
if (verbose > 8) {
if (interactive()) {
plot(dendros[[blockNo]], labels = FALSE, main = paste("Block", blockNo))
}
}
externalSplitOptions <- list()
e.index <- 1
if (useBranchEigennodeDissim) {
externalSplitOptions[[e.index]] <- list(
multiExpr = mtd.subset(multiExpr.scaled.imputed, , block),
corFnc = corFnc, corOptions = corOptions,
consensusQuantile = consensusQuantile,
signed = networkType %in% c("signed", "signed hybrid"),
reproduceQuantileError = reproduceBranchEigennodeQuantileError
)
e.index <- e.index + 1
}
if (!is.null(stabilityLabels)) {
externalSplitOptions[[e.index]] <- list(stabilityLabels = stabilityLabels)
e.index <- e.index + 1
}
gc()
# blockLabels = try(cutreeDynamic(dendro = dendros[[blockNo]],
blockLabels <- cutreeDynamic(
dendro = dendros[[blockNo]],
distM = as.matrix(consTomDS),
deepSplit = deepSplit,
cutHeight = detectCutHeight, minClusterSize = minModuleSize,
method = "hybrid",
maxCoreScatter = maxCoreScatter, minGap = minGap,
maxAbsCoreScatter = maxAbsCoreScatter, minAbsGap = minAbsGap,
minSplitHeight = minSplitHeight, minAbsSplitHeight = minAbsSplitHeight,
externalBranchSplitFnc = branchSplitFnc,
minExternalSplit = minBranchDissimilarities,
externalSplitOptions = externalSplitOptions,
externalSplitFncNeedsDistance = externalSplitFncNeedsDistance,
assumeSimpleExternalSpecification = FALSE,
pamStage = pamStage, pamRespectsDendro = pamRespectsDendro,
verbose = verbose - 3, indent = indent + 2
)
# verbose = verbose-3, indent = indent + 2), silent = TRUE);
if (verbose > 8) {
print(table(blockLabels))
if (interactive()) {
plotDendroAndColors(dendros[[blockNo]], labels2colors(blockLabels),
dendroLabels = FALSE,
main = paste("Block", blockNo)
)
}
}
if (class(blockLabels) == "try-error") {
(if (verbose > 0) message else warning)
(paste(
spaces, "blockwiseConsensusModules: cutreeDynamic failed:\n ", spaces,
blockLabels, "\n", spaces, " Error occured in block", blockNo, "\n",
spaces, " Continuing with next block. "
))
next
} else {
blockLabels[blockLabels > 0] <- blockLabels[blockLabels > 0] + maxUsedLabel
maxUsedLabel <- max(blockLabels)
}
if (sum(blockLabels > 0) == 0) {
if (verbose > 1) {
message(paste(
spaces, "No modules detected in block", blockNo,
"--> continuing with next block."
))
}
next
}
# Calculate eigengenes for this batch
if (verbose > 2) message(paste(spaces, "....calculating eigengenes.."))
blockAssigned <- c(1:nBlockGenes)[blockLabels != 0]
blockLabelIndex <- as.numeric(levels(as.factor(blockLabels[blockAssigned])))
blockConsMEs <- try(multiSetMEs(selExpr,
universalColors = blockLabels,
excludeGrey = TRUE, grey = 0, impute = impute,
# trapErrors = TRUE, returnValidOnly = TRUE,
verbose = verbose - 4, indent = indent + 3
), silent = TRUE)
if (class(blockConsMEs) == "try-error") {
if (verbose > 0) {
message(paste(spaces, "*** multiSetMEs failed with the message:"))
message(paste(spaces, " ", blockConsMEs))
message(paste(spaces, "*** --> Ending module detection here"))
} else {
warning(paste(
"blocwiseConsensusModules: multiSetMEs failed with the message: \n",
" ", blockConsMEs, "\n--> continuing with next block."
))
}
next
}
deleteModules <- NULL
changedModules <- NULL
# find genes whose closest module eigengene has cor higher than minKMEtoJoin and assign them
# Removed - should not change blocks before clustering them
# unassGenes = c(c(1:nGGenes)[-block][allLabels[-block]==0], block[blockLabels==0]);
# if (length(unassGenes) > 0)
# {
# blockKME = array(0, dim = c(length(unassGenes), ncol(blockConsMEs[[1]]$data), nSets));
# corEval = parse(text = paste(.corFnc[intCorType],
# "( multiExpr[[set]]$data[, unassGenes], blockConsMEs[[set]]$data,",
# .corOptions[intCorType], ")"))
# for (set in 1:nSets) blockKME[, , set] = eval(corEval);
# if (intNetworkType==1) blockKME = abs(blockKME);
# consKME = as.matrix(apply(blockKME, c(1,2), min));
# consKMEmax = apply(consKME, 1, max);
# closestModule = blockLabelIndex[apply(consKME, 1, which.max)];
# assign = (consKMEmax >= minKMEtoJoin );
# if (sum(assign>0))
# {
# allLabels[unassGenes[assign]] = closestModule[assign];
# changedModules = union(changedModules, closestModule[assign]);
# }
# rm(blockKME, consKME, consKMEmax);
# }
gc()
# Check modules: make sure that of the genes present in the module, at least a minimum number
# have a correlation with the eigengene higher than a given cutoff, and that all member genes have
# the required minimum consensus KME
if (verbose > 2) {
message(paste(spaces, "....checking consensus modules for statistical meaningfulness.."))
}
for (mod in 1:ncol(blockConsMEs[[1]]$data))
{
modGenes <- (blockLabels == blockLabelIndex[mod])
KME <- matrix(0, nrow = sum(modGenes), ncol = nSets)
corEval <- parse(text = paste(
.corFnc[intCorType],
"( selExpr[[set]]$data[, modGenes], blockConsMEs[[set]]$data[, mod]",
prepComma(.corOptions[intCorType]), ")"
))
for (set in 1:nSets) KME[, set] <- as.vector(eval(corEval))
if (intNetworkType == 1) KME <- abs(KME)
consKME <- apply(KME, 1, quantile, probs = trimmingConsensusQuantile, names = FALSE, na.rm = TRUE)
if (sum(consKME > minCoreKME) < minCoreKMESize) {
blockLabels[modGenes] <- 0
deleteModules <- union(deleteModules, mod)
if (verbose > 3) {
message(paste(spaces, " ..deleting module ", blockLabelIndex[mod],
": of ", sum(modGenes),
" total genes in the module only ", sum(consKME > minCoreKME),
" have the requisite high correlation with the eigengene in all sets.",
sep = ""
))
}
} else if (sum(consKME < minKMEtoStay) > 0) {
if (verbose > 3) {
message(paste(
spaces, " ..removing", sum(consKME < minKMEtoStay),
"genes from module", blockLabelIndex[mod], "because their KME is too low."
))
}
blockLabels[modGenes][consKME < minKMEtoStay] <- 0
if (sum(blockLabels[modGenes] > 0) < minModuleSize) {
deleteModules <- union(deleteModules, mod)
blockLabels[modGenes] <- 0
if (verbose > 3) {
message(paste(spaces, " ..deleting module ", blockLabelIndex[mod],
": not enough genes in the module after removal of low KME genes.",
sep = ""
))
}
} else {
changedModules <- union(changedModules, blockLabelIndex[mod])
}
}
}
# Remove marked modules
if (!is.null(deleteModules)) {
for (set in 1:nSets) blockConsMEs[[set]]$data <- blockConsMEs[[set]]$data[, -deleteModules]
modGenes <- is.finite(match(blockLabels, blockLabelIndex[deleteModules]))
blockLabels[modGenes] <- 0
modAllGenes <- is.finite(match(allLabels, blockLabelIndex[deleteModules]))
allLabels[modAllGenes] <- 0
blockLabelIndex <- blockLabelIndex[-deleteModules]
}
# Check whether there's anything left
if (sum(blockLabels > 0) == 0) {
if (verbose > 1) {
message(paste(spaces, " ..No significant modules detected in block", blockNo))
message(paste(spaces, " ..continuing with next block."))
}
next
}
# Update module eigengenes
for (set in 1:nSets) {
if (is.null(dim(blockConsMEs[[set]]$data))) {
dim(blockConsMEs[[set]]$data) <- c(length(blockConsMEs[[set]]$data), 1)
}
}
if (is.null(consMEs[[1]])) {
for (set in 1:nSets) consMEs[[set]] <- list(data = blockConsMEs[[set]]$data)
} else {
for (set in 1:nSets) {
consMEs[[set]]$data <- cbind(consMEs[[set]]$data, blockConsMEs[[set]]$data)
}
}
# Update allLabels
allLabelIndex <- c(allLabelIndex, blockLabelIndex)
allLabels[gsg$goodGenes][block[blockAssigned]] <- blockLabels[blockAssigned]
gc()
}
# Check whether any of the already assigned genes (in this or previous blocks) should be re-assigned
if (verbose > 2) {
message(paste(spaces, "....checking for genes that should be reassigned.."))
}
deleteModules <- NULL
goodLabels <- allLabels[gsg$goodGenes]
if (sum(goodLabels != 0) > 0) {
propLabels <- goodLabels[goodLabels != 0]
assGenes <- (c(1:nGenes)[gsg$goodGenes])[goodLabels != 0]
corEval <- parse(text = paste(
.corFnc[intCorType],
"(multiExpr[[set]]$data[, goodLabels!=0], consMEs[[set]]$data",
prepComma(.corOptions[intCorType]), ")"
))
nMods <- ncol(consMEs[[1]]$data)
lpValues <- array(0, dim = c(length(propLabels), nMods, nSets))
sumSign <- array(0, dim = c(length(propLabels), nMods))
if (verbose > 3) {
message(paste(spaces, "......module membership p-values.."))
}
for (set in 1:nSets)
{
KME <- eval(corEval)
if (intNetworkType == 1) KME <- abs(KME)
lpValues[, , set] <- -2 * log(corPvalueFisher(KME, nGSamples[set], twoSided = FALSE))
sumSign <- sumSign + sign(KME)
}
if (verbose > 3) {
message(paste(spaces, "......module membership scores.."))
}
scoreAll <- as.matrix(apply(lpValues, c(1, 2), sum)) * (nSets + sumSign) / (2 * nSets)
scoreAll[!is.finite(scoreAll)] <- 0.001 # This low should be enough
bestScore <- apply(scoreAll, 1, max)
if (intNetworkType == 1) sumSign <- abs(sumSign)
if (verbose > 3) {
cat(paste(spaces, "......individual modules.."))
pind <- initProgInd()
}
for (mod in 1:nMods)
{
modGenes <- c(1:length(propLabels))[propLabels == allLabelIndex[mod]]
scoreMod <- scoreAll[modGenes, mod]
candidates <- (bestScore[modGenes] > scoreMod)
candidates[!is.finite(candidates)] <- FALSE
if (sum(candidates) > 0) {
pModule <- pchisq(scoreMod[candidates], nSets, log.p = TRUE)
whichBest <- apply(scoreAll[modGenes[candidates], ], 1, which.max)
pBest <- pchisq(bestScore[modGenes[candidates]], nSets, log.p = TRUE)
reassign <- ifelse(is.finite(pBest - pModule),
((pBest - pModule) < log(reassignThreshold)),
FALSE
)
if (sum(reassign) > 0) {
allLabels[assGenes[modGenes[candidates][reassign]]] <- whichBest[reassign]
changedModules <- union(changedModules, whichBest[reassign])
if (length(modGenes) - sum(reassign) < minModuleSize) {
deleteModules <- union(deleteModules, mod)
} else {
changedModules <- union(changedModules, mod)
}
}
}
if (verbose > 3) pind <- updateProgInd(mod / nMods, pind)
}
rm(lpValues, sumSign, scoreAll)
if (verbose > 3) message("")
}
# Remove marked modules
if (!is.null(deleteModules)) {
# for (set in 1:nSets) consMEs[[set]]$data = consMEs[[set]]$data[, -deleteModules];
modGenes <- is.finite(match(allLabels, allLabelIndex[deleteModules]))
allLabels[modGenes] <- 0
# allLabelIndex = allLabelIndex[-deleteModules];
}
if (verbose > 1) message(paste(spaces, "..merging consensus modules that are too close.."))
# print(table(allLabels));
# print(is.numeric(allLabels))
if (numericLabels) {
colors <- allLabels
} else {
colors <- labels2colors(allLabels)
}
mergedColors <- colors
mergedMods <- try(mergeCloseModules(multiExpr, colors[gsg$goodGenes],
equalizeQuantiles = equalizeQuantilesForModuleMerging,
quantileSummary = quantileSummaryForModuleMerging,
consensusQuantile = mergeConsensusQuantile,
cutHeight = mergeCutHeight,
relabel = TRUE, impute = impute,
verbose = verbose - 2, indent = indent + 2
), silent = TRUE)
if (class(mergedMods) == "try-error") {
if (verbose > 0) {
message(paste(
spaces, "blockwiseConsensusModules: mergeCloseModule failed with this message:\n",
spaces, " ", mergedMods, spaces,
"---> returning unmerged consensus modules"
))
} else {
warning(paste(
"blockwiseConsensusModules: mergeCloseModule failed with this message:\n ",
mergedMods, "---> returning unmerged consensus modules"
))
}
MEs <- try(multiSetMEs(multiExpr,
universalColors = colors[gsg$goodGenes]
# trapErrors = TRUE, returnValidOnly = TRUE
), silent = TRUE)
if (class(MEs) == "try-error") {
warning(paste(
"blockwiseConsensusModules: ME calculation failed with this message:\n ",
MEs, "---> returning empty module eigengenes"
))
allSampleMEs <- NULL
} else {
if (!MEs[[1]]$allOK) mergedColors[gsg$goodGenes] <- MEs[[1]]$validColors
allSampleMEs <- vector(mode = "list", length = nSets)
for (set in 1:nSets)
{
allSampleMEs[[set]] <-
list(data = as.data.frame(matrix(NA, nrow = nGSamples[set], ncol = ncol(MEs[[set]]$data))))
allSampleMEs[[set]]$data[gsg$goodSamples[[set]], ] <- MEs[[set]]$data[, ]
names(allSampleMEs[[set]]$data) <- names(MEs[[set]]$data)
}
}
} else {
mergedColors[gsg$goodGenes] <- mergedMods$colors
allSampleMEs <- vector(mode = "list", length = nSets)
names(allSampleMEs) <- names(multiExpr)
for (set in 1:nSets)
{
allSampleMEs[[set]] <-
list(data = as.data.frame(matrix(NA,
nrow = nGSamples[set],
ncol = ncol(mergedMods$newMEs[[1]]$data)
)))
allSampleMEs[[set]]$data[gsg$goodSamples[[set]], ] <- mergedMods$newMEs[[set]]$data[, ]
names(allSampleMEs[[set]]$data) <- names(mergedMods$newMEs[[set]]$data)
}
}
if (seedSaved) .Random.seed <<- savedSeed
if (removeConsensusTOMOnExit) {
.checkAndDelete(consensusTOMInfo$TOMFiles)
consensusTOMInfo$TOMFiles <- NULL
}
if (removeIndividualTOMsOnExit) {
.checkAndDelete(individualTOMInfo$actualTOMFileNames)
individualTOMInfo$actualTOMFileNames <- NULL
}
# Under no circumstances return consensus TOM or individual TOM similarities within the returned list.
consensusTOMInfo$consensusTOM <- NULL
individualTOMInfo$TOMSimilarities <- NULL
list(
colors = mergedColors,
unmergedColors = colors,
multiMEs = allSampleMEs,
goodSamples = gsg$goodSamples,
goodGenes = gsg$goodGenes,
dendrograms = dendros,
TOMFiles = consensusTOMInfo$TOMFiles,
blockGenes = individualTOMInfo$blockGenes,
blocks = blocks,
originCount = consensusTOMInfo$originCount,
networkCalibrationSamples = consensusTOMInfo$networkCalibrationSamples,
individualTOMInfo = individualTOMInfo,
consensusTOMInfo = if (saveConsensusTOMs) consensusTOMInfo else NULL,
consensusQuantile = consensusQuantile
)
}
# ==========================================================================================================
#
# recutConsensusTrees
#
# ==========================================================================================================
recutConsensusTrees <- function(multiExpr,
goodSamples, goodGenes,
blocks,
TOMFiles,
dendrograms,
corType = "pearson",
networkType = "unsigned",
deepSplit = 2,
detectCutHeight = 0.995, minModuleSize = 20,
checkMinModuleSize = TRUE,
maxCoreScatter = NULL, minGap = NULL,
maxAbsCoreScatter = NULL, minAbsGap = NULL,
minSplitHeight = NULL, minAbsSplitHeight = NULL,
useBranchEigennodeDissim = FALSE,
minBranchEigennodeDissim = mergeCutHeight,
pamStage = TRUE, pamRespectsDendro = TRUE,
# minKMEtoJoin =0.7,
trimmingConsensusQuantile = 0,
minCoreKME = 0.5, minCoreKMESize = minModuleSize / 3,
minKMEtoStay = 0.2,
reassignThresholdPS = 1e-4,
mergeCutHeight = 0.15,
mergeConsensusQuantile = trimmingConsensusQuantile,
impute = TRUE,
trapErrors = FALSE,
numericLabels = FALSE,
verbose = 2, indent = 0) {
spaces <- indentSpaces(indent)
dataSize <- checkSets(multiExpr)
nSets <- dataSize$nSets
nGenes <- dataSize$nGenes
nSamples <- dataSize$nSamples
if (length(blocks) != nGenes) {
stop("Input error: length of 'blocks' must equal number of genes in 'multiExpr'.")
}
# if (verbose>0)
# message(paste(spaces, "Calculating consensus modules and module eigengenes",
# "block-wise from all genes"));
# If we're merging branches by correlation within cutreeHybrid, prepare scaled and imputed multiExpr.
if (useBranchEigennodeDissim) {
multiExpr.scaled <- mtd.apply(multiExpr, scale)
hasMissing <- unlist(multiData2list(mtd.apply(multiExpr, function(x) {
any(is.na(x))
})))
# Impute those that have missing data
multiExpr.scaled.imputed <- mtd.mapply(
function(x, doImpute) {
if (doImpute) t(impute.knn(t(x))$data) else x
},
multiExpr.scaled, hasMissing
)
branchSplitFnc <- "mtd.branchEigengeneDissim"
}
intCorType <- pmatch(corType, .corTypes)
if (is.na(intCorType)) {
stop(paste("Invalid 'corType'. Recognized values are", paste(.corTypes, collapse = ", ")))
}
# if ( (minKMEtoJoin >1) | (minKMEtoJoin <0) ) stop("minKMEtoJoin must be between 0 and 1.");
intNetworkType <- charmatch(networkType, .networkTypes)
if (is.na(intNetworkType)) {
stop(paste(
"Unrecognized networkType argument.",
"Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", ")
))
}
allLabels <- rep(0, nGenes)
allLabelIndex <- NULL
corFnc <- match.fun(.corFnc[intCorType])
corOptions <- list(use = "p")
# Get rid of bad genes and bad samples
gsg <- list(goodGenes = goodGenes, goodSamples = goodSamples, allOK = TRUE)
gsg$allOK <- (sum(!gsg$goodGenes) == 0)
nGGenes <- sum(gsg$goodGenes)
nGSamples <- rep(0, nSets)
for (set in 1:nSets)
{
nGSamples[set] <- sum(gsg$goodSamples[[set]])
gsg$allOK <- gsg$allOK && (sum(!gsg$goodSamples[[set]]) == 0)
}
if (!gsg$allOK) {
for (set in 1:nSets) {
multiExpr[[set]]$data <- multiExpr[[set]]$data[gsg$goodSamples[[set]], gsg$goodGenes]
}
}
gBlocks <- blocks[gsg$goodGenes]
blockLevels <- as.numeric(levels(factor(gBlocks)))
blockSizes <- table(gBlocks)
nBlocks <- length(blockLevels)
reassignThreshold <- reassignThresholdPS^nSets
# prepare scaled and imputed multiExpr.
multiExpr.scaled <- mtd.apply(multiExpr, scale)
hasMissing <- unlist(multiData2list(mtd.apply(multiExpr, function(x) {
any(is.na(x))
})))
# Impute those that have missing data
multiExpr.scaled.imputed <- mtd.mapply(
function(x, doImpute) {
if (doImpute) t(impute.knn(t(x))$data) else x
},
multiExpr.scaled, hasMissing
)
if (useBranchEigennodeDissim) {
branchSplitFnc <- "mtd.branchEigengeneDissim"
} else {
branchSplitFnc <- NULL
}
# Initialize various variables
consMEs <- vector(mode = "list", length = nSets)
blockNo <- 1
maxUsedLabel <- 0
gc()
# Here's where the analysis starts
while (blockNo <= nBlocks) {
if (verbose > 1) message(paste(spaces, "..Working on block", blockNo, "."))
# Select most connected genes
block <- c(1:nGGenes)[gBlocks == blockLevels[blockNo]]
nBlockGenes <- length(block)
selExpr <- vector(mode = "list", length = nSets)
for (set in 1:nSets) {
selExpr[[set]] <- list(data = multiExpr[[set]]$data[, block])
}
# This is how TOMs are saved:
# if (saveTOMs)
# {
# TOMFiles[blockNo] = paste(saveTOMFileBase, "-block.", blockNo, ".RData", sep="");
# save(consTomDS, file = TOMFiles[blockNo]);
# }
# consTomDS = 1-consTomDS;
# gc();
xx <- try(load(TOMFiles[blockNo]), silent = TRUE)
if (class(xx) == "try-error") {
message(paste(
"************\n File name", TOMFiles[blockNo],
"appears invalid: the load function returned the following error:\n ",
xx
))
stop()
}
if (xx != "consTomDS") {
stop(paste("The file", TOMFiles[blockNo], "does not contain the appopriate variable."))
}
if (class(consTomDS) != "dist") {
stop(paste("The file", TOMFiles[blockNo], "does not contain the appopriate distance structure."))
}
consTomDS <- 1 - consTomDS
gc()
if (verbose > 2) message(paste(spaces, "....clustering and detecting modules.."))
errorOccured <- FALSE
blockLabels <- try(cutreeDynamic(
dendro = dendrograms[[blockNo]],
deepSplit = deepSplit,
cutHeight = detectCutHeight, minClusterSize = minModuleSize,
method = "hybrid",
maxCoreScatter = maxCoreScatter, minGap = minGap,
maxAbsCoreScatter = maxAbsCoreScatter, minAbsGap = minAbsGap,
minSplitHeight = minSplitHeight, minAbsSplitHeight = minAbsSplitHeight,
externalBranchSplitFnc = if (useBranchEigennodeDissim) {
branchSplitFnc
} else {
NULL
},
minExternalSplit = minBranchEigennodeDissim,
externalSplitOptions = list(
multiExpr = mtd.subset(multiExpr.scaled.imputed, , block),
corFnc = corFnc, corOptions = corOptions,
consensusQuantile = mergeConsensusQuantile,
signed = networkType %in% c("signed", "signed hybrid")
),
externalSplitFncNeedsDistance = FALSE,
pamStage = pamStage, pamRespectsDendro = pamRespectsDendro,
distM = as.matrix(consTomDS),
verbose = verbose - 3, indent = indent + 2
), silent = TRUE)
if (class(blockLabels) == "try-error") {
(if (verbose > 0) message else warning)
(paste(
spaces, "blockwiseConsensusModules: cutreeDynamic failed:\n ",
blockLabels, "\nError occured in block", blockNo, "\nContinuing with next block."
))
next
} else {
blockLabels[blockLabels > 0] <- blockLabels[blockLabels > 0] + maxUsedLabel
maxUsedLabel <- max(blockLabels)
}
if (sum(blockLabels > 0) == 0) {
if (verbose > 1) {
message(paste(spaces, "No modules detected in block", blockNo))
message(paste(spaces, " Continuing with next block."))
}
next
}
# Calculate eigengenes for this batch
if (verbose > 2) message(paste(spaces, "....calculating eigengenes.."))
blockAssigned <- c(1:nBlockGenes)[blockLabels != 0]
blockLabelIndex <- as.numeric(levels(as.factor(blockLabels[blockAssigned])))
blockConsMEs <- try(multiSetMEs(selExpr,
universalColors = blockLabels,
excludeGrey = TRUE, grey = 0, impute = impute,
# trapErrors = TRUE, returnValidOnly = TRUE,
verbose = verbose - 4, indent = indent + 3
), silent = TRUE)
if (class(blockConsMEs) == "try-error") {
if (verbose > 0) {
message(paste(spaces, "*** multiSetMEs failed with the message:"))
message(paste(spaces, " ", blockConsMEs))
message(paste(spaces, "*** --> Ending module detection here"))
} else {
warning(paste(
"blocwiseConsensusModules: multiSetMEs failed with the message: \n",
" ", blockConsMEs, "\n--> Continuing with next block."
))
}
next
}
deleteModules <- NULL
changedModules <- NULL
# find genes whose closest module eigengene has cor higher than minKMEtoJoin and assign them
# Removed - should not change blocks before clustering them
# unassGenes = c(c(1:nGGenes)[-block][allLabels[-block]==0], block[blockLabels==0]);
# if (length(unassGenes) > 0)
# {
# blockKME = array(0, dim = c(length(unassGenes), ncol(blockConsMEs[[1]]$data), nSets));
# corEval = parse(text = paste(.corFnc[intCorType],
# "( multiExpr[[set]]$data[, unassGenes], blockConsMEs[[set]]$data,",
# .corOptions[intCorType], ")"))
# for (set in 1:nSets) blockKME[, , set] = eval(corEval);
# if (intNetworkType==1) blockKME = abs(blockKME);
# consKME = as.matrix(apply(blockKME, c(1,2), min));
# consKMEmax = apply(consKME, 1, max);
# closestModule = blockLabelIndex[apply(consKME, 1, which.max)];
# assign = (consKMEmax >= minKMEtoJoin );
# if (sum(assign>0))
# {
# allLabels[unassGenes[assign]] = closestModule[assign];
# changedModules = union(changedModules, closestModule[assign]);
# }
# rm(blockKME, consKME, consKMEmax);
# }
gc()
# Check modules: make sure that of the genes present in the module, at least a minimum number
# have a correlation with the eigengene higher than a given cutoff, and that all member genes have
# the required minimum consensus KME
if (verbose > 2) {
message(paste(spaces, "....checking consensus modules for statistical meaningfulness.."))
}
for (mod in 1:ncol(blockConsMEs[[1]]$data))
{
modGenes <- (blockLabels == blockLabelIndex[mod])
KME <- matrix(0, nrow = sum(modGenes), ncol = nSets)
corEval <- parse(text = paste(
.corFnc[intCorType],
"( selExpr[[set]]$data[, modGenes], blockConsMEs[[set]]$data[, mod]",
prepComma(.corOptions[intCorType]), ")"
))
for (set in 1:nSets) KME[, set] <- as.vector(eval(corEval))
if (intNetworkType == 1) KME <- abs(KME)
consKME <- apply(KME, 1, quantile, probs = trimmingConsensusQuantile, na.rm = TRUE)
if (sum(consKME > minCoreKME) < minCoreKMESize) {
blockLabels[modGenes] <- 0
deleteModules <- union(deleteModules, mod)
if (verbose > 3) {
message(paste(spaces, " ..deleting module ", blockLabelIndex[mod],
": of ", sum(modGenes),
" total genes in the module only ", sum(consKME > minCoreKME),
" have the requisite high correlation with the eigengene in all sets.",
sep = ""
))
}
} else if (sum(consKME < minKMEtoStay) > 0) {
if (verbose > 3) {
message(paste(
spaces, " ..removing", sum(consKME < minKMEtoStay),
"genes from module", blockLabelIndex[mod], "because their KME is too low."
))
}
blockLabels[modGenes][consKME < minKMEtoStay] <- 0
if (sum(blockLabels[modGenes] > 0) < minModuleSize) {
deleteModules <- union(deleteModules, mod)
blockLabels[modGenes] <- 0
if (verbose > 3) {
message(paste(spaces, " ..deleting module ", blockLabelIndex[mod],
": not enough genes in the module after removal of low KME genes.",
sep = ""
))
}
} else {
changedModules <- union(changedModules, blockLabelIndex[mod])
}
}
}
# Remove marked modules
if (!is.null(deleteModules)) {
for (set in 1:nSets) blockConsMEs[[set]]$data <- blockConsMEs[[set]]$data[, -deleteModules]
modGenes <- is.finite(match(blockLabels, blockLabelIndex[deleteModules]))
blockLabels[modGenes] <- 0
modAllGenes <- is.finite(match(allLabels, blockLabelIndex[deleteModules]))
allLabels[modAllGenes] <- 0
blockLabelIndex <- blockLabelIndex[-deleteModules]
}
# Check whether there's anything left
if (sum(blockLabels > 0) == 0) {
if (verbose > 1) {
message(paste(spaces, " No significant modules detected in block", blockNo))
message(paste(spaces, " Continuing with next block."))
}
next
}
# Update module eigengenes
for (set in 1:nSets) {
if (is.null(dim(blockConsMEs[[set]]$data))) {
dim(blockConsMEs[[set]]$data) <- c(length(blockConsMEs[[set]]$data), 1)
}
}
if (is.null(consMEs[[1]])) {
for (set in 1:nSets) consMEs[[set]] <- list(data = blockConsMEs[[set]]$data)
} else {
for (set in 1:nSets) {
consMEs[[set]]$data <- cbind(consMEs[[set]]$data, blockConsMEs[[set]]$data)
}
}
# Update allLabels
allLabelIndex <- c(allLabelIndex, blockLabelIndex)
allLabels[block[blockAssigned]] <- blockLabels[blockAssigned]
gc()
blockNo <- blockNo + 1
}
# Check whether any of the already assigned genes (in this or previous blocks) should be re-assigned
if (verbose > 2) {
message(paste(spaces, "....checking for genes that should be reassigned.."))
}
deleteModules <- NULL
goodLabels <- allLabels[gsg$goodGenes]
if (sum(goodLabels != 0) > 0) {
propLabels <- goodLabels[goodLabels != 0]
assGenes <- (c(1:nGenes)[gsg$goodGenes])[goodLabels != 0]
corEval <- parse(text = paste(
.corFnc[intCorType],
"(multiExpr[[set]]$data[, goodLabels!=0], consMEs[[set]]$data",
prepComma(.corOptions[intCorType]), ")"
))
nMods <- ncol(consMEs[[1]]$data)
lpValues <- array(0, dim = c(length(propLabels), nMods, nSets))
sumSign <- array(0, dim = c(length(propLabels), nMods))
if (verbose > 3) {
message(paste(spaces, "......module membership p-values.."))
}
for (set in 1:nSets)
{
KME <- eval(corEval)
if (intNetworkType == 1) KME <- abs(KME)
lpValues[, , set] <- -2 * log(corPvalueFisher(KME, nGSamples[set], twoSided = FALSE))
sumSign <- sumSign + sign(KME)
}
if (verbose > 3) {
message(paste(spaces, "......module membership scores.."))
}
scoreAll <- as.matrix(apply(lpValues, c(1, 2), sum)) * (nSets + sumSign) / (2 * nSets)
scoreAll[!is.finite(scoreAll)] <- 0.001 # This low should be enough
bestScore <- apply(scoreAll, 1, max)
if (intNetworkType == 1) sumSign <- abs(sumSign)
if (verbose > 3) {
cat(paste(spaces, "......individual modules.."))
pind <- initProgInd()
}
for (mod in 1:nMods)
{
modGenes <- c(1:length(propLabels))[propLabels == allLabelIndex[mod]]
scoreMod <- scoreAll[modGenes, mod]
candidates <- (bestScore[modGenes] > scoreMod)
candidates[!is.finite(candidates)] <- FALSE
if (sum(candidates) > 0) {
pModule <- pchisq(scoreMod[candidates], nSets, log.p = TRUE)
whichBest <- apply(scoreAll[modGenes[candidates], ], 1, which.max)
pBest <- pchisq(bestScore[modGenes[candidates]], nSets, log.p = TRUE)
reassign <- ifelse(is.finite(pBest - pModule),
((pBest - pModule) < log(reassignThreshold)),
FALSE
)
if (sum(reassign) > 0) {
allLabels[assGenes[modGenes[candidates][reassign]]] <- whichBest[reassign]
changedModules <- union(changedModules, whichBest[reassign])
if (length(modGenes) - sum(reassign) < minModuleSize) {
deleteModules <- union(deleteModules, mod)
} else {
changedModules <- union(changedModules, mod)
}
}
}
if (verbose > 3) pind <- updateProgInd(mod / nMods, pind)
}
rm(lpValues, sumSign, scoreAll)
if (verbose > 3) message("")
}
# Remove marked modules
if (!is.null(deleteModules)) {
# for (set in 1:nSets) consMEs[[set]]$data = consMEs[[set]]$data[, -deleteModules];
modGenes <- is.finite(match(allLabels, allLabelIndex[deleteModules]))
allLabels[modGenes] <- 0
# allLabelIndex = allLabelIndex[-deleteModules];
}
if (verbose > 1) message(paste(spaces, "..merging consensus modules that are too close.."))
# print(table(allLabels));
# print(is.numeric(allLabels))
if (numericLabels) {
colors <- allLabels
} else {
colors <- labels2colors(allLabels)
}
mergedColors <- colors
mergedMods <- try(mergeCloseModules(multiExpr, colors[gsg$goodGenes],
consensusQuantile = mergeConsensusQuantile,
cutHeight = mergeCutHeight,
relabel = TRUE, impute = impute,
verbose = verbose - 2, indent = indent + 2
), silent = TRUE)
if (class(mergedMods) == "try-error") {
if (verbose > 0) {
message(paste(
spaces, "blockwiseConsensusModules: mergeCloseModule failed with this message:\n",
spaces, " ", mergedMods, spaces,
"---> returning unmerged consensus modules"
))
} else {
warning(paste(
"blockwiseConsensusModules: mergeCloseModule failed with this message:\n ",
mergedMods, "---> returning unmerged consensus modules"
))
}
MEs <- try(multiSetMEs(multiExpr,
universalColors = colors[gsg$goodGenes]
# trapErrors = TRUE, returnValidOnly = TRUE
), silent = TRUE)
if (class(MEs) == "try-error") {
warning(paste(
"blockwiseConsensusModules: ME calculation failed with this message:\n ",
MEs, "---> returning empty module eigengenes"
))
allSampleMEs <- NULL
} else {
if (!MEs[[1]]$allOK) mergedColors[gsg$goodGenes] <- MEs[[1]]$validColors
allSampleMEs <- vector(mode = "list", length = nSets)
for (set in 1:nSets)
{
allSampleMEs[[set]] <-
list(data = as.data.frame(matrix(NA, nrow = nGSamples[set], ncol = ncol(MEs[[set]]$data))))
allSampleMEs[[set]]$data[gsg$goodSamples[[set]], ] <- MEs[[set]]$data[, ]
names(allSampleMEs[[set]]$data) <- names(MEs[[set]]$data)
}
}
} else {
mergedColors[gsg$goodGenes] <- mergedMods$colors
allSampleMEs <- vector(mode = "list", length = nSets)
for (set in 1:nSets)
{
allSampleMEs[[set]] <-
list(data = as.data.frame(matrix(NA,
nrow = nGSamples[set],
ncol = ncol(mergedMods$newMEs[[1]]$data)
)))
allSampleMEs[[set]]$data[gsg$goodSamples[[set]], ] <- mergedMods$newMEs[[set]]$data[, ]
names(allSampleMEs[[set]]$data) <- names(mergedMods$newMEs[[set]]$data)
}
}
list(
colors = mergedColors,
unmergedColors = colors,
multiMEs = allSampleMEs
# goodSamples = gsg$goodSamples,
# goodGenes = gsg$goodGenes,
# dendrograms = dendros,
# blockGenes = blockGenes,
# originCount = originCount,
# TOMFiles = TOMFiles
)
}
# ======================================================================================================
#
# preliminary partitioning
#
# ======================================================================================================
projectiveKMeans <- function(
datExpr,
preferredSize = 5000,
nCenters = as.integer(min(ncol(datExpr) / 20, preferredSize^2 / ncol(datExpr))),
sizePenaltyPower = 4,
networkType = "unsigned",
randomSeed = 54321,
checkData = TRUE,
imputeMissing = TRUE,
maxIterations = 1000,
verbose = 0, indent = 0) {
centerGeneDist <- function(centers, oldDst = NULL, changed = c(1:nCenters),
blockSize = 50000, verbose = 0, spaces = "") {
if (is.null(oldDst)) {
oldDst <- array(0, c(nCenters, nGenes))
changed <- c(1:nCenters)
}
dstAll <- oldDst
nChanged <- length(changed)
nBlocks <- ceiling(ncol(datExpr) / blockSize)
blocks <- allocateJobs(ncol(datExpr), nBlocks)
if (verbose > 5) {
pind <- initProgInd(spaste(spaces, " ..centerGeneDist: "))
}
for (b in 1:nBlocks)
{
if (intNetworkType == 1) {
dst <- 1 - abs(cor(centers[, changed], datExpr[, blocks[[b]]]))
} else {
dst <- 1 - cor(centers[, changed], datExpr[, blocks[[b]]])
}
dstAll[changed, blocks[[b]]] <- dst
if (verbose > 5) pind <- updateProgInd(b / nBlocks, pind)
}
dstAll
}
memberCenterDist <- function(dst, membership, sizes = NULL, changed = c(1:nCenters), oldMCDist = NULL) {
if (is.null(oldMCDist)) {
changed <- c(1:nCenters)
oldMCDist <- rep(0, nGenes)
}
centerDist <- oldMCDist
if (!is.null(changed)) {
if (is.null(sizes)) sizes <- table(membership)
if (length(sizes) != nCenters) {
sizes2 <- rep(0, nCenters)
sizes2[as.numeric(names(sizes))] <- sizes
sizes <- sizes2
}
if (is.finite(sizePenaltyPower)) {
sizeCorrections <- (sizes / preferredSize)^sizePenaltyPower
sizeCorrections[sizeCorrections < 1] <- 1
} else {
sizeCorrections <- rep(1, length(sizes))
sizeCorrections[sizes > preferredSize] <- Inf
}
for (cen in changed) {
if (sizes[cen] != 0) {
if (is.finite(sizeCorrections[cen])) {
centerDist[membership == cen] <- dst[cen, membership == cen] * sizeCorrections[cen]
} else {
centerDist[membership == cen] <- 10 + dst[cen, membership == cen]
}
}
}
}
centerDist
}
spaces <- indentSpaces(indent)
if (verbose > 0) {
message(paste(spaces, "Projective K-means:"))
}
datExpr <- scale(as.matrix(datExpr))
if (any(is.na(datExpr))) {
if (imputeMissing) {
message(spaste(
spaces, "projectiveKMeans: imputing missing data in 'datExpr'.\n",
"To reproduce older results, use 'imputeMissing = FALSE'. "
))
datExpr <- t(impute.knn(t(datExpr))$data)
} else {
message(spaste(
spaces, "projectiveKMeans: there are missing data in 'datExpr'.\n",
"SVD will not work; will use a weighted mean approximation."
))
}
}
# if (preferredSize >= floor(sqrt(2^31)) )
# stop("'preferredSize must be less than ", floor(sqrt(2^31)), ". Please decrease it and try again.")
if (preferredSize >= sqrt(2^31) - 1) {
message(spaste(
spaces, " Support for blocks larger than sqrt(2^31) is experimental; please report\n",
spaces, " any issues to Peter.Langfelder@gmail.com."
))
}
if (exists(".Random.seed")) {
seedSaved <- TRUE
savedSeed <- .Random.seed
} else {
seedSaved <- FALSE
}
set.seed(randomSeed)
nAllSamples <- nrow(datExpr)
nAllGenes <- ncol(datExpr)
intNetworkType <- charmatch(networkType, .networkTypes)
if (is.na(intNetworkType)) {
stop(paste(
"Unrecognized networkType argument.",
"Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", ")
))
}
if (verbose > 1) {
message(paste(spaces, "..using", nCenters, "centers."))
}
# Check data for genes and samples that have too many missing values
if (checkData) {
if (verbose > 0) {
message(paste(spaces, "..checking data for excessive number of missing values.."))
}
gsg <- goodSamplesGenes(datExpr, verbose = verbose - 1, indent = indent + 1)
if (!gsg$allOK) datExpr <- datExpr[gsg$goodSamples, gsg$goodGenes]
}
nGenes <- ncol(datExpr)
nSamples <- nrow(datExpr)
datExpr[is.na(datExpr)] <- 0
centers <- matrix(0, nSamples, nCenters)
randGeneIndex <- sample(nGenes, size = nGenes)
temp <- rep(c(1:nCenters), times = ceiling(nGenes / nCenters))
membership <- temp[randGeneIndex]
if (verbose > 0) {
message(paste(spaces, "..k-means clustering.."))
}
changed <- c(1:nCenters)
dst <- NULL
centerDist <- NULL
iteration <- 0
while (!is.null(changed) && (iteration < maxIterations)) {
iteration <- iteration + 1
if (verbose > 1) message(paste(spaces, " ..iteration", iteration))
clusterSizes <- table(membership)
if (verbose > 5) pind <- initProgInd(paste(spaces, " ....calculating centers: "))
for (cen in sort(changed))
{
centers[, cen] <- .alignedFirstPC(datExpr[, membership == cen],
verbose = verbose - 2,
indent = indent + 2
)
if (verbose > 5) pind <- updateProgInd(cen / nCenters, pind)
}
if (verbose > 5) {
pind <- updateProgInd(1, pind)
message("")
}
if (verbose > 5) message(paste(spaces, " ....calculating center to gene distances"))
dst <- centerGeneDist(centers, dst, changed, verbose = verbose, spaces = spaces)
centerDist <- memberCenterDist(dst, membership, clusterSizes, changed, centerDist)
nearestDist <- rep(0, nGenes)
nearest <- rep(0, nGenes)
if (verbose > 5) message(paste(spaces, " ....finding nearest center for each gene"))
minRes <- .Call("minWhich_call", dst, 0L, PACKAGE = "WGCNA")
nearestDist <- minRes$min
nearest <- minRes$which
if (sum(centerDist > nearestDist) > 0) {
proposedMemb <- nearest
accepted <- FALSE
while (!accepted && (sum(proposedMemb != membership) > 0)) {
if (verbose > 2) {
cat(paste(spaces, " ..proposing to move", sum(proposedMemb != membership), "genes"))
}
moved <- c(1:nGenes)[proposedMemb != membership]
newCentDist <- memberCenterDist(dst, proposedMemb)
gotWorse <- newCentDist[moved] > centerDist[moved]
if (sum(!is.finite(gotWorse)) > 0) {
warning("Have NAs in gotWorse.")
}
if (sum(gotWorse) == 0) {
accepted <- TRUE
if (verbose > 2) message(paste("..move accepted."))
} else {
if (verbose > 2) message(paste("..some genes got worse. Trying again."))
ord <- order(centerDist[moved[gotWorse]] - newCentDist[moved[gotWorse]])
n <- ceiling(length(ord) * 3 / 5)
proposedMemb[moved[gotWorse][ord[c(1:n)]]] <- membership[moved[gotWorse][ord[c(1:n)]]]
}
}
if (accepted) {
propSizes <- table(proposedMemb)
keep <- as.numeric(names(propSizes))
centers <- centers[, keep]
dst <- dst[keep, ]
changedAll <- union(membership[moved], proposedMemb[moved])
changedKeep <- changedAll[is.finite(match(changedAll, keep))]
changed <- rank(changedKeep) # This is a way to make say 1,3,4 into 1,2,3
membership <- as.numeric(as.factor(proposedMemb))
if ((verbose > 1) && (sum(keep) < nCenters)) {
message(paste(
spaces, " ..dropping", nCenters - sum(keep),
"centers because ther clusters are empty."
))
}
nCenters <- length(keep)
} else {
changed <- NULL
if (verbose > 2) message(paste("Could not find a suitable move to improve the clustering."))
}
} else {
changed <- NULL
if (verbose > 2) {
message(paste("Clustering is stable: all genes are closest to their assigned center."))
}
}
if (verbose > 5) {
message("Sizes of biggest preliminary clusters:")
order <- order(-as.numeric(clusterSizes))
print(as.numeric(clusterSizes)[order[1:min(100, length(order))]])
}
}
if (verbose > 2 & verbose < 6) {
message("Sizes of preliminary clusters:")
print(clusterSizes)
}
# merge nearby clusters if their sizes allow merging
if (verbose > 0) message(paste(spaces, "..merging smaller clusters..."))
small <- (clusterSizes < preferredSize)
done <- FALSE
while (!done & (sum(small) > 1)) {
smallIndex <- c(1:nCenters)[small]
nSmall <- sum(small)
if (intNetworkType == 1) {
clustDist <- 1 - abs(cor(centers[, small]))
} else {
clustDist <- 1 - cor(centers[, small])
}
diag(clustDist) <- 10
distOrder <- order(as.vector(clustDist))[seq(from = 2, to = nSmall * (nSmall - 1), by = 2)]
i <- 1
canMerge <- FALSE
while (i <= length(distOrder) && (!canMerge)) {
whichJ <- smallIndex[as.integer((distOrder[i] - 1) / nSmall + 1)]
whichI <- smallIndex[distOrder[i] - (whichJ - 1) * nSmall]
canMerge <- sum(clusterSizes[c(whichI, whichJ)]) < preferredSize
i <- i + 1
}
if (canMerge) {
membership[membership == whichJ] <- whichI
clusterSizes[whichI] <- clusterSizes[whichI] + clusterSizes[whichJ]
centers[, whichI] <- .alignedFirstPC(datExpr[, membership == whichI],
verbose = verbose - 2,
indent = indent + 2
)
nCenters <- nCenters - 1
if (verbose > 3) {
message(paste(
spaces, " ..merged clusters", whichI, "and", whichJ,
"whose combined size is", clusterSizes[whichI]
))
}
membership[membership > whichJ] <- membership[membership > whichJ] - 1
centers <- centers[, -whichJ]
clusterSizes <- clusterSizes[-whichJ]
small <- (clusterSizes < preferredSize)
} else {
done <- TRUE
}
}
if (checkData) {
membershipAll <- rep(NA, nAllGenes)
membershipAll[gsg$goodGenes] <- membership
} else {
membershipAll <- membership
}
if (seedSaved) .Random.seed <<- savedSeed
if (verbose > 2) {
message("Sorted sizes of final clusters:")
print(sort(as.numeric(table(membership))))
}
return(list(clusters = membershipAll, centers = centers))
}
# ======================================================================================================
#
# Consensus preliminary partitioning
#
# ======================================================================================================
consensusProjectiveKMeans <- function(
multiExpr,
preferredSize = 5000,
nCenters = NULL,
sizePenaltyPower = 4,
networkType = "unsigned",
randomSeed = 54321,
checkData = TRUE,
imputeMissing = TRUE,
useMean = (length(multiExpr) > 3),
maxIterations = 1000,
verbose = 0, indent = 0) {
centerGeneDist <- function(centers, oldDst = NULL, changed = c(1:nCenters)) {
if (is.null(oldDst)) {
oldDst <- array(0, c(nCenters, nGenes))
changed <- c(1:nCenters)
}
dstAll <- oldDst
nChanged <- length(changed)
if (nChanged != 0) {
dstX <- array(0, c(nSets, nChanged, nGenes))
for (set in 1:nSets) {
if (intNetworkType == 1) {
dstX[set, , ] <- 1 - abs(cor(centers[[set]]$data[, changed], multiExpr[[set]]$data))
} else {
dstX[set, , ] <- 1 - cor(centers[[set]]$data[, changed], multiExpr[[set]]$data)
}
}
dst <- array(0, c(nChanged, nGenes))
if (useMean) {
dstAll[changed, ] <- base::colMeans(dstX, dims = 1)
} else {
dstAll[changed, ] <- -minWhichMin(-dstX, byRow = FALSE, dims = 1)$min
}
}
dstAll
}
memberCenterDist <- function(dst, membership, sizes = NULL, changed = c(1:nCenters), oldMCDist = NULL) {
if (is.null(oldMCDist)) {
changed <- c(1:nCenters)
oldMCDist <- rep(0, nGenes)
}
centerDist <- oldMCDist
if (!is.null(changed)) {
if (is.null(sizes)) sizes <- table(membership)
if (length(sizes) != nCenters) {
sizes2 <- rep(0, nCenters)
sizes2[as.numeric(names(sizes))] <- sizes
sizes <- sizes2
}
if (is.finite(sizePenaltyPower)) {
sizeCorrections <- (sizes / preferredSize)^sizePenaltyPower
sizeCorrections[sizeCorrections < 1] <- 1
} else {
sizeCorrections <- rep(1, length(sizes))
sizeCorrections[sizes > preferredSize] <- Inf
}
for (cen in changed) {
if (sizes[cen] != 0) {
if (is.finite(sizeCorrections[cen])) {
centerDist[membership == cen] <- dst[cen, membership == cen] * sizeCorrections[cen]
} else {
centerDist[membership == cen] <- 10 + dst[cen, membership == cen]
}
}
}
}
centerDist
}
spaces <- indentSpaces(indent)
if (verbose > 0) {
message(paste(spaces, "Consensus projective K-means:"))
}
allSize <- checkSets(multiExpr)
nSamples <- allSize$nSamples
nGenes <- allSize$nGenes
nSets <- allSize$nSets
# if (preferredSize >= floor(sqrt(2^31)) )
# stop("'preferredSize must be less than ", floor(sqrt(2^31)), ". Please decrease it and try again.")
if (preferredSize >= sqrt(2^31) - 1) {
message(spaste(
spaces, " Support for blocks larger than sqrt(2^31) is experimental; please report\n",
spaces, " any issues to Peter.Langfelder@gmail.com."
))
}
if (exists(".Random.seed")) {
seedSaved <- TRUE
savedSeed <- .Random.seed
} else {
seedSaved <- FALSE
}
set.seed(randomSeed)
if (is.null(nCenters)) nCenters <- as.integer(min(nGenes / 20, 100 * nGenes / preferredSize))
if (verbose > 1) {
message(paste(spaces, "..using", nCenters, "centers."))
}
intNetworkType <- charmatch(networkType, .networkTypes)
if (is.na(intNetworkType)) {
stop(paste(
"Unrecognized networkType argument.",
"Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", ")
))
}
for (set in 1:nSets) {
multiExpr[[set]]$data <- scale(as.matrix(multiExpr[[set]]$data))
}
# Check data for genes and samples that have too many missing values
if (checkData) {
if (verbose > 0) {
message(paste(spaces, "..checking data for excessive number of missing values.."))
}
gsg <- goodSamplesGenesMS(multiExpr, verbose = verbose - 1, indent = indent + 1)
for (set in 1:nSets)
{
if (!gsg$allOK) {
multiExpr[[set]]$data <- scale(multiExpr[[set]]$data[gsg$goodSamples[[set]], gsg$goodGenes])
}
}
}
anyNA <- mtd.apply(multiExpr, function(x) any(is.na(x)), mdaSimplify = TRUE)
if (imputeMissing && any(anyNA)) {
if (verbose > 0) {
message(paste(spaces, "..imputing missing data.."))
}
multiExpr[anyNA] <- mtd.apply(multiExpr[anyNA], function(x) t(impute.knn(t(x))$data), mdaVerbose = verbose > 1)
} else if (any(anyNA)) {
message(paste(
spaces, "Found missing data. These will be replaced by zeros;\n",
spaces, " for a better replacement, use 'imputeMissing=TRUE'."
))
for (set in 1:nSets) {
multiExpr[[set]]$data[is.na(multiExpr[[set]]$data)] <- 0
}
}
setSize <- checkSets(multiExpr)
nGenes <- setSize$nGenes
nSamples <- setSize$nSamples
centers <- vector(mode = "list", length = nSets)
for (set in 1:nSets) {
centers[[set]] <- list(data = matrix(0, nSamples[set], nCenters))
}
randGeneIndex <- sample(nGenes, size = nGenes)
temp <- rep(c(1:nCenters), times = ceiling(nGenes / nCenters))
membership <- temp[randGeneIndex]
if (verbose > 0) {
message(paste(spaces, "..consensus k-means clustering.."))
}
changed <- c(1:nCenters)
dst <- NULL
iteration <- 0
centerDist <- NULL
while (!is.null(changed) && (iteration < maxIterations)) {
iteration <- iteration + 1
if (verbose > 1) message(paste(spaces, " ..iteration", iteration))
clusterSizes <- table(membership)
for (set in 1:nSets) {
for (cen in changed) {
centers[[set]]$data[, cen] <- .alignedFirstPC(multiExpr[[set]]$data[, membership == cen],
verbose = verbose - 2, indent = indent + 2
)
}
}
dst <- centerGeneDist(centers, dst, changed)
centerDist <- memberCenterDist(dst, membership, clusterSizes, changed, centerDist)
changed <- NULL
minRes <- minWhichMin(dst)
nearestDist <- minRes$min
nearest <- minRes$which
if (sum(centerDist > nearestDist) > 0) {
proposedMemb <- nearest
accepted <- FALSE
while (!accepted && (sum(proposedMemb != membership) > 0)) {
if (verbose > 2) {
cat(paste(spaces, " ..proposing to move", sum(proposedMemb != membership), "genes"))
}
moved <- c(1:nGenes)[proposedMemb != membership]
newCentDist <- memberCenterDist(dst, proposedMemb)
gotWorse <- newCentDist[moved] > centerDist[moved]
if (sum(gotWorse) == 0) {
accepted <- TRUE
if (verbose > 2) message(paste("..move accepted."))
} else {
if (verbose > 2) message(paste("..some genes got worse. Trying again."))
ord <- order(centerDist[moved[gotWorse]] - newCentDist[moved[gotWorse]])
n <- ceiling(length(ord) * 3 / 5)
proposedMemb[moved[gotWorse][ord[c(1:n)]]] <- membership[moved[gotWorse][ord[c(1:n)]]]
}
}
if (accepted) {
propSizes <- table(proposedMemb)
keep <- as.numeric(names(propSizes))
for (set in 1:nSets) {
centers[[set]]$data <- centers[[set]]$data[, keep]
}
dst <- dst[keep, ]
changedAll <- union(membership[moved], proposedMemb[moved])
changedKeep <- changedAll[is.finite(match(changedAll, keep))]
changed <- rank(changedKeep) # This is a way to make say 1,3,4 into 1,2,3
membership <- as.numeric(as.factor(proposedMemb))
if ((verbose > 1) && (sum(keep) < nCenters)) {
message(paste(
spaces, " ..dropping", nCenters - sum(keep),
"centers because ther clusters are empty."
))
}
nCenters <- length(keep)
} else {
changed <- NULL
if (verbose > 2) message(paste("Could not find a suitable move to improve the clustering."))
}
} else {
changed <- NULL
if (verbose > 2) {
message(paste("Clustering is stable: all genes are closest to their assigned center."))
}
}
}
consCenterDist <- function(centers, select) {
if (is.logical(select)) {
nC <- sum(select)
} else {
nC <- length(select)
}
distX <- array(0, dim = c(nSets, nC, nC))
for (set in 1:nSets)
{
if (intNetworkType == 1) {
distX[set, , ] <- 1 - abs(cor(centers[[set]]$data[, select]))
} else {
distX[set, , ] <- 1 - cor(centers[[set]]$data[, select])
}
}
-minWhichMin(-distX, byRow = FALSE, dims = 1)$min
}
unmergedMembership <- membership
unmergedCenters <- centers
# merge nearby clusters if their sizes allow merging
if (verbose > 0) message(paste(spaces, "..merging smaller clusters..."))
clusterSizes <- as.vector(table(membership))
small <- (clusterSizes < preferredSize)
done <- FALSE
while (!done & (sum(small) > 1)) {
smallIndex <- c(1:nCenters)[small]
nSmall <- sum(small)
clustDist <- consCenterDist(centers, smallIndex)
diag(clustDist) <- 10
distOrder <- order(as.vector(clustDist))[seq(from = 2, to = nSmall * (nSmall - 1), by = 2)]
i <- 1
canMerge <- FALSE
while (i <= length(distOrder) && (!canMerge)) {
whichJ <- smallIndex[as.integer((distOrder[i] - 1) / nSmall + 1)]
whichI <- smallIndex[distOrder[i] - (whichJ - 1) * nSmall]
canMerge <- sum(clusterSizes[c(whichI, whichJ)]) < preferredSize
i <- i + 1
}
if (canMerge) {
membership[membership == whichJ] <- whichI
clusterSizes[whichI] <- sum(clusterSizes[c(whichI, whichJ)])
# if (verbose > 1)
# message(paste(spaces, " ..merged clusters", whichI, "and", whichJ,
# "whose combined size is", clusterSizes[whichI]));
for (set in 1:nSets) {
centers[[set]]$data[, whichI] <- .alignedFirstPC(multiExpr[[set]]$data[, membership == whichI],
verbose = verbose - 2, indent = indent + 2
)
}
for (set in 1:nSets) {
centers[[set]]$data <- centers[[set]]$data[, -whichJ]
}
membership[membership > whichJ] <- membership[membership > whichJ] - 1
nCenters <- nCenters - 1
clusterSizes <- as.vector(table(membership))
small <- (clusterSizes < preferredSize)
if (verbose > 3) {
message(paste(
spaces, " ..merged clusters", whichI, "and", whichJ,
"whose combined size is", clusterSizes[whichI]
))
}
} else {
done <- TRUE
}
}
if (checkData) {
membershipAll <- rep(NA, allSize$nGenes)
membershipAll[gsg$goodGenes] <- membership
} else {
membershipAll <- membership
}
if (seedSaved) .Random.seed <<- savedSeed
return(list(
clusters = membershipAll, centers = centers, unmergedClusters = unmergedMembership,
unmergedCenters = unmergedCenters
))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.