# module preservation for networks specified by adjacency matrices, not expression data.
# this version can handle both expression and adjacency matrices.
# =====================================================================================================
#
# p-value functions
#
# =====================================================================================================
# Assumes Z in the form of a list, Z[[ref]][[test]] is a matrix of Z scores except the first column is
# assumed to be moduleSize.
# Returned value is log10 of the p-value
#' @title .pValueFromZ
#'
#' @param Z
#' @param bonf
#' @param summaryCols
#' @param summaryInd
#'
#' @return
#'
.pValueFromZ <- function(Z, bonf = FALSE, summaryCols = NULL, summaryInd = NULL) {
p <- Z # This carries over all necessary names and missing values when ref==test
nRef <- length(Z)
for (ref in 1:nRef)
{
nTest <- length(Z[[ref]])
for (test in 1:nTest)
{
zz <- Z[[ref]][[test]]
if (length(zz) > 1) {
names <- colnames(zz)
# Try to be a bit intelligent about whether to ignore the first column
if (names[1] == "moduleSize") ignoreFirst <- TRUE else ignoreFirst <- FALSE
ncol <- ncol(zz)
range <- c((1 + as.numeric(ignoreFirst)):ncol)
p[[ref]][[test]][, range] <- pnorm(as.matrix(zz[, range]), lower.tail = FALSE, log.p = TRUE) / log(10)
Znames <- names[range]
if (bonf) {
pnames <- sub("Z", "log.p.Bonf", Znames, fixed = TRUE)
p[[ref]][[test]][, range] <- p[[ref]][[test]][, range] + log10(nrow(p[[ref]][[test]]))
biggerThan1 <- p[[ref]][[test]][, range] > 0
p[[ref]][[test]][, range][biggerThan1] <- 0 # Remember that the p-values are stored in log form
} else {
pnames <- sub("Z", "log.p", Znames, fixed = TRUE)
}
if (!is.null(summaryCols)) {
for (c in 1:length(summaryCols))
{
medians <- apply(p[[ref]][[test]][, summaryInd[[c]], drop = FALSE], 1, median, na.rm = TRUE)
p[[ref]][[test]][, summaryCols[c]] <- medians
}
}
colnames(p[[ref]][[test]])[range] <- pnames
}
}
}
p
}
#' @title .qValueFromP
#'
#' @param p
#' @param summaryCols
#' @param summaryInd
#'
#' @return
#'
.qValueFromP <- function(p, summaryCols = NULL, summaryInd = NULL) {
q <- p # This carries over all necessary names and missing values when ref==test
nRef <- length(p)
for (ref in 1:nRef)
{
nTest <- length(p[[ref]])
for (test in 1:nTest)
{
pp <- p[[ref]][[test]]
if (length(pp) > 1) {
names <- colnames(pp)
# Try to be a bit intelligent about whether to ignore the first column
if (names[1] == "moduleSize") {
ignoreFirst <- TRUE
} else {
ignoreFirst <- FALSE
}
ncol <- ncol(pp)
nrow <- nrow(pp)
range <- c((1 + as.numeric(ignoreFirst)):ncol)
for (col in range)
{
xx <- try(qvalue(10^pp[, col]), silent = TRUE)
if (class(xx) == "try-error" || length(xx) == 1) {
q[[ref]][[test]][, col] <- rep(NA, nrow)
printFlush(paste(
"Warning in modulePreservation: qvalue calculation failed for",
"column", col, "for reference set", ref, "and test set", test
))
} else {
q[[ref]][[test]][, col] <- xx$qvalues
}
}
colnames(q[[ref]][[test]])[range] <- sub("log.p", "q", names[range], fixed = TRUE)
if (!is.null(summaryCols)) {
for (c in 1:length(summaryCols))
{
xx <- log10(as.matrix(q[[ref]][[test]][, summaryInd[[c]], drop = FALSE]))
# Restrict all -logs > 1000 to 1000:
xx[!is.na(xx) & !is.finite(xx)] <- -1000
xx[!is.na(xx) & (xx < -1000)] <- -1000
medians <- apply(xx, 1, median, na.rm = TRUE)
q[[ref]][[test]][, summaryCols[c]] <- 10^medians
}
}
}
}
}
q
}
#' @title modulePreservation
#'
#' @param multiData
#' @param multiColor
#' @param dataIsExpr
#' @param networkType
#' @param corFnc
#' @param corOptions
#' @param referenceNetworks
#' @param testNetworks
#' @param nPermutations
#' @param includekMEallInSummary
#' @param restrictSummaryForGeneralNetworks
#' @param calculateQvalue
#' @param randomSeed
#' @param maxGoldModuleSize
#' @param maxModuleSize
#' @param quickCor
#' @param ccTupletSize
#' @param calculateCor.kIMall
#' @param calculateClusterCoeff
#' @param useInterpolation
#' @param checkData
#' @param greyName
#' @param savePermutedStatistics
#' @param loadPermutedStatistics
#' @param permutedStatisticsFile
#' @param plotInterpolation
#' @param interpolationPlotFile
#' @param discardInvalidOutput
#' @param parallelCalculation
#' @param verbose
#' @param indent
#'
#' @importFrom foreach foreach %dopar%
#'
#' @return
#' @export
#'
#' @examples
modulePreservation <- function(
multiData,
multiColor,
dataIsExpr = TRUE,
networkType = "unsigned",
corFnc = "cor",
corOptions = "use = 'p'",
referenceNetworks = 1,
testNetworks = NULL,
nPermutations = 100,
includekMEallInSummary = FALSE,
restrictSummaryForGeneralNetworks = TRUE,
calculateQvalue = FALSE,
randomSeed = 12345,
maxGoldModuleSize = 1000, maxModuleSize = 1000,
quickCor = 1,
ccTupletSize = 2,
calculateCor.kIMall = FALSE,
calculateClusterCoeff = FALSE,
useInterpolation = FALSE,
checkData = TRUE,
greyName = NULL,
savePermutedStatistics = TRUE,
loadPermutedStatistics = FALSE,
permutedStatisticsFile =
if (useInterpolation) "permutedStats-intrModules.RData" else "permutedStats-actualModules.RData",
plotInterpolation = TRUE,
interpolationPlotFile = "modulePreservationInterpolationPlots.pdf",
discardInvalidOutput = TRUE,
parallelCalculation = FALSE,
verbose = 1, indent = 0) {
sAF <- options("stringsAsFactors")
options(stringsAsFactors = FALSE)
on.exit(options(stringsAsFactors = sAF[[1]]), TRUE)
if (!is.null(randomSeed)) {
if (exists(".Random.seed")) {
savedSeed <- .Random.seed
on.exit(
{
.Random.seed <<- savedSeed
},
TRUE
)
}
set.seed(randomSeed)
}
spaces <- indentSpaces(indent)
nType <- charmatch(networkType, .networkTypes)
if (is.na(nType)) {
stop(paste(
"Unrecognized networkType argument.",
"Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", ")
))
}
# Check that the multiData/multiAdj has correct structure
nNets <- length(multiData)
nGenes <- sapply(multiData, sapply, ncol)
if (checkData) {
if (dataIsExpr) {
.checkExpr(multiData, verbose, indent)
} else {
.checkAdj(multiData, verbose, indent)
}
}
# Check for presence of dimnames, assign if none, and make them unique.
multiData <- mtd.apply(multiData, function(.data) {
if (is.null(colnames(.data))) colnames(.data) <- spaste("Column.", 1:ncol(.data))
colnames(.data) <- make.unique(colnames(.data))
.data
})
if (!dataIsExpr) {
multiData <- mtd.apply(multiData, function(.data) {
rownames(.data) <- colnames(.data)
.data
})
}
# Check for names; if there are none, create artificial labels.
setNames <- names(multiData)
if (is.null(setNames)) {
setNames <- paste("Set", c(1:nNets), sep = "")
}
# Check that referenceNetworks is valid
referenceNetworks <- as.numeric(referenceNetworks)
if (any(is.na(referenceNetworks))) {
stop("All elements of referenceNetworks must be numeric and present.")
}
if (any(referenceNetworks < 1) | any(referenceNetworks > nNets)) {
stop("referenceNetworks contains elements outside of the allowed range. ")
}
nRefNets <- length(referenceNetworks)
# Check testNetworks
if (is.null(testNetworks)) {
testNetworks <- list()
for (ref in 1:nRefNets) testNetworks[[ref]] <- c(1:nNets)[-referenceNetworks[ref]]
}
# Check that testNetworks was specified correctly
if (!is.list(testNetworks) && nRefNets > 1) {
stop(
"When there are more than 1 reference networks, 'testNetworks' must\n",
" be a list with one component per reference network."
)
}
if (!is.list(testNetworks)) testNetworks <- list(testNetworks)
if (length(testNetworks) != nRefNets) {
stop("Length of 'testNetworks' must be the same as length of 'referenceNetworks'.")
}
for (ref in 1:nRefNets)
{
if (any(testNetworks[[ref]] < 1 || testNetworks[[ref]] > nNets)) {
stop("Some entries of testNetworks[[", ref, "]] are out of range.")
}
}
# Check multiColor
if (class(multiColor) != "list") {
stop("multiColor does not appear to have the correct format.")
}
if (length(multiColor) != nNets) {
multiColor2 <- list()
if (length(names(multiColor)) != length(multiColor)) {
stop("Each entry of 'multiColor' must have a name.")
}
color2expr <- match(names(multiColor), setNames)
if (any(is.na(color2expr))) {
stop("Entries of 'multiColor' must name-match entries in 'multiData'.")
}
for (s in 1:nNets)
{
# multiData[[s]]$data = as.matrix(multiData[[s]]$data);
loc <- which(names(multiColor) %in% setNames[s])
if (length(loc) == 0) {
multiColor2[[s]] <- NA
} else {
multiColor2[[s]] <- multiColor[[loc]]
}
}
multiColor <- multiColor2
rm(multiColor2)
}
s <- 1
while ((s <= nNets) & !.cvPresent(multiColor[[s]])) s <- s + 1
if (s == nNets + 1) {
stop("No valid color lists found.")
}
if (is.null(greyName)) {
if (is.numeric(multiColor[[s]])) # Caution: need a valid s here.
{
greyName <- 0
goldName <- 0.1
} else {
greyName <- "grey"
goldName <- "gold"
}
} else {
if (is.numeric(greyName)) goldName <- 0.1 else goldName <- "gold"
}
if (verbose > 2) {
printFlush(paste(
" ..unassigned 'module' name:", greyName,
"\n ..all network sample 'module' name:", goldName
))
}
MEgrey <- paste("ME", greyName, sep = "")
MEgold <- paste("ME", goldName, sep = "")
for (s in 1:nNets)
{
if (.cvPresent(multiColor[[s]]) & nGenes[s] != length(multiColor[[s]])) {
stop(paste("Color vector for set", s, "does not have the correct number of entries."))
}
if (is.factor(multiColor[[s]])) multiColor[[s]] <- as.character(multiColor[[s]])
}
keepGenes <- list()
nNAs <- sapply(multiColor, .nNAColors)
if (any(nNAs > 0)) {
if (verbose > 0) printFlush(paste(spaces, " ..removing genes with missing color..."))
for (s in 1:nNets) {
if (.cvPresent(multiColor[[s]])) {
keepGenes[[s]] <- !is.na(multiColor[[s]])
if (dataIsExpr) {
multiData[[s]]$data <- multiData[[s]]$data[, keepGenes[[s]]]
} else {
multiData[[s]]$data <- multiData[[s]]$data[keepGenes[[s]], keepGenes[[s]]]
}
multiColor[[s]] <- multiColor[[s]][keepGenes[[s]]]
}
}
}
# Check for set names; if there are none, create artificial labels.
if (is.null(names(multiData))) {
setNames <- paste("Set_", c(1:nNets), sep = "")
} else {
setNames <- names(multiData)
}
# Check that multiData has valid colnames
for (s in 1:nNets) {
if (is.null(colnames(multiData[[s]]$data))) {
stop(paste(
"Matrix of data in set", names(multiData)[s],
"has no colnames. Colnames are needed to match variables."
))
}
}
# For now we use numeric labels for permutations.
permGoldName <- 0.1
permGreyName <- 0
collectGarbage()
if (verbose > 0) printFlush(paste(spaces, " ..calculating observed preservation values"))
observed <- .modulePreservationInternal(multiData, multiColor,
dataIsExpr = dataIsExpr,
calculatePermutation = FALSE, networkType = networkType,
referenceNetworks = referenceNetworks,
testNetworks = testNetworks,
densityOnly = useInterpolation,
maxGoldModuleSize = maxGoldModuleSize,
maxModuleSize = maxModuleSize,
corFnc = corFnc, corOptions = corOptions,
quickCor = quickCor,
# calculateQuality = calculateQuality,
ccTupletSize = ccTupletSize,
calculateCor.kIMall = calculateCor.kIMall,
calculateClusterCoeff = calculateClusterCoeff,
checkData = FALSE, greyName = greyName,
verbose = verbose - 3, indent = indent + 2
)
if (nPermutations == 0) {
return(list(observed = observed))
}
# Calculate preservation scores in permuted data.
psLoaded <- FALSE
if (loadPermutedStatistics) {
cat(paste(spaces, "..attempting to load permutation statistics.."))
x <- try(load(file = permutedStatisticsFile), silent = TRUE)
if (class(x) == "try-error") {
printFlush(paste("failed. Error message returned by system:\n", x))
} else {
expectVars <- c(
"regModuleSizes", "regStatNames", "regModuleNames", "fixStatNames", "fixModuleNames",
"permutationsPresent", "permOut", "interpolationUsed"
)
e2x <- match(expectVars, x)
if (any(is.na(e2x)) | length(x) != length(expectVars)) {
printFlush(paste("the file does not contain (all) expected variables."))
} else if (length(permOut) != length(referenceNetworks)) {
printFlush("the loaded permutation statistics have incorrect number of reference sets.")
} else if (length(permOut[[1]]) != nNets) {
printFlush("the loaded permutation statistics have incorrect number of test sets.")
} else {
psLoaded <- TRUE
printFlush("success.")
}
}
if (!psLoaded) {
printFlush(paste(
spaces, "\n ..will recalculate permutations.",
"Hit Ctrl-C (Esc in Windows) to stop the calculation."
))
}
}
nRegStats <- 20
nFixStats <- 3
if (!psLoaded) {
permOut <- list()
regModuleSizes <- list()
regModuleNames <- list()
permutationsPresent <- matrix(FALSE, nNets, nRefNets)
interpolationUsed <- matrix(FALSE, nNets, nRefNets)
if (verbose > 0) printFlush(paste(spaces, " ..calculating permutation Z scores"))
for (iref in 1:nRefNets) # Loop over reference networks
{
ref <- referenceNetworks[iref]
if (verbose > 0) printFlush(paste(spaces, "..Working with set", ref, "as reference set"))
permOut[[iref]] <- list()
regModuleSizes[[iref]] <- list()
regModuleNames[[iref]] <- list()
nRefMods <- length(unique(multiColor[[ref]]))
if (nRefMods == 1) {
printFlush(paste(
spaces, "*+*+*+*+* Reference set contains a single module.\n",
spaces,
"A permutation analysis is not meaningful for a single module; skipping."
))
next
}
for (tnet in 1:nNets) {
if (tnet %in% testNetworks[[iref]]) {
# Retain only genes that are shared between the reference and test networks
if (verbose > 1) printFlush(paste(spaces, "....working with set", tnet, "as test set"))
overlap <- intersect(colnames(multiData[[ref]]$data), colnames(multiData[[tnet]]$data))
loc1 <- match(overlap, colnames(multiData[[ref]]$data))
loc2 <- match(overlap, colnames(multiData[[tnet]]$data))
refName <- paste("ref_", setNames[ref], sep = "")
colorRef <- multiColor[[ref]][loc1]
if (dataIsExpr) {
datRef <- multiData[[ref]]$data[, loc1]
datTest <- multiData[[tnet]]$data[, loc2]
} else {
datRef <- multiData[[ref]]$data[loc1, loc1]
datTest <- multiData[[tnet]]$data[loc2, loc2]
}
testName <- setNames[tnet]
nRefGenes <- ncol(datRef)
# if(!is.na(multiColor[[tnet]][1])||length(multiColor[[tnet]])!=1)
if (.cvPresent(multiColor[[tnet]])) {
colorTest <- multiColor[[tnet]][loc2]
} else {
colorTest <- NA
}
name <- paste(refName, "vs", testName, sep = "")
obsModSizes <- list()
nObsMods <- rep(0, 2)
tab <- table(colorRef)
nObsMods[1] <- length(tab)
obsModSizes[[1]] <- tab[names(tab) != greyName]
if (!useInterpolation | (nObsMods[1] <= 5) | (sum(obsModSizes[[1]]) < 1000)) {
# Do not use interpolation: simply use original colors
permRefColors <- colorRef
interpolationUsed[tnet, iref] <- FALSE
nPermMods <- nObsMods[1]
if (useInterpolation && (verbose > 1)) {
printFlush(paste(spaces, " FYI: interpolation will not be used for this comparison."))
}
} else {
obsModSizes[[1]][obsModSizes[[1]] < 3] <- 3
if (length(colorTest) > 1) {
tab <- table(colorTest)
obsModSizes[[2]] <- tab[names(tab) != greyName]
obsModSizes[[2]][obsModSizes[[2]] < 3] <- 3
nObsMods[2] <- length(tab)
} else {
obsModSizes[[2]] <- NA
}
# Note we only need permColors for the reference set.
nPermMods <- 10
minNMods <- 5
OMS <- obsModSizes[[1]]
logmin <- log(min(OMS))
logmax <- log(min(maxModuleSize, max(OMS)))
ok <- FALSE
skip <- FALSE
while (!ok) {
if (logmin >= logmax) logmax <- logmin + nPermMods / 2
permModSizes <- as.integer(exp(seq(from = logmin, to = logmax, length = nPermMods)))
nNeededGenes <- sum(permModSizes)
# Check that the data has enough genes to fit the module sizes:
if (nNeededGenes < nRefGenes) {
ok <- TRUE
skip <- FALSE
} else if (nPermMods > minNMods) {
# Drop one module
nPermMods <- nPermMods - 1
logStep <- (logmax - logmin) / nPermMods
# If the modules are spaced far enough apart, also decrease the max module size
if (logStep > log(2)) {
logmax <- logmax - logStep
}
} else {
# It appears we don't have enough genes to form meaningful modules for interpolation.
# Decrease logmin as well, but only to a certain degree.
logminFloor <- 3
if (logmin > logminFloor) {
logmin <- min(logmin - 1, logminFloor)
} else {
# For now we give up, but in the future may have to add a non-interpolation approach here
# as well.
printFlush(paste(
spaces,
"*+*+*+*+*+ There are not enough genes and/or modules for",
"reference set", ref, "and test set", tnet, ".\n",
spaces, "Will skip this combination."
))
ok <- TRUE
skip <- TRUE
}
}
}
if (skip) {
# Instead of skipping, use the actual colors
permRefColors <- colorRef
interpolationUsed[tnet, iref] <- FALSE
nPermMods <- nObsMods[1]
} else {
# Create a base label sequence for the permuted reference data set
permRefColors <- rep(c(1:nPermMods), permModSizes)
nGrey <- nRefGenes - length(permRefColors)
permRefColors <- c(permRefColors, rep(greyName, nGrey))
interpolationUsed[tnet, iref] <- TRUE
}
}
permExpr <- list()
permExpr[[1]] <- list(data = datRef)
permExpr[[2]] <- list(data = datTest)
names(permExpr) <- setNames[c(ref, tnet)]
permOut[[iref]][[tnet]] <- list(
regStats = array(NA, dim = c(
nPermMods + 2 - (!interpolationUsed[tnet, iref]),
nRegStats, nPermutations
)),
fixStats = array(NA, dim = c(nObsMods[[1]], nFixStats, nPermutations))
)
# Perform actual permutations
# oldRNG = NULL;
permColors <- list()
permColors[[1]] <- permRefColors
permColors[[2]] <- NA
permColorsForAcc <- list()
permColorsForAcc[[1]] <- colorRef
permColorsForAcc[[2]] <- colorTest
# For reproducibility of previous results, write separate code for threaded and unthreaded
# calculations
if (parallelCalculation) {
combineCalculations <- function(...) {
list(...)
}
seed <- sample(1e8, 1)
if (verbose > 2) {
printFlush(paste(spaces, " ......parallel calculation of permuted statistics.."))
}
datout <- foreach(
perm = 1:nPermutations, .combine = combineCalculations,
.multicombine = TRUE, .maxcombine = nPermutations + 10
) %dopar% {
set.seed(seed + perm + perm^2)
collectGarbage()
.modulePreservationInternal(permExpr, permColors,
dataIsExpr = dataIsExpr,
calculatePermutation = TRUE,
multiColorForAccuracy = permColorsForAcc,
networkType = networkType,
corFnc = corFnc, corOptions = corOptions,
referenceNetworks = 1,
testNetworks = list(2),
densityOnly = useInterpolation,
maxGoldModuleSize = maxGoldModuleSize,
maxModuleSize = maxModuleSize, quickCor = quickCor,
ccTupletSize = ccTupletSize,
calculateCor.kIMall = calculateCor.kIMall,
calculateClusterCoeff = calculateClusterCoeff,
# calculateQuality = calculateQuality,
greyName = greyName,
checkData = FALSE,
verbose = verbose - 3, indent = indent + 3
)
}
for (perm in 1:nPermutations)
{
if (!datout[[perm]] [[1]]$netPresent[2]) {
stop(paste(
"Internal error: no data in permuted set preservation measures. \n",
"Please contact the package maintainers. Sorry!"
))
}
permOut[[iref]][[tnet]]$regStats[, , perm] <- as.matrix(
cbind(
datout[[perm]] [[1]]$quality[[2]][, -1],
datout[[perm]] [[1]]$intra[[2]],
datout[[perm]] [[1]]$inter[[2]]
)
)
permOut[[iref]][[tnet]]$fixStats[, , perm] <- as.matrix(datout[[perm]] [[1]]$accuracy[[2]])
}
datout <- datout[[1]] # For the name stting procedures that follow...
} else {
for (perm in 1:nPermutations)
{
if (verbose > 2) printFlush(paste(spaces, " ......working on permutation", perm))
# newRNG = .Random.seed;
# if (!is.null(oldRNG))
# if (isTRUE(all.equal(newRNG, oldRNG)))
# printFlush("WARNING: something's wrong with the RNG... old and new RNG equal.");
# oldRNG = .Random.seed
# set.seed(perm*2);
datout <- .modulePreservationInternal(permExpr, permColors,
dataIsExpr = dataIsExpr,
calculatePermutation = TRUE,
multiColorForAccuracy = permColorsForAcc,
networkType = networkType,
corFnc = corFnc, corOptions = corOptions,
referenceNetworks = 1,
testNetworks = list(2),
densityOnly = useInterpolation,
maxGoldModuleSize = maxGoldModuleSize,
maxModuleSize = maxModuleSize, quickCor = quickCor,
ccTupletSize = ccTupletSize,
calculateCor.kIMall = calculateCor.kIMall,
calculateClusterCoeff = calculateClusterCoeff,
# calculateQuality = calculateQuality,
greyName = greyName,
checkData = FALSE,
verbose = verbose - 3, indent = indent + 3
)
if (!datout[[1]]$netPresent[2]) {
stop(paste(
"Internal error: no data in permuted set preservation measures. \n",
"Please contact the package maintainers. Sorry!"
))
}
permOut[[iref]][[tnet]]$regStats[, , perm] <- as.matrix(cbind(
datout[[1]]$quality[[2]][, -1],
datout[[1]]$intra[[2]],
datout[[1]]$inter[[2]]
))
permOut[[iref]][[tnet]]$fixStats[, , perm] <- as.matrix(datout[[1]]$accuracy[[2]])
collectGarbage()
}
}
regStatNames <- c(
colnames(datout[[1]]$quality[[2]])[-1], colnames(datout[[1]]$intra[[2]]),
colnames(datout[[1]]$inter[[2]])
)
regModuleNames[[iref]][[tnet]] <- rownames(datout[[1]]$quality[[2]])
regModuleSizes[[iref]][[tnet]] <- datout[[1]]$quality[[2]][, 1]
fixStatNames <- colnames(datout[[1]]$accuracy[[2]])
fixModuleNames <- rownames(datout[[1]]$accuracy[[2]])
dimnames(permOut[[iref]][[tnet]]$regStats) <- list(
regModuleNames[[iref]][[tnet]], regStatNames,
spaste("Permutation.", c(1:nPermutations))
)
dimnames(permOut[[iref]][[tnet]]$fixStats) <- list(
fixModuleNames, fixStatNames,
spaste("Permutation.", c(1:nPermutations))
)
permutationsPresent[tnet, iref] <- TRUE
} else {
regModuleNames[[iref]][[tnet]] <- NA
regModuleSizes[[iref]][[tnet]] <- NA
permOut[[iref]][[tnet]] <- NA
}
}
}
if (savePermutedStatistics) {
save(regModuleSizes, regStatNames, regModuleNames, fixStatNames,
fixModuleNames, permutationsPresent, interpolationUsed, permOut,
file = permutedStatisticsFile
)
}
} # if (!psLoaded)
collectGarbage()
if (any(interpolationUsed, na.rm = TRUE)) {
if (verbose > 0) printFlush(paste(spaces, "..Calculating interpolation approximations.."))
if (plotInterpolation) pdf(file = interpolationPlotFile, width = 12, height = 6)
}
# Define a "class" indicating that no valid fit was obtained
invalidFit <- "invalidFit"
LogIndex <- c(rep(c(1, 0, 0, 0, 0, 0, 0), 2), 0, 0, 0, 0, 0, 0)
dfMean <- c(rep(c(2, 2, 2, 1, 1, 1, 1), 2), 3, 3, 3, 2, 2, 2)
dfSD <- c(rep(c(2, 2, 2, 2, 2, 2, 2), 2), 2, 2, 2, 2, 2, 2)
epsilon <- 0.00001
meanLM <- list()
seLM <- list()
OmitModule <- c(permGoldName, permGreyName)
for (iref in 1:nRefNets)
{
ref <- referenceNetworks[iref]
meanLM[[iref]] <- list()
seLM[[iref]] <- list()
for (tnet in 1:nNets) {
if (observed[[iref]]$netPresent[tnet] &
permutationsPresent[tnet, iref] & interpolationUsed[tnet, iref]) {
NotGold <- !is.element(regModuleNames[[iref]][[tnet]], OmitModule)
meanLM[[iref]][[tnet]] <- list()
seLM[[iref]][[tnet]] <- list()
logName <- matrix("", sum(NotGold), 1)
for (stat in 1:nRegStats)
{
means <- c(apply(permOut[[iref]][[tnet]]$regStats[NotGold, stat, , drop = FALSE], c(1:2),
mean,
na.rm = TRUE
))
SD <- try(c(apply(permOut[[iref]][[tnet]]$regStats[NotGold, stat, , drop = FALSE], c(1:2),
sd,
na.rm = TRUE
)),
silent = TRUE
)
if (class(SD) == "try-error") SD <- NA
if (any(is.na(c(means, SD)))) {
meanLM[[iref]][[tnet]][[stat]] <- NA
class(meanLM[[iref]][[tnet]][[stat]]) <- invalidFit
seLM[[iref]][[tnet]][[stat]] <- NA
class(seLM[[iref]][[tnet]][[stat]]) <- invalidFit
} else {
modSizes <- regModuleSizes[[iref]][[tnet]][NotGold]
xx <- log(modSizes)
if (LogIndex[stat] == 1) {
means[means < epsilon] <- epsilon
yy <- log(means)
logName[stat] <- "log"
} else {
yy <- means
logName[stat] <- ""
}
name1 <- regStatNames[stat]
name2 <- paste(setNames[ref], "vs", setNames[tnet])
meanLM[[iref]][[tnet]][[stat]] <- lm(yy ~ ns(xx, df = dfMean[stat]))
names(meanLM[[iref]][[tnet]])[stat] <- paste(name1, "_meanInterpolation", sep = "")
PredictedMedian <- as.numeric(predict(meanLM[[iref]][[tnet]][[stat]]))
if (all(!is.na(means)) && length(table(means)) > 1) {
par(mfrow = c(1, 2))
par(mar = c(5, 5, 4, 1))
SE <- SD / sqrt(nPermutations)
ymin <- min(yy - SE, na.rm = TRUE)
ymax <- max(yy + SE, na.rm = TRUE)
verboseScatterplot(xx, yy,
xlab = "Log module size",
ylab = paste(logName[stat], "Permutation median"),
main = paste("mean", name1, "\n", name2, "; "),
cex.axis = 1, cex.lab = 1, cex.main = 1.2,
ylim = c(ymin, ymax)
)
try(errbar(xx, yy, yy + SE, yy - SE, add = TRUE), silent = TRUE)
lines(xx[order(xx)], PredictedMedian[order(xx)], col = "red")
verboseScatterplot(yy, PredictedMedian,
xlab = "Observed permutation median",
ylab = "Predicted permutation median",
main = paste("mean", name1, "\n", name2, "\n"),
cex.axis = 1, cex.lab = 1, cex.main = 1.2
)
abline(0, 1, col = "green")
}
epsilon2 <- 0.0000000001
SD[SD < epsilon2] <- epsilon2
yy2 <- log(SD)
xx2 <- log(modSizes)
seLM[[iref]][[tnet]][[stat]] <- lm(yy2 ~ ns(xx2, df = dfSD[stat]))
names(seLM[[iref]][[tnet]])[stat] <- paste(name1, "_SDInterpolation", sep = "")
PredictedSD <- as.numeric(predict(seLM[[iref]][[tnet]][[stat]]))
if (all(!is.na(SD)) && length(table(SD)) > 1) {
par(mfrow = c(1, 2))
verboseScatterplot(xx2, yy2,
xlab = "Log Module Size", ylab = "Log observed permutation SD",
main = paste("SD", name1, "\n", name2, "\n"),
cex.axis = 1, cex.lab = 1, cex.main = 1.2
)
lines(xx2[order(xx2)], PredictedSD[order(xx2)], col = "red")
verboseScatterplot(yy2, PredictedSD,
xlab = "Log observed permutation SD",
ylab = "Log predicted permutation SD",
main = paste("SD", name1, "\n", name2, "\n"),
cex.axis = 1, cex.lab = 1, cex.main = 1.2
)
abline(0, 1, col = "green")
}
}
}
}
}
}
if (any(interpolationUsed)) {
if (plotInterpolation) dev.off()
}
observedQuality <- list()
observedPreservation <- list()
observedReferenceSeparability <- list()
observedTestSeparability <- list()
observedOverlapCounts <- list()
observedOverlapPvalues <- list()
observedAccuracy <- list()
Z.quality <- list()
Z.preservation <- list()
Z.referenceSeparability <- list()
Z.testSeparability <- list()
Z.accuracy <- list()
interpolationStat <- c(rep(TRUE, 14), FALSE, FALSE, FALSE, rep(TRUE, 6))
for (iref in 1:nRefNets)
{
observedQuality[[iref]] <- list()
observedPreservation[[iref]] <- list()
observedReferenceSeparability[[iref]] <- list()
observedTestSeparability[[iref]] <- list()
observedOverlapCounts[[iref]] <- list()
observedOverlapPvalues[[iref]] <- list()
observedAccuracy[[iref]] <- list()
Z.quality[[iref]] <- list()
Z.preservation[[iref]] <- list()
Z.referenceSeparability[[iref]] <- list()
Z.testSeparability[[iref]] <- list()
Z.accuracy[[iref]] <- list()
for (tnet in 1:nNets) {
if (observed[[iref]]$netPresent[tnet] & permutationsPresent[tnet, iref]) {
nModules <- nrow(observed[[iref]]$intra[[tnet]])
nQualiStats <- ncol(observed[[iref]]$quality[[tnet]]) - 1
nIntraStats <- ncol(observed[[iref]]$intra[[tnet]])
accuracy <- observed[[iref]]$accuracy[[tnet]]
inter <- observed[[iref]]$inter[[tnet]]
nInterStats <- ncol(inter) + ncol(accuracy)
a2i <- match(rownames(accuracy), rownames(inter))
accuracy2 <- matrix(NA, nrow(inter), ncol(accuracy))
accuracy2[a2i, ] <- accuracy
rownames(accuracy2) <- rownames(inter)
colnames(accuracy2) <- colnames(accuracy)
modSizes <- observed[[iref]]$quality[[tnet]][, 1]
allObsStats <- cbind(
observed[[iref]]$quality[[tnet]][, -1],
observed[[iref]]$intra[[tnet]],
accuracy2, inter
)
sepCol <- match("separability.qual", colnames(observed[[iref]]$quality[[tnet]]))
sepCol2 <- match("separability.pres", colnames(observed[[iref]]$intra[[tnet]]))
quality <- observed[[iref]]$quality[[tnet]][, -sepCol]
if (dataIsExpr | (!restrictSummaryForGeneralNetworks)) {
rankColsQuality <- c(2, 3, 4, 5)
rankColsDensity <- c(1, 2, 3, 4)
} else {
rankColsQuality <- c(5)
rankColsDensity <- 4
}
ranks <- apply(-quality[, rankColsQuality, drop = FALSE], 2, rank, na.last = "keep")
medRank <- apply(as.matrix(ranks), 1, median, na.rm = TRUE)
observedQuality[[iref]][[tnet]] <- cbind(
moduleSize = modSizes,
medianRank.qual = medRank,
quality[, -1]
)
preservation <- cbind(observed[[iref]]$intra[[tnet]][, -sepCol2], inter)
ranksDensity <- apply(-preservation[, rankColsDensity, drop = FALSE], 2, rank, na.last = "keep")
medRankDensity <- apply(as.matrix(ranksDensity), 1, median, na.rm = TRUE)
if (dataIsExpr | (!restrictSummaryForGeneralNetworks)) {
if (includekMEallInSummary) {
connSummaryInd <- c(7:10)
} else {
connSummaryInd <- c(7, 8, 10)
}
} else {
connSummaryInd <- c(7, 10) # in this case only cor.kIM and cor.Adj which sits in the cor.cor slot
}
ranksConnectivity <- apply(-preservation[, connSummaryInd, drop = FALSE], 2, rank, na.last = "keep")
medRankConnectivity <- apply(as.matrix(ranksConnectivity), 1, median, na.rm = TRUE)
medRank <- apply(cbind(ranksDensity, ranksConnectivity), 1, median, na.rm = TRUE)
observedPreservation[[iref]][[tnet]] <- cbind(
moduleSize = modSizes,
medianRank.pres = medRank,
medianRankDensity.pres = medRankDensity,
medianRankConnectivity.pres = medRankConnectivity,
preservation
)
observedAccuracy[[iref]][[tnet]] <- cbind(moduleSize = modSizes[a2i], accuracy)
observedReferenceSeparability[[iref]][[tnet]] <- cbind(
moduleSize = modSizes,
observed[[iref]]$quality[[tnet]][sepCol]
)
observedTestSeparability[[iref]][[tnet]] <- cbind(
moduleSize = modSizes,
observed[[iref]]$intra[[tnet]][sepCol2]
)
observedOverlapCounts[[iref]][[tnet]] <- observed[[iref]]$overlapTables[[tnet]]$countTable
observedOverlapPvalues[[iref]][[tnet]] <- observed[[iref]]$overlapTables[[tnet]]$pTable
nAllStats <- ncol(allObsStats)
zAll <- matrix(NA, nModules, nAllStats)
rownames(zAll) <- rownames(observed[[iref]]$intra[[tnet]])
colnames(zAll) <- paste("Z.", colnames(allObsStats), sep = "")
logModSizes <- log(modSizes)
goldRowPerm <- match(goldName, regModuleNames[[iref]][[tnet]])
goldRowObs <- match(goldName, rownames(inter))
fixInd <- 1
regInd <- 1
for (stat in 1:nAllStats) {
if (interpolationStat[stat]) {
if (interpolationUsed[tnet, iref]) {
if (class(meanLM[[iref]][[tnet]][[regInd]]) != invalidFit) {
# print(paste(regInd, stat))
prediction <- predict(meanLM[[iref]][[tnet]][[regInd]],
newdata = data.frame(xx = logModSizes), se.fit = TRUE
)
predictedMean <- as.numeric(prediction$fit)
if (LogIndex[regInd] == 1) {
predictedMean <- exp(predictedMean)
}
predictedSD <- exp(as.numeric(predict(seLM[[iref]][[tnet]][[regInd]],
newdata = data.frame(xx2 = logModSizes)
)))
zAll[, stat] <- (allObsStats[, stat] - predictedMean) / predictedSD
# For the gold module : take the direct observations.
goldMean <- mean(permOut[[iref]][[tnet]]$regStats[goldRowPerm, regInd, ], na.rm = TRUE)
goldSD <- apply(permOut[[iref]][[tnet]]$regStats[goldRowPerm, regInd, ], 2, sd, na.rm = TRUE)
zAll[goldRowObs, stat] <- (allObsStats[goldRowObs, stat] - goldMean) / goldSD
}
} else {
means <- c(apply(permOut[[iref]][[tnet]]$regStats[, regInd, , drop = FALSE],
c(1:2), mean,
na.rm = TRUE
))
SDs <- c(apply(permOut[[iref]][[tnet]]$regStats[, regInd, , drop = FALSE],
c(1:2), sd,
na.rm = TRUE
))
z <- (allObsStats[, stat] - means) / SDs
if (any(is.finite(z))) {
finite <- is.finite(z)
z[finite][SDs[finite] == 0] <- max(abs(z[finite]), na.rm = TRUE) *
sign(allObsStats[, stat] - means)[SDs[finite] == 0]
}
zAll[, stat] <- z
}
regInd <- regInd + 1
} else {
if (.cvPresent(multiColor[[tnet]])) {
means <- c(apply(permOut[[iref]][[tnet]]$fixStats[, fixInd, , drop = FALSE],
c(1:2), mean,
na.rm = TRUE
))
SDs <- c(apply(permOut[[iref]][[tnet]]$fixStats[, fixInd, , drop = FALSE],
c(1:2), sd,
na.rm = TRUE
))
z <- (allObsStats[a2i, stat] - means) / SDs
if (any(is.finite(z))) {
finite <- is.finite(z)
z[finite][SDs[finite] == 0] <- max(abs(z[finite]), na.rm = TRUE) *
sign(allObsStats[a2i, stat] - means)[SDs[finite] == 0]
}
zAll[a2i, stat] <- z
}
fixInd <- fixInd + 1
}
}
zAll <- as.data.frame(zAll)
sepCol <- match("Z.separability.qual", colnames(zAll)[1:nQualiStats])
zQual <- zAll[, c(1:nQualiStats)][, -sepCol]
summaryColsQuality <- rankColsQuality - 1 # quality also contains module sizes, Z does not
summZ <- apply(zQual[, summaryColsQuality, drop = FALSE], 1, median, na.rm = TRUE)
Z.quality[[iref]][[tnet]] <- data.frame(cbind(
moduleSize = modSizes, Zsummary.qual = summZ,
zQual
))
Z.referenceSeparability[[iref]][[tnet]] <-
data.frame(cbind(moduleSize = modSizes, zAll[sepCol]))
st <- nQualiStats + 1
en <- nQualiStats + nIntraStats + nInterStats
sepCol2 <- match("Z.separability.pres", colnames(zAll)[st:en])
accuracyCols <- match(c("Z.accuracy", "Z.minusLogFisherP", "Z.coClustering"), colnames(zAll)[st:en])
zPres <- zAll[, st:en][, -c(sepCol2, accuracyCols)]
summaryColsPreservation <- list(summaryColsQuality, connSummaryInd)
nGroups <- length(summaryColsPreservation)
summZMat <- matrix(0, nrow(zPres), nGroups)
for (g in 1:nGroups) {
summZMat[, g] <- apply(zPres[, summaryColsPreservation[[g]], drop = FALSE], 1, median, na.rm = TRUE)
}
colnames(summZMat) <- c("Zdensity.pres", "Zconnectivity.pres")
summZ <- apply(summZMat, 1, mean, na.rm = TRUE)
Z.preservation[[iref]][[tnet]] <- data.frame(cbind(
moduleSize = modSizes, Zsummary.pres = summZ,
summZMat, zPres
))
Z.testSeparability[[iref]][[tnet]] <-
data.frame(cbind(moduleSize = modSizes, zAll[nQualiStats + sepCol2]))
Z.accuracy[[iref]][[tnet]] <-
data.frame(cbind(moduleSize = modSizes, zAll[st:en][accuracyCols]))
} else {
observedQuality[[iref]][[tnet]] <- NA
observedPreservation[[iref]][[tnet]] <- NA
observedReferenceSeparability[[iref]][[tnet]] <- NA
observedTestSeparability[[iref]][[tnet]] <- NA
observedAccuracy[[iref]][[tnet]] <- NA
observedOverlapCounts[[iref]][[tnet]] <- NA
observedOverlapPvalues[[iref]][[tnet]] <- NA
Z.quality[[iref]][[tnet]] <- NA
Z.preservation[[iref]][[tnet]] <- NA
Z.referenceSeparability[[iref]][[tnet]] <- NA
Z.testSeparability[[iref]][[tnet]] <- NA
Z.accuracy[[iref]][[tnet]] <- NA
}
}
names(observedQuality[[iref]]) <- paste("inColumnsAlsoPresentIn",
sep = ".",
setNames
)
names(observedPreservation[[iref]]) <- paste("inColumnsAlsoPresentIn",
sep = ".",
setNames
)
names(observedReferenceSeparability[[iref]]) <- paste("inColumnsAlsoPresentIn",
sep = ".",
setNames
)
names(observedTestSeparability[[iref]]) <- paste("inColumnsAlsoPresentIn",
sep = ".",
setNames
)
names(observedAccuracy[[iref]]) <- paste("inColumnsAlsoPresentIn",
sep = ".",
setNames
)
names(observedOverlapCounts[[iref]]) <- paste("inColumnsAlsoPresentIn",
sep = ".",
setNames
)
names(observedOverlapPvalues[[iref]]) <- paste("inColumnsAlsoPresentIn",
sep = ".",
setNames
)
names(Z.quality[[iref]]) <- paste("inColumnsAlsoPresentIn",
sep = ".",
setNames
)
names(Z.preservation[[iref]]) <- paste("inColumnsAlsoPresentIn",
sep = ".",
setNames
)
names(Z.referenceSeparability[[iref]]) <- paste("inColumnsAlsoPresentIn",
sep = ".",
setNames
)
names(Z.testSeparability[[iref]]) <- paste("inColumnsAlsoPresentIn",
sep = ".",
setNames
)
names(Z.accuracy[[iref]]) <- paste("inColumnsAlsoPresentIn",
sep = ".",
setNames
)
}
names(observedQuality) <- paste("ref", setNames[referenceNetworks], sep = ".")
names(observedPreservation) <- paste("ref", setNames[referenceNetworks], sep = ".")
names(observedReferenceSeparability) <- paste("ref", setNames[referenceNetworks], sep = ".")
names(observedTestSeparability) <- paste("ref", setNames[referenceNetworks], sep = ".")
names(observedAccuracy) <- paste("ref", setNames[referenceNetworks], sep = ".")
names(observedOverlapCounts) <- paste("ref", setNames[referenceNetworks], sep = ".")
names(observedOverlapPvalues) <- paste("ref", setNames[referenceNetworks], sep = ".")
names(Z.quality) <- paste("ref", setNames[referenceNetworks], sep = ".")
names(Z.preservation) <- paste("ref", setNames[referenceNetworks], sep = ".")
names(Z.referenceSeparability) <- paste("ref", setNames[referenceNetworks], sep = ".")
names(Z.testSeparability) <- paste("ref", setNames[referenceNetworks], sep = ".")
names(Z.accuracy) <- paste("ref", setNames[referenceNetworks], sep = ".")
summaryIndQuality <- list(c(3:6))
summaryIndPreservation <- list(c(5:8), connSummaryInd + 4, c(3, 4)) # 4 = 1 module size + 3 summary indices
p.quality <- .pValueFromZ(Z.quality, summaryCols = 2, summaryInd = summaryIndQuality)
p.preservation <- .pValueFromZ(Z.preservation, summaryCols = c(3, 4, 2), summaryInd = summaryIndPreservation)
p.referenceSeparability <- .pValueFromZ(Z.referenceSeparability)
p.testSeparability <- .pValueFromZ(Z.testSeparability)
p.accuracy <- .pValueFromZ(Z.accuracy)
pBonf.quality <- .pValueFromZ(Z.quality, bonf = TRUE, summaryCols = 2, summaryInd = summaryIndQuality)
pBonf.preservation <- .pValueFromZ(Z.preservation,
bonf = TRUE, summaryCols = c(3, 4, 2),
summaryInd = summaryIndPreservation
)
pBonf.referenceSeparability <- .pValueFromZ(Z.referenceSeparability, bonf = TRUE)
pBonf.testSeparability <- .pValueFromZ(Z.testSeparability, bonf = TRUE)
pBonf.accuracy <- .pValueFromZ(Z.accuracy, bonf = TRUE)
if (calculateQvalue) {
q.quality <- .qValueFromP(p.quality, summaryCols = 2, summaryInd = summaryIndQuality)
q.preservation <- .qValueFromP(p.preservation,
summaryCols = c(3, 4, 2),
summaryInd = summaryIndPreservation
)
q.referenceSeparability <- .qValueFromP(p.referenceSeparability)
q.testSeparability <- .qValueFromP(p.testSeparability)
q.accuracy <- .qValueFromP(p.accuracy)
} else {
q.quality <- NULL
q.preservation <- NULL
q.referenceSeparability <- NULL
q.testSeparability <- NULL
q.accuracy <- NULL
}
output <- list(
quality = list(
observed = observedQuality, Z = Z.quality,
log.p = p.quality,
log.pBonf = pBonf.quality,
q = q.quality
),
preservation = list(
observed = observedPreservation, Z = Z.preservation,
log.p = p.preservation,
log.pBonf = pBonf.preservation,
q = q.preservation
),
accuracy = list(
observed = observedAccuracy, Z = Z.accuracy,
log.p = p.accuracy,
log.pBonf = pBonf.accuracy,
q = q.accuracy,
observedCounts = observedOverlapCounts,
observedFisherPvalues = observedOverlapPvalues
),
referenceSeparability = list(
observed = observedReferenceSeparability,
Z = Z.referenceSeparability,
log.p = p.referenceSeparability,
log.pBonf = pBonf.referenceSeparability,
q = q.referenceSeparability
),
testSeparability = list(
observed = observedTestSeparability,
Z = Z.testSeparability,
log.p = p.testSeparability,
log.pBonf = pBonf.testSeparability,
q = q.testSeparability
),
permutationDetails = list(
permutedStatistics = permOut,
interpolationModuleSizes = regModuleSizes,
interpolationStatNames = regStatNames,
permutationsPresent = permutationsPresent,
interpolationUsed = interpolationUsed
)
)
checkComps <- list(c(1, 2), c(1, 2), c(1, 2), c(1, 2), c(1, 2))
nCheckComps <- length(checkComps)
if (discardInvalidOutput) {
for (iref in 1:nRefNets)
{
for (tnet in 1:nNets) {
if (observed[[iref]]$netPresent[tnet] & permutationsPresent[tnet, iref]) {
for (oc in 1:nCheckComps)
{
for (ic in checkComps[[oc]])
{
data <- output[[oc]][[ic]][[iref]][[tnet]]
keep <- apply(!is.na(data), 2, sum) > 0
output[[oc]][[ic]][[iref]][[tnet]] <- data[, keep, drop = FALSE]
}
}
}
}
}
}
return(output)
}
# =====================================================================================================
#
# .modulePreservationInternal
#
# =====================================================================================================
# Calculate module preservation scores for a given multi-expression data set.
# color vector present?
#' @title .cvPresent
#'
#' @param cv
#'
#' @return
#'
.cvPresent <- function(cv) {
if (is.null(cv)) {
return(FALSE)
}
if (length(cv) == 1 && (is.na(cv[1]))) {
return(FALSE)
}
return(TRUE)
}
#' @title .nNAColors
#'
#' @param cv
#'
#' @return
#'
.nNAColors <- function(cv) {
if (.cvPresent(cv)) sum(is.na(cv)) else 0
}
#' @title .accuracyStatistics
#'
#' @param colorRef
#' @param colorTest
#' @param ccTupletSize
#' @param greyName
#' @param pEpsilon
#'
#' @return
#'
.accuracyStatistics <- function(colorRef, colorTest, ccTupletSize, greyName, pEpsilon) {
colorRefLevels <- sort(unique(colorRef))
nRefMods <- length(colorRefLevels)
refModSizes <- table(colorRef)
accuracy <- matrix(NA, nRefMods, 3)
colnames(accuracy) <- c("accuracy", "minusLogFisherP", "coClustering")
rownames(accuracy) <- colorRefLevels
nRefGenes <- length(colorRef) # also equals nTestGenes
if (.cvPresent(colorTest)) {
# if (verbose > 1) printFlush(paste(spaces, "....calculating color label accuracy..."));
overlap <- overlapTable(colorTest, colorRef)
greyRow <- rownames(overlap$countTable) == greyName
greyCol <- colnames(overlap$countTable) == greyName
# bestCount = apply(overlap$countTable, 2, max);
overlap$pTable[!is.finite(overlap$pTable)] <- pEpsilon
overlap$pTable[overlap$pTable < pEpsilon] <- pEpsilon
bestCount <- rep(0, ncol(overlap$countTable))
if (sum(!greyRow) > 0 & sum(!greyCol) > 0) {
bestCount[!greyCol] <- apply(overlap$countTable[!greyRow, !greyCol, drop = FALSE], 2, max)
accuracy[!greyCol, 2] <-
-log10(apply(overlap$pTable[!greyRow, !greyCol, drop = FALSE], 2, min))
ccNumer <- apply(
overlap$countTable[!greyRow, !greyCol, drop = FALSE], 2, choose,
ccTupletSize
)
dim(ccNumer) <- c(sum(!greyRow), sum(!greyCol))
ccDenom <- choose(refModSizes[!greyCol], ccTupletSize)
accuracy[!greyCol, 3] <- apply(ccNumer, 2, sum) / ccDenom
}
if (sum(greyRow) == 1 & sum(greyCol) == 1) {
bestCount[greyCol] <- overlap$countTable[greyRow, greyCol]
accuracy[greyCol, 2] <- -log10(overlap$pTable[greyRow, greyCol])
accuracy[greyCol, 3] <- choose(bestCount[greyCol], ccTupletSize) /
choose(refModSizes[greyCol], ccTupletSize)
}
accuracy[, 1] <- bestCount / refModSizes
} else {
overlap <- list(countTable = NA, pTable = NA)
}
list(accuracy = accuracy, overlapTable = overlap)
}
# =================================================================================================
#
# .modulePreservationInternal
#
# =================================================================================================
# multiData contains either expression data or adjacencies; which one is indicated in dataIsExpr
#' @title .modulePreservationInternal
#'
#' @param multiData
#' @param multiColor
#' @param dataIsExpr
#' @param calculatePermutation
#' @param multiColorForAccuracy
#' @param networkType
#' @param corFnc
#' @param corOptions
#' @param referenceNetworks
#' @param testNetworks
#' @param densityOnly
#' @param maxGoldModuleSize
#' @param maxModuleSize
#' @param quickCor
#' @param ccTupletSize
#' @param calculateCor.kIMall
#' @param calculateClusterCoeff
#' @param checkData
#' @param greyName
#' @param pEpsilon
#' @param verbose
#' @param indent
#'
#' @return
#'
#' @examples
.modulePreservationInternal <- function(multiData, multiColor, dataIsExpr,
calculatePermutation,
multiColorForAccuracy = NULL,
networkType = "signed",
corFnc = "cor",
corOptions = "use = 'p'",
referenceNetworks = 1,
testNetworks,
densityOnly = FALSE,
maxGoldModuleSize = 1000,
maxModuleSize = 1000, quickCor = 1,
ccTupletSize,
calculateCor.kIMall,
calculateClusterCoeff,
# calculateQuality = FALSE,
checkData = TRUE,
greyName,
pEpsilon = 1e-200,
verbose = 1, indent = 0) {
spaces <- indentSpaces(indent)
# size = checkSets(multiData);
nNets <- length(multiData)
nGenes <- sapply(multiData, sapply, ncol)
nType <- charmatch(networkType, .networkTypes)
if (is.na(networkType)) {
stop(paste(
"Unrecognized networkType argument.",
"Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", ")
))
}
setNames <- names(multiData)
# Check multiColor.
if (length(multiColor) != nNets) {
multiColor2 <- list()
if (length(names(multiColor)) != length(multiColor)) {
stop("Each entry of 'multiColor' must have a name.")
}
color2expr <- match(names(multiColor), names(multiData))
if (any(is.na(color2expr))) {
stop("Entries of 'multiColor' must name-match entries in 'multiData'.")
}
for (s in 1:nNets)
{
multiData[[s]]$data <- as.matrix(multiData[[s]]$data)
loc <- which(names(multiColor) %in% names(multiData)[s])
if (length(loc) == 0) {
multiColor2[[s]] <- NA
} else {
multiColor2[[s]] <- multiColor[[loc]]
}
}
multiColor <- multiColor2
rm(multiColor2)
}
if (is.numeric(greyName)) goldName <- 0.1 else goldName <- "gold"
MEgrey <- paste("ME", greyName, sep = "")
MEgold <- paste("ME", goldName, sep = "")
collectGarbage()
for (s in 1:nNets)
{
if (.cvPresent(multiColor[[s]]) & nGenes[s] != length(multiColor[[s]])) {
stop(paste("Color vector for set", s, "does not have the correct number of entries."))
}
}
keepGenes <- list()
for (s in 1:nNets) {
keepGenes[[s]] <- rep(TRUE, nGenes[s])
}
nNAs <- sapply(multiColor, .nNAColors)
if (any(nNAs > 0)) {
if (verbose > 0) printFlush(paste(spaces, " ..removing genes with missing color..."))
for (s in 1:nNets) {
if (.cvPresent(multiColor[[s]])) {
keepGenes[[s]] <- !is.na(multiColor[[s]])
if (dataIsExpr) {
multiData[[s]]$data <- multiData[[s]]$data[, keepGenes[[s]]]
} else {
multiData[[s]]$data <- multiData[[s]]$data[keepGenes[[s]], keepGenes[[s]]]
}
multiColor[[s]] <- multiColor[[s]][keepGenes[[s]]]
}
}
}
# if (verbose > 0) printFlush(paste(spaces, " ..preservation tests based on different reference networks"))
datout <- list()
if (nNets == 1) # && length(multiColor)==1)
{
# For now stop; in the future we'll bring this up to speed as well.
stop("Calculation of quality in an idividual network is not supported at this time. Sorry!")
} else {
for (iref in 1:length(referenceNetworks))
{
ref <- referenceNetworks[iref]
if (!.cvPresent(multiColor[[ref]])) {
stop(paste(
"Network", ref,
"does not have a color vector and cannot be used as reference network."
))
}
if (verbose > 0) printFlush(paste(spaces, "..working on reference network", setNames[ref]))
accuracy <- list()
quality <- list()
interPres <- list()
intraPres <- list()
overlapTables <- list()
netPresent <- rep(FALSE, nNets)
for (tnet in testNetworks[[iref]])
{
if (verbose > 1) printFlush(paste(spaces, " ..working on test network", setNames[tnet]))
overlap <- intersect(colnames(multiData[[ref]]$data), colnames(multiData[[tnet]]$data))
if (length(overlap) == 0) {
printFlush(paste(
spaces, "WARNING: sets", ref, "and", tnet,
"have no overlapping genes with valid colors.\n",
spaces, "No preservation measures can be calculated."
))
next
}
loc1 <- match(overlap, colnames(multiData[[ref]]$data))
loc2 <- match(overlap, colnames(multiData[[tnet]]$data))
if (length(multiColor[[tnet]]) > 1) {
colorTest <- multiColor[[tnet]][loc2]
} else {
colorTest <- NA
}
if (dataIsExpr) {
datTest <- multiData[[tnet]]$data[, loc2]
datRef <- multiData[[ref]]$data[, loc1]
} else {
datTest <- multiData[[tnet]]$data[loc2, loc2]
datRef <- multiData[[ref]]$data[loc1, loc1]
}
colorRef <- multiColor[[ref]][loc1]
# if multiColorForAccuracy is present, use the colors in this list to calculate accuracy and
# Fisher p values.
if (!is.null(multiColorForAccuracy)) {
colorRefAcc <- multiColorForAccuracy[[ref]][loc1]
if (.cvPresent(multiColorForAccuracy[[tnet]])) {
colorTestAcc <- multiColorForAccuracy[[tnet]][loc2]
} else {
colorTestAcc <- NA
}
} else {
colorRefAcc <- colorRef
colorTestAcc <- colorTest # This is the only place where colorTest is used (?)
}
# Accuracy measures
if (calculatePermutation) {
colorRefAcc <- sample(colorRefAcc)
colorTestAcc <- sample(colorRefAcc)
}
x <- .accuracyStatistics(colorRefAcc, colorTestAcc,
ccTupletSize = ccTupletSize, greyName = greyName, pEpsilon = pEpsilon
)
accuracy[[tnet]] <- x$accuracy
overlapTables[[tnet]] <- x$overlapTable
# From now on we work with colorRef; colorTest is not needed anymore.
# Restrict each module to at most maxModuleSize genes..
colorRefLevels <- sort(unique(colorRef))
nRefMods <- length(colorRefLevels)
nRefGenes <- length(colorRef)
# Check that the gold module is not too big. In particular, the gold module must not contain all valid
# genes, since in such a case the random sampling makes no sense for density-based statistics.
goldModSize <- maxGoldModuleSize
if (goldModSize > nRefGenes / 2) goldModSize <- nRefGenes / 2
# ..step 1: gold module. Note that because of the above the gold module size is always smaller than
# nRefGenes.
goldModR <- sample(nRefGenes, goldModSize)
if (calculatePermutation) {
goldModT <- sample(nRefGenes, goldModSize)
} else {
goldModT <- goldModR
}
if (dataIsExpr) {
goldRef <- datRef[, goldModR]
goldRefP <- datRef[, goldModT]
goldTest <- datTest[, goldModT]
} else {
goldRef <- datRef[goldModR, goldModR]
goldRefP <- datRef[goldModT, goldModT]
goldTest <- datTest[goldModT, goldModT]
}
# ..step 2: proper modules and grey
keepGenes <- rep(TRUE, nRefGenes)
for (m in 1:nRefMods)
{
inModule <- colorRef == colorRefLevels[m]
nInMod <- sum(inModule)
if (nInMod > maxModuleSize) {
sam <- sample(nInMod, maxModuleSize)
keepGenes[inModule] <- FALSE
keepGenes[inModule][sam] <- TRUE
}
}
# Create the permuted data sets
if (sum(keepGenes) < nRefGenes) {
colorRef <- colorRef[keepGenes]
if (dataIsExpr) {
datRef <- datRef[, keepGenes]
} else {
datRef <- datRef[keepGenes, keepGenes]
}
nRefGenes <- length(colorRef)
if (calculatePermutation) {
keepPerm <- sample(nRefGenes, sum(keepGenes))
if (dataIsExpr) {
datTest <- datTest[, keepPerm]
datRefP <- datRef[, keepPerm]
} else {
datTest <- datTest[keepPerm, keepPerm]
datRefP <- datRef[keepPerm, keepPerm]
}
} else {
if (dataIsExpr) {
datTest <- datTest[, keepGenes]
datRefP <- datRef
} else {
datTest <- datTest[keepGenes, keepGenes]
datRefP <- datRef
}
}
} else {
if (calculatePermutation) {
perm <- sample(c(1:nRefGenes))
if (dataIsExpr) {
datRefP <- datRef[, perm]
datTest <- datTest[, perm]
} else {
datRefP <- datRef[perm, perm]
datTest <- datTest[perm, perm]
}
} else {
datRefP <- datRef
}
}
if (dataIsExpr) {
datRef <- cbind(datRef, goldRef)
datRefP <- cbind(datRefP, goldRefP)
datTest <- cbind(datTest, goldTest)
} else {
datRef <- .combineAdj(datRef, goldRef)
datRefP <- .combineAdj(datRefP, goldRefP)
datTest <- .combineAdj(datTest, goldTest)
rownames(datRef) <- make.unique(rownames(datRef))
rownames(datRefP) <- make.unique(rownames(datRefP))
rownames(datTest) <- make.unique(rownames(datTest))
collectGarbage()
}
colnames(datRef) <- make.unique(colnames(datRef))
colnames(datRefP) <- make.unique(colnames(datRefP))
colnames(datTest) <- make.unique(colnames(datTest))
gold <- rep(goldName, goldModSize)
colorRef_2 <- c(as.character(colorRef), gold)
colorLevels <- sort(unique(colorRef_2))
opt <- list(
corFnc = corFnc, corOptions = corOptions, quickCor = quickCor,
nType = nType,
MEgold = MEgold, MEgrey = MEgrey,
densityOnly = densityOnly, calculatePermutation = calculatePermutation,
calculateCor.kIMall = calculateCor.kIMall,
calculateClusterCoeff = calculateClusterCoeff
)
if (dataIsExpr) {
stats <- .coreCalcForExpr(datRef, datRefP, datTest, colorRef_2, opt)
interPresNames <- spaste(corFnc, c(
".kIM", ".kME", ".kMEall",
spaste(".", corFnc), ".clusterCoeff", ".MAR"
))
measureNames <- c(
"propVarExplained", "meanSignAwareKME", "separability",
"meanSignAwareCorDat", "meanAdj", "meanClusterCoeff", "meanMAR"
)
} else {
stats <- .coreCalcForAdj(datRef, datRefP, datTest, colorRef_2, opt)
interPresNames <- spaste(corFnc, c(".kIM", ".kME", ".kIMall", ".adj", ".clusterCoeff", ".MAR"))
measureNames <- c(
"propVarExplained", "meanKIM", "separability",
"meanSignAwareCorDat", "meanAdj", "meanClusterCoeff", "meanMAR"
)
}
name1 <- paste(setNames[[ref]], "_vs_", setNames[[tnet]], sep = "")
quality[[tnet]] <- cbind(
stats$modSizes,
stats$proVar[, 1],
if (dataIsExpr) stats$meanSignAwareKME[, 1] else stats$meankIM[, 1],
stats$Separability[, 1], stats$MeanSignAwareCorDat[, 1],
stats$MeanAdj[, 1], stats$meanClusterCoeff[, 1],
stats$meanMAR[, 1]
)
intraPres[[tnet]] <- cbind(
stats$proVar[, 2],
if (dataIsExpr) stats$meanSignAwareKME[, 2] else stats$meankIM[, 2],
stats$Separability[, 2], stats$MeanSignAwareCorDat[, 2],
stats$MeanAdj[, 2], stats$meanClusterCoeff[, 2],
stats$meanMAR[, 2]
)
# colnames(quality[[tnet]]) = paste(c("moduleSize", measureNames), setNames[ref], sep = "_");
# colnames(intraPres[[tnet]]) = paste(measureNames, name1, sep = "_");
colnames(quality[[tnet]]) <- c("moduleSize", paste(measureNames, "qual", sep = "."))
rownames(quality[[tnet]]) <- colorLevels
colnames(intraPres[[tnet]]) <- paste(measureNames, "pres", sep = ".")
rownames(intraPres[[tnet]]) <- colorLevels
names(intraPres)[tnet] <- paste(name1, sep = "")
quality[[tnet]] <- as.data.frame(quality[[tnet]])
intraPres[[tnet]] <- as.data.frame(intraPres[[tnet]])
interPres[[tnet]] <- as.data.frame(cbind(
stats$corkIM, stats$corkME, stats$corkMEall, stats$ICORdat,
stats$corCC, stats$corMAR
))
colnames(interPres[[tnet]]) <- interPresNames
rownames(interPres[[tnet]]) <- colorLevels
names(interPres)[[tnet]] <- paste(name1, sep = "")
netPresent[tnet] <- TRUE
} # of for (test in testNetworks[[iref]])
datout[[iref]] <- list(
netPresent = netPresent, quality = quality,
intra = intraPres, inter = interPres, accuracy = accuracy,
overlapTables = overlapTables
)
} # of for (iref in 1:length(referenceNetworkss))
names(datout) <- setNames[referenceNetworks]
} # of else for if (nNets==1)
return(datout)
}
#' @title .checkExpr
#'
#' @param multiExpr
#' @param verbose
#' @param indent
#'
#' @return
#'
.checkExpr <- function(multiExpr, verbose, indent) {
spaces <- indentSpaces(indent)
nNets <- length(multiExpr)
if (verbose > 0) {
printFlush(paste(spaces, " ..checking data for excessive amounts of missing data.."))
}
for (set in 1:nNets)
{
gsg <- goodSamplesGenes(multiExpr[[set]]$data, verbose = verbose - 2, indent = indent + 2)
if (!gsg$allOK) {
stop(paste(
"The submitted 'multiExpr' data contain genes or samples\n",
" with zero variance or excessive counts of missing entries.\n",
" Please use the function goodSamplesGenes on each set to identify the problematic\n",
" genes and samples, and remove them before running modulePreservation."
))
}
}
}
#' @title .checkAdj
#'
#' @param multiAdj
#' @param verbose
#' @param indent
#'
#' @return
#'
.checkAdj <- function(multiAdj, verbose, indent) {
spaces <- indentSpaces(indent)
nNets <- length(multiAdj)
if (verbose > 0) {
printFlush(paste(spaces, " ..checking adjacencies for excessive amounts of missing data"))
}
for (set in 1:nNets)
{
checkAdjMat(multiAdj[[set]]$data)
gsg <- goodSamplesGenes(multiAdj[[set]]$data, verbose = verbose - 2, indent = indent + 2)
if (!gsg$allOK) {
stop(paste(
"The submitted 'multiAdj' contains rows or columns\n",
" with zero variance or excessive counts of missing entries. Please remove\n",
" offending rows and columns before running modulePreservation."
))
}
}
}
#' @title .combineAdj
#'
#' @param block1
#' @param block2
#'
#' @return
#'
.combineAdj <- function(block1, block2) {
n1 <- ncol(block1)
n2 <- ncol(block2)
comb <- matrix(0, n1 + n2, n1 + n2)
comb[1:n1, 1:n1] <- block1
comb[(n1 + 1):(n1 + n2), (n1 + 1):(n1 + n2)] <- block2
try(
{
colnames(comb) <- c(colnames(block1), colnames(block2))
},
silent = TRUE
)
comb
}
# This function is basically copied from the file networkConcepts.R
#' @title .computeLinksInNeighbors
#'
#' @param x
#' @param imatrix
#'
#' @return
#'
.computeLinksInNeighbors <- function(x, imatrix) {
x %*% imatrix %*% x
}
#' @title .computeSqDiagSum
#'
#' @param x
#' @param vec
#'
#' @return
#'
.computeSqDiagSum <- function(x, vec) {
sum(x^2 * vec)
}
#' @title .clusterCoeff
#'
#' @param adjmat1
#'
#' @return
#'
.clusterCoeff <- function(adjmat1) {
# diag(adjmat1)=0
no.nodes <- dim(adjmat1)[[1]]
nolinksNeighbors <- c(rep(-666, no.nodes))
total.edge <- c(rep(-666, no.nodes))
maxh1 <- max(as.dist(adjmat1))
minh1 <- min(as.dist(adjmat1))
nolinksNeighbors <- apply(adjmat1, 1, .computeLinksInNeighbors, imatrix = adjmat1)
subTerm <- apply(adjmat1, 1, .computeSqDiagSum, vec = diag(adjmat1))
plainsum <- colSums(adjmat1)
squaresum <- colSums(adjmat1^2)
total.edge <- plainsum^2 - squaresum
# CChelp=rep(-666, no.nodes)
CChelp <- ifelse(total.edge == 0, 0, (nolinksNeighbors - subTerm) / total.edge)
CChelp
}
#' @title .clusterCoeff
#'
#' @param adjacency
#'
#' @return
#'
.clusterCoeff <- function(adjacency) {
#This function assumes that the diagonal of the adjacency matrix is 1
denom <- apply(adjacency, 2, sum) - 1
mar <- (apply(adjacency^2, 2, sum) - 1) / denom
mar[denom == 0] <- NA
mar
}
# =======================================================================================
#
# Core calculation for expression data
#
# =======================================================================================
#' @title .coreCalcForExpr
#'
#' @param datRef
#' @param datRefP
#' @param datTest
#' @param colors
#' @param opt
#'
#' @return
#'
.coreCalcForExpr <- function(datRef, datRefP, datTest, colors, opt) {
colorLevels <- levels(factor(colors))
nMods <- length(colorLevels)
nGenes <- length(colors)
# Flag modules whose size is 1
modSizes <- table(colors)
act <- (modSizes > 1)
kME <- list()
ME <- list()
if (!opt$densityOnly) {
ME[[1]] <- moduleEigengenes(datRef, colors)$eigengenes
kME[[1]] <- as.matrix(signedKME(datRef, ME[[1]], corFnc = opt$corFnc, corOptions = opt$corOptions))
}
if (opt$calculatePermutation | opt$densityOnly) {
# printFlush("Calculating ME[[2]]");
ME[[2]] <- moduleEigengenes(datRefP, colors)$eigengenes
kME[[2]] <- as.matrix(signedKME(datRefP, ME[[2]], corFnc = opt$corFnc, corOptions = opt$corOptions))
} else {
ME[[2]] <- ME[[1]]
kME[[2]] <- kME[[1]]
}
ME[[3]] <- moduleEigengenes(datTest, colors)$eigengenes
kME[[3]] <- as.matrix(signedKME(datTest, ME[[3]], corFnc = opt$corFnc, corOptions = opt$corOptions))
modGenes <- list()
for (m in 1:nMods) {
modGenes[[m]] <- c(1:nGenes)[colors == colorLevels[m]]
}
# kME correlation
# if (verbose > 1) printFlush(paste(spaces, "....calculating kME..."));
corkME <- rep(NA, nMods)
corkMEall <- rep(NA, nMods)
if (!opt$densityOnly) {
for (j in 1:nMods) {
if (act[j]) {
nModGenes <- length(modGenes[[j]])
names1 <- substring(colnames(kME[[1]]), 4)
j1 <- which(names1 == colorLevels[j])
# corkME[j]=cor(kME[[1]][loc,j1],kME[[3]][loc,j1], use = "p")
corExpr <- parse(text = paste(
opt$corFnc, "(kME[[1]][modGenes[[j]],j1],kME[[3]][modGenes[[j]],j1]",
prepComma(opt$corOptions), ")"
))
corkME[j] <- abs(eval(corExpr))
# corkMEall[j]=cor(kME[[1]][,j1],kME[[3]][,j1], use = "p")
corExpr <- parse(text = paste(opt$corFnc, "(kME[[1]][,j1],kME[[3]][,j1] ", prepComma(opt$corOptions), ")"))
corkMEall[j] <- abs(eval(corExpr))
# covkME[j]=cov(kME[[1]][loc,j1],kME[[3]][loc,j1], use = "p")
# meanProductkME[j] = scalarProduct(kME[[1]][loc,j1],kME[[3]][loc,j1])
}
}
}
# proportion of variance explained
# if (verbose > 1)
# printFlush(paste(spaces, "....calculating proprotion of variance explained..."));
proVar <- matrix(NA, nMods, 2)
meanSignAwareKME <- matrix(NA, nMods, 2)
names1 <- substring(colnames(kME[[2]]), 4)
for (j in 1:nMods) {
if (act[j]) {
j1 <- which(names1 == colorLevels[j])
proVar[j, 1] <- mean((kME[[2]][modGenes[[j]], j1])^2, na.rm = TRUE)
proVar[j, 2] <- mean((kME[[3]][modGenes[[j]], j1])^2, na.rm = TRUE)
if (opt$densityOnly) {
if (opt$nType == 1) {
meanSignAwareKME[j, 1] <- mean(abs(kME[[2]][modGenes[[j]], j1]), na.rm = TRUE)
meanSignAwareKME[j, 2] <- mean(abs(kME[[3]][modGenes[[j]], j1]), na.rm = TRUE)
} else {
meanSignAwareKME[j, 1] <- abs(mean(kME[[2]][modGenes[[j]], j1], na.rm = TRUE))
meanSignAwareKME[j, 2] <- abs(mean(kME[[3]][modGenes[[j]], j1], na.rm = TRUE))
}
} else {
meanSignAwareKME[j, 1] <- abs(mean(abs(kME[[2]][modGenes[[j]], j1]), na.rm = TRUE))
meanSignAwareKME[j, 2] <- abs(mean(sign(kME[[1]][modGenes[[j]], j1]) * kME[[3]][modGenes[[j]], j1],
na.rm =
TRUE
))
}
}
}
# if (verbose > 1) printFlush(paste(spaces, "....calculating separability..."));
Separability <- matrix(NA, nMods, 2)
# for(k in (2-calculateQuality):2)
for (k in 1:2)
{
Gold <- which(colnames(ME[[k + 1]]) %in% c(opt$MEgold, opt$MEgrey))
# corME=cor(ME[[k+1]],use="p")
corExpr <- parse(text = paste(opt$corFnc, "(ME[[k+1]]", prepComma(opt$corOptions), ")"))
corME <- eval(corExpr)
if (opt$nType == 0) corME <- abs(corME)
diag(corME) <- 0
Separability[, k] <- 1 - apply(corME[, -Gold, drop = FALSE], 1, max, na.rm = TRUE)
}
# mean signed correlation&inter array correlation
# if (verbose > 1) printFlush(paste(spaces, "....calculating MeanSignAwareCorDat..."));
MeanSignAwareCorDat <- matrix(NA, nMods, 2)
ICORdat <- rep(NA, nMods)
corkIM <- rep(NA, nMods)
corCC <- rep(NA, nMods)
corMAR <- rep(NA, nMods)
MeanAdj <- matrix(NA, nMods, 2)
meanCC <- matrix(NA, nMods, 2)
meanMAR <- matrix(NA, nMods, 2)
for (j in 1:nMods) {
if (act[j]) {
if (!opt$densityOnly) {
# ModuleCorData1=cor(datRef[,modGenes[[j]]],use="p", quick = as.numeric(opt$quickCor))
corExpr <- parse(text = paste(
opt$corFnc, "(datRef[,modGenes[[j]]]", prepComma(opt$corOptions),
", quick = as.numeric(opt$quickCor))"
))
ModuleCorData1 <- eval(corExpr)
}
if (opt$calculatePermutation | opt$densityOnly) {
# ModuleCorData2=cor(datRefP[,modGenes[[j]]],use="p", quick = as.numeric(opt$quickCor))
corExpr <- parse(text = paste(
opt$corFnc, "(datRefP[,modGenes[[j]]]", prepComma(opt$corOptions),
", quick = as.numeric(opt$quickCor))"
))
ModuleCorData2 <- eval(corExpr)
} else {
ModuleCorData2 <- ModuleCorData1
}
# ModuleCorData3=cor(datTest[,modGenes[[j]]],use="p", quick = as.numeric(opt$quickCor))
corExpr <- parse(text = paste(
opt$corFnc, "(datTest[,modGenes[[j]]]", prepComma(opt$corOptions),
", quick = as.numeric(opt$quickCor))"
))
# printFlush(j);
ModuleCorData3 <- try(eval(corExpr))
if (inherits(ModuleCorData3, "try-error")) browser()
if (opt$nType == 1) {
SignedModuleCorData2 <- abs(ModuleCorData2)
} else {
SignedModuleCorData2 <- ModuleCorData2
}
if (opt$densityOnly) {
if (opt$nType == 1) {
SignedModuleCorData3 <- abs(ModuleCorData3)
} else {
SignedModuleCorData3 <- ModuleCorData3
}
} else {
SignedModuleCorData3 <- sign(ModuleCorData1) * ModuleCorData3
}
MeanSignAwareCorDat[j, 1] <- mean(as.dist(SignedModuleCorData2), na.rm = TRUE)
MeanSignAwareCorDat[j, 2] <- mean(as.dist(SignedModuleCorData3), na.rm = TRUE)
if (!opt$densityOnly) {
# ICORdat[j]=cor(c(as.dist(ModuleCorData1)),c(as.dist(ModuleCorData3)),use="p")
corExpr <- parse(text = paste(
opt$corFnc,
"(c(as.dist(ModuleCorData1)),c(as.dist(ModuleCorData3))",
prepComma(opt$corOptions), ")"
))
ICORdat[j] <- eval(corExpr)
# ICOVdat[j]=cov(c(as.dist(ModuleCorData1)),c(as.dist(ModuleCorData3)),use="p")
# spdat[j]=scalarProduct(c(as.dist(ModuleCorData1)),c(as.dist(ModuleCorData3)))
}
if (opt$nType == 1) {
if (!opt$densityOnly) adjacency1 <- ModuleCorData1^6
if (opt$calculatePermutation | opt$densityOnly) {
adjacency2 <- ModuleCorData2^6
} else {
adjacency2 <- adjacency1
}
adjacency3 <- ModuleCorData3^6
} else if (opt$nType == 2) {
if (!opt$densityOnly) adjacency1 <- ((1 + ModuleCorData1) / 2)^12
if (opt$calculatePermutation | opt$densityOnly) {
adjacency2 <- ((1 + ModuleCorData2) / 2)^12
} else {
adjacency2 <- adjacency1
}
adjacency3 <- ((1 + ModuleCorData3) / 2)^12
} else {
if (!opt$densityOnly) adjacency1 <- ModuleCorData1^6
adjacency1[ModuleCorData1 < 0] <- 0
if (opt$calculatePermutation | opt$densityOnly) {
adjacency2 <- ModuleCorData2^6
adjacency2[ModuleCorData2 < 0] <- 0
} else {
adjacency2 <- adjacency1
}
adjacency3 <- ModuleCorData3^6
adjacency3[ModuleCorData3 < 0] <- 0
}
if (opt$calculateClusterCoeff) {
ccRef <- .clusterCoeff(adjacency1)
ccRefP <- .clusterCoeff(adjacency2)
ccTest <- .clusterCoeff(adjacency3)
meanCC[j, 1] <- mean(ccRefP)
meanCC[j, 2] <- mean(ccTest)
if (!opt$densityOnly) {
corExpr <- parse(text = paste(opt$corFnc, "(ccRef, ccTest ", prepComma(opt$corOptions), ")"))
corCC[j] <- eval(corExpr)
}
}
marRef <- .MAR(adjacency1)
marRefP <- .MAR(adjacency2)
marTest <- .MAR(adjacency3)
meanMAR[j, 1] <- mean(marRefP)
meanMAR[j, 2] <- mean(marTest)
if (!opt$densityOnly) {
kIMref <- apply(adjacency1, 2, sum, na.rm = TRUE)
kIMtest <- apply(adjacency3, 2, sum, na.rm = TRUE)
corExpr <- parse(text = paste(opt$corFnc, "(kIMref, kIMtest ", prepComma(opt$corOptions), ")"))
corkIM[j] <- eval(corExpr)
corExpr <- parse(text = paste(opt$corFnc, "(marRef, marTest ", prepComma(opt$corOptions), ")"))
corMAR[j] <- eval(corExpr)
}
MeanAdj[j, 1] <- mean(as.dist(adjacency2), na.rm = TRUE)
MeanAdj[j, 2] <- mean(as.dist(adjacency3), na.rm = TRUE)
}
}
list(
modSizes = modSizes,
corkIM = corkIM, corkME = corkME, corkMEall = corkMEall,
proVar = proVar, meanSignAwareKME = meanSignAwareKME,
Separability = Separability, MeanSignAwareCorDat = MeanSignAwareCorDat, ICORdat = ICORdat,
MeanAdj = MeanAdj, meanClusterCoeff = meanCC, meanMAR = meanMAR, corCC = corCC, corMAR = corMAR
)
}
# ===================================================================================================
#
# Core calculation for adjacency
#
# ===================================================================================================
# A few supporting functions first:
#' @title .getSVDs
#'
#' @param data
#' @param colors
#'
#' @return
#'
.getSVDs <- function(data, colors) {
colorLevels <- levels(factor(colors))
nMods <- length(colorLevels)
svds <- list()
for (m in 1:nMods)
{
modGenes <- (colors == colorLevels[m])
modAdj <- data[modGenes, modGenes]
if (sum(is.na(modAdj)) > 0) {
seed <- .Random.seed
modAdj <- impute.knn(modAdj)$data
.Random.seed <<- seed
}
svds[[m]] <- svd(modAdj, nu = 1, nv = 0)
svds[[m]]$u <- c(svds[[m]]$u)
if (sum(svds[[m]]$u, na.rm = TRUE) < 0) svds[[m]]$u <- -svds[[m]]$u
}
svds
}
#' @title .kIM
#'
#' @param adj
#' @param colors
#' @param calculateAll
#'
#' @return
#'
.kIM <- function(adj, colors, calculateAll = TRUE) {
colorLevels <- levels(factor(colors))
nMods <- length(colorLevels)
nGenes <- length(colors)
kIM <- matrix(NA, nGenes, nMods)
if (calculateAll) {
for (m in 1:nMods)
{
modGenes <- colors == colorLevels[m]
kIM[, m] <- apply(adj[, modGenes, drop = FALSE], 1, sum, na.rm = TRUE)
kIM[modGenes, m] <- kIM[modGenes, m] - 1
}
} else {
for (m in 1:nMods)
{
modGenes <- colors == colorLevels[m]
kIM[modGenes, m] <- apply(adj[modGenes, modGenes, drop = FALSE], 1, sum, na.rm = TRUE) - 1
}
}
kIM
}
# Here is the main function
# Summary:
# PVE: from svd$d
# kME: from svd$u (to make it different from kIM)
# kIM: as usual
# kMEall: from kIMall
# meanSignAwareKME: from svd$u
# Separability: as in the paper
# MeanSignAwareCorDat: ??
# meanAdj: mean adjacency
# cor.cor: replace by cor.adj
#' @title .coreCalcForAdj
#'
#' @param datRef
#' @param datRefP
#' @param datTest
#' @param colors
#' @param opt
#'
#' @return
#'
.coreCalcForAdj <- function(datRef, datRefP, datTest, colors, opt) {
# printFlush(".coreCalcForAdj:entering");
colorLevels <- levels(factor(colors))
nMods <- length(colorLevels)
nGenes <- length(colors)
gold <- substring(opt$MEgold, 3)
grey <- substring(opt$MEgrey, 3)
# Flag modules whose size is 1
modSizes <- table(colors)
act <- (modSizes > 1)
svds <- list()
kIM <- list()
# printFlush(".coreCalcForAdj:getting svds and kIM");
if (!opt$densityOnly) {
svds[[1]] <- .getSVDs(datRef, colors)
kIM[[1]] <- .kIM(datRef, colors, calculateAll = opt$calculateCor.kIMall)
}
if (opt$calculatePermutation | opt$densityOnly) {
svds[[2]] <- .getSVDs(datRefP, colors)
kIM[[2]] <- .kIM(datRefP, colors, calculateAll = opt$calculateCor.kIMall)
} else {
svds[[2]] <- svds[[1]]
kIM[[2]] <- kIM[[1]]
}
svds[[3]] <- .getSVDs(datTest, colors)
kIM[[3]] <- .kIM(datTest, colors, calculateAll = opt$calculateCor.kIMall)
proVar <- matrix(NA, nMods, 2)
modGenes <- list()
for (m in 1:nMods)
{
modGenes[[m]] <- c(1:nGenes)[colors == colorLevels[m]]
proVar[m, 1] <- svds[[2]][[m]]$d[1] / sum(svds[[2]][[m]]$d)
proVar[m, 2] <- svds[[3]][[m]]$d[1] / sum(svds[[3]][[m]]$d)
}
# printFlush(".coreCalcForAdj:getting corkME and ICOR");
corkME <- rep(NA, nMods)
corkMEall <- rep(NA, nMods)
corkIM <- rep(NA, nMods)
ICORdat <- rep(NA, nMods)
if (!opt$densityOnly) {
for (m in 1:nMods) {
if (act[m]) {
nModGenes <- modSizes[m]
corExpr <- parse(text = paste(
opt$corFnc, "(svds[[1]][[m]]$u,svds[[3]][[m]]$u",
prepComma(opt$corOptions), ")"
))
corkME[m] <- abs(eval(corExpr))
if (opt$calculateCor.kIMall) {
corExpr <- parse(text = paste(
opt$corFnc, "(kIM[[1]][,m],kIM[[3]][,m] ",
prepComma(opt$corOptions), ")"
))
corkMEall[m] <- eval(corExpr)
}
corExpr <- parse(text = paste(
opt$corFnc, "(kIM[[1]][modGenes[[m]],m],kIM[[3]][modGenes[[m]],m] ",
prepComma(opt$corOptions), ")"
))
corkIM[m] <- eval(corExpr)
adj1 <- datRef[modGenes[[m]], modGenes[[m]]]
adj2 <- datTest[modGenes[[m]], modGenes[[m]]]
corExpr <- parse(text = paste(
opt$corFnc, "(c(as.dist(adj1)), c(as.dist(adj2))",
prepComma(opt$corOptions), ")"
))
ICORdat[m] <- eval(corExpr)
}
}
}
meanSignAwareKME <- matrix(NA, nMods, 2)
meankIM <- matrix(NA, nMods, 2)
for (m in 1:nMods) {
if (act[m]) {
meankIM[m, 1] <- mean(kIM[[2]][modGenes[[m]], m], na.rm = TRUE)
meankIM[m, 2] <- mean(kIM[[3]][modGenes[[m]], m], na.rm = TRUE)
if (opt$densityOnly) {
if (opt$nType == 1) {
meanSignAwareKME[m, 1] <- mean(abs(svds[[2]][[m]]$u), na.rm = TRUE)
meanSignAwareKME[m, 2] <- mean(abs(svds[[3]][[m]]$u), na.rm = TRUE)
} else {
meanSignAwareKME[m, 1] <- abs(mean(svds[[2]][[m]]$u, na.rm = TRUE))
meanSignAwareKME[m, 2] <- abs(mean(svds[[3]][[m]]$u, na.rm = TRUE))
}
} else {
meanSignAwareKME[m, 1] <- mean(abs(svds[[2]][[m]]$u), na.rm = TRUE)
meanSignAwareKME[m, 2] <- abs(mean(sign(svds[[1]][[m]]$u) * svds[[3]][[m]]$u, na.rm = TRUE))
}
}
}
MeanAdj <- matrix(NA, nMods, 2)
sepMat <- array(NA, dim = c(nMods, nMods, 2))
corCC <- rep(NA, nMods)
corMAR <- rep(NA, nMods)
meanCC <- matrix(NA, nMods, 2)
meanMAR <- matrix(NA, nMods, 2)
for (m in 1:nMods) {
if (act[m]) {
modAdj <- datRefP[modGenes[[m]], modGenes[[m]]]
if (opt$calculateClusterCoeff) ccRefP <- .clusterCoeff(modAdj)
marRefP <- .MAR(modAdj)
meanMAR[m, 1] <- mean(marRefP)
MeanAdj[m, 1] <- mean(as.dist(modAdj), na.rm = TRUE)
modAdj <- datRef[modGenes[[m]], modGenes[[m]]]
if (opt$calculateClusterCoeff) ccRef <- .clusterCoeff(modAdj)
marRef <- .MAR(modAdj)
modAdj <- datTest[modGenes[[m]], modGenes[[m]]]
if (opt$calculateClusterCoeff) ccTest <- .clusterCoeff(modAdj)
marTest <- .MAR(modAdj)
MeanAdj[m, 2] <- mean(as.dist(modAdj), na.rm = TRUE)
if (opt$calculateClusterCoeff) {
meanCC[m, 1] <- mean(ccRefP)
meanCC[m, 2] <- mean(ccTest)
corExpr <- parse(text = paste(opt$corFnc, "(ccRef, ccTest ", prepComma(opt$corOptions), ")"))
corCC[m] <- eval(corExpr)
}
meanMAR[m, 2] <- mean(marTest)
corExpr <- parse(text = paste(opt$corFnc, "(marRef, marTest ", prepComma(opt$corOptions), ")"))
corMAR[m] <- eval(corExpr)
if ((m > 1) && (colorLevels[m] != gold)) {
for (m2 in 1:(m - 1)) {
if (colorLevels[m2] != gold) {
interAdj <- datRefP[modGenes[[m]], modGenes[[m2]]]
tmp <- mean(interAdj, na.rm = TRUE)
if (tmp != 0) {
sepMat[m, m2, 1] <- mean(interAdj, na.rm = TRUE) / sqrt(MeanAdj[m, 1] * MeanAdj[m2, 1])
} else {
sepMat[m, m2, 1] <- 0
}
sepMat[m2, m, 1] <- sepMat[m, m2, 1]
interAdj <- datTest[modGenes[[m]], modGenes[[m2]]]
tmp <- mean(interAdj, na.rm = TRUE)
if (tmp != 0) {
sepMat[m, m2, 2] <- mean(interAdj, na.rm = TRUE) / sqrt(MeanAdj[m, 2] * MeanAdj[m2, 2])
} else {
sepMat[m, m2, 2] <- 0
}
sepMat[m2, m, 2] <- sepMat[m, m2, 2]
}
}
}
}
}
Separability <- matrix(NA, nMods, 2)
notGold <- colorLevels != gold
for (k in 1:2) {
Separability[notGold, k] <- 1 - apply(sepMat[notGold, notGold, k, drop = FALSE], 1, max, na.rm = TRUE)
}
MeanSignAwareCorDat <- matrix(NA, nMods, 2)
list(
modSizes = modSizes,
corkIM = corkIM, corkME = corkME, corkMEall = corkMEall,
proVar = proVar,
meanSignAwareKME = meanSignAwareKME,
meankIM = meankIM,
Separability = Separability, MeanSignAwareCorDat = MeanSignAwareCorDat, ICORdat = ICORdat,
MeanAdj = MeanAdj, meanClusterCoeff = meanCC, meanMAR = meanMAR, corCC = corCC, corMAR = corMAR
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.