# set global chunk options library(knitr) opts_chunk$set(size='tiny')
Example of WGCNA analysis. The package is available on CRAN. The tutorial available at UCLA web-site. Here we use our own ovarian cancer CPTAC data set.
library(WGCNA) options(stringsAsFactors = FALSE) enableWGCNAThreads() disableWGCNAThreads() # on RStudio for some reason multithreading does not work library(vp.misc)
data("cptac_oca")
WGCNA seems to be tolerant to missing values, but we subset the data for the sake of faster computation.
oca.set <- oca.set[complete.cases(exprs(oca.set)),]
Scanning powers to find the ones that produce scale-free topology.
powers = 1:20 sft <- pickSoftThreshold(t(exprs(oca.set)), powerVector = powers, networkType = "signed", verbose = 0)
par(mfrow = c(1,2)) cex1 = 0.9 plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab="Soft Threshold (power)", ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence")) text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], labels=powers,cex=cex1,col="red") # this line corresponds to using an R^2 cut-off of h abline(h=0.90,col="red") # Mean connectivity as a function of the soft-thresholding power plot(sft$fitIndices[,1], sft$fitIndices[,5], xlab="Soft Threshold (power)", ylab="Mean Connectivity", type="n", main = paste("Mean connectivity")) text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1, col="red")
The lowest power that results in crossing in 0.9 threshold is 10. Therefore we'll set it as a final value.
softPower = 10
The recommended type of correlation is signed. That way correlations +1
and -1 will have different meaning. In case of unsigned correlation the
sign obviosly does not matter and genes with +1 correlation as close to
each other as -1 correlation. In case of (recommended) signed correlation
the adjacency (or 1 - distance) is calculated as
adjacency = (0.5*(1+cor))^power
.
adjacency.mat <- adjacency(t(exprs(oca.set)), power = softPower, type="signed")
The gene-gene adjacencies converted to topological overlap. This helps to "smooth" correlation structure as it relies not only direct gene-gene correlations, but also the correlations with genes that are connected in common.
TOM <- TOMsimilarity(adjacency.mat, TOMType = "unsigned")
Finally adjacency or its analog, topological overlap, converted to distance.
dissTOM = 1-TOM
Calling base R hclust
function to perform hierarchical clustering.
geneTree <- hclust(as.dist(dissTOM), method = "average")
Plot the resulting clustering tree
plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity", labels = FALSE, hang = 0.04)
Cutting the tree to identify the modules.
minModuleSize <- 20 # 20 is default dynamicMods <- cutreeDynamic(dendro = geneTree, distM = dissTOM, # deepSplit = 2, # pamRespectsDendro = FALSE, # method= "tree", minClusterSize = minModuleSize)
As it is evident from the commented code, it is possible to define coloring in
multiple ways. It is practical to take advantage of WGCNA's labels2colors
because they provide human-readable names to the modules. Although
leveraging RColorBrewer
may provide better colors, however the names
will be hex color codes, which makes invonvenient naming.
# Convert numeric lables into colors library(RColorBrewer) # dynamicColors <- labels2colors(dynamicMods, colorSeq = brewer.pal(11,'Spectral')) # this `standardColors()` is a bit nicer as the colors come with names dynamicColors <- labels2colors(dynamicMods, colorSeq = standardColors()) # Plot the dendrogram and colors underneath plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05, main = "Gene dendrogram and module colors")
Modules can be merged if they are too similar. The similarity is quantified as correlation between the eigengenes. Threshold for module merged visualized on the dendrogram.
Calculating eigengenes.
MEList <- moduleEigengenes(t(exprs(oca.set)), colors = dynamicColors) MEs <- MEList$eigengenes
Calculate distances between module eigengenes. This is equivalent to signed correlation distance. That is +1 correlation is zero distance or very similar modules, while -1 are the furthest modules possible.
MEDiss <- 1 - cor(MEs)
Cluster module eigengenes to find out which are to be merged (assuming the distance is no larger then a predefined threshold).
METree <- hclust(as.dist(MEDiss), method = "average")
Plot the cut line into the dendrogram.
plot(METree, main = "Clustering of module eigengenes", xlab = "", sub = "") MEDissThres <- 0.5 # default value is 0.2. 0.5 here is for demo purpose abline(h=MEDissThres, col = "red")
Call an automatic merging function. The threshold is defined above
MEDissThres =
r MEDissThres
. These are going to be the final modules.
merge <- mergeCloseModules(t(exprs(oca.set)), dynamicColors, cutHeight = MEDissThres, verbose = 3) moduleColors <- merge$colors
Show both original and merged modules.
plotDendroAndColors(geneTree, cbind(dynamicColors, moduleColors), c("Dynamic Tree Cut", "Merged dynamic"), dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05, marAll = c(1, 7, 3, 1))
Recalculating eigengenes.
MEs0 <- moduleEigengenes(t(exprs(oca.set)), moduleColors)$eigengenes MEs <- orderMEs(MEs0)
Preparing traits data.
datTraits <- pData(oca.set)[,grep('binary', varLabels(oca.set))] colnames(datTraits) <- sub('.binary','',colnames(datTraits)) head(datTraits)
Calculating correlations and associated p-values.
moduleTraitCor = cor(MEs, datTraits, use = "p"); moduleTraitPvalue = corPvalueStudent(moduleTraitCor, ncol(oca.set));
Displaying correlations and their p-values as a heatmap.
textMatrix <- paste(signif(moduleTraitCor, 2), "\n(", signif(moduleTraitPvalue, 1), ")", sep = ""); textMatrix <- matrix(textMatrix, ncol = ncol(moduleTraitCor)) par(mar = c(6, 8, 3, 2)) labeledHeatmap(Matrix = moduleTraitCor, xLabels = colnames(datTraits), yLabels = names(MEs), ySymbols = names(MEs), colorLabels = FALSE, # colors = greenWhiteRed(50), colors = blueWhiteRed(50), textMatrix = textMatrix, setStdMargins = FALSE, cex.text = 0.75, zlim = c(-1,1), main = paste("Module-Trait Association (p-value)"))
# split by module color features <- sub('\\.\\d+','',featureNames(oca.set)) clustList <- tapply(features, moduleColors, c) library(org.Hs.eg.db) clustList <- lapply(clustList, intersect, mappedRkeys(org.Hs.egREFSEQ2EG)) clustEG <- lapply(clustList, function(x) sapply(as.list(org.Hs.egREFSEQ2EG[x]), `[`, 1)) library(clusterProfiler) library(ReactomePA) xx <- compareCluster(clustEG, fun="enrichPathway", organism="human", pvalueCutoff=0.05, qvalueCutoff=0.05, pAdjustMethod="none", universe=unlist(clustEG)) # plot(xx, colorBy="qvalue") dotplot(xx)
library(dplyr) head(summary(xx) %>% dplyr::select(-geneID)) %>% kable()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.