In songw01/MEGENA: Multiscale Clustering of Geometrical Network

knitr::opts_chunk$set(echo = TRUE)

MEGENA pipeline as of 02/20/2020

This is a routine MEGENA pipeline description encompassing from data correlation analysis to module plotting, and is based on version 1.3.6 https://CRAN.R-project.org/package=MEGENA.Please cite the paper below when MEGENA is applied as part of your analysis:

Song W.-M., Zhang B. (2015) Multiscale Embedded Gene Co-expression Network Analysis. PLoS Comput Biol 11(11): e1004574. doi: 10.1371/journal.pcbi.1004574.

For statistical mechanics aspects involved in MEGENA, please check:

Song W.-M., Di Matteo T.and Aste T., Building Complex Networks with Platonic Solids, Physical Reivew E, 2012 Apr;85(4 Pt 2):046115.

calculate correlation

rm(list = ls()) # rm R working space

library(MEGENA)

# input parameters
n.cores <- 2; # number of cores/threads to call for PCP
doPar <-TRUE; # do we want to parallelize?
method = "pearson" # method for correlation. either pearson or spearman. 
FDR.cutoff = 0.05 # FDR threshold to define significant correlations upon shuffling samples. 
module.pval = 0.05 # module significance p-value. Recommended is 0.05. 
hub.pval = 0.05 # connectivity significance p-value based random tetrahedral networks
cor.perm = 10; # number of permutations for calculating FDRs for all correlation pairs. 
hub.perm = 100; # number of permutations for calculating connectivity significance p-value. 

# annotation to be done on the downstream
annot.table=NULL
id.col = 1
symbol.col= 2
###########

data(Sample_Expression) # load toy example data

rho.out = calculate.rho.signed(datExpr,n.perm = 10,FDR.cutoff = FDR.cutoff,estimator = method,
                               use.obs = "na.or.complete",
                               direction = "absolute",
                               rho.thresh = NULL,sort.el = TRUE)

calculate PFN

In this step, Planar Filtered Network (PFN) is calculated by taking significant correlation pairs, ijw. In the case of utilizing a different similarity measure, one can independently format the results into 3-column data frame with column names c("row","col","weight"), and make sure the weight column ranges within 0 to 1. Using this as an input to calculate.PFN() will work just as fine.

#### register multiple cores if needed: note that set.parallel.backend() is deprecated. 
run.par = doPar & (getDoParWorkers() == 1) 
if (run.par)
{
  cl <- parallel::makeCluster(n.cores)
  registerDoParallel(cl)
  # check how many workers are there
  cat(paste("number of cores to use:",getDoParWorkers(),"\n",sep = ""))
}

##### calculate PFN
el <- calculate.PFN(rho.out$signif.ijw,doPar = doPar,num.cores = n.cores,keep.track = FALSE)
g <- graph.data.frame(el,directed = FALSE)

perform clustering

MCA clustering is performed to identify multiscale clustering analysis. "MEGENA.output"" is the core output to be used in the down-stream analyses for summarization and plotting.

##### perform MCA clustering.
MEGENA.output <- do.MEGENA(g,
 mod.pval = module.pval,hub.pval = hub.pval,remove.unsig = TRUE,
 min.size = 10,max.size = vcount(g)/2,
 doPar = doPar,num.cores = n.cores,n.perm = hub.perm,
 save.output = FALSE)

###### unregister cores as these are not needed anymore.
if (getDoParWorkers() > 1)
{
  env <- foreach:::.foreachGlobals
  rm(list=ls(name=env), pos=env)
}

summarize results

summary.output <- MEGENA.ModuleSummary(MEGENA.output,
    mod.pvalue = module.pval,hub.pvalue = hub.pval,
    min.size = 10,max.size = vcount(g)/2,
    annot.table = annot.table,id.col = id.col,symbol.col = symbol.col,
    output.sig = TRUE)

if (!is.null(annot.table))
{
  # update annotation to map to gene symbols
  V(g)$name <- paste(annot.table[[symbol.col]][match(V(g)$name,annot.table[[id.col]])],V(g)$name,sep = "|")
  summary.output <- output[c("mapped.modules","module.table")]
  names(summary.output)[1] <- "modules"
}

print(head(summary.output$modules,2))
print(summary.output$module.table)

Plot some modules

You can generate refined module network plots:

library(ggplot2)
library(ggraph)

pnet.obj <- plot_module(output.summary = summary.output,PFN = g,subset.module = "c1_3",
    layout = "kamada.kawai",label.hubs.only = TRUE,
    gene.set = NULL,color.code =  "grey",
    output.plot = FALSE,out.dir = "modulePlot",col.names = c("magenta","green","cyan"),label.scaleFactor = 20,
    hubLabel.col = "black",hubLabel.sizeProp = 1,show.topn.hubs = Inf,show.legend = TRUE)

#X11();
print(pnet.obj[[1]])

plot module hierarchy

You can generate module hierarchy plot in many ways to accommodate for various needs.

module.table <- summary.output$module.table
htbl = module.table[,c("module.parent","module.id")]

### hierarchy plot with labels
hplot = get_module_hierarchy_graph(htbl,max.depth = 5,anchor.mid = NULL,h.scale = NULL,is.circular = FALSE,layout = "dendrogram",add_names = TRUE)
print(hplot$pobj)

### hierarchy plot with piechart to represent multiple attributes of module
hplot = get_module_hierarchy_graph(htbl,max.depth = 5,anchor.mid = NULL,h.scale = NULL,is.circular = FALSE,layout = "dendrogram",add_names = FALSE)

# get a mock piechart matrix
pie.data = matrix(runif(nrow(hplot$pobj$data)*3,0,1),nrow = nrow(hplot$pobj$data))
colnames(pie.data) = LETTERS[1:ncol(pie.data)]

# update with pie charts
library(scatterpie)
pie.dat = data.frame(x = hplot$pobj$data$x,y = hplot$pobj$data$y,as.data.frame(pie.data))

pobj.pie = hplot$pobj + geom_scatterpie(data=pie.dat,aes(x=x, y=y,r = 0.1), 
                                cols=colnames(pie.data),alpha = 0.5) + coord_equal() + 
    theme_bw() + theme(axis.line = element_blank(),axis.ticks = element_blank(),
                       axis.title = element_blank(),axis.text = element_blank(),
                       panel.grid.major = element_blank(),panel.grid.minor = element_blank(),
                       panel.border = element_blank())

# update module names
library(ggrepel)
pobj.pie = pobj.pie + geom_text_repel(data = pobj.pie$data,aes(x = x,y = y,label = name),vjust = -2.5)
print(pobj.pie)

Plot module characteristics with sunburst plot

Another alternative is to plot sunbursts reflecting MEGENA hierarchy.

# no coloring 
sbobj1 = draw_sunburst_wt_fill(module.df = summary.output$module.table,
  feat.col = NULL,id.col = "module.id",parent.col = "module.parent")
print(sbobj1)

# get some coloring (with log transform option)
mdf= summary.output$module.table
mdf$heat.pvalue = runif(nrow(mdf),0,0.1)

sbobj2 = draw_sunburst_wt_fill(module.df = mdf,feat.col = "heat.pvalue",log.transform = TRUE,
  fill.type = "continuous",
  fill.scale = scale_fill_gradient2(low = "white",mid = "white",high = "red",
  midpoint = -log10(0.05),na.value = "white"), 
  id.col = "module.id",parent.col = "module.parent")
print(sbobj2)

# get discrete coloring done
mdf$category = factor(sample(x = c("A","B"),size = nrow(mdf),replace = TRUE))
sbobj3 = draw_sunburst_wt_fill(module.df = mdf,feat.col = "category",
  fill.type = "discrete",
  fill.scale = scale_fill_manual(values = c("A" = "red","B" = "blue")), 
  id.col = "module.id",parent.col = "module.parent")
print(sbobj3)

## Now, use viewport function to organize sunbursts in an array.
library(grid)
# organize plots into a list
plotlist = list(sbobj1,sbobj2,sbobj3)

# create grid of subplots
grid.newpage()
pushViewport(viewport(layout = grid.layout(3,1)))

for (i in 1:length(plotlist))
{
  print(plotlist[[i]], vp = viewport(layout.pos.col = 1,layout.pos.row = i))
}