#Ensemble of Gene Set Enrichment Analyses
#
# Author: Monther Alhamdoosh, E:m.hamdoosh@gmail.com
###############################################################################
# Plot GO graphs using topGO package
plotGOGraphs <- function(egsea.results, gs.annot, report.dir, sort.by,
verbose){
message(" GO graphs are being generated for top-ranked GO terms\n",
"based on ", sort.by, " ... ")
tryCatch({
go.dir = paste0(report.dir, "/go-graphs/")
if (!dir.exists(go.dir))
dir.create(file.path(go.dir), showWarnings = FALSE)
contrast.names = names(egsea.results)
file.name = paste0(go.dir, sub(" - ", "-", contrast.names), "-",
gs.annot@label, "-top-", sort.by, "-")
if (file.exists(paste0(file.name[length(file.name)], "CC.png")))
return()
topGOdata = loadTopGOdata(gs.annot, go.dir)
noSig = 5
for (i in 1:length(contrast.names)){
if (verbose)
message(contrast.names[i])
generateGOGraphs(egsea.results[[i]], gs.annot, sort.by,
file.name[i], topGOdata, noSig)
}
},
error = function(err){
warning("GO graphs were not generated." ,
"They might not be supported under your current OS.")
})
}
plotGOGraphs.comparison <- function(egsea.results, gs.annot, report.dir, sort.by){
message(" GO graphs are being generated for top-ranked COMPARISON\n",
"GO terms based on ", sort.by, " ... ")
tryCatch({
go.dir = paste0(report.dir, "/go-graphs/")
if (!dir.exists(go.dir))
dir.create(file.path(go.dir), showWarnings = FALSE)
file.name = paste0(go.dir, "comparison-",
gs.annot@label, "-top-", sort.by, "-")
if (file.exists(paste0(file.name, "CC.png")))
return()
topGOdata = loadTopGOdata(gs.annot, go.dir)
noSig = 5
generateGOGraphs(egsea.results, gs.annot, sort.by,
file.name, topGOdata, noSig)
},
error = function(err){
warning("GO graphs were not generated." ,
"They might not be supported under your current OS.")
})
}
loadTopGOdata <- function(gs.annot, go.dir=NULL){
if (!is.null(go.dir) && file.exists(paste0(go.dir, "topGOdata.rda"))){
load(paste0(go.dir, "topGOdata.rda"))
return(topGOdata)
}else{
if (tolower(gs.annot@species) %in% c("human", "homo sapiens")){
mappingDB = "org.Hs.eg.db"
}else if (tolower(gs.annot@species) %in% c("mouse", "mus musculus")){
mappingDB = "org.Mm.eg.db"
}else if (tolower(gs.annot@species) %in% c("rat", "rattus norvegicus")){
mappingDB = "org.Rn.eg.db"
}
gl = rep(0, length(gs.annot@featureIDs))
names(gl) = gs.annot@featureIDs
capture.output(topGOdataBP <- new("topGOdata",ontology = "BP", allGenes = gl,
geneSel = topDiffGenes, nodeSize = 10, annot =
annFUN.org, mapping=mappingDB))
capture.output(topGOdataMF <- new("topGOdata",ontology = "MF", allGenes = gl,
geneSel = topDiffGenes, nodeSize = 10, annot =
annFUN.org, mapping=mappingDB))
capture.output(topGOdataCC <- new("topGOdata",ontology = "CC", allGenes = gl,
geneSel = topDiffGenes, nodeSize = 10, annot =
annFUN.org, mapping=mappingDB))
topGOdata = list(BP=topGOdataBP, MF=topGOdataMF, CC=topGOdataCC)
if (!is.null(go.dir))
save(topGOdata, file=paste0(go.dir, "topGOdata.rda"))
return(topGOdata)
}
}
#file.name : prefix
generateGOGraphs <- function(egsea.results, gs.annot, sort.by,
file.name, topGOdata = NULL, noSig = 5, format = NULL){
if (is.null(topGOdata)){
topGOdata = loadTopGOdata(gs.annot)
}
go.subsets = list()
go.ids = as.character(gs.annot@anno[
match(rownames(egsea.results),
gs.annot@anno[, "GeneSet"]), "GOID"])
go.subsets[["BP"]] = go.ids[go.ids %in% topGOdata[["BP"]]@graph@nodes]
go.subsets[["MF"]] = go.ids[go.ids %in% topGOdata[["MF"]]@graph@nodes]
go.subsets[["CC"]] = go.ids[go.ids %in% topGOdata[["CC"]]@graph@nodes]
scores = egsea.results[, sort.by]
max.score = 1.001
if (max(scores) > 1)
scores = (scores - min(scores)) / (max(scores) -
min(scores))
scores = scores + 0.001
names(scores) = as.character(
gs.annot@anno[match(rownames(
egsea.results),
gs.annot@anno[, "GeneSet"]), "GOID"])
# Generate the BP graph
if (length(go.subsets[["BP"]]) > 0)
tryCatch({
scores.sub = rep(max.score,
length(topGOdata[["BP"]]@graph@nodes))
names(scores.sub) = topGOdata[["BP"]]@graph@nodes
scores.sub[go.subsets[["BP"]]] =
scores[go.subsets[["BP"]]]
if (is.null(format) || tolower(format) == "pdf"){
pdf(paste0(file.name, "BP.pdf"))
showSigOfNodes(topGOdata[["BP"]], scores.sub,
firstSigNodes=noSig,
useInfo='def', sigForAll=FALSE) # or
# use printGraph to write out plot to file
dev.off()
}
if (is.null(format) || tolower(format) == "png"){
png(paste0(file.name, "BP.png"), width=800,
height=800)
showSigOfNodes(topGOdata[["BP"]], scores.sub,
firstSigNodes=noSig,
useInfo='def', sigForAll=FALSE) # or
# use printGraph to write out plot to file
dev.off()
}
}, error=function(err){
dev.off()
file.remove(paste0(file.name, "BP.pdf"))
file.remove(paste0(file.name, "BP.png"))
warning("EGSEA_ERROR: ",err)
}
)
# stop("here")
# write the MF graph
if (length(go.subsets[["MF"]]) > 0)
tryCatch({
scores.sub = rep(max.score,
length(topGOdata[["MF"]]@graph@nodes))
names(scores.sub) = topGOdata[["MF"]]@graph@nodes
scores.sub[go.subsets[["MF"]]] =
scores[go.subsets[["MF"]]]
if (is.null(format) || tolower(format) == "pdf"){
pdf(paste0(file.name, "MF.pdf"))
showSigOfNodes(topGOdata[["MF"]], scores.sub,
firstSigNodes=noSig,
sigForAll=FALSE, useInfo='def') # or
# use printGraph to write out plot to file
dev.off()
}
if (is.null(format) || tolower(format) == "png"){
png(paste0(file.name, "MF.png"), width=800,
height=800)
showSigOfNodes(topGOdata[["MF"]], scores.sub,
firstSigNodes=noSig,
sigForAll=FALSE, useInfo='def') # or
# use printGraph to write out plot to file
dev.off()
}
}, error=function(err){
dev.off()
file.remove(paste0(file.name, "MF.pdf"))
file.remove(paste0(file.name, "MF.png"))
warning("EGSEA_ERROR: ",err)
}
)
# write the CC graph
if (length(go.subsets[["CC"]]) > 0)
tryCatch({
scores.sub = rep(max.score,
length(topGOdata[["CC"]]@graph@nodes))
names(scores.sub) = topGOdata[["CC"]]@graph@nodes
scores.sub[go.subsets[["CC"]]] =
scores[go.subsets[["CC"]]]
if (is.null(format) || tolower(format) == "pdf"){
pdf(paste0(file.name, "CC.pdf"))
showSigOfNodes(topGOdata[["CC"]], scores.sub,
firstSigNodes=noSig,
sigForAll=FALSE, useInfo='def') # or
# use printGraph to write out plot to file
dev.off()
}
if (is.null(format) || tolower(format) == "png"){
png(paste0(file.name, "CC.png"), width=800,
height=800)
showSigOfNodes(topGOdata[["CC"]], scores.sub,
firstSigNodes=noSig,
sigForAll=FALSE, useInfo='def') # or
# use printGraph to write out plot to file
dev.off()
}
}, error=function(err){
dev.off()
file.remove(paste0(file.name, "CC.pdf"))
file.remove(paste0(file.name, "CC.png"))
warning("EGSEA_ERROR: ",err)
}
)
}
topDiffGenes <- function (allScore) {
return(allScore < 0.01)
}
# Plot summary plots for each contrast and comparisons
generateSumPlots <- function(egsea.results, baseGSEAs, gs.annot, report.dir,
sum.plot.cutoff=1, sum.plot.axis="p.value",interactive=FALSE){
message(" Summary plots are being generated ... ")
summary.dir = paste0(report.dir, "/summary/")
contrast.names = names(egsea.results)
for(i in 1:length(egsea.results)){
file.name = paste0(summary.dir, sub(" - ", "-",
contrast.names[i]), "-", gs.annot@label,
"-summary-", sum.plot.axis)
if (!file.exists(paste0(file.name, "-dir.png"))){
plot.data = generatePlotData(egsea.results[[i]],
gs.annot, sum.plot.cutoff, sum.plot.axis)
if (sum.plot.axis %in% c("p.value", "p.adj"))
generateSummaryPlots(plot.data, file.name, summary.dir,
interactive=interactive)
else
generateSummaryPlots(plot.data, file.name, summary.dir,
Xlab = paste0("-", sum.plot.axis),interactive=interactive)
}
file.name = paste0(summary.dir, sub(" - ", "-",
contrast.names[i]), "-", gs.annot@label, "-methods")
if (length(baseGSEAs) > 1 && !file.exists(paste0(file.name,
".png")))
generateMDSMethodsPlot(egsea.results[[i]], baseGSEAs,
file.name)
}
}
generateSumPlots.comparison <- function(egsea.results, egsea.comparison, gs.annot,
report.dir,
sum.plot.cutoff=1, sum.plot.axis="p.value",interactive=FALSE){
message(" Comparison summary plots are being generated ...")
summary.dir = paste0(report.dir, "/summary/")
dir.create(file.path(summary.dir), showWarnings = FALSE)
contrast.names = names(egsea.results)
if (length(contrast.names) == 2){
generateSummaryPlots.comparison(egsea.results,
egsea.comparison, gs.annot,
sum.plot.axis, sum.plot.cutoff,
file.prefix="", summary.dir,interactive=interactive)
}
else if (length(contrast.names) > 2){
message(" There are more than 2 contrasts!")
for (i in 1:(length(contrast.names)-1)){
for (j in (i+1):length(contrast.names)){
egsea.results.sub = egsea.results[c(i,j)]
generateSummaryPlots.comparison(egsea.results.sub,
egsea.comparison, gs.annot,
sum.plot.axis, sum.plot.cutoff, file.prefix =
paste0('-', i,j), summary.dir,interactive=interactive)
}
}
}
}
generateSummaryPlots.comparison <- function(egsea.results, egsea.comparison,
gs.annot, sum.plot.axis, sum.plot.cutoff,
file.prefix, summary.dir,interactive=FALSE){
file.name = paste0(summary.dir, gs.annot@label, file.prefix,
"-summary-", sum.plot.axis)
if (!file.exists(paste0(file.name, "-dir.png"))){
contrast.names = names(egsea.results)
plot.data = generatePlotData.comparison(egsea.results,
egsea.comparison, gs.annot,
sum.plot.axis, sum.plot.cutoff)
if (sum.plot.axis %in% c("p.value", "p.adj")){
generateSummaryPlots(plot.data, file.name, summary.dir,
paste0("-log10(p-value) for ",
contrast.names[1]),
paste0("-log10(p-value) for ",
contrast.names[2]),interactive=interactive)
}else{
generateSummaryPlots(plot.data, file.name, summary.dir,
paste0("-", sum.plot.axis, " for ",
contrast.names[1]),
paste0("-", sum.plot.axis, " for ",
contrast.names[2]),interactive=interactive)
}
}
}
generatePlotData.comparison <- function(egsea.results.two, egsea.comparison,
gs.annot, sum.plot.axis, sum.plot.cutoff,
use.names = FALSE){
if (is.null(sum.plot.cutoff)){
if (sum.plot.axis %in% c("p.value", "p.adj"))
sum.plot.cutoff = 1
else
sum.plot.cutoff = 10000
}
tmp = egsea.results.two[[1]]
gsets1 = as.character(rownames(tmp)) [tmp[, sum.plot.axis] <= sum.plot.cutoff]
tmp = egsea.results.two[[2]]
gsets2 = as.character(rownames(tmp)) [tmp[, sum.plot.axis] <= sum.plot.cutoff]
gsets = intersect(gsets1, gsets2)
# r1 = match(gsets, gsets1)
# r2 = match(gsets, gsets2)
# print(length(egsea.results.all))
# print(length(gsets))
# print(head(fc))
n = length(egsea.results.two)
pvalues.all = matrix(0, length(gsets), n)
gs.dirs.all = matrix(0, length(gsets), n)
sig.combined = 0
for (i in 1:n){
egsea.results = egsea.results.two[[i]]
if ( sum.plot.axis %in% c("p.value", "p.adj")){
pvalues = egsea.results[gsets, sum.plot.axis]
#pvalues[pvalues == 0] = 1*10^-22
pvalues = -1 * log10(pvalues)
pvalues[pvalues == Inf] = max(pvalues[pvalues != Inf]) + 50
pvalues[is.na(pvalues)] = 0
#pvalues[is.na(pvalues)] = max(pvalues, na.rm=TRUE) + 1
}else{
pvalues = - egsea.results[gsets, sum.plot.axis]
}
sig.combined = sig.combined +
egsea.results[gsets, "significance"]
pvalues.all[, i] = pvalues
gs.dirs.all[, i] = egsea.results[gsets, "direction"]
}
gs.dirs = rowMeans(gs.dirs.all, na.rm=TRUE)
gs.sizes = as.numeric(sapply(
as.character(gs.annot@anno[gsets, "NumGenes"]),
function(x) strsplit(x, split="/", fixed=TRUE)[[1]][2]))
sig.combined = sig.combined / n
rank = match(gsets, rownames(egsea.comparison))
if (!use.names)
labels = gs.annot@anno[gsets, "ID"]
else
labels = gs.annot@anno[gsets, "GeneSet"]
plot.data = data.frame(id=labels,
x.data=pvalues.all[,1], y.data=pvalues.all[,2],
gsSize=gs.sizes, sig=sig.combined, dir=gs.dirs, rank =
rank)
plot.data = plot.data[order(plot.data[, "rank"]), ]
return(plot.data)
}
generatePlotData <- function(egsea.results, gs.annot,
sum.plot.cutoff, sum.plot.axis, use.names = FALSE){
if (is.null(sum.plot.cutoff)){
if (sum.plot.axis %in% c("p.value", "p.adj"))
sum.plot.cutoff = 1
else
sum.plot.cutoff = 10000
}
x.data = egsea.results[, sum.plot.axis]
gsets = as.character(rownames(egsea.results))[x.data <= sum.plot.cutoff]
x.data = x.data[x.data <= sum.plot.cutoff]
if (sum.plot.axis %in% c("p.value", "p.adj")){
#x.data[x.data == 0] = 1*10^-22
x.data = -1 * log10(x.data)
x.data[x.data == Inf] = max(x.data[x.data != Inf]) + 50
x.data[is.na(x.data)] = 0
#x.data[is.na(x.data)] = max(x.data, na.rm=TRUE) + 1
}
else{
x.data = - x.data
}
rank = seq(1, length(gsets))
gs.sizes = as.numeric(sapply(as.character(gs.annot@anno[gsets,
"NumGenes"]),
function(x) strsplit(x, split="/",
fixed=TRUE)[[1]][2]))
if (!use.names)
labels = gs.annot@anno[gsets, "ID"]
else
labels = gs.annot@anno[gsets, "GeneSet"]
plot.data = data.frame(id=labels ,
x.data=x.data, y.data=egsea.results[gsets, "avg.logfc"],
gsSize=gs.sizes, sig=egsea.results[gsets, "significance"],
dir=egsea.results[gsets, "direction"], rank = rank)
# print(head(plot.data))
return(plot.data)
}
# Generate MDS plot for the rankings of different methods
generateMDSMethodsPlot <- function(egsea.results, baseGSEAs, file.name, format=NULL){
ranks = egsea.results[, baseGSEAs]
if (is.null(format) || tolower(format) == "pdf"){
pdf(paste0(file.name, ".pdf"), width=6, height=6)
limma::plotMDS(x=ranks, labels=baseGSEAs, col = "#4d4d4d",
xlab="Leading rank dim 1",
ylab="Leading rank dim 2", cex=1)
dev.off()
}
if (is.null(format) || tolower(format) == "png"){
png(paste0(file.name, ".png"), width=800, height=800)
limma::plotMDS(x=ranks, labels=baseGSEAs, col = "blue",
xlab="Leading rank dim 1",
ylab="Leading rank dim 2")
dev.off()
}
}
# Generate summary plots based on regulation direction and gene set ranking.
generateSummaryPlots <- function(plot.data, file.name, file.dir, Xlab="-log10(p-value)",
Ylab="Average Absolute logFC", format = NULL, interactive=FALSE){
tryCatch({
plot.data.sig = plot.data[plot.data[, "rank"] <= 10, ]
sig.cols = rep("black", nrow(plot.data.sig))
if (min(plot.data[, "x.data"], na.rm=TRUE) > 0){
xlimits = c(0.8 * min(plot.data[, "x.data"], na.rm=TRUE),
max(plot.data[, "x.data"], na.rm=TRUE)*1.05)
}else{
xlimits = c(1.05 * min(plot.data[, "x.data"], na.rm=TRUE),
max(plot.data[, "x.data"], na.rm=TRUE)*0.8)
}
if (max(plot.data[, "y.data"], na.rm=TRUE) > 0){
ylimits = c(min(plot.data[, "y.data"], na.rm=TRUE),
max(plot.data[, "y.data"], na.rm=TRUE) * 1.05)
}else{
ylimits = c(min(plot.data[, "y.data"], na.rm=TRUE),
max(plot.data[, "y.data"], na.rm=TRUE) * 0.9)
}
# print(plot.data.sig)
# print(dim(plot.data))
# plot rank-based coloured bubbles
p = qplot(x.data, y.data, data=plot.data, size=gsSize,asp=1,
colour=rank,
xlab = Xlab, ylab = Ylab,
xlim=xlimits,
ylim=ylimits)
# customize bubbles colour
p = p + scale_colour_gradient(guide="colourbar", low="#56B1F7",
high="#000000",
limits=c(1,100), na.value="black", name="Rank")
# customize bubble size
p = p + scale_size("Cardinality", range=c(2,20))
if (is.null(format) || tolower(format) == "pdf"){
pdf(paste0(file.name, "-rank.pdf"), width = 10, height = 7,
useDingbats = FALSE)
# label the bubbles of the top 10 gene sets
print(p + geom_text(size=5, mapping=aes(x=x.data, y=y.data,
label=id),
data=plot.data.sig,
colour=sig.cols, vjust=-1, hjust=1) )
dev.off()
}
if (is.null(format) || tolower(format) == "png"){
png(paste0(file.name, "-rank.png"), width = 800, height = 700)
print(p + geom_text(size=5, mapping=aes(x=x.data, y=y.data,
label=id),
data=plot.data.sig,
colour=sig.cols, vjust=-1, hjust=1) )
dev.off()
}
# print("here")
if(interactive){
saveWidget(widget=ggplotly(p), selfcontained=FALSE,
libdir=file.path(file.dir,"lib"),
file=paste0(file.name, "-rank.html"),
list(fig.width = 800, fig.height = 800)) # htmlwidgets::saveWidget
}
# print("here1")
# plot direction-based coloured bubbles
top.10.ids = as.character(plot.data[plot.data[, "rank"] <= 10,
"id"])
sig.ids = setdiff(plot.data[rank(-plot.data[,"sig"], na.last =
TRUE) <= 5, "id"], top.10.ids)
sig.cols = c(rep("black", length(top.10.ids)), rep("blue",
length(sig.ids)))
plot.data.sig = plot.data[match(c(top.10.ids, sig.ids),
plot.data[, "id"]), ]
p = qplot(x.data, y.data, data=plot.data, size=sig,asp=1,
colour=dir,
xlab = Xlab, ylab = Ylab,
xlim=xlimits,
ylim=ylimits)
p = p + scale_colour_gradient(guide="colourbar", low="#56B1F7",
high="#E35F5F",
limits=c(-1,1), na.value="black",
name="Regulation Direction") # low="#5FE377"
p = p + scale_size("significance", range=c(2,20))
if (is.null(format) || tolower(format) == "pdf"){
pdf(paste0(file.name, "-dir.pdf"), width = 10, height = 7,
useDingbats = FALSE)
print(p + geom_text(size=5, mapping=aes(x=x.data, y=y.data,
label=id),
data=plot.data.sig,
colour=sig.cols, vjust=-1, hjust=1) )
dev.off()
}
if (is.null(format) || tolower(format) == "png"){
png(paste0(file.name, "-dir.png"), width = 800, height = 700)
print(p + geom_text(size=5, mapping=aes(x=x.data, y=y.data,
label=id),
data=plot.data.sig,
colour=sig.cols, vjust=-1, hjust=1) )
dev.off()
}
if(interactive){
saveWidget(widget=ggplotly(p), selfcontained=FALSE,
libdir=file.path(file.dir,"lib"),
file=paste0(file.name, "-dir.html")) # htmlwidgets::saveWidget
}
},
error = function(e){
warning("Summary plots were not generated for ",
file.name)
})
}
# Plot top KEGG pathways using the pathview package
#
# Plot top KEGG pathways using the pathview package and overlay fold change
# data on them.
plotPathways = function(gene.sets, fc, gs.annot, report.dir, kegg.dir,
verbose=FALSE){
#TODO: adjust the limit of the expression values
message(" Pathway maps are being generated for top-ranked \n pathways based ",
"on logFC ... ")
current = getwd()
contrast.names = colnames(fc)
if (is.null(kegg.dir))
kegg.dir = paste0(report.dir, "/kegg-dir/")
dir.create(file.path(kegg.dir), showWarnings = FALSE)
kegg.dir = normalizePath(kegg.dir)
for(i in 1:ncol(fc)){
if (verbose)
message(" ", contrast.names[i])
pv.dir = paste0(report.dir, "/pv-top-gs-", gs.annot@label, "/",
sub(" - ", "-", contrast.names[i]))
dir.create(file.path(pv.dir), showWarnings = FALSE, recursive =
TRUE)
setwd(pv.dir)
for (gene.set in gene.sets[[i]]){
id = as.character(gs.annot@anno[match(gene.set,
gs.annot@anno[, "GeneSet"]), "ID"])
if (file.exists(paste0(id,".pathview.png"))){
next
}
generatePathway(gene.set, gs.annot, fc[,i], kegg.dir, verbose)
}
}
setwd(current)
}
generatePathway <- function(gene.set, gs.annot, fc, kegg.dir="./",
verbose=FALSE, file.name = NULL){
id = as.character(gs.annot@anno[match(gene.set,
gs.annot@anno[, "GeneSet"]), "ID"])
if (verbose)
pathview(gene.data=fc, pathway.id = id,
species =
species.fullToShort[[tolower(gs.annot@species)]],
kegg.dir=kegg.dir, kegg.native=TRUE,
res=600,
limit=list(gene=c(-2,2), cpd=1),
bins=list(gene=20, cpd=10),
low = list(gene = "blue", cpd =
"green"))
else
suppressMessages(
pathview(gene.data=fc, pathway.id = id,
species =
species.fullToShort[[tolower(gs.annot@species)]],
kegg.dir=kegg.dir,
kegg.native=TRUE, res=600,
limit=list(gene=c(-2,2),
cpd=1), bins=list(gene=20, cpd=10),
low = list(gene = "blue", cpd =
"green")))
if (!is.null(file.name)){
pathway.file = paste0("./", id, ".pathview.png")
if (file.exists(pathway.file)){
file.rename(pathway.file, paste0(file.name, ".png"))
file.remove(paste0(kegg.dir, id, ".png"))
file.remove(paste0(kegg.dir, id, ".xml"))
}else{
warning("EGSEA could not generate the pathway map image.")
}
}
}
plotPathways.comparison <- function(gene.sets, fc, gs.annot, report.dir, kegg.dir,
verbose=FALSE){
message(" Pathway maps are being generated for top-ranked comparative\n",
"pathways based on logFC ... ")
current = getwd()
if (is.null(kegg.dir))
kegg.dir = paste0(report.dir, "/kegg-dir/")
dir.create(file.path(kegg.dir), showWarnings = FALSE)
kegg.dir = normalizePath(kegg.dir)
pv.dir = paste0(report.dir, "/pv-top-gs-", gs.annot@label, "/")
setwd(pv.dir)
for (gene.set in gene.sets){
id = gs.annot@anno[match(gene.set, gs.annot@anno[, "GeneSet"]), "ID"]
if (file.exists(paste0(id, ".pathview.multi.png"))){
next
}
generateComparisonPathway(gene.set, gs.annot, fc, kegg.dir, verbose)
}
setwd(current)
}
generateComparisonPathway <- function(gene.set, gs.annot, fc, kegg.dir="./",
verbose=FALSE, file.name = NULL){
id = as.character(gs.annot@anno[match(gene.set,
gs.annot@anno[, "GeneSet"]), "ID"])
if (!verbose)
suppressMessages(pathview(gene.data=fc, pathway.id = id,
species = gs.annot@species, kegg.dir=kegg.dir,
kegg.native=TRUE, res=600,
limit=list(gene=c(-2,2), cpd=1), bins=list(gene=20,
cpd=10),
key.align="y", key.pos="topright",
multi.state=TRUE,
low = list(gene = "blue", cpd = "green")))
else
pathview(gene.data=fc, pathway.id = id,
species = gs.annot@species, kegg.dir=kegg.dir,
kegg.native=TRUE, res=600,
limit=list(gene=c(-2,2), cpd=1), bins=list(gene=20,
cpd=10),
key.align="y", key.pos="topright", multi.state=TRUE,
low = list(gene = "blue", cpd = "green"))
if (!is.null(file.name)){
pathway.file = paste0("./", id, ".pathview.multi.png")
if (file.exists(pathway.file)){
file.rename(pathway.file, paste0(file.name, ".png"))
file.remove(paste0(kegg.dir, id, ".png"))
file.remove(paste0(kegg.dir, id, ".xml"))
}else{
warning("EGSEA could not generate the pathway map image.")
}
}
}
plotHeatMapsLogFC.comparison <- function(gene.sets, fc, limma.tops, gs.annot, symbolsMap,
report.dir){
hm.dir = paste0(report.dir, "/hm-top-gs-", gs.annot@label, "/")
for (gene.set in gene.sets){
id = gs.annot@anno[match(gene.set, gs.annot@anno[, "GeneSet"]),
"ID"]
file.name = paste0(hm.dir, "/", id, ".heatmap.multi.png")
#print(id)
if (file.exists(file.name)){
#print(" Heat map has been already generated.")
next
}
#print(head(fc))
generateHeatMap(gene.set, gs.annot, fc, limma.tops, symbolsMap, sub(".png",
"", file.name))
}
}
#Plot heat maps for each gene set in a gene sets annotation object
plotHeatMapsLogFC = function(gene.sets, fc, limma.tops, gs.annot, symbolsMap,
report.dir){
# create output directory for heatmaps
message(" Heat maps are being generated for top-ranked gene sets \n",
"based on logFC ... ")
if (!identical(rownames(fc), gs.annot@featureIDs)){
fc = fc[match(gs.annot@featureIDs, rownames(fc)) , ]
if (!identical(rownames(fc), gs.annot@featureIDs)){
stop("The row names of the fold change matrix should ",
"match the featureIDs vector in the gs.annot list")
}
}
if (nrow(symbolsMap) > 0 && !identical(as.character(symbolsMap[,1]),
gs.annot@featureIDs)){
symbolsMap = symbolsMap[match(as.character(symbolsMap[,1]),
gs.annot@featureIDs) , ]
if (!identical(as.character(symbolsMap[,1]),
gs.annot@featureIDs))
stop("All featureIDs in the gs.annot list should map to
a valid gene symbol")
}
contrast.names = colnames(fc)
for(i in 1:ncol(fc)){
hm.dir = paste0(report.dir, "/hm-top-gs-", gs.annot@label, "/",
sub(" - ", "-", contrast.names[i]))
dir.create(file.path(hm.dir), showWarnings = FALSE,
recursive = TRUE)
for (gene.set in gene.sets[[i]]){
id = gs.annot@anno[match(gene.set,
gs.annot@anno[, "GeneSet"]), "ID"]
file.name = paste0(hm.dir, "/", id, ".heatmap.png")
if (file.exists(file.name)){
next
}
##### generate heat map here
if (length(limma.tops) > 0)
generateHeatMap(gene.set, gs.annot, fc[, i],
limma.tops[contrast.names[i]],
symbolsMap, sub(".png", "", file.name))
else
generateHeatMap(gene.set, gs.annot, fc[, i],
limma.tops,
symbolsMap, sub(".png", "", file.name))
}
}
}
generateHeatMap <- function(gene.set, gs.annot, fc, limma.tops, symbolsMap,
file.name, format=NULL, print.csv=TRUE,
fc.colors= c("#67A9CF", "#F7F7F7", "#EF8A62")){
stopifnot(length(fc.colors) == 3)
if (length(gs.annot@idx[[gene.set]]) < 2){
warning(paste0("heatmap for ", gene.set, " is skipped. It has
only one gene."))
return()
}
idx = gs.annot@idx
sel = which(names(idx) == gene.set)
stopifnot(length(sel) == 1)
sel.genes = idx[[sel]]
sig.genes = rep(FALSE, length(sel.genes))
if (!is.matrix(fc) || ncol(fc) == 1){
tmpfc = fc[sel.genes]
hm = as.matrix(tmpfc, nrow=length(tmpfc))
hm = cbind(hm, hm)
colnames(hm) = c(" ", " ")
if (length(limma.tops) > 0){
stopifnot(identical(names(fc), rownames(limma.tops[[1]])) ||
identical(rownames(fc), rownames(limma.tops[[1]])))
csv.out = limma.tops[[1]][sel.genes, ]
sig.genes[csv.out[, "adj.P.Val"] <= 0.05] = TRUE
}else{
csv.out = hm[,1]
}
leglabels = c("FDR <= 0.05",
"FDR > 0.05")
}else{
hm = fc[sel.genes, ]
# colnames(hm) = gsub("X", "", colnames(fc))
colnames(hm) = colnames(fc)
if (length(limma.tops) > 0){
csv.out = c()
for (contr in colnames(fc)){
stopifnot(identical(rownames(fc), rownames(limma.tops[[contr]])))
t = limma.tops[[contr]][sel.genes, ]
sig.genes[t[, "adj.P.Val"] <= 0.05] = TRUE
colnames(t) = gsub(" ", "", paste0(contr, ".", colnames(t)))
if (length(csv.out) == 0)
csv.out = t
else
csv.out = cbind(csv.out, t)
}
}else{
csv.out = hm
}
leglabels = c("FDR <= 0.05 for at least one",
"FDR > 0.05 for all contrasts")
}
if (nrow(symbolsMap) == 0)
rownames(hm) = gs.annot@featureIDs[sel.genes] # genes in the
# gene set
else
rownames(hm) = symbolsMap[match(gs.annot@featureIDs[sel.genes],
symbolsMap[,1]), 2]
if (length(limma.tops) == 0)
csv.out = cbind(rownames(hm), csv.out)
# initialize and create the heat map plot
colrange = colorpanel(99, fc.colors[1], fc.colors[2],fc.colors[3])
upperBound = max(round(quantile(abs(hm))[4]), 1)
br=c(seq(-1*upperBound,-1*upperBound*0.5,length=25),
seq(-1*upperBound*0.5+0.001, upperBound*0.5,length=50),
seq(upperBound*0.5 + 0.001, upperBound,length=25))
# br=c(seq(-2,-0.5,length=25),seq(-0.499,0.5,length=25),
# seq(0.51,2,length=25))
# adjust font size depending on the number of rows
sel.genes.size = length(sel.genes)
if(sel.genes.size < 20){
cr = 0.85
}else if(sel.genes.size < 40){
cr = 0.65
}else if(sel.genes.size < 70){
cr = 0.35
}else if(sel.genes.size < 100){
cr = 0.35
}else
cr = 0.15
# gene coloured based on significance
sig.color = rep("#6C6C6C", length(sel.genes))
sig.color[sig.genes] = "#529F4E"
# Export PDF file
if (is.null(format) || tolower(format) == "pdf"){
pdf(paste0(file.name, ".pdf"))
if (nchar(gene.set) > 35)
par(cex.main = 0.7)
else
par(cex.main = 0.8)
heatmap.2(hm, col = colrange, breaks=br,
margins=c(10,10), cexRow=cr,
cexCol=0.85, trace = "none",
Colv = ifelse(is.matrix(fc) && ncol(fc) > 2,
TRUE, FALSE),
dendrogram="row",
density.info = "histogram",
denscol = "black",
#key.title = "",
key.xlab = "logFC",
key.ylab = "Count",
colRow = sig.color,
main = paste0(gene.set, "(logFC)")
)
if (length(limma.tops) > 0){
legend("topright",
legend = leglabels,
col = c("#529F4E", "#6C6C6C"),
title = "Significance of DE",
lty= 1,
lwd = 5,
cex=.7
)
}
dev.off()
}
# Export PNG file
if (is.null(format) || tolower(format) == "png"){
png(paste0(file.name, ".png")) # , width=800, height=800
heatmap.2(hm, col = colrange, breaks=br, margins=c(10,10),
cexRow=cr, cexCol=0.85, trace = "none",
Colv = ifelse(is.matrix(fc) && ncol(fc) > 2,
TRUE, FALSE),
Rowv=TRUE, dendrogram="none",
colRow = sig.color,
key=FALSE#, lmat=lmat, lwid = lwid, lhei = lhei
) #,main = paste0(gene.set, " (logFC)"), colsep=c(3,6,9))
dev.off()
}
# Export text file
if (print.csv){
write.csv(csv.out, file=paste0(file.name, ".csv"),
row.names=FALSE)
}
}
generateSummaryHeatmaps <- function(hm, hm.vals, contrasts, title,
xlab, file.name, cellvals = NULL, format = NULL){
t = rownames(hm)
t1 = t
for (i in 1:length(t1)){
if (nchar(t1[i]) > 18)
t1[i] = paste0(substr(t1[i], 1, 18), " ...")
}
rownames(hm) = t1
if (length(contrasts) > 1){
# colrow = rev(colorpanel(length(t), "#7FCC77", "#53AC49", "#186F0F")) # GREEN
colrow = rev(colorpanel(length(t), "#BDBDBD", "#737373", "#000000")) # BLUE
}else{
colrow = "black"
}
if (hm.vals %in% c("significance", "p.value", "p.adj"))
colrange = colorRampPalette(brewer.pal(9, "Greens"))(99)
else if (hm.vals %in% c("direction", "avg.logfc.dir"))
if (max(abs(hm)) <= 1)
colrange = colorRampPalette(rev(brewer.pal(9, "RdYlBu")))(99)
else
colrange = colorRampPalette(rev(brewer.pal(11, "RdYlBu")))(99)
else
colrange = colorRampPalette(rev(brewer.pal(9, "Greens")))(99)
# colrange = colorpanel(99, hm.valss[1], hm.valss[2], hm.valss[3])
if (hm.vals %in% c("p.value", "p.adj")){
qs = quantile(hm)
br = seq(qs[2], qs[4], length=100)
}else if (hm.vals %in% c("direction", "avg.logfc.dir")){
upperBound = max(round(quantile(abs(hm))[4]), 1)
br=c(seq(-1*upperBound,-1*upperBound*0.5,length=25),
seq(-1*upperBound*0.5+0.001, upperBound*0.5,length=50),
seq(upperBound*0.5 + 0.001, upperBound,length=25))
}
else{
qs = quantile(hm)
br = c(seq(qs[1], qs[2], length=25),
seq(qs[2]+ 1, qs[3], length=25),
seq(qs[3] + 1, qs[4], length=25),
seq(qs[4] + 1, qs[5],length=25))
}
sel.genes.sets = length(t)
if(sel.genes.sets <= 20){
cr = 0.8
}else if(sel.genes.sets < 40){
cr = 0.65
}else if(sel.genes.sets < 70){
cr = 0.35
}else if(sel.genes.sets < 100){
cr = 0.35
}else
cr = 0.15
if (is.null(format) || tolower(format) == "pdf"){
pdf(paste0(file.name, ".pdf"))
createSummaryHeatmap(hm, contrasts, colrange,
br, cr, colrow,
xlab, title, cellvals,
showLegend = length(contrasts) > 1)
dev.off()
}
if (is.null(format) || tolower(format) == "png"){
png(paste0(file.name, ".png"), width=800, height=800)
createSummaryHeatmap(hm, contrasts, colrange,
br, cr, colrow,
xlab, title, cellvals,
showLegend = FALSE)
dev.off()
}
rownames(hm) = t
#colnames(hm) = paste0(colnames(hm), ".", sort.by)
if (!is.null(cellvals)){
hm = cbind(hm, cellvals)
}
write.csv(hm, file=paste0(file.name, ".csv"),
row.names=TRUE)
}
createSummaryHeatmap <- function(hm, contrasts, colrange,
br, cr, colrow,
xlab, title, cellvals, showLegend=TRUE){
# creates 3x3 table with location of heatmap elements defined
mylmat = rbind(c(0,3,0),c(0,1,2),c(0,4,0))
mylwid = c(0.5,4,0.5)
mylhei = c(1,4,1)
par(cex.main = 0.8)
if (!is.null(cellvals)){
if (min(cellvals) < 1)
cellvals1 = round(cellvals, 4)
else
cellvals1 = round(cellvals, 1)
heatmap.2(hm, lmat=mylmat, lwid=mylwid, lhei=mylhei,
breaks=br, col=colrange, margins=c(10,10),
cexRow=cr, cexCol=0.85, trace = "none",
Colv = ifelse(length(contrasts) > 1, TRUE, FALSE),
Rowv = ifelse(length(contrasts) > 1, TRUE, FALSE),
dendrogram = "none",
key.xlab=xlab,
keysize=1, key.title="", density.info="none",
colRow = colrow,
main = title,
cellnote = cellvals1, notecol = "#6C6C6C")
#key.title="Contrast Rank"
}else
heatmap.2(hm, lmat=mylmat, lwid=mylwid, lhei=mylhei,
breaks=br, col=colrange, margins=c(10,10),
cexRow=cr, cexCol=0.85, trace = "none",
Colv = ifelse(length(contrasts) > 1, TRUE, FALSE), ,
Rowv = ifelse(length(contrasts) > 1, TRUE, FALSE),
dendrogram = "none",
key.xlab=xlab,
keysize=1, key.title="", density.info="none",
colRow = colrow,
main = title)
if (showLegend){
legend(x=0.73, y=1.1, xpd=TRUE,
legend = c("High", "Medium",
"Low"),
border = "#FFFFFF",
fill = "#FFFFFF",
#col = c("#186F0F", "#53AC49", "#7FCC77"), # GREEN
col = c("#000000", "#737373", "#BDBDBD"), # BLUE
title = "Comparison Rank",
lty= 1,
lwd = 5,
cex=.7
)
}
}
### Later the D3 Javascript library will be used to generated clickable plots
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.