# param edgerfiles - vector of edger filepaths
# param pathways - if not 'all', character vector of pathway ids from http://www.genome.jp/kegg/pathway.html
# param sigonly - if TRUE, only color significantly differentially expressed items, else color all
edger2kegg <- function( edgerfiles , organism="hsa" , pathways="all" , limits=c(-1,1) , entrezOrganism="Hs", sigonly=FALSE, pval=FALSE, gradient=TRUE, threads=getOption("threads",1L) ){
library(pathview)
library(KEGGREST)
library(gage)
library(gageData)
data(go.sets.hs)
data(go.subs.hs)
data(kegg.gs)
de=tsvRead(edgerfiles,col_names=T)
if(length(edgerfiles) == 1)
de=list(de)
# each edger file is a separate comparision, loop through each file
#dump <- mclapply(1:length(edgerfile),function(i) {
for(i in 1:length(edgerfiles)) {
# convert gene symbols to entrez IDs
de[[i]]$entrez=pathview::id2eg(de[[i]]$gene, category ="symbol",org=entrezOrganism)[,2]
# ???
de[[i]][which(de[[i]]$logFC>10),10]<- (10) # set the logFC column to 10 for each row where logFC is greater than 10
de[[i]][which(de[[i]]$logFC<(-10)),10]<- (-10) # set the logFC column to -10 for each row where logFC is less than -10
# making binary calls for up/down regulated
de[[i]]$call=0
if(!pval) {
de[[i]]$call[which(de[[i]]$QValue<=0.05 & de[[i]]$logFC<0)] <- (-1) # if row is signgicant and log2(fold_change)<0, set call to -1
de[[i]]$call[which(de[[i]]$QValue<=0.05 & de[[i]]$logFC>0)] <- 1 # if row is signgicant and log2(fold_change)>0, set call to 1
} else {
de[[i]]$call[which(de[[i]]$PValue<=0.05 & de[[i]]$logFC<0)] <- (-1) # if row is signgicant and log2(fold_change)<0, set call to -1
de[[i]]$call[which(de[[i]]$PValue<=0.05 & de[[i]]$logFC>0)] <- 1 # if row is signgicant and log2(fold_change)>0, set call to 1
}
unq<-as.numeric(rownames(unique(data.frame(de[[i]]$entrez)))) # get indices for rows with unique entrez ids
de[[i]]<-de[[i]][unq,] # remove duplicate entrez rows from de
#### remove rows (just one i think) where entrez id is NA
de[[i]]=de[[i]][-(which(is.na(de[[i]]$entrez))),]
####
row.names(de[[i]])<-de[[i]]$entrez
# if pathways is "all"
if(length(pathways)==1 & pathways[1]=="all"){
# get a list of all pathways for a species
dbnames=keggList("pathway",organism)
# get ID for each pathway
dbid=unlist(lapply(strsplit(names(dbnames),organism),"[",2))
} else{
# if your pathway ID is species specific
if(any(grepl(organism,pathways))){
# dbnames=unlist(lapply(keggGet(as.character(pathways)),"[",2))
dbnames=pathways
# prefix pathway IDs with species code
dbid=gsub(organism,"",pathways)
} else{
#dbnames=unlist(lapply(keggGet(paste0(organism,pathways)),"[",2))
dbnames=pathways
dbid=pathways
}
}
# count number of pathways to draw
numdbs=length(dbnames)
# format pathway names for output files
dbshortnames=unlist(lapply(strsplit(dbnames," - "),"[",1))
dbshortnames=gsub(" ","", dbshortnames)
dbshortnames=gsub(")","", dbshortnames)
dbshortnames=gsub("\\(","",dbshortnames)
dbshortnames=gsub("/","", dbshortnames)
dbshortnames=gsub("'","", dbshortnames)
# select columns of expression values
vals=data.matrix(de[[i]][,c("logFC","logCPM","PValue","QValue","call")])
row.names(vals)<-de[[i]]$entrez
# if only want to color significant rows
if(pval) {
suff="_pval"
} else {
suff="_qval"
}
if(sigonly) {
suff=paste0(suff,"Sigonly")
if(!pval){
vals[which(vals[,"QValue"]>0.05),1] <- 0 # set logFC to 0 for all insignificant items (based on QValue)
} else {
vals[which(vals[,"PValue"]>0.05),1] <- 0 # set logFC to 0 for all insignificant items (based on PValue)
}
}
if(gradient) {
suff=paste0(suff,"Gradient")
if(!pval) {
vals=cbind(vals[,],(sign(vals[,"logFC"])*(-log10(vals[,"QValue"]))))
}
else {
vals=cbind(vals[,],(sign(vals[,"logFC"])*(-log10(vals[,"PValue"]))))
}
colnames(vals)[6]="gradientScore"
}
#dump <- mclapply(1:numdbs, function(j) {
for(j in 1:numdbs){
if(gradient){
gd=vals[,6]
} else {
gd=vals[,1]
}
bn=basename(removeext(edgerfiles[i]))
cat(bn," : ",dbshortnames[j],"\n")
res1 = tryCatch({
pathview( gene.data=gd, pathway.id=as.character(dbid[j]),species=organism,out.suffix=paste0(dbshortnames[j],"_",bn,suff, "_log2ratio_pathview"), sign.pos="bottomleft", kegg.native=FALSE, limit=list(cpd=limits,gene=limits),low =list(gene = "red", cpd = "yellow") , mid = list(gene = "gray", cpd= "gray"), high =list(gene = "green", cpd = "blue") )
},warning = function(w) {
cat("\tWarning generated for pathview() call #1 in ", bn,": ",dbnames[j],"!\n")
}, error = function(e) {
cat("\tError generated for pathview() call #1 in ", bn,": ",dbnames[j],"!\n")
}, finally = {
#cat("\tpathview() call #1 done.\n")
})
res2 = tryCatch({
pathview( gene.data=gd, pathway.id=as.character(dbid[j]),species=organism,out.suffix=paste0(dbshortnames[j],"_",bn,suff, "_log2ratio_keggNative"), sign.pos="bottomleft", kegg.native=TRUE, limit=list(cpd=limits,gene=limits),low =list(gene = "red", cpd = "yellow") , mid = list(gene = "gray", cpd= "gray"), high =list(gene = "green", cpd = "blue") )
},warning = function(w) {
cat("\tWarning generated for pathview() call #2 in ", bn,": ",dbnames[j],"!\n")
}, error = function(e) {
cat("\tError generated for pathview() call #2 in ", bn,": ",dbnames[j],"!\n")
}, finally = {
#cat("\tpathview() call #2 done.\n")
})
} # \for each db
# }, mc.cores=threads,mc.preschedule=F) # \for each db
} # \for each file in edgerfiles # }, mc.cores=threads,mc.preschedule=F))
} # \edger2kegg()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.