#'Perform enrichment analysis on the input entities
#'@description perform enrichment analysis from p-values of the input entities. The function wraps around the main functions of \pkg{\link{piano}}.
#'@usage computeNodeEnrichment(nodedata, pcol, nodetype, annotation, internalid, method, size)
#'@param nodedata a two-column data frame of entities and the statistical values. The 1st column is entities and the second is statistical values e.g. p-values.
#'@param pcol a string specifying columnname containing p-values. This parameter is only for accepting the input from GUI.
#'@param nodetype a string specifying a node type. It can be one of compound (default), protein, gene, pathway, rna, dna.
#'@param annotation a string specifying the annotation type e.g. pathway (default) and mesh. Pathway annotation requires the database.
#'Mesh annotation doesn't require the database but it is available for PubChem compounds only.
#'@param internalid a logical value indicating whether the input entities are the neo4j ids, if TRUE (default). If not, the entities are expected to be the grinn id or PubChem CID.
#'See details and see \code{\link{convertId}} for how to convert ids. It has no effect on Mesh annotation.
#'@param method a string specifying the enrichment analysis method. It can be one of reporter (default), fisher, median, mean, stouffer. See \code{\link{runGSA}}
#'@param size a numeric vector specifying the minimum and maximum number of members in each annotation term to be used in the analysis. Default is c(3,500).
#'@param returnas a string specifying output type. It can be one of dataframe, list, json. Default is dataframe.
#'@details The database uses two id systems. The neo4j id is a numeric, internal id automatically generated by the database system.
#'The grinn id (gid) is an id system of Grinn database that uses main ids of standard resources
#'i.e. ENSEMBL for genes (e.g.ENSG00000139618), UniProt for proteins (e.g.P0C9J6), PubChem CID for compounds (e.g.5793), KEGG for pathways (e.g.hsa00010).
#'@return list of data frame of nodes, edges and enrichment. The nodes data frame contains input attributes. The edges data frame contains annotation pairs.
#'The data frame of enrichment contains the following components:
#'@param method a string specifying the enrichment analysis method. It can be one of reporter (default), fisher, median, mean, stouffer. See \code{\link{runGSA}}
#'@param size a numeric vector specifying the minimum and maximum number of members in each annotation term to be used in the analysis. Default is c(3,500).
#'@details The database uses two id systems. The neo4j id is a numeric, internal id automatically generated by the database system.
#'The grinn id (gid) is an id system of Grinn database that uses main ids of standard resources
#'i.e. ENSEMBL for genes (e.g.ENSG00000139618), UniProt for proteins (e.g.P0C9J6), PubChem CID for compounds (e.g.5793), KEGG for pathways (e.g.hsa00010).
#'@return list of data frame of nodes, edges, enrichment and pairs. The pairs data frame contains annotation pairs. The data frame of enrichment contains the following components:
#'
#'\code{rank} = rank sort by p
#'
#'\code{id} = annotation id or annotation neo4j id
#'
#'\code{gid} = annotation id or annotation grinn id
#'
#'\code{nodename} = annotation name
#'
#'\code{nodelabel} = annotation type
#'
#'\code{nodexref} = cross references
#'
#'\code{p} = raw p-values
#'
#'\code{p_adj} = adjusted p-values
#'
#'\code{no_of_entities} = number of input entities in each annotation term
#'
#'\code{annotation_size} = total number of entities in each annotation term from the database
#'
#'\code{member} = list of entity members of the annotation term
#'
#'Return list of empty data frame if error or found nothing.
#'@author Kwanjeera W \email{kwanich@@ucdavis.edu}
#'@references Fisher R. (1932) Statistical methods for research workers. Oliver and Boyd, Edinburgh.
#'@references Stouffer S., Suchman E., Devinney L., Star S., and Williams R. (1949) The American soldier: adjustment during army life. Princeton University Press, Oxford, England.
#'@references Patil K. and Nielsen J. (2005) Uncovering transcriptional regulation of metabolism by using metabolic network topology. Proceedings of the National Academy of Sciences of the United States of America 102(8), 2685.
#'@references Oliveira A., Patil K., and Nielsen J. (2008) Architecture of transcriptional regulatory circuits is knitted over the topology of bio-molecular interaction networks. BMC Systems Biology 2(1), 17.
#'@references Väremo L., Nielsen J., and Nookaew I. (2013) Enriching the gene set analysis of genome-wide data by incorporating directionality of gene expression and combining statistical hypotheses and methods. Nucleic Acids Research, 41(8), pp. 4378-4391.
#'@seealso \code{\link{loadGSC}}, \code{\link{runGSA}}, \code{\link{GSAsummaryTable}}
#'@examples
#'#dt <- data.frame(pubchem=c(1110,10413,196,51,311,43,764,790), stat=runif(8, 0, 0.06)) #statistical values of pubchem compounds
#'#result <- computeNodeEnrichment(nodedata=dt, nodetype="compound", annotation="mesh", internalid=FALSE)
#'@export
computeNodeEnrichment <- function(nodedata, pcol=NULL, nodetype="compound", annotation="pathway", internalid = TRUE, method="reporter", size=c(3,500)) UseMethod("computeNodeEnrichment")
#'@export
computeNodeEnrichment.default <- function (nodedata, pcol=NULL, nodetype="compound", annotation="pathway", internalid = TRUE, method="reporter", size=c(3,500)){
out <- tryCatch(
{
tmparg <- try(nodetype <- match.arg(tolower(nodetype), c("compound","protein","gene","rna","dna"), several.ok = FALSE), silent = TRUE)
if (class(tmparg) == "try-error") {
stop("argument 'nodetype' is not valid, choose one from the list: compound,protein,gene,rna,dna")
}
tmparg <- try(annotation <- match.arg(tolower(annotation), c("pathway","mesh"), several.ok = FALSE), silent = TRUE)
if (class(tmparg) == "try-error") {
stop("argument 'annotation' is not valid, choose one from the list: pathway,mesh")
}
tmparg <- try(method <- match.arg(tolower(method), c("reporter","fisher","median","mean","stouffer"), several.ok = FALSE), silent = TRUE)
if (class(tmparg) == "try-error") {
stop("argument 'method' is not valid, choose one from the list: reporter,fisher,median,mean,stouffer")
}
if (class(nodedata) != "data.frame"){
stop("argument 'nodedata' is not valid, data frame is required.")
}
flagdf = FALSE
if(!is.null(pcol)){#get result from statistical analysis
datinput = nodedata #keep input data
flagdf = TRUE
if(!is.null(nodedata$grinn)){
nodedata = nodedata[,c('grinn',pcol)]
}else if(!is.null(nodedata$PubChem)){
nodedata = nodedata[,c('PubChem',pcol)]
colnames(datinput) = gsub("PubChem","grinn",colnames(datinput))
}else if(!is.null(nodedata$pubchem)){
nodedata = nodedata[,c('pubchem',pcol)]
colnames(datinput) = gsub("pubchem","grinn",colnames(datinput))
}else if(!is.null(nodedata$uniprot)){
nodedata = nodedata[,c('uniprot',pcol)]
colnames(datinput) = gsub("uniprot","grinn",colnames(datinput))
}else if(!is.null(nodedata$ensembl)){
nodedata = nodedata[,c('ensembl',pcol)]
colnames(datinput) = gsub("ensembl","grinn",colnames(datinput))
}
#nodedata = na.omit(nodedata)
}
require('dplyr')#load dplyr for opencpu
nodetype = Hmisc::capitalize(nodetype)
if(tolower(annotation) == 'pathway' && foundDb()){#pathway enrichment
cat("Querying database ...\n")
if(internalid){
annols = apply(nodedata, 1, function(x) fetchNetwork(to=x[1], fromtype="pathway", totype = nodetype, reltype = "ANNOTATION")) #query annotation pairs
}else{
annols = apply(nodedata, 1, function(x) fetchNetworkByGID(to=x[1], fromtype="pathway", totype = nodetype, reltype = "ANNOTATION")) #query annotation pairs
}
if(!is.null(unlist(annols))){#found annotation
annonws = combineNetworks(annols) #combine annotation pairs
if(internalid){
#format pval: 1st id, 2nd pval, ... ,use only 1st and 2nd column
pv = nodedata[,2]
pv = as.numeric(pv)
names(pv) = nodedata[,1]
pval = pv
}else{
#format pval: 1st gid, 2nd pval, ... ,use only 1st and 2nd column, change gid to id
merged = merge(annonws$nodes, nodedata, by.x = colnames(annonws$nodes)[2], by.y = colnames(nodedata)[1], all.y = TRUE)
pv = merged[,ncol(annonws$nodes)+1]
pv = as.numeric(pv)
names(pv) = merged$id
pval = pv
}
era = computeEnrichment(edgelist = annonws$edges[,2:1], pval = pval, fc = NULL, method = method, size=size, returnas="dataframe") #compute enrichment
era = era[order(era$p),]
era = era[ , !(colnames(era) == 'no_of_entities')] #hide sum columns
era$rank = seq(1:nrow(era))
era = merge(annonws$nodes, era, by='id') #merge annotation attributes and enrichemt results
era = era[,c(ncol(era),1:(ncol(era)-1))] #rearrange columns
#count subtotal entities
tatt = dplyr::left_join(annonws$edges[,1:2],annonws$nodes,by=c('target'='id')) #get target attributes
subtotls = tatt %>% dplyr::group_by(source) %>% dplyr::tally() #count entities
colnames(subtotls) = c('id','no_of_entities')
era = dplyr::left_join(era, subtotls, by=c('id'='id'))
#count total entities
cat("Querying pathway statistics ...\n")
ptwls = unique(era$id) #list of pathways
ptwstat = lapply(ptwls, function(x){#get total number of annotated nodes of a pathway
qstring = paste0('MATCH (from:Pathway)-[r:ANNOTATION]->(to:',nodetype,') where ID(from) = ',x,' RETURN toString(ID(from)), labels(to), count(to)')
annosize = as.data.frame(curlRequest.TRANSACTION.row(qstring)[[1]]$row, stringsAsFactors = FALSE) #get annotation info from db
colnames(annosize) = c('id','nodelabel','annotation_size')
annosize
})
ptwstat = plyr::ldply(ptwstat, data.frame)
era = dplyr::left_join(era,ptwstat[,-2],by=c('id'='id')) #merge with annotation info
era = era[c(1:(ncol(era)-3),(ncol(era)-1),ncol(era),(ncol(era)-2))] #rearrange columns
networknode = annonws$nodes[annonws$nodes$nodelabel != "Pathway", ] #not return pathway nodes
if(flagdf){#keep input data
networknode = merge(networknode,datinput,by.x='gid',by.y='grinn',all.x=TRUE)
networknode = networknode[,c(2,1,3:ncol(networknode))]
networknode[is.na(networknode)] = ""
}
meminfo = merge(annonws$edges, networknode, by.x='target', by.y='id')
memname = plyr::ddply(meminfo,c('source'),plyr::summarise,membername=list(nodename))
era = dplyr::left_join(era, memname, by=c('id'='source'))
list(nodes=networknode, edges=annonws$edges, enrichment=era) #output
}
else{#no annotation found
list(nodes=data.frame(), edges=data.frame(), enrichment=data.frame()) #output
}
}else if(tolower(annotation) == 'mesh'){#mesh enrichment
if(internalid || tolower(nodetype) != "compound"){
cat("Error: Accept only PubChem compounds, returning no data ...\n")
annols = NULL
}else{
cat("Connecting PubChem ...\n")
annols = apply(nodedata, 1, function(x) callMesh(pcid=x[1])) #query pubchem annotation pairs
}
if(!is.null(unlist(annols))){#found annotation
annonws = combineNetworks(annols) #combine annotation pairs
if(foundDb()){#have db
nodels = lapply(nodedata[,1], formatNode.LIST, y="compound", z="grinnid") #query nodes by gid
networknode = plyr::ldply(nodels, data.frame)
networknode$id = networknode$gid
}else{#no db
cat("No database installed, returning original input ...\n")
networknode = data.frame(id=nodedata[,1], gid=nodedata[,1], nodename=nodedata[,1], nodelabel="Compound", nodexref='', stringsAsFactors = FALSE)
}
#format pval: 1st gid, 2nd pval, ... ,use only 1st and 2nd column
pv = nodedata[,2]
pv = as.numeric(pv)
names(pv) = nodedata[,1]
pval = pv
era = computeEnrichment(edgelist = annonws$edges[,2:1], pval = pval, fc = NULL, method = method, size=size, returnas="dataframe") #compute enrichment
era = era[order(era$p),]
era$rank = seq(1:nrow(era))
era = merge(annonws$nodes[,1:6], era, by='id') #merge annotation attributes and enrichemt results
era = era[,c(ncol(era),1:(ncol(era)-6),(ncol(era)-3),(ncol(era)-2),(ncol(era)-4),(ncol(era)-5),(ncol(era)-1))] #rearrange columns
if(flagdf){#keep input data
networknode = merge(networknode,datinput,by.x='gid',by.y='grinn',all.x=TRUE)
networknode = networknode[,c(2,1,3:ncol(networknode))]
networknode[is.na(networknode)] = ""
}
meminfo = merge(annonws$edges, networknode, by.x='target', by.y='id')
memname = plyr::ddply(meminfo,c('source'),plyr::summarise,membername=list(nodename))
era = dplyr::left_join(era, memname, by=c('id'='source'))
list(nodes=networknode, edges=annonws$edges, enrichment=era) #output
}
else{#no annotation found
list(nodes=data.frame(), edges=data.frame(), enrichment=data.frame()) #output
}
}else{
cat('Error: No database installed, returning no data ..\n')
list(nodes=data.frame(), edges=data.frame(), enrichment=data.frame()) #output
}
},error=function(e) {
message(e)
cat("\nError: RETURN no data ..\n")
list(nodes=data.frame(), edges=data.frame(), enrichment=data.frame()) #output
})
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.