#'Query networks of a specific relationship from the database
#'@description query networks containing a specific relationship type between the given \code{from} and/or \code{to} nodes using grinn id (gid), see details.
#'@usage fetchNetworkByGID(from, to, fromtype, totype, reltype, returnas)
#'@param from a character vector of start nodes e.g. from = c('id1', 'id2'). Given \code{from} = NULL, will result in all possible start nodes.
#'Otherwise the value must be a grinn id, see details and see \code{\link{convertId}} for how to convert ids.
#'@param to a character vector of end nodes e.g. to = c('id1', 'id2'). Given \code{to} = NULL, will result in all possible end nodes, see \code{from} for details.
#'@param fromtype a string specifying the type of the start node. It can be one of compound, protein, gene, pathway, rna, dna, phenotype. The node types correspond to node labels of the database.
#'@param totype a string specifying the type of end nodes, see \code{fromtype} for details.
#'@param reltype a string specifying a relationship type.
#'It can be one of annotation, biochemical_reaction, catalysis, control, conversion, genetic_association, molecular_binding.
#'@param returnas a string specifying output type. It can be one of dataframe, list, json. Default is dataframe.
#'@details The function is specifically used to query one type of relationship. Use \code{\link{fetchHetNetworkByGID}} to query networks containing one or more relationship types (heterogeneous network).
#'
#'The database uses two id systems. The neo4j id is a numeric, internal id automatically generated by the database system.
#'The grinn id (gid) is an id system of Grinn database that uses main ids of standard resources
#'i.e. ENSEMBL for genes (e.g.ENSG00000139618), UniProt for proteins (e.g.P0C9J6), PubChem CID for compounds (e.g.5793), KEGG for pathways (e.g.hsa00010).
#'@return list of network information with the following components:
#'
#'nodes:
#'
#'\code{id} = node neo4j id
#'
#'\code{gid} = node grinn id
#'
#'\code{nodename} = node name
#'
#'\code{nodelabel} = node type
#'
#'\code{nodexref} = node cross references
#'
#'edges:
#'
#'\code{source, target} = node neo4j id
#'
#'\code{type} = relationship type
#'
#'\code{datasource} = relationship resource
#'
#'\code{properties} = relationship properties
#'
#'Return empty list if error or found nothing.
#'@author Kwanjeera W \email{kwanich@@ucdavis.edu}
#'@seealso \code{\link{convertId}}, \code{\link{fetchHetNetworkByGID}}
#'
#'For database structure see \url{http://grinnhomepage}
#'@examples
#'# Query the network of (from:Compound)-BIOCHEMICAL_REACTION->(to:Compound)
#'#from = list('1060','284','760') #By default, PubChem ids can be used as grinn ids for compounds
#'#to = list('222656','107689','5950','71080') #By default, PubChem ids can be used as grinn ids for compounds
#'#result = fetchNetworkByGID(from=from, to=to, fromtype="compound", totype="compound", reltype="biochemical_reaction")
#'# Query the network of (:Protein)-CATALYSIS->(to:Compound)
#'#to = c('222656','107689','5950','71080') #By default, PubChem ids can be used as grinn ids for compounds
#'#result = fetchNetworkByGID(from=NULL, to=to, fromtype="protein", totype="compound", reltype="catalysis")
#'@export
fetchNetworkByGID <- function(from=NULL, to=NULL, fromtype, totype, reltype, returnas="dataframe") UseMethod("fetchNetworkByGID")
#'@export
fetchNetworkByGID.default <- function(from=NULL, to=NULL, fromtype, totype, reltype, returnas="dataframe"){
out <- tryCatch(
{
tmparg <- try(fromtype <- match.arg(tolower(fromtype), c("compound","protein","gene","pathway","rna","dna","phenotype"), several.ok = FALSE), silent = TRUE)
if (class(tmparg) == "try-error") {
stop("argument 'fromtype' is not valid, choose one from the list: compound,protein,gene,pathway,rna,dna,phenotype")
}
tmparg <- try(totype <- match.arg(tolower(totype), c("compound","protein","gene","pathway","rna","dna","phenotype"), several.ok = FALSE), silent = TRUE)
if (class(tmparg) == "try-error") {
stop("argument 'totype' is not valid, choose one from the list: compound,protein,gene,pathway,rna,dna,phenotype")
}
tmparg <- try(reltype <- match.arg(tolower(reltype), c("annotation","biochemical_reaction","catalysis","control","conversion","genetic_association","molecular_binding"), several.ok = FALSE), silent = TRUE)
if (class(tmparg) == "try-error") {
stop("argument 'reltype' is not valid, choose one from the list: annotation,biochemical_reaction,catalysis,control,conversion,genetic_association,molecular_binding")
}
#construct query
maxkw = 500 #maximum keywords
fromtype = Hmisc::capitalize(fromtype)
totype = Hmisc::capitalize(totype)
doPar = FALSE
if (!is.null(from) && !is.null(to)) {#from and to
querystring = pathList_GID["fromto"]
doPar = TRUE #do loop
from = unique(stringr::str_trim(unlist(from))) #remove whiteline, duplicate
to = unique(stringr::str_trim(unlist(to))) #remove whiteline, duplicate
}else if (!is.null(from) && is.null(to)) {#from
querystring = pathList_GID["from"]
txtinput = unique(stringr::str_trim(unlist(from))) #remove whiteline, duplicate
len = length(txtinput)
}else if (is.null(from) && !is.null(to)) {#to
querystring = pathList_GID["to"]
txtinput = unique(stringr::str_trim(unlist(to))) #remove whiteline, duplicate
len = length(txtinput)
}else{
stop("Error: No query provided")
}
querystring = gsub("fromtype", fromtype, querystring)
querystring = gsub("totype", totype, querystring)
querystring = gsub("reltype", toupper(reltype), querystring)
cat("Querying network ...\n")
if(!doPar){
if(len <= maxkw){
qstring = gsub("keyword", paste0("['",paste0(txtinput, collapse = "','"),"']"), querystring)
cat(qstring,"\n")
paths = curlRequest.TRANSACTION(cypher=qstring)
}else{
cat("Split queries for more than 500 nodes ...\nWarning: querying a large number of nodes will take long time. \n")
# subinp = split(txtinput, ceiling(seq_along(txtinput)/maxkw)) #split keywords
# paths = foreach(i=1:length(subinp), .combine=c) %dopar% {
# qstring = gsub("keyword", paste0("['",paste0(unlist(subinp[i]), collapse = "','"),"']"), querystring)
# cat(qstring,"\n")
# curlRequest.TRANSACTION(cypher=qstring)
# }
paths = lapply(txtinput, function (x) curlRequest.TRANSACTION(cypher=gsub("keyword", paste0("['",paste0(x, collapse = "','"),"']"), querystring)))
paths = unlist(paths, recursive = FALSE)
}
}else{
cat("Querying from combination of nodes ...\nWarning: querying a large number of nodes will take long time. \n")
# path = foreach(i=1:length(from), .combine=rbind) %dopar% {
# foreach(j=1:length(to)) %dopar% {
# qstring = gsub("keyfrom", from[i], querystring)
# qstring = gsub("keyto", to[j], qstring)
# cat(qstring,"\n")
# curlRequest.TRANSACTION(cypher=qstring)
# }
# }
# paths = unlist(path, recursive = FALSE)
ft = expand.grid(from,to)
path = apply(ft, 1, function (x) curlRequest.TRANSACTION(cypher=gsub("keyfrom", x[1], gsub("keyto", x[2], querystring))))
paths = unlist(path, recursive = FALSE)
}
network = formatNetworkOutput(paths)
networknode = network$nodes
out = switch(returnas,
dataframe = list(nodes=networknode, edges=network$edges),
list = list(nodes = split(networknode, seq(nrow(networknode))), edges = split(network$edges, seq(nrow(network$edges)))),
json = list(nodes=jsonlite::toJSON(networknode), edges=jsonlite::toJSON(network$edges)),
stop("incorrect return type"))
},error=function(e) {
message(e)
cat("\nError: RETURN no network ..\n")
out = switch(returnas,
dataframe = list(nodes = data.frame(), edges = data.frame()),
list = list(nodes = list(), edges = list()),
json = list(nodes = "", edges = ""))
})
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.