R/fetchNetworkByGID.R

#'Query networks of a specific relationship from the database
#'@description query networks containing a specific relationship type between the given \code{from} and/or \code{to} nodes using grinn id (gid), see details.
#'@usage fetchNetworkByGID(from, to, fromtype, totype, reltype, returnas)
#'@param from a character vector of start nodes e.g. from = c('id1', 'id2'). Given \code{from} = NULL, will result in all possible start nodes.
#'Otherwise the value must be a grinn id, see details and see \code{\link{convertId}} for how to convert ids.
#'@param to a character vector of end nodes e.g. to = c('id1', 'id2'). Given \code{to} = NULL, will result in all possible end nodes, see \code{from} for details.
#'@param fromtype a string specifying the type of the start node. It can be one of compound, protein, gene, pathway, rna, dna, phenotype. The node types correspond to node labels of the database.
#'@param totype a string specifying the type of end nodes, see \code{fromtype} for details.
#'@param reltype a string specifying a relationship type.
#'It can be one of annotation, biochemical_reaction, catalysis, control, conversion, genetic_association, molecular_binding.
#'@param returnas a string specifying output type. It can be one of dataframe, list, json. Default is dataframe.
#'@details The function is specifically used to query one type of relationship. Use \code{\link{fetchHetNetworkByGID}} to query networks containing one or more relationship types (heterogeneous network).
#'
#'The database uses two id systems. The neo4j id is a numeric, internal id automatically generated by the database system.
#'The grinn id (gid) is an id system of Grinn database that uses main ids of standard resources
#'i.e. ENSEMBL for genes (e.g.ENSG00000139618), UniProt for proteins (e.g.P0C9J6), PubChem CID for compounds (e.g.5793), KEGG for pathways (e.g.hsa00010).
#'@return list of network information with the following components:
#'
#'nodes:
#'
#'\code{id} = node neo4j id
#'
#'\code{gid} = node grinn id
#'
#'\code{nodename} = node name
#'
#'\code{nodelabel} = node type
#'
#'\code{nodexref} = node cross references
#'
#'edges:
#'
#'\code{source, target} = node neo4j id
#'
#'\code{type} = relationship type
#'
#'\code{datasource} = relationship resource
#'
#'\code{properties} = relationship properties
#'
#'Return empty list if error or found nothing.
#'@author Kwanjeera W \email{kwanich@@ucdavis.edu}
#'@seealso \code{\link{convertId}}, \code{\link{fetchHetNetworkByGID}}
#'
#'For database structure see \url{http://grinnhomepage}
#'@examples
#'# Query the network of (from:Compound)-BIOCHEMICAL_REACTION->(to:Compound)
#'#from = list('1060','284','760') #By default, PubChem ids can be used as grinn ids for compounds
#'#to = list('222656','107689','5950','71080') #By default, PubChem ids can be used as grinn ids for compounds
#'#result = fetchNetworkByGID(from=from, to=to, fromtype="compound", totype="compound", reltype="biochemical_reaction")
#'# Query the network of (:Protein)-CATALYSIS->(to:Compound)
#'#to = c('222656','107689','5950','71080') #By default, PubChem ids can be used as grinn ids for compounds
#'#result = fetchNetworkByGID(from=NULL, to=to, fromtype="protein", totype="compound", reltype="catalysis")
#'@export
fetchNetworkByGID <- function(from=NULL, to=NULL, fromtype, totype, reltype, returnas="dataframe") UseMethod("fetchNetworkByGID")
#'@export
fetchNetworkByGID.default <- function(from=NULL, to=NULL, fromtype, totype, reltype, returnas="dataframe"){
  out <- tryCatch(
    {
      tmparg <- try(fromtype <- match.arg(tolower(fromtype), c("compound","protein","gene","pathway","rna","dna","phenotype"), several.ok = FALSE), silent = TRUE)
      if (class(tmparg) == "try-error") {
        stop("argument 'fromtype' is not valid, choose one from the list: compound,protein,gene,pathway,rna,dna,phenotype")
      }
      tmparg <- try(totype <- match.arg(tolower(totype), c("compound","protein","gene","pathway","rna","dna","phenotype"), several.ok = FALSE), silent = TRUE)
      if (class(tmparg) == "try-error") {
        stop("argument 'totype' is not valid, choose one from the list: compound,protein,gene,pathway,rna,dna,phenotype")
      }
      tmparg <- try(reltype <- match.arg(tolower(reltype), c("annotation","biochemical_reaction","catalysis","control","conversion","genetic_association","molecular_binding"), several.ok = FALSE), silent = TRUE)
      if (class(tmparg) == "try-error") {
        stop("argument 'reltype' is not valid, choose one from the list: annotation,biochemical_reaction,catalysis,control,conversion,genetic_association,molecular_binding")
      }
      #construct query
      maxkw = 500 #maximum keywords
      fromtype = Hmisc::capitalize(fromtype)
      totype = Hmisc::capitalize(totype)
      doPar = FALSE
      if (!is.null(from) && !is.null(to)) {#from and to
        querystring = pathList_GID["fromto"]
        doPar = TRUE #do loop
        from = unique(stringr::str_trim(unlist(from))) #remove whiteline, duplicate
        to = unique(stringr::str_trim(unlist(to))) #remove whiteline, duplicate
      }else if (!is.null(from) && is.null(to)) {#from
        querystring = pathList_GID["from"]
        txtinput = unique(stringr::str_trim(unlist(from))) #remove whiteline, duplicate
        len = length(txtinput)
      }else if (is.null(from) && !is.null(to)) {#to
        querystring = pathList_GID["to"]
        txtinput = unique(stringr::str_trim(unlist(to))) #remove whiteline, duplicate
        len = length(txtinput)
      }else{
        stop("Error: No query provided")
      }
      querystring = gsub("fromtype", fromtype, querystring)
      querystring = gsub("totype", totype, querystring)
      querystring = gsub("reltype", toupper(reltype), querystring)
      cat("Querying network ...\n")
      if(!doPar){
        if(len <= maxkw){
          qstring = gsub("keyword", paste0("['",paste0(txtinput, collapse = "','"),"']"), querystring)
cat(qstring,"\n")
          paths = curlRequest.TRANSACTION(cypher=qstring)
        }else{
          cat("Split queries for more than 500 nodes ...\nWarning: querying a large number of nodes will take long time. \n")
#           subinp = split(txtinput, ceiling(seq_along(txtinput)/maxkw)) #split keywords
#           paths = foreach(i=1:length(subinp), .combine=c) %dopar% {
#             qstring = gsub("keyword", paste0("['",paste0(unlist(subinp[i]), collapse = "','"),"']"), querystring)
# cat(qstring,"\n")
#             curlRequest.TRANSACTION(cypher=qstring)
#           }
          paths = lapply(txtinput, function (x) curlRequest.TRANSACTION(cypher=gsub("keyword", paste0("['",paste0(x, collapse = "','"),"']"), querystring)))
          paths = unlist(paths, recursive = FALSE)
        }
      }else{
        cat("Querying from combination of nodes ...\nWarning: querying a large number of nodes will take long time. \n")
#         path = foreach(i=1:length(from), .combine=rbind) %dopar% {
#           foreach(j=1:length(to)) %dopar% {
#             qstring = gsub("keyfrom", from[i], querystring)
#             qstring = gsub("keyto", to[j], qstring)
#             cat(qstring,"\n")
#             curlRequest.TRANSACTION(cypher=qstring)
#           }
#         }
#         paths = unlist(path, recursive = FALSE)
        ft = expand.grid(from,to)
        path = apply(ft, 1, function (x) curlRequest.TRANSACTION(cypher=gsub("keyfrom", x[1], gsub("keyto", x[2], querystring))))
        paths = unlist(path, recursive = FALSE)
      }
      network = formatNetworkOutput(paths)
      networknode = network$nodes
      out = switch(returnas,
                     dataframe = list(nodes=networknode, edges=network$edges),
                     list = list(nodes = split(networknode, seq(nrow(networknode))), edges = split(network$edges, seq(nrow(network$edges)))),
                     json = list(nodes=jsonlite::toJSON(networknode), edges=jsonlite::toJSON(network$edges)),
                     stop("incorrect return type"))
    },error=function(e) {
      message(e)
      cat("\nError: RETURN no network ..\n")
      out = switch(returnas,
                   dataframe = list(nodes = data.frame(), edges = data.frame()),
                   list = list(nodes = list(), edges = list()),
                   json = list(nodes = "", edges = ""))
    })
  return(out)
}
kwanjeeraw/metabox documentation built on May 20, 2019, 7:07 p.m.