R/fetchNetwork.R

#'Query networks of a specific relationship from the database
#'@description query networks containing a specific relationship type between the given \code{from} and/or \code{to} nodes using neo4j id, see details.
#'@usage fetchNetwork(from, to, fromtype, totype, reltype, returnas)
#'@param from a character vector of start nodes e.g. from = c('id1', 'id2'). Given \code{from} = NULL, will result in all possible start nodes.
#'Otherwise the value must be a neo4j id, see details and see \code{\link{convertId}} for how to convert ids.
#'@param to a character vector of end nodes e.g. to = c('id1', 'id2'). Given \code{to} = NULL, will result in all possible end nodes, see \code{from} for details.
#'@param fromtype a string specifying the type of the start node. It can be one of compound, protein, gene, pathway, rna, dna, phenotype. The node types correspond to node labels of the database.
#'@param totype a string specifying the type of end nodes, see \code{fromtype} for details.
#'@param reltype a string specifying a relationship type.
#'It can be one of annotation, biochemical_reaction, catalysis, control, conversion, genetic_association, molecular_binding.
#'@param returnas a string specifying output type. It can be one of dataframe, list, json. Default is dataframe.
#'@details The function is specifically used to query one type of relationship. Use \code{\link{fetchHetNetwork}} to query networks containing one or more relationship types (heterogeneous network).
#'
#'The database uses two id systems. The neo4j id is a numeric, internal id automatically generated by the database system.
#'The grinn id (gid) is an id system of Grinn database that uses main ids of standard resources
#'i.e. ENSEMBL for genes (e.g.ENSG00000139618), UniProt for proteins (e.g.P0C9J6), PubChem CID for compounds (e.g.5793), KEGG for pathways (e.g.hsa00010).
#'@return list of network information with the following components:
#'
#'nodes:
#'
#'\code{id} = node neo4j id
#'
#'\code{gid} = node grinn id
#'
#'\code{nodename} = node name
#'
#'\code{nodelabel} = node type
#'
#'\code{nodexref} = node cross references
#'
#'edges:
#'
#'\code{source, target} = node neo4j id
#'
#'\code{type} = relationship type
#'
#'\code{datasource} = relationship resource
#'
#'\code{properties} = relationship properties
#'
#'Return empty list if error or found nothing.
#'@author Kwanjeera W \email{kwanich@@ucdavis.edu}
#'@seealso \code{\link{convertId}}, \code{\link{fetchHetNetwork}}
#'
#'For database structure see \url{http://grinnhomepage}
#'@examples
#'# Query the network of (from:Compound)-BIOCHEMICAL_REACTION->(to:Compound)
#'#from = list(7097,41074,39189) #list of neo4j ids
#'#to = list(113,2440,554,102) #list of neo4j ids
#'#result = fetchNetwork(from=from, to=to, fromtype="compound", totype="compound", reltype="biochemical_reaction")
#'# Query the network of (:Protein)-CATALYSIS->(to:Compound)
#'#to = c(113,2440,554,102) #list of neo4j ids
#'#result = fetchNetwork(from=NULL, to=to, fromtype="protein", totype="compound", reltype="catalysis")
#'@export
fetchNetwork <- function(from=NULL, to=NULL, fromtype, totype, reltype, returnas="dataframe") UseMethod("fetchNetwork")
#'@export
fetchNetwork.default <- function(from=NULL, to=NULL, fromtype, totype, reltype, returnas="dataframe"){
  out <- tryCatch(
  {
    tmparg <- try(fromtype <- match.arg(tolower(fromtype), c("compound","protein","gene","pathway","rna","dna","phenotype"), several.ok = FALSE), silent = TRUE)
    if (class(tmparg) == "try-error") {
      stop("argument 'fromtype' is not valid, choose one from the list: compound,protein,gene,pathway,rna,dna,phenotype")
    }
    tmparg <- try(totype <- match.arg(tolower(totype), c("compound","protein","gene","pathway","rna","dna","phenotype"), several.ok = FALSE), silent = TRUE)
    if (class(tmparg) == "try-error") {
      stop("argument 'totype' is not valid, choose one from the list: compound,protein,gene,pathway,rna,dna,phenotype")
    }
    tmparg <- try(reltype <- match.arg(tolower(reltype), c("annotation","biochemical_reaction","catalysis","control","conversion","genetic_association","molecular_binding"), several.ok = FALSE), silent = TRUE)
    if (class(tmparg) == "try-error") {
      stop("argument 'reltype' is not valid, choose one from the list: annotation,biochemical_reaction,catalysis,control,conversion,genetic_association,molecular_binding")
    }
    #construct query
    maxkw = 500 #maximum keywords
    fromtype = Hmisc::capitalize(fromtype)
    totype = Hmisc::capitalize(totype)
    doPar = FALSE
    if (!is.null(from) && !is.null(to)) {#from and to
      querystring = pathList["fromto"]
      doPar = TRUE #do loop
      from = unique(stringr::str_trim(unlist(from))) #remove whiteline, duplicate
      from = from[!is.na(suppressWarnings(as.numeric(from)))] #remove string, ID accepts integer only
      to = unique(stringr::str_trim(unlist(to))) #remove whiteline, duplicate
      to = to[!is.na(suppressWarnings(as.numeric(to)))] #remove string, ID accepts integer only
    }else if (!is.null(from) && is.null(to)) {#from
      querystring = pathList["from"]
      txtinput = unique(stringr::str_trim(unlist(from))) #remove whiteline, duplicate
      txtinput = txtinput[!is.na(suppressWarnings(as.numeric(txtinput)))] #remove string, ID accepts integer only
      len = length(txtinput)
    }else if (is.null(from) && !is.null(to)) {#to
      querystring = pathList["to"]
      txtinput = unique(stringr::str_trim(unlist(to))) #remove whiteline, duplicate
      txtinput = txtinput[!is.na(suppressWarnings(as.numeric(txtinput)))] #remove string, ID accepts integer only
      len = length(txtinput)
    }else{
      stop("Error: No query provided")
    }
    querystring = gsub("fromtype", fromtype, querystring)
    querystring = gsub("totype", totype, querystring)
    querystring = gsub("reltype", toupper(reltype), querystring)
    cat("Querying network ...\n")
    if(!doPar){
      if(len <= maxkw){
        qstring = gsub("keyword", paste0("['",paste0(txtinput, collapse = "','"),"']"), querystring)
cat(qstring,"\n")
        paths = curlRequest.TRANSACTION(cypher=qstring)
      }else{
        cat("Split queries for more than 500 nodes ...\nWarning: querying a large number of nodes will take long time. \n")
#         subinp = split(txtinput, ceiling(seq_along(txtinput)/maxkw)) #split keywords
#         paths = foreach(i=1:length(subinp), .combine=c) %dopar% {
#           qstring = gsub("keyword", paste0("['",paste0(unlist(subinp[i]), collapse = "','"),"']"), querystring)
# cat(qstring,"\n")
#           curlRequest.TRANSACTION(cypher=qstring)
#         }
        paths = lapply(txtinput, function (x) curlRequest.TRANSACTION(cypher=gsub("keyword", paste0("['",paste0(x, collapse = "','"),"']"), querystring)))
        paths = unlist(paths, recursive = FALSE)
      }
    }else{
      cat("Querying from combination of nodes ...\nWarning: querying a large number of nodes will take long time. \n")
#       path = foreach(i=1:length(from), .combine=rbind) %dopar% {
#         foreach(j=1:length(to)) %dopar% {
#           qstring = gsub("keyfrom", from[i], querystring)
#           qstring = gsub("keyto", to[j], qstring)
# cat(qstring,"\n")
#           curlRequest.TRANSACTION(cypher=qstring)
#         }
#       }
#       paths = unlist(path, recursive = FALSE)
      ft = expand.grid(from,to)
      path = apply(ft, 1, function (x) curlRequest.TRANSACTION(cypher=gsub("keyfrom", x[1], gsub("keyto", x[2], querystring))))
      paths = unlist(path, recursive = FALSE)
    }
    network = formatNetworkOutput(paths)
    networknode = network$nodes
    out = switch(returnas,
                 dataframe = list(nodes=networknode, edges=network$edges),
                 list = list(nodes = split(networknode, seq(nrow(networknode))), edges = split(network$edges, seq(nrow(network$edges)))),
                 json = list(nodes=jsonlite::toJSON(networknode), edges=jsonlite::toJSON(network$edges)),
                 stop("incorrect return type"))
  },error=function(e) {
    message(e)
    cat("\nError: RETURN no network ..\n")
    out = switch(returnas,
                 dataframe = list(nodes = data.frame(), edges = data.frame()),
                 list = list(nodes = list(), edges = list()),
                 json = list(nodes = "", edges = ""))
  })
  return(out)
}
kwanjeeraw/metabox documentation built on May 20, 2019, 7:07 p.m.