#' Generic query against VFB OWL API
#'
#' @description This function expects an R list or vector describing a query and
#' constructs an appropriate query url embedding a JSON query, GETs the server
#' response and (by default) parses the JSON result.
#'
#' @details Note that the VFB OWL query endpoint by default wraps all results
#' inside a JSON result object called \code{results}. When
#' \code{parse.json=TRUE}, the returned results will be unwrapped to remove
#' this outer layer.
#'
#' @param query A key-value list specifying the query
#' @param path The path on the server containing the query page
#' @param server The base url of the server
#' @param parse.json Whether or no to parse the response (default: TRUE)
#' @param ... additional arguments passed to
#' \code{jsonlite::\link[jsonlite]{fromJSON}}
#' @export
#' @family query
#' @seealso \code{\link[jsonlite]{fromJSON}}
#' @examples
#' \dontrun{
#' # query for descendant classes of Fan-Shaped Body
#' vfb_owl_query(list(query_type="descendant_class", query="FBbt:00003679"))
#'
#' # query for individual neurons overlapping with Fan-Shaped Body
#' neurondf=vfb_owl_query(list(query_type="individuals", query="FBbt:00003679"))
#' # show the first few rows of the returned data.frame
#' head(neurondf)
#' }
vfb_owl_query<-function(query, path="do/jsonQuery.html?json=",
server= getOption("vfbr.server"), parse.json=TRUE, ...) {
queryj=minify(toJSON(query, auto_unbox=TRUE))
queryj=utils::URLencode(queryj)
url=paste0(server, "/", path, queryj)
if(is.null(server)) stop ("You must specify a server!")
res=GET(url)
if(parse.json) {
res=vfb_parse_json(res, ...)
if(is.list(res) && length(res)==1 && names(res)=='results')
res$results else res
} else {
res
}
}
vfb_parse_json <- function(req, simplifyVector = TRUE, ...) {
text <- content(req, as = "text")
if (identical(text, "")) stop("No output to parse", call. = FALSE)
jsonlite::fromJSON(text, simplifyVector = simplifyVector, ...)
}
#' Query VFB via solr indexing system
#'
#' @description Solr provides an extremely fast way to query all key content on
#' VFB and is the backend used for most queries run on the website. It is
#' pre-populated from the OWL ontology documents describing all the
#' information on the site and their relationships. More details on Solr and
#' its query syntax can be found at \url{http://lucene.apache.org/solr/}.
#' @details The \code{query} arguments maps onto the general solr \code{q=}
#' query while \code{filterqueries} maps onto one or more \code{fl=} terms.
#' The
#' \href{https://cwiki.apache.org/confluence/display/solr/Common+Query+Parameters}{solr
#' wiki} says this about the difference:
#'
#' The fq parameter defines a query that can be used to restrict the superset
#' of documents that can be returned, without influencing score. It can be
#' very useful for speeding up complex queries, since the queries specified
#' with fq are cached independently of the main query. When a later query uses
#' the same filter, there's a cache hit, and filter results are returned
#' quickly from the cache.
#' @inheritParams vfb_owl_query
#' @param filterquery A character vector (of length one or more) describing
#' filter queries for solr (see Details for regular vs filter queries)
#' @param sort Character vector naming one or more fields (+ delimited) to use
#' for sorting the results.
#' @param defaultfield Character vector naming default field used for filter
#' queries (defaults to \code{short_form})
#' @param rows Maximum number of rows to return. The special value of Inf
#' implies all matching rows.
#' @param fields Which fields to return (+delimited). A value of \code{""}
#' implies all fields.
#' @param ... additional solr query arguments
#' @return When \code{parse.json=TRUE}, a data.frame containing the parsed
#' response (originally the \code{response$docs} field in the parsed JSON)
#' along with additional attributes including
#'
#' \itemize{
#'
#' \item numFound
#'
#' \item start
#'
#' \item responseHeader
#'
#' }
#'
#' When \code{parse.json=FALSE} an \code{httr::response} object
#' @export
#' @family query
#' @examples
#' # Find VFB ids matching a given GMR line
#' # note the field synonym_autosuggest will in future be the only one
#' # matching GMR* ids
#' vfb_solr_query(filterquery="VFB_*",query="synonym_autosuggest:GMR_10A07*")
#'
#' # Find VFB ids matching a given VT Gal4 line
#' vfb_solr_query(filterquery="VFB_*",query="label:VT017929*")
#'
#' # how many GMR lines can we find
#' # note use of rows = 0 so we do not fetch results (but still get totals)
#' r=vfb_solr_query(filterquery="VFB_*",query="label:GMR_*", rows=0)
#' attr(r,'numFound')
#' \donttest{
#' #' # VFB id for all GMR lines
#' all_gmr=vfb_solr_query(filterquery="VFB_*",query="label:GMR_*", rows=4000)
#' head(all_gmr)
#'
#' # VFB id for all FlyCircuit neurons
#' # note use of rows=Inf to fetch all rows
#' all_fc=vfb_solr_query(filterquery="VFB_*",
#' query="source_data_link_annotation:*flycircuit*", rows=Inf)
#' head(all_fc)
#' }
#' @seealso \code{\link[httr]{response}}
vfb_solr_query<-function(query="*:*", filterquery=NULL,
fields="label+short_form", sort="score+desc",
defaultfield="short_form", rows=30L,
path="solr/ontology/select?wt=json",
server= getOption("vfbr.server.solr"), parse.json=TRUE, ...) {
if(!is.finite(rows)) {
# check how many rows there are
rowr=vfb_solr_query(query=query, filterquery = filterquery, fields = fields,
sort=sort, defaultfield = defaultfield, rows=0L, path=path,
server = server, parse.json = T)
rows=attr(rowr,'numFound')
# now we will return them all
}
params=c(fl=fields, sort=sort, rows=rows, df=defaultfield, q=query)
# filterquery can be vectorised
de_vectorise<-function(x, name) {
if(length(x))
names(x)=rep(name, length(x))
x
}
params=c(params, de_vectorise(filterquery, "fq"))
apl=pairlist(...)
if(length(apl)){
# interpret as extra SOLR params
for(n in names(apl)) params=c(params, de_vectorise(apl[[n]], n))
}
fullquery=paste(names(params), sep="=", params, collapse = "&")
url=paste0(server, "/", path, "&", fullquery)
url=utils::URLencode(url)
if(is.null(server)) stop ("You must specify a server!")
res=GET(url)
if(parse.json) {
rawres=vfb_parse_json(res)
# get main response data.frame
response=rawres[['response']]
res=response[['docs']]
# make an empty data.frame if we got no response
if(!length(res)) res=data.frame()
# copy over other fields
otherfields=setdiff(names(response),'docs')
for(n in otherfields) attr(res, n)=response[[n]]
# add response header as attribute as well
attr(res, 'responseHeader')=rawres[['responseHeader']]
}
res
}
#' Query VFB's Neo4J graph database
#'
#' @details Under the hood, this uses the \code{RNeo4j::cypher} function to call
#' a Neo4J service running on the specified VFB server.
#' @param x A character query in Neo4J's cypher language
#' @param ... Additional query arguments of the form \code{key=value}
#' @param path The relative path on the server for the Neo4J endpoint
#' @param server The server's root URL
#'
#' @return A data.frame of query results
#' @export
#' @seealso \code{RNeo4j::RNeo4j}, \code{RNeo4j::cypher} and
#' \url{https://neo4j.com/docs/rest-docs/3.3/}
#' @family query
#' @examples
#' \donttest{
#' # ask for all neuronal classes
#' nclasses=vfb_neo4j_query("MATCH (n:Neuron:Class) RETURN n.label")
#' nrow(nclasses)
#' head(nclasses)
#'
#' # Find all images with an associated neuronal class
#' q=paste0("MATCH (n:Class:VFB { label : 'neuron' })",
#' "<-[:SUBCLASSOF*]-(p)<-[:INSTANCEOF]-(i:Individual)",
#' "RETURN distinct i.label, p.label;")
#' nclasses_image=vfb_neo4j_query(q)
#' nrow(nclasses_image)
#' head(nclasses_image)
#'
#' # how many neuronal classes have images?
#' length(unique(nclasses_image$p.label))
#'
#' # look at number of images for each neuronal class
#' table_by_nclass <- table(nclasses_image$p.label)
#' subset(as.data.frame(table_by_nclass), Freq>200)
#' }
#' @references \url{https://neo4j.com/developer/cypher-query-language/}
vfb_neo4j_query <- function(x, ..., path="db/data", server= getOption("vfbr.server.neo4j")){
url=file.path(server, path)
if(!requireNamespace("RNeo4j", quietly = TRUE))
stop('You must install the suggested package RNeo4j to use vfb_neo4j_query!\n',
' remotes::install_github("nicolewhite/RNeo4j")')
g <- try(RNeo4j::startGraph(url), silent = TRUE)
if(inherits(g, 'try-error'))
stop("Unable to connect to VFB neo4j server: ", server)
RNeo4j::cypher(g, x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.