library(knitr) library(ReDaMoR) cranRef <- function(x){ sprintf( "[%s](https://CRAN.R-project.org/package=%s): %s", x, x, packageDescription(x)$Title ) }
The aim of the neo2R is to provide simple and low level connectors for querying Neo4j graph databases. The objects returned by the query functions are either lists or data.frames with very few post-processing. It allows fast processing of queries returning many records. And it let the user handle post-processing according to the data model and his needs. It has been developed to support the BED package (https://github.com/patzaw/BED, https://f1000research.com/articles/7-195/v3 ). Other packages such as RNeo4j (https://github.com/nicolewhite/RNeo4j) or neo4R (https://github.com/neo4j-rstats/neo4r) provide connectors to neo4j databases with additional features.
install.packages("neo2R")
The following R packages available on CRAN are required:
deps <- desc::desc_get_deps() sdeps <- filter(deps, type %in% c("Depends", "Imports") & package!="R") for(p in sdeps$package){ cat(paste("-", cranRef(p)), sep="\n") }
devtools::install_github("patzaw/neo2R")
You can download and install Neo4j according to the documentation. You can also run it in a docker container. It takes a few seconds to start.
The following chunks show how to instantaite a docker container running either version 3 or version 4 (only those 2 are supported by neo2R) of Neo4j. The main differences between the 2 calls are related to the apoc library and SSL configuration.
```{sh, eval=FALSE}
export CONTAINER=neo4j_cont
export NJ_VERSION=3.5.30
export NJ_HTTP_PORT=7474 export NJ_HTTPS_PORT=7473 export NJ_BOLT_PORT=7687
export NJ_HOME=~/neo4j_home
NJ_AUTH=neo4j/donttrustusers # set to 'none' if you want to disable authorization
export NJ_APOC=https://github.com/neo4j-contrib/neo4j-apoc-procedures/releases/download/3.5.0.15/apoc-3.5.0.15-all.jar
mkdir -p $NJ_HOME
export NJ_IMPORT=$NJ_HOME/neo4jImport mkdir -p $NJ_IMPORT export NJ_DATA=$NJ_HOME/neo4jData if test -e $NJ_DATA; then echo "$NJ_DATA directory exists ==> abort - Remove it before proceeding" >&2 exit fi mkdir -p $NJ_DATA
export NJ_SSL=${NJ_HOME}/ssl mkdir -p ${NJ_SSL}/client_policy mkdir -p ${NJ_SSL}/client_policy/revoked mkdir -p ${NJ_SSL}/client_policy/trusted openssl req -subj "/CN=localhost" -new -newkey rsa:2048 -days 365 -nodes -x509 -keyout ${NJ_SSL}/client_policy/private.key -out ${NJ_SSL}/client_policy/public.crt chmod -R a+rwx ${NJ_SSL}
export NJ_PLUGINS=$NJ_HOME/neo4jPlugins mkdir -p $NJ_PLUGINS cd $NJ_PLUGINS wget --no-check-certificate $NJ_APOC cd -
docker run -d \ --name $CONTAINER \ --publish=$NJ_HTTP_PORT:7474 \ --publish=$NJ_HTTPS_PORT:7473 \ --publish=$NJ_BOLT_PORT:7687 \ --env=NEO4J_dbms_memory_heap_initialsize=2G \ --env=NEO4J_dbms_memory_heap_maxsize=2G \ --env=NEO4J_dbms_memory_pagecache_size=1G \ --env=NEO4J_dbms_querycachesize=0 \ --env=NEO4J_cypher_minreplaninterval=100000000ms \ --env=NEO4J_cypher_statisticsdivergencethreshold=1 \ --env=NEO4J_dbms_security_procedures_unrestricted=apoc.\* \ --env=NEO4J_dbms_directories_import=import \ --env NEO4J_AUTH=$NJ_AUTH \ --volume $NJ_IMPORT:/var/lib/neo4j/import \ --volume $NJ_DATA/data:/data \ --volume $NJ_PLUGINS:/plugins \ --volume=$NJ_SSL:/ssl \ --env=NEO4J_https_sslpolicy=client_policy \ --env=NEO4J_dbms_ssl_policy_clientpolicy_basedirectory=/ssl/client_policy \ --env=NEO4J_dbms_ssl_policy_clientpolicy_clientauth=NONE \ --env=NEO4J_dbms_ssl_policy_clientpolicy_trust__all=true \ neo4j:$NJ_VERSION
sleep 15 sudo chmod a+rwx $NJ_IMPORT
### Neo4j 4.x ```{sh, eval=FALSE} #!/bin/sh ################################# ## CONFIG according to your needs ################################# export CONTAINER=neo4j_cont export NJ_VERSION=4.4.26 ## Ports export NJ_HTTP_PORT=7474 export NJ_HTTPS_PORT=7473 export NJ_BOLT_PORT=7687 ## Change the location of the Neo4j directory export NJ_HOME=~/neo4j_home ## Authorization NJ_AUTH=neo4j/donttrustusers # set to 'none' if you want to disable authorization ## APOC download export NJ_APOC=https://github.com/neo4j-contrib/neo4j-apoc-procedures/releases/download/4.4.0.3/apoc-4.4.0.3-all.jar ################################# ## RUN ################################# mkdir -p $NJ_HOME ## Import and data directory export NJ_IMPORT=$NJ_HOME/neo4jImport mkdir -p $NJ_IMPORT export NJ_DATA=$NJ_HOME/neo4jData if test -e $NJ_DATA; then echo "$NJ_DATA directory exists ==> abort - Remove it before proceeding" >&2 exit fi mkdir -p $NJ_DATA ## SSL export NJ_SSL=${NJ_HOME}/ssl mkdir -p ${NJ_SSL}/https mkdir -p ${NJ_SSL}/https/revoked mkdir -p ${NJ_SSL}/https/trusted openssl req -subj "/CN=localhost" -new -newkey rsa:2048 -days 365 -nodes -x509 -keyout ${NJ_SSL}/https/private.key -out ${NJ_SSL}/https/public.crt chmod -R a+rwx ${NJ_SSL} ## Neo4j plugins: APOC export NJ_PLUGINS=$NJ_HOME/neo4jPlugins mkdir -p $NJ_PLUGINS cd $NJ_PLUGINS wget --no-check-certificate $NJ_APOC cd - docker run -d \ --name $CONTAINER \ --publish=$NJ_HTTP_PORT:7474 \ --publish=$NJ_HTTPS_PORT:7473 \ --publish=$NJ_BOLT_PORT:7687 \ --env=NEO4J_dbms_memory_heap_initial__size=2G \ --env=NEO4J_dbms_memory_heap_max__size=2G \ --env=NEO4J_dbms_memory_pagecache_size=1G \ --env=NEO4J_dbms_query__cache__size=0 \ --env=NEO4J_cypher_min__replan__interval=100000000ms \ --env=NEO4J_cypher_statistics__divergence__threshold=1 \ --env=NEO4J_dbms_security_procedures_unrestricted=apoc.\\\* \ --env=NEO4J_dbms_directories_import=import \ --env NEO4J_AUTH=$NJ_AUTH \ --volume $NJ_IMPORT:/var/lib/neo4j/import \ --volume $NJ_DATA/data:/data \ --volume $NJ_PLUGINS:/plugins \ --volume=$NJ_SSL:/ssl \ --env=NEO4J_dbms_connector_https_enabled=true \ --env=NEO4J_dbms_ssl_policy_https_enabled=true \ --env=NEO4J_dbms_ssl_policy_https_base__directory=/ssl/https \ --env=NEO4J_dbms_ssl_policy_https_private__key=private.key \ --env=NEO4J_dbms_ssl_policy_https_public__certificate=public.crt \ --env=NEO4J_dbms_ssl_policy_https_client__auth=NONE \ --env=NEO4J_dbms_ssl_policy_https_trust__all=true \ neo4j:$NJ_VERSION sleep 15 sudo chmod a+rwx $NJ_IMPORT
In recent version of Neo4j, it is not possible to instantiate the docker image
with a specific user name and password. The password needs to be changed
after Neo4j starts. Nevertheless, it's still possible to instanciate the image
without any credentials with the following docker run
parameter: --env NEO4J_AUTH=none
```{sh, eval=FALSE}
export CONTAINER=neo4j_cont
export NJ_VERSION=5.12.0
export NJ_HTTP_PORT=7474 export NJ_HTTPS_PORT=7473 export NJ_BOLT_PORT=7687
export NJ_HOME=~/neo4j_home
export NJ_APOC=https://github.com/neo4j/apoc/releases/download/5.1.0/apoc-5.1.0-core.jar
mkdir -p $NJ_HOME
export NJ_IMPORT=$NJ_HOME/neo4jImport mkdir -p $NJ_IMPORT export NJ_DATA=$NJ_HOME/neo4jData if test -e $NJ_DATA; then echo "$NJ_DATA directory exists ==> abort - Remove it before proceeding" >&2 exit fi mkdir -p $NJ_DATA
export NJ_SSL=${NJ_HOME}/ssl mkdir -p ${NJ_SSL}/https mkdir -p ${NJ_SSL}/https/revoked mkdir -p ${NJ_SSL}/https/trusted openssl req -subj "/CN=localhost" -new -newkey rsa:2048 -days 365 -nodes -x509 -keyout ${NJ_SSL}/https/private.key -out ${NJ_SSL}/https/public.crt cp ${NJ_SSL}/https/public.crt ${NJ_SSL}/https/trusted chmod -R a+rwx ${NJ_SSL}
export NJ_PLUGINS=$NJ_HOME/neo4jPlugins mkdir -p $NJ_PLUGINS cd $NJ_PLUGINS wget --no-check-certificate $NJ_APOC cd -
--env NEO4J_AUTH=none
if you don't want to setup credentialsdocker run -d \ --name $CONTAINER \ --publish=$NJ_HTTP_PORT:7474 \ --publish=$NJ_HTTPS_PORT:7473 \ --publish=$NJ_BOLT_PORT:7687 \ --env=NEO4J_server_memory_heap_initialsize=2G \ --env=NEO4J_server_memory_heap_maxsize=2G \ --env=NEO4J_server_memory_pagecache_size=1G \ --env=NEO4J_server_db_querycachesize=0 \ --env=NEO4J_dbms_cypher_minreplaninterval=100000000ms \ --env=NEO4J_dbms_cypher_statisticsdivergencethreshold=1 \ --env=NEO4J_dbms_security_procedures_unrestricted=apoc.\* \ --env=NEO4J_server_directories_import=import\ --volume=$NJ_IMPORT:/var/lib/neo4j/import \ --volume=$NJ_DATA/data:/data \ --volume=$NJ_LOGS:/var/lib/neo4j/logs \ --volume=$NJ_PLUGINS:/plugins \ --volume=$NJ_SSL:/ssl \ --env=NEO4J_server_https_enabled=true \ --env=NEO4J_dbms_ssl_policy_https_enabled=true \ --env=NEO4J_dbms_ssl_policy_https_basedirectory=/ssl/https \ --env=NEO4J_dbms_ssl_policy_https_privatekey=private.key \ --env=NEO4J_dbms_ssl_policy_https_publiccertificate=public.crt \ --env=NEO4J_dbms_ssl_policy_https_clientauth=NONE \ --env=NEO4J_dbms_ssl_policy_https_trust__all=true \ neo4j:$NJ_VERSION
sleep 15 sudo chmod a+rwx $NJ_IMPORT
If you did not disable credentials (with `--env NEO4J_AUTH=none`), you'll need to first change the user password before being able to connect to the graph database. The following chunk shows how to do it with neo2R. ```r library(neo2R) system <- try(startGraph( "https://localhost:7473", database="system", check=FALSE, username="neo4j", password="neo4j", importPath="~/neo4j_home/neo4jImport", .opts = list(ssl_verifypeer=0) ), silent = TRUE) if(system$version[[1]]=="5"){ try(cypher( system, "ALTER CURRENT USER SET PASSWORD FROM 'neo4j' TO 'donttrustusers'" ), silent = TRUE) }
library(neo2R) system <- startGraph( "https://localhost:7473", database="system", check=FALSE, username="neo4j", password="neo4j", importPath="~/neo4j_home/neo4jImport", .opts = list(ssl_verifypeer=0) ) cypher( system, "ALTER CURRENT USER SET PASSWORD FROM 'neo4j' TO 'donttrustusers'" )
After installing neo4j,
startGraph
is used to initialize the connection from R.
If authentication has been disabled in neo4j by setting NEO4J.AUTH=none,
neither username nor password are required.
If you're connecting to a local instance of Neo4j and import directory has
been defined in the configuration, you can specify it in order to allow
import from data.frames.
library(neo2R) graph <- startGraph( "https://localhost:7473", username="neo4j", password="donttrustusers", importPath="~/neo4j_home/neo4jImport", .opts = list(ssl_verifypeer=0) )
If you're connected to a local instance of Neo4j and import directory has been defined (see above), you can import data from data.frames. Use the 'row' prefix to refer to the data.frame column.
######################################### ## Nodes ## Create an index to speed-up MERGE if(graph$version[[1]]=="5"){ try(cypher(graph, 'CREATE INDEX FOR (n:TestNode) ON (n.name)'), silent=TRUE) }else{ try(cypher(graph, 'CREATE INDEX ON :TestNode(name)'), silent=TRUE) } ## Define node properties in a data.frame set.seed(1) nn <- 100000 nodes <- data.frame( "name"=paste( sample(LETTERS, nn, replace=TRUE), sample.int(nn, nn, replace=FALSE) ), "value"=rnorm(nn, 10, 3), stringsAsFactors=FALSE ) import_from_df( graph=graph, cql='MERGE (n:TestNode {name:row.name, value:toFloat(row.value)})', toImport=nodes ) ######################################### ## Edges ## Define node properties in a data.frame ne <- 100000 edges <- data.frame( "from"=sample(nodes$name, ne, replace=TRUE), "to"=sample(nodes$name, ne, replace=TRUE), "property"=round(runif(ne)*10), stringsAsFactors=FALSE ) import_from_df( graph=graph, cql=prepCql( 'MATCH (f:TestNode {name:row.from})', 'MATCH (t:TestNode {name:row.to})', 'MERGE (f)-[r:TestEdge {property:toInteger(row.property)}]->(t)' ), toImport=edges )
You can query the Neo4j graph database using the cypher()
function.
Depending on the query, the function can return data in a a data.frame
(by setting result="row"
) or in a list with nodes, relationships and paths
returned by the query by setting result="graph"
(the "graph" method is
quite slow with version 5 of Neo4j for unknown reason).
## Get TestNode with value smaller than 4. ## According to the normal distribution we expect 2.5% of the total ## number of nodes ==> ~2500 nodes df <- cypher( graph, prepCql( 'MATCH (n:TestNode) WHERE n.value <= 4', 'RETURN n.name as name, n.value as value' ) ) print(dim(df)) print(head(df)) ## Multiple queries can be sent at once dfl <- multicypher( graph, sprintf( paste( 'MATCH (n:TestNode) WHERE n.value <= %s', 'RETURN n.name as name, n.value as value' ), 2:4 ) ) print(lapply(dfl, dim)) ## Get all paths of length 5 starting from a subset of nodes net <- cypher( graph, prepCql( 'MATCH p=(f:TestNode)-[:TestEdge*5..5]->(t:TestNode) WHERE f.value < 3', 'RETURN p' ), result="graph" ) print(lapply(net, head, 3)) print(table(unlist(lapply(net$paths, length))))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.