Document management
In solrium: General Purpose R Interface to 'Solr'

Installation

Stable version from CRAN

install.packages("solrium")

Or the development version from GitHub

remotes::install_github("ropensci/solrium")

Load

library("solrium")

Initialize connection. By default, you connect to http://localhost:8983

(conn <- SolrClient$new())

#> <Solr Client>
#>   host: 127.0.0.1
#>   path: 
#>   port: 8983
#>   scheme: http
#>   errors: simple
#>   proxy:

Create documents from R objects

For now, only lists and data.frame's supported.

data.frame

if (!collection_exists(conn, "books")) {
  collection_create(conn, name = "books", numShards = 1)
}

#> $responseHeader
#> $responseHeader$status
#> [1] 0
#> 
#> $responseHeader$QTime
#> [1] 6513
#> 
#> 
#> $success
#> $success$`172.20.0.5:8983_solr`
#> $success$`172.20.0.5:8983_solr`$responseHeader
#> $success$`172.20.0.5:8983_solr`$responseHeader$status
#> [1] 0
#> 
#> $success$`172.20.0.5:8983_solr`$responseHeader$QTime
#> [1] 5024
#> 
#> 
#> $success$`172.20.0.5:8983_solr`$core
#> [1] "books_shard1_replica_n1"
#> 
#> 
#> 
#> $warning
#> [1] "Using _default configset. Data driven schema functionality is enabled by default, which is NOT RECOMMENDED for production use. To turn it off: curl http://{host:port}/solr/books/config -d '{\"set-user-property\": {\"update.autoCreateFields\":\"false\"}}'"

df <- data.frame(id = c(67, 68), price = c(1000, 500000000))
conn$add(df, "books")

#> $responseHeader
#> $responseHeader$rf
#> [1] 1
#> 
#> $responseHeader$status
#> [1] 0
#> 
#> $responseHeader$QTime
#> [1] 987

list

ss <- list(list(id = 1, price = 100), list(id = 2, price = 500))
conn$add(ss, "books")

#> $responseHeader
#> $responseHeader$rf
#> [1] 1
#> 
#> $responseHeader$status
#> [1] 0
#> 
#> $responseHeader$QTime
#> [1] 61

Delete documents

By id

Create collection if it doesn't exist yet

if (!collection_exists(conn, "gettingstarted")) {
  collection_create(conn, name = "gettingstarted", numShards = 1)
}

#> $responseHeader
#> $responseHeader$status
#> [1] 0
#> 
#> $responseHeader$QTime
#> [1] 6446
#> 
#> 
#> $success
#> $success$`172.20.0.7:8983_solr`
#> $success$`172.20.0.7:8983_solr`$responseHeader
#> $success$`172.20.0.7:8983_solr`$responseHeader$status
#> [1] 0
#> 
#> $success$`172.20.0.7:8983_solr`$responseHeader$QTime
#> [1] 5112
#> 
#> 
#> $success$`172.20.0.7:8983_solr`$core
#> [1] "gettingstarted_shard1_replica_n1"
#> 
#> 
#> 
#> $warning
#> [1] "Using _default configset. Data driven schema functionality is enabled by default, which is NOT RECOMMENDED for production use. To turn it off: curl http://{host:port}/solr/gettingstarted/config -d '{\"set-user-property\": {\"update.autoCreateFields\":\"false\"}}'"

Add some documents first

docs <- list(list(id = 1, price = 100, name = "brown"),
             list(id = 2, price = 500, name = "blue"),
             list(id = 3, price = 2000L, name = "pink"))
conn$add(docs, "gettingstarted")

#> $responseHeader
#> $responseHeader$rf
#> [1] 1
#> 
#> $responseHeader$status
#> [1] 0
#> 
#> $responseHeader$QTime
#> [1] 1108

And the documents are now in your Solr database

conn$search(name = "gettingstarted", params = list(q = "*:*", rows = 3))

#> # A tibble: 3 x 4
#>   id    price name  `_version_`
#>   <chr> <int> <chr>       <dbl>
#> 1 1       100 brown     1.66e18
#> 2 2       500 blue      1.66e18
#> 3 3      2000 pink      1.66e18

Now delete those documents just added

conn$delete_by_id(ids = c(1, 2, 3), "gettingstarted")

#> $responseHeader
#> $responseHeader$rf
#> [1] 1
#> 
#> $responseHeader$status
#> [1] 0
#> 
#> $responseHeader$QTime
#> [1] 48

And now they are gone

conn$search("gettingstarted", params = list(q = "*:*", rows = 4))

#> # A tibble: 0 x 0

By query

Add some documents first

conn$add(docs, "gettingstarted")

#> $responseHeader
#> $responseHeader$rf
#> [1] 1
#> 
#> $responseHeader$status
#> [1] 0
#> 
#> $responseHeader$QTime
#> [1] 72

And the documents are now in your Solr database

conn$search("gettingstarted", params = list(q = "*:*", rows = 5))

#> # A tibble: 3 x 4
#>   id    price name  `_version_`
#>   <chr> <int> <chr>       <dbl>
#> 1 1       100 brown     1.66e18
#> 2 2       500 blue      1.66e18
#> 3 3      2000 pink      1.66e18

Now delete those documents just added

conn$delete_by_query(query = "(name:blue OR name:pink)", "gettingstarted")

#> $responseHeader
#> $responseHeader$rf
#> [1] 1
#> 
#> $responseHeader$status
#> [1] 0
#> 
#> $responseHeader$QTime
#> [1] 122

And now they are gone

conn$search("gettingstarted", params = list(q = "*:*", rows = 5))

#> # A tibble: 1 x 4
#>   id    price name  `_version_`
#>   <chr> <int> <chr>       <dbl>
#> 1 1       100 brown     1.66e18

Update documents from files

This approach is best if you have many different things you want to do at once, e.g., delete and add files and set any additional options. The functions are:

update_xml()
update_json()
update_csv()

There are separate functions for each of the data types as they take slightly different parameters - and to make it more clear that those are the three input options for data types.

JSON

file <- system.file("examples", "books.json", package = "solrium")
conn$update_json(file, "books")

#> $responseHeader
#> $responseHeader$rf
#> [1] 1
#> 
#> $responseHeader$status
#> [1] 0
#> 
#> $responseHeader$QTime
#> [1] 782

Add and delete in the same file

Add a document first, that we can later delete

ss <- list(list(id = 456, name = "cat"))
conn$add(ss, "books")

#> $responseHeader
#> $responseHeader$rf
#> [1] 1
#> 
#> $responseHeader$status
#> [1] 0
#> 
#> $responseHeader$QTime
#> [1] 100

Now add a new document, and delete the one we just made

file <- system.file("examples", "add_delete.xml", package = "solrium")
cat(readLines(file), sep = "\n")

#> <update>
#>  <add>
#>    <doc>
#>      <field name="id">978-0641723445</field>
#>      <field name="cat">book,hardcover</field>
#>      <field name="name">The Lightning Thief</field>
#>      <field name="author">Rick Riordan</field>
#>      <field name="series_t">Percy Jackson and the Olympians</field>
#>      <field name="sequence_i">1</field>
#>      <field name="genre_s">fantasy</field>
#>      <field name="inStock">TRUE</field>
#>      <field name="pages_i">384</field>
#>    </doc>
#>  </add>
#>  <delete>
#>      <id>456</id>
#>  </delete>
#> </update>

conn$update_xml(file, "books")

#> $responseHeader
#> $responseHeader$rf
#> [1] 1
#> 
#> $responseHeader$status
#> [1] 0
#> 
#> $responseHeader$QTime
#> [1] 279

Notes

Note that update_xml() and update_json() have exactly the same parameters, but simply use different data input formats. update_csv() is different in that you can't provide document or field level boosts or other modifications. In addition update_csv() can accept not just csv, but tsv and other types of separators.

Any scripts or data that you put into this service are public.

solrium documentation built on May 19, 2021, 9:06 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

solrium
General Purpose R Interface to 'Solr'

Document management
In solrium: General Purpose R Interface to 'Solr'

Installation

Create documents from R objects

data.frame

list

Delete documents

By id

By query

Update documents from files

JSON

Add and delete in the same file

Notes

Try the solrium package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

solrium General Purpose R Interface to 'Solr'

Document management In solrium: General Purpose R Interface to 'Solr'

Installation

Create documents from R objects

data.frame

list

Delete documents

By id

By query

Update documents from files

JSON

Add and delete in the same file

Notes

Try the solrium package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

solrium
General Purpose R Interface to 'Solr'

Document management
In solrium: General Purpose R Interface to 'Solr'