test-integration.R
In uptasticsearch: Get Data Frame Representations of 'Elasticsearch' Results

context("Elasticsearch integration tests")

# tests in this file are run automatically on Travis and require
# an actual Elasticsearch cluster to be up and running. For details,
# see: https://github.com/uptake/uptasticsearch/blob/master/.travis.yml

# Sample data from:
# - https://www.elastic.co/guide/en/kibana/current/tutorial-load-dataset.html

# Configure logger (suppress all logs in testing)
loggerOptions <- futile.logger::logger.options()
if (!identical(loggerOptions, list())){
    origLogThreshold <- loggerOptions[[1]][['threshold']]
} else {
    origLogThreshold <- futile.logger::INFO
}
futile.logger::flog.threshold(0)

#--- es_search

    # search request
    test_that("es_search works as expected for a simple search request", {
        testthat::skip_on_cran()

        outDT <- es_search(
            es_host = "http://127.0.0.1:9200"
            , es_index = "shakespeare"
            , max_hits = 100
            , size = 100
        )

       expect_true(data.table::is.data.table(outDT))
    })

    test_that("es_search works when you have to scroll", {
        testthat::skip_on_cran()

        outDT <- es_search(
            es_host = "http://127.0.0.1:9200"
            , es_index = "shakespeare"
            , max_hits = 30
            , size = 2
        )
        expect_true(data.table::is.data.table(outDT))
        expect_true(nrow(outDT) == 30)
    })

    test_that("es_search works in single-threaded mode", {
        testthat::skip_on_cran()

        outDT <- es_search(
            es_host = "http://127.0.0.1:9200"
            , es_index = "shakespeare"
            , max_hits = 30
            , size = 2
            , n_cores = 1
        )
        expect_true(data.table::is.data.table(outDT))
        expect_true(nrow(outDT) == 30)
    })

    test_that("es_search rejects scrolls longer than 1 hour", {
        testthat::skip_on_cran()

        expect_error({
            outDT <- es_search(
                es_host = "http://127.0.0.1:9200"
                , es_index = "shakespeare"
                , max_hits = 100
                , size = 100
                , scroll = "2h"
            )
        }, regexp = "By default, this function does not permit scroll requests which keep the scroll")

    })

    test_that("es_search warns and readjusts size if max_hits less than 10000", {
        testthat::skip_on_cran()

        expect_warning({
            outDT <- es_search(
                es_host = "http://127.0.0.1:9200"
                , es_index = "shakespeare"
                , max_hits = 9999
            )
        }, regexp = "You requested a maximum of 9999 hits and a page size of 10000")
        expect_true(data.table::is.data.table(outDT))

    })

    test_that("es_search warns when max hits is not a clean multiple of size", {
        testthat::skip_on_cran()

        expect_warning({
            outDT <- es_search(
                es_host = "http://127.0.0.1:9200"
                , es_index = "shakespeare"
                , max_hits = 12
                , size = 7
            )
        }, regexp = "When max_hits is not an exact multiple of size, it is possible to get a few more than max_hits results back")
        expect_true(data.table::is.data.table(outDT))
    })

    test_that("es_search works as expected for search requests that return nothing", {
        testthat::skip_on_cran()

        # NOTE: Creating an intentionally empty index is the safest way to test
        #       this functionality. Any other test would involve writing a query
        #       and I want to avoid exposing our tests to changes in the query DSL
        expect_warning({
            outDT <- es_search(
                es_host = "http://127.0.0.1:9200"
                , es_index = "empty_index"
            )
        }, regexp = "Query is syntactically valid but 0 documents were matched")
        expect_null(outDT)
    })

    # aggregation request
    test_that("es_search works as expected for a simple aggregation request", {
        testthat::skip_on_cran()

        outDT <- es_search(
            es_host = "http://127.0.0.1:9200"
            , es_index = "shakespeare"
            , max_hits = 100
            , query = '{"aggs": {"thing": {"terms": {"field": "speaker", "size": 12}}}}'
        )

        expect_true(data.table::is.data.table(outDT))
        num_expected_levels <- 4
        major_version <- .major_version(
            .get_es_version("http://127.0.0.1:9200")
        )
        if (as.integer(major_version) >= 7){
            num_expected_levels <- 3
        }
        expect_true(nrow(outDT) == num_expected_levels)
        expect_named(
            outDT
            , c("thing", "doc_count")
            , ignore.case = FALSE
            , ignore.order = TRUE
        )
        expect_true(is.numeric(outDT[, doc_count]))
        expect_true(is.character(outDT[, thing]))
        expect_true(all(outDT[, doc_count > 0]))
    })

    test_that("es_search respects the names you assign to aggregation results", {
        testthat::skip_on_cran()

        outDT <- es_search(
            es_host = "http://127.0.0.1:9200"
            , es_index = "shakespeare"
            , max_hits = 100
            , query = '{"aggs": {"name_i_picked": {"terms": {"field": "speaker", "size": 12}}}}'
        )

        # main test
        expect_named(
            outDT
            , c("name_i_picked", "doc_count")
            , ignore.case = FALSE
            , ignore.order = TRUE
        )

        # ther stuff we might as well test
        expect_true(data.table::is.data.table(outDT))
        expect_true(is.numeric(outDT[, doc_count]))
        expect_true(is.character(outDT[, name_i_picked]))
        expect_true(all(outDT[, doc_count > 0]))
    })

    # We have tests on static empty results, but this test will catch
    # changes across versions in the way ES actually responds to aggs results that
    # return nothing
    test_that("es_search correctly handles empty bucketed aggregation result", {
        testthat::skip_on_cran()

        outDT <- es_search(
            es_host = "http://127.0.0.1:9200"
            , es_index = "shakespeare"
            , max_hits = 100
            , query = '{"aggs": {"blegh": {"terms": {"field": "nonsense_field"}}}}'
        )
        expect_null(outDT)
    })

#--- .get_es_version

    test_that(".get_es_version works", {
        testthat::skip_on_cran()

        ver <- uptasticsearch:::.get_es_version(es_host = "http://127.0.0.1:9200")

        # is a string
        expect_true(assertthat::is.string(ver), info = paste0("returned version: ", ver))

        # Decided to check that it's coercible to an integer instead of
        # hard-coding known ES versions so this test won't require
        # attention or break builds if/when ES7 or whatever the next major verison
        # is comes out
        expect_true(!is.na(as.integer(ver)), info = paste0("returned version: ", ver))
    })

#--- get_fields and .get_aliases

    # set up helper function for manipulating aliases. Valid actions below are
    # "add" and "remove"
    .alias_action <- function(action, alias_name){
        res <- httr::POST(
            url = "http://127.0.0.1:9200/_aliases"
            , httr::add_headers(c('Content-Type' = 'application/json'))
            , body = sprintf(
                '{"actions": [{"%s": {"index": "shakespeare", "alias": "%s"}}]}'
                , action
                , alias_name
            )
        )
        httr::stop_for_status(res)
        return(invisible(NULL))
    }

    test_that(".get_aliases returns NULL when no aliases have been created in the cluster", {
        testthat::skip_on_cran()

        result <- .get_aliases(
            es_host = "http://127.0.0.1:9200"
        )
        expect_null(result)
    })

    test_that("get_fields works on an actual running ES cluster with no aliases", {
        testthat::skip_on_cran()

        fieldDT <- get_fields(
            es_host = "http://127.0.0.1:9200"
            , es_indices = "_all"
        )
        expect_true(data.table::is.data.table(fieldDT))
        expect_true(nrow(fieldDT) > 0)
        expect_named(
            fieldDT
            , c("index", "type", "field", "data_type")
            , ignore.order = TRUE
            , ignore.case = FALSE
        )
        expect_true("shakespeare" %in% fieldDT[, unique(index)])
        expect_true(is.character(fieldDT$index))
        expect_true(is.character(fieldDT$type))
        expect_true(is.character(fieldDT$field))
        expect_true(is.character(fieldDT$data_type))
        expect_true(sum(is.na(fieldDT[, .(index, field, data_type)])) == 0)
    })

    test_that(".get_aliases and get_fields work as expected when exactly one alias exists for one index in the cluster", {

        testthat::skip_on_cran()

        # create an alias
        .alias_action("add", "the_test_alias")

        # get_aliases should work
        resultDT <- .get_aliases("http://127.0.0.1:9200")
        expect_true(data.table::is.data.table(resultDT))
        expect_true(nrow(resultDT) == 1)
        expect_named(
            resultDT
            , c("alias", "index")
            , ignore.case = FALSE
            , ignore.order = TRUE
        )
        expect_identical(resultDT[, index], "shakespeare")
        expect_identical(resultDT[, alias], "the_test_alias")

        # get_fields should work
        fieldDT <- get_fields(
            es_host = "http://127.0.0.1:9200"
            , es_indices = "_all"
        )

        expect_true(data.table::is.data.table(fieldDT))
        expect_true(nrow(fieldDT) > 0)
        expect_named(
            fieldDT
            , c("index", "type", "field", "data_type")
            , ignore.order = TRUE
            , ignore.case = FALSE
        )
        expect_true(is.character(fieldDT$index))
        expect_true(is.character(fieldDT$type))
        expect_true(is.character(fieldDT$field))
        expect_true(is.character(fieldDT$data_type))
        expect_true(sum(is.na(fieldDT[, .(index, field, data_type)])) == 0)

        # get_fields should replace index names with their aliases
        expect_true(fieldDT[, sum(index == "the_test_alias")] > 0)
        expect_true(
            fieldDT[, sum(index == "shakespeare")] == 0
            , info = "get_fields didn't replace index names with their aliases"
        )

        # delete the alias we created (to keep tests self-contained)
        .alias_action("remove", "the_test_alias")

        # confirm that it's gone
        resultDT <- .get_aliases("http://127.0.0.1:9200")
        expect_null(resultDT)
    })

    test_that(".get_aliases and get_fields work as expected when more than one alias exists for one index in the cluster", {

        testthat::skip_on_cran()

        # create an alias
        .alias_action('add', 'the_test_alias')
        .alias_action('add', 'the_best_alias')
        .alias_action('add', 'the_nest_alias')

        # get_aliases should work
        resultDT <- .get_aliases("http://127.0.0.1:9200")
        expect_true(data.table::is.data.table(resultDT))
        expect_true(nrow(resultDT) == 3)
        expect_named(
            resultDT
            , c("alias", "index")
            , ignore.case = FALSE
            , ignore.order = TRUE
        )
        expect_identical(resultDT[, index], rep("shakespeare", 3))
        expect_true(resultDT[, all(c("the_best_alias", "the_nest_alias", "the_test_alias") %in% alias)])

        # get_fields should work for "_all" indices
        # NOTE: this was deprecated in Elasticsearch 6 and removed in
        #       ES7, but we use it here so that old uptasticsearch code
        #       continues to work
        fieldDT <- get_fields(
            es_host = "http://127.0.0.1:9200"
            , es_indices = "_all"
        )

        expect_true(data.table::is.data.table(fieldDT))
        expect_true(nrow(fieldDT) > 0)
        expect_named(
            fieldDT
            , c("index", "type", "field", "data_type")
            , ignore.order = TRUE
            , ignore.case = FALSE
        )
        expect_true(is.character(fieldDT$index))
        expect_true(is.character(fieldDT$type))
        expect_true(is.character(fieldDT$field))
        expect_true(is.character(fieldDT$data_type))
        expect_true(sum(is.na(fieldDT[, .(index, field, data_type)])) == 0)

        # get_fields should replace index names with their aliases
        expect_true(fieldDT[, all(c("the_best_alias", "the_nest_alias", "the_test_alias") %in% index)])
        expect_true(
            fieldDT[, sum(index == "shakespeare")] == 0
            , info = "get_fields didn't replace index names with their aliases"
        )

        # since we aliased the same index three times, the subsections should all be identical
        expect_true(identical(
            fieldDT[index == 'the_best_alias', .(type, field, data_type)]
            , fieldDT[index == 'the_nest_alias', .(type, field, data_type)]
        ))
        expect_true(identical(
            fieldDT[index == 'the_best_alias', .(type, field, data_type)]
            , fieldDT[index == 'the_test_alias', .(type, field, data_type)]
        ))

        # get_fields should work targeting a specific index with aliases
        fieldDT <- get_fields(
            es_host = "http://127.0.0.1:9200"
            , es_indices = "shakespeare"
        )

        expect_true(data.table::is.data.table(fieldDT))
        expect_true(nrow(fieldDT) > 0)
        expect_named(
            fieldDT
            , c("index", "type", "field", "data_type")
            , ignore.order = TRUE
            , ignore.case = FALSE
        )
        expect_true(is.character(fieldDT$index))
        expect_true(is.character(fieldDT$type))
        expect_true(is.character(fieldDT$field))
        expect_true(is.character(fieldDT$data_type))
        expect_true(sum(is.na(fieldDT[, .(index, field, data_type)])) == 0)

        # get_fields should replace index names with their aliases
        expect_true(fieldDT[, all(c("the_best_alias", "the_nest_alias", "the_test_alias") %in% index)])
        expect_true(
            fieldDT[, sum(index == "shakespeare")] == 0
            , info = "get_fields didn't replace index names with their aliases"
        )

        # since we aliased the same index three times, the subsections should all be identical
        expect_true(identical(
            fieldDT[index == 'the_best_alias', .(type, field, data_type)]
            , fieldDT[index == 'the_nest_alias', .(type, field, data_type)]
        ))
        expect_true(identical(
            fieldDT[index == 'the_best_alias', .(type, field, data_type)]
            , fieldDT[index == 'the_test_alias', .(type, field, data_type)]
        ))

        # delete the aliases we created (to keep tests self-contained)
        .alias_action('remove', 'the_test_alias')
        .alias_action('remove', 'the_best_alias')
        .alias_action('remove', 'the_nest_alias')

        # confirm that they're gone
        resultDT <- .get_aliases("http://127.0.0.1:9200")
        expect_null(resultDT)
    })

    test_that("get_fields works when you target a single index with no aliases", {

        testthat::skip_on_cran()

        fieldDT <- get_fields(
            es_host = "http://127.0.0.1:9200"
            , es_indices = "empty_index"
        )
        expect_true(data.table::is.data.table(fieldDT))
        expect_true(nrow(fieldDT) > 0)
        expect_named(
            fieldDT
            , c("index", "type", "field", "data_type")
            , ignore.order = TRUE
            , ignore.case = FALSE
        )
        expect_true(is.character(fieldDT$index))
        expect_true(is.character(fieldDT$type))
        expect_true(is.character(fieldDT$field))
        expect_true(is.character(fieldDT$data_type))
        expect_true(sum(is.na(fieldDT[, .(index, field, data_type)])) == 0)

        # should only give us back records on the one index we requested
        expect_true(fieldDT[, all(index == "empty_index")])
    })


    test_that("get_fields works when you pass a vector of index names", {

        testthat::skip_on_cran()

        fieldDT <- get_fields(
            es_host = "http://127.0.0.1:9200"
            , es_indices = c("empty_index", "shakespeare")
        )
        expect_true(data.table::is.data.table(fieldDT))
        expect_true(nrow(fieldDT) > 0)
        expect_named(
            fieldDT
            , c("index", "type", "field", "data_type")
            , ignore.order = TRUE
            , ignore.case = FALSE
        )
        expect_true(is.character(fieldDT$index))
        expect_true(is.character(fieldDT$type))
        expect_true(is.character(fieldDT$field))
        expect_true(is.character(fieldDT$data_type))
        expect_true(sum(is.na(fieldDT[, .(index, field, data_type)])) == 0)

        # should only give us back records on indexes we requested
        expect_true(fieldDT[, any(index == "empty_index")])
        expect_true(fieldDT[, length(unique(index))] >= 2)
    })

##### TEST TEAR DOWN #####
futile.logger::flog.threshold(origLogThreshold)
rm(list = ls())
Any scripts or data that you put into this service are public.
uptasticsearch documentation built on Sept. 12, 2019, 1:04 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
uptasticsearch
Get Data Frame Representations of 'Elasticsearch' Results

tests/testthat/test-integration.R
In uptasticsearch: Get Data Frame Representations of 'Elasticsearch' Results

Try the uptasticsearch package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

uptasticsearch Get Data Frame Representations of 'Elasticsearch' Results

tests/testthat/test-integration.R In uptasticsearch: Get Data Frame Representations of 'Elasticsearch' Results

Try the uptasticsearch package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

uptasticsearch
Get Data Frame Representations of 'Elasticsearch' Results

tests/testthat/test-integration.R
In uptasticsearch: Get Data Frame Representations of 'Elasticsearch' Results