tests/testthat/test-utils-stopper.R

test_that("dtm_stopper works with stop_list", {

    expect_identical(
        dim(dtm_stopper(dtm.dgc,
                        stop_list = c("we", "moon"))),
        as.integer(c(10, 41)))
    expect_identical(
        dim(dtm_stopper(dtm.dgc,
                        stop_list = c("We", "we", "moon"),
                        ignore_case = FALSE)),
        as.integer(c(10, 41)))
    expect_identical(
        dim(dtm_stopper(dtm.dgc,
                        stop_list = c("we", "moon"),
                        ignore_case = FALSE)),
        as.integer(c(10, 42)))

})

test_that("dtm_stopper works with dense", {

    out1 <- dtm_stopper(dtm.dgc,
            stop_list = c("we", "moon"),
            ignore_case = FALSE,
            dense = TRUE)

    expect_identical(
        dim(out1),
        as.integer(c(10, 42)))

    expect_identical(class(out1), c("matrix", "array"))

})

test_that("dtm_stopper works with stop_termfreq", {

    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_termfreq = c(2L, 5L))),
        as.integer(c(10, 12)))
    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_termfreq = c(1L, 2L))),
        as.integer(c(10, 35)))
    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_termfreq = c(1, 2))),
        as.integer(c(10, 35)))
    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_termfreq = c(1, Inf))),
        as.integer(c(10, 44)))
})

test_that("dtm_stopper works with stop_termprop", {

    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_termprop = c(0.04, 0.99))),
        as.integer(c(10, 7)))
    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_termprop = c(0.01, 0.1))),
        as.integer(c(10, 43)))
    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_termprop = c(0.01, 0.06))),
        as.integer(c(10, 41)))
    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_termprop = c(Inf, 0.1))),
        as.integer(c(10, 44)))
    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_termprop = c(Inf, Inf))),
        as.integer(c(10, 44)))
})

test_that("dtm_stopper works with stop_docfreq", {

    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_docfreq = c(1L, 3L))),
        as.integer(c(10, 39)))
    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_docfreq = c(2L, 4L))),
        as.integer(c(10, 12)))
    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_docfreq = c(2L, Inf))),
        as.integer(c(10, 13)))
    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_docfreq = c(Inf, Inf))),
        as.integer(c(10, 44)))
})

test_that("dtm_stopper works with stop_docprop", {

    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_docprop = c(0.2, .98))),
        as.integer(c(10, 13)))
    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_docprop = c(0.1, 0.4))),
        as.integer(c(10, 43)))
})

test_that("dtm_stopper works with Inf", {

    # docprop
    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_docprop = c(0.2, Inf))),
        as.integer(c(10, 13)))
    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_docprop = c(Inf, 1.0))),
        as.integer(c(10, 44)))
    # docfreq
    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_docfreq = c(1L, 3L))),
        as.integer(c(10, 39)))
    #termfreq
    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_termfreq = c(Inf, 2))),
        as.integer(c(10, 35)))
    #termprop
    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_termprop = c(0.04, Inf))),
        as.integer(c(10, 7)))
})

test_that("dtm_stopper works with happax and null", {

    # add empty column to dtm
    mat <- as.matrix(rep(0, nrow(dtm.dgc)))
    colnames(mat) <- "empty"
    dtm.a <- cbind(dtm.dgc, mat)

    expect_identical(
        dim(dtm_stopper(dtm.a, stop_hapax = TRUE)),
        as.integer(c(10, 13)))

    # should remove the one null column
    expect_identical(
        dim(dtm_stopper(dtm.a, stop_null = TRUE)),
        as.integer(c(10, 44)))

})

test_that("dtm_stopper works with stoprank", {

    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_termrank = 2L)),
        as.integer(c(10, 42)))

    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_termrank = 5L)),
        as.integer(c(10, 39)))

    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_termrank = 20L)),
        as.integer(c(10, 24)))

})


test_that("dtm_stopper errors work", {

    expect_error(
        expect_message(dtm_stopper(dtm.dgc, stop_termfreq = c("picklespit"))))
    expect_error(
        expect_message(dtm_stopper(dtm.dgc, stop_docfreq = c("picklespit"))))
    expect_error(
        expect_message(dtm_stopper(dtm.dgc)))
    expect_error(
        expect_message(dtm_stopper(as.matrix(dtm.dgc))))
    expect_error(
        expect_message(dtm_stopper(dtm.dgc, stop_termrank = "picklespit")))
    expect_error(
        expect_message(dtm_stopper(dtm.dgc, stop_termfreq = "picklespit")))
})


test_that("dtm_stopper omit_empty works", {

    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_list = "too", omit_empty = FALSE)),
        as.integer(c(10, 43)))

    suppressMessages(
    expect_identical(
        dim(dtm_stopper(dtm.dgc, stop_list = "too", omit_empty = TRUE)),
        as.integer(c(9, 43)))
    )

})

Try the text2map package in your browser

Any scripts or data that you put into this service are public.

text2map documentation built on July 9, 2023, 6:35 p.m.