Nothing
library(polmineR)
use(pkg = "RcppCWB", corpus = "REUTERS")
testthat::context("hits")
test_that(
"hits method",
{
y <- hits("REUTERS", query = "oil")
expect_equal(as.data.table(y)[["count"]], count("REUTERS", query = "oil")[["count"]])
y <- hits("REUTERS", query = "oil", s_attribute = "places", freq = TRUE)
expect_equal(y@stat[places == "argentina"][["count"]], 1L)
p <- partition("REUTERS", places = "saudi-arabia", regex = TRUE)
y <- hits(p, query = "oil")
expect_equal(as.data.table(y)[["count"]], count(p, query = "oil")[["count"]])
y <- hits(p, query = "oil", s_attribute = "id")
expect_equal(y@stat[id == "242"][["count"]], count(partition("REUTERS", id = "242"), query = "oil")[["count"]])
}
)
test_that(
"hits for nested scenario",
{
skip_if_not(use("GermaParl2"))
# we want to ensure that the order is independent from the order of
# subsetting operations
hits1 <- corpus("GERMAPARL2MINI") |>
subset(p_type == "speech") %>%
subset(speaker_party %in% c("CDU", "CSU", "SPD")) |>
split(s_attribute = "speaker_party", verbose = FALSE) %>%
hits(query = tm::stopwords("de")[1:10], cqp = FALSE, verbose = FALSE) %>%
as.data.table()
hits2 <- corpus("GERMAPARL2MINI") |>
subset(speaker_party %in% c("CDU", "CSU", "SPD")) |>
split(s_attribute = "speaker_party") %>%
subset(p_type == "speech", verbose = FALSE) %>%
hits(query = tm::stopwords("de")[1:10], cqp = FALSE, verbose = FALSE) %>%
as.data.table()
testthat::expect_identical(hits1, hits2)
hits3 <- corpus("GERMAPARL2MINI") |>
subset(speaker_party == "CDU") |>
subset(p_type == "speech", verbose = FALSE) %>%
hits(query = tm::stopwords("de")[1:10], cqp = FALSE, verbose = FALSE) %>%
as.data.table()
hits4 <- corpus("GERMAPARL2MINI") |>
subset(p_type == "speech") %>%
subset(speaker_party == "CDU") |>
hits(query = tm::stopwords("de")[1:10], cqp = FALSE, verbose = FALSE) %>%
as.data.table()
testthat::expect_identical(hits3, hits4)
testthat::expect_identical(
hits2[partition == "CDU"][, c("query", "count")],
hits3[count > 0][,c("query", "count")]
)
}
)
test_that(
"test arg decode of hits()",
{
s_attrs <- c("id", "places", "language")
corpus("REUTERS") %>%
hits(query = "oil", s_attribute = s_attrs, decode = TRUE) %>%
as.data.table() %>%
.[, s_attrs, with = FALSE] %>%
sapply(typeof) %>%
unname() %>%
unique() %>%
expect_identical("character")
corpus("REUTERS") %>%
hits(query = "oil", s_attribute = s_attrs, decode = FALSE) %>%
as.data.table() %>%
.[, s_attrs, with = FALSE] %>%
sapply(typeof) %>%
unname() %>%
unique() %>%
expect_identical("integer")
corpus("REUTERS") %>%
hits(query = "oil", s_attribute = s_attrs, decode = c(FALSE, TRUE, TRUE)) %>%
as.data.table() %>%
.[, s_attrs, with = FALSE] %>%
sapply(typeof) %>%
unname() %>%
expect_identical(c("integer", "character", "character"))
}
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.