Nothing
library(polmineR)
testthat::context("split")
use("polmineR")
test_that(
"split up corpus",
{
x <- partition("GERMAPARLMINI", date = "2009-11-11", speaker = "Gerda Hasselfeldt")
y <- split(x, gap = 500)
expect_identical(is(y)[1], "partition_bundle")
expect_identical(length(y), 15L)
expect_identical(size(merge(y)), size(x))
y2 <- merge(y)
expect_identical(as.integer(merge(y)@cpos), as.integer(x@cpos))
expect_identical(as.vector(merge(y)@cpos), as.vector(x@cpos))
# check that argument values works as intended
speakers <- c("Volker Kauder", "Norbert Lammert", "Wolfgang Thierse")
sb_speakers <- corpus("GERMAPARLMINI") %>%
split(s_attribute = "speaker", values = speakers)
expect_true(all(speakers %in% names(sb_speakers)))
# the following tests require that GERMAPARL2MINI is available
# It is wrapped into the GermaParl2 package, which can be installed as
# follows:
# install.packages(
# pkgs = "GermaParl2",
# contriburl = "https://polmine.github.io/drat/src/contrib",
# type = "source"
# )
skip_if_not(use("GermaParl2"))
gparl2 <- corpus("GERMAPARL2MINI")
n_sentences <- gparl2 %>%
split(s_attribute = "p", values = FALSE, verbose = FALSE) %>%
length()
attr_size <- RcppCWB::cl_attribute_size(
corpus = "GERMAPARL2MINI",
attribute = "p",
attribute_type = "s",
registry = gparl2@registry_dir
)
expect_identical(n_sentences, attr_size)
}
)
test_that(
"different order, same result",
{
pp1 <- corpus("GERMAPARLMINI") %>%
subset(protocol_date == "2009-11-10") %>%
split(s_attribute = "speaker")
pp2 <- corpus("GERMAPARLMINI") %>%
subset(date == "2009-11-10") %>%
split(s_attribute = "speaker")
pp3 <- corpus("GERMAPARLMINI") %>%
split(s_attribute = "speaker") %>%
.[["Angela Dorothea Merkel"]] %>%
subset(date == "2009-11-10")
dimnames(pp3@cpos) <- NULL
expect_identical(
pp1[["Angela Dorothea Merkel"]]@cpos,
pp2[["Angela Dorothea Merkel"]]@cpos
)
expect_identical(
pp1[["Angela Dorothea Merkel"]]@cpos,
pp3@cpos
)
skip_if_not(use("GermaParl2"))
gparl2 <- corpus("GERMAPARL2MINI")
renner <- gparl2 %>%
subset(speaker_who == "Renner")
n_subcorpora <- split(renner, s_attribute = "s", verbose = FALSE) |>
length()
n_sentences <- renner |>
slot("cpos") %>%
RcppCWB::ranges_to_cpos() %>%
RcppCWB::cl_cpos2struc(corpus = "GERMAPARL2MINI", s_attribute = "s", cpos = ., registry = gparl2@registry_dir) %>%
unique() %>%
length()
expect_identical(n_subcorpora, n_sentences)
}
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.