Nothing
library(polmineR)
use("polmineR")
testthat::context("as.speeches")
test_that(
"as.speeches",
{
p <- partition("GERMAPARLMINI", date = ".*", regex = TRUE)
pb <- as.speeches(corpus("GERMAPARLMINI"), s_attribute_name = "speaker", s_attribute_date = "date")
expect_equal(length(pb), 276L)
scb <- as.speeches(corpus("GERMAPARLMINI"), s_attribute_name = "speaker", s_attribute_date = "date")
expect_equal(length(scb), length(pb))
expect_equal(sum(unname(unlist(lapply(scb@objects, size)))), size("GERMAPARLMINI"))
expect_equal(all(names(scb) %in% names(pb)), TRUE)
pb <- pb[names(scb)]
expect_identical(names(scb), names(pb))
expect_identical(
do.call(rbind, lapply(scb@objects, function(x) x@cpos)),
do.call(rbind, lapply(pb@objects, function(x) x@cpos))
)
}
)
test_that(
"as.speeches() same result for partition and corpus-method",
{
sp_all <- as.speeches("GERMAPARLMINI", s_attribute_name = "speaker", s_attribute_date = "date")
sp_min1 <- sp_all[grep("(2009-10-28|2009-11-10)", names(sp_all), value = TRUE)]
sp_min2 <- corpus("GERMAPARLMINI") %>%
subset(date %in% c("2009-10-28", "2009-11-10")) %>%
as.speeches(s_attribute_name = "speaker", s_attribute_date = "date")
expect_identical(length(sp_min1), length(sp_min2))
expect_identical(sum(summary(sp_min1)$size), sum(summary(sp_min2)$size))
expect_true(all(names(sp_min1) %in% names(sp_min2)))
}
)
test_that(
"tdm for as.speeches",
{
skip("knowingly not working")
p <- partition("GERMAPARLMINI", date = ".*", regex = TRUE)
pb <- as.speeches(p, s_attribute_name = "speaker", s_attribute_date = "date")
cnt <- count(pb, p_attribute = "word")
tdm <- as.TermDocumentMatrix(cnt, col = "count")
co <- corpus("GERMAPARLMINI")
sp <- as.speeches(co, s_attribute_name = "speaker", s_attribute_date = "date")
tmp <- sp[[names(pb)]]
sp@objects <- tmp@objects
cnt2 <- count(sp, p_attribute = "word")
tdm2 <- as.TermDocumentMatrix(cnt2, col = "count")
expect_identical(tdm, tdm2)
}
)
test_that(
"tdm for as.speeches, but partition/subcorpus",
{
p <- partition("GERMAPARLMINI", date = "2009-11-11", regex = TRUE)
pb <- as.speeches(p, s_attribute_name = "speaker", s_attribute_date = "date")
cnt <- count(pb, p_attribute = "word")
tdm <- as.TermDocumentMatrix(cnt, col = "count")
co <- corpus("GERMAPARLMINI")
s <- subset(co, date == "2009-11-11")
sp <- as.speeches(co, s_attribute_name = "speaker", s_attribute_date = "date")
tmp <- sp[names(pb)]
sp@objects <- tmp@objects
cnt2 <- count(sp, p_attribute = "word")
tdm2 <- as.TermDocumentMatrix(cnt2, col = "count")
expect_identical(tdm, tdm2)
}
)
test_that(
"as.speeches() for nested corpus",
{
skip_if_not(use("GermaParl2"))
sp <- corpus("GERMAPARL2MINI") %>%
subset(p_type == "speech") %>%
as.speeches(
s_attribute_date = "protocol_date",
s_attribute_name = "speaker_name",
verbose = FALSE, progress = FALSE
)
expect_identical(length(sp), 14L)
cschmid <- corpus("GERMAPARL2MINI") %>%
subset(speaker_name == "Carlo Schmid") %>%
subset(p_type == "speech")
expect_identical(
size(sp[[grep("Carlo Schmid", names(sp), value = TRUE)]]),
size(cschmid)
)
}
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.