Nothing
library(quanteda)
context("double reinert classification")
mini_corpus <- head(data_corpus_inaugural, n = 2)
mini_corpus <- split_segments(mini_corpus, 5)
dtm <- dfm(tokens(mini_corpus, remove_punct = TRUE), tolower = TRUE)
dtm <- dfm_remove(dtm, stopwords("en"))
dtm <- dfm_wordstem(dtm, language = "english")
dtm <- dfm_trim(dtm, min_termfreq = 3)
res1 <- rainette(dtm, k = 5, min_segment_size = 2, min_split_members = 3)
res2 <- rainette(dtm, k = 5, min_segment_size = 3, min_split_members = 3)
res12 <- rainette2(dtm, max_k = 5, min_segment_size1 = 2, min_segment_size2 = 3, min_members = 3)
res <- rainette2(res1, res2, min_members = 2)
res_notfull <- rainette2(res1, res2, min_members = 2, full = FALSE)
test_that("compute_chi2", {
tab <- matrix(c(45, 121 - 45, 257 - 45, 1213 - 121 - 257 + 45), nrow = 2)
expect_equal(rainette:::compute_chi2(45, 121, 257, 1213),
chisq.test(tab)$statistic)
tab <- matrix(c(0, 12, 25, 121 - 12 - 25), nrow = 2)
expect_equal(rainette:::compute_chi2(0, 12, 25, 121),
suppressWarnings(-chisq.test(tab)$statistic))
})
test_that("get_groups", {
expect_equal(dim(rainette:::get_groups(res1)), c(316, 4))
expect_equal(substr(rainette:::get_groups(res1)[[2]],3,3), as.character(res1$uce_groups[[2]]))
expect_equal(substr(rainette:::get_groups(res1)[[2]],1,2), rep("2.", length(res1$group)))
})
test_that("groups_crosstab", {
g1 <- tibble(c(1,1,2,2), c(1,1,2,3))
g2 <- tibble(c(1,2,1,2), c(3,1,2,1))
colnames(g1) <- 1:2
colnames(g2) <- 1:2
n_tot <- 4
tab <- rainette:::groups_crosstab(g1, g2, min_members = -Inf, min_chi2 = -Inf)
expect_equal(nrow(tab), 25)
expect_equal(
colnames(tab),
c("g1", "g2", "n_both", "level1", "level2", "n1", "n2", "chi2", "interclass")
)
tmp <- tab %>% dplyr::filter(level1 == 2, level2 == 2, g1 == 2, g2 == 2)
expect_equal(tmp$n_both, 1)
expect_equal(tmp$n1, 1)
expect_equal(tmp$n2, 1)
expect_equal(tmp$chi2, unname(rainette:::compute_chi2(tmp$n_both, tmp$n1, tmp$n2, n_tot)))
expect_equal(tmp$interclass, "2x2")
})
test_that("crosstab_add_members and filtering", {
g1 <- tibble(c(1,1,1,1,2), c(1,1,2,3,3))
g2 <- tibble(c(1,1,1,1,2), c(3,1,3,1,2))
colnames(g1) <- 1:2
colnames(g2) <- 1:2
n_tot <- 5
tab <- rainette:::groups_crosstab(g1, g2, min_members = 2, min_chi2 = 0.5)
tab <- rainette:::crosstab_add_members(tab, g1, g2)
expect_equal(nrow(tab), 1)
expect_equal(tab$g1, 1)
expect_equal(tab$g2, 1)
expect_equal(tab$n_both, 4)
expect_equal(tab$interclass, "1x1")
expect_equal(tab$chi2,
suppressWarnings(unname(chisq.test(matrix(c(4,0,0,1), nrow=2))$statistic)))
expect_equal(tab$members, list(1:4))
})
test_that("crosstab_keep_max", {
df <- tribble(
~ g1, ~ g2, ~chi2,
11, 21, 10,
11, 22, 5,
11, 23, 8,
12, 21, 9,
12, 22, 8,
12, 23, 10,
13, 21, 8,
13, 22, 2,
13, 23, 2
)
res <- crosstab_keep_max(df)
expect_equal(res$g1, c(11, 12))
expect_equal(res$g2, c(21, 23))
})
test_that("cross_sizes", {
tab <- tibble(interclass = c("1.1x1.1", "1.2x1.1", "2.1x2.2", "2.1x1.1"),
members = list(c(1,2,3), c(4,5,6), c(1,2), 5), id = 1:4)
sizes <- rainette:::cross_sizes(tab)
expect_equal(sizes, structure(c(1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 1, 0, 1, 0, 1), .Dim = c(4L,
4L)))
})
test_that("next_partitions", {
tab <- tibble(interclass = c("1.1x1.1", "1.2x1.1", "2.1x2.2", "2.1x1.1"),
members = list(c(1,2,3), c(4,5,6), c(1,2), 7), id = 1:4)
sizes <- rainette:::cross_sizes(tab)
partitions <- list(tab$id)
partitions[[2]] <- which(sizes == 0, arr.ind = TRUE, useNames = FALSE) %>%
asplit(1)
partitions[[3]] <- rainette:::next_partitions_for(partitions[[2]], sizes)
expect_equal(partitions[[3]], list(c(1, 2, 4), c(2, 3, 4)))
partitions[[3]] <- rainette:::next_partitions_parallel(partitions[[2]], sizes)
expect_equal(partitions[[3]], list(c(1, 2, 4), c(2, 3, 4)))
expect_equal(rainette:::next_partitions_for(partitions[[3]], sizes), NULL)
expect_equal(rainette:::next_partitions_parallel(partitions[[3]], sizes), NULL)
})
test_that("get_optimal_partitions", {
valid <- tibble(interclass = c("1x1", "2x2", "3x3", "4x4"),
n_both = c(3, 3, 2, 1),
chi2 = c(4, 5, 10, 4),
members = list(c(1,2,3), c(4,5,6), c(1,2), 7),
id = 1:4)
partitions <- list(
list(c(1, 2), c(2, 3), c(1, 4)),
list(c(1, 2, 4)))
n_tot <- 7
# With full = TRUE
res <- rainette:::get_optimal_partitions(partitions, valid, n_tot, full = TRUE)
tmp <- res[res$k == 2,]
expect_equal(tmp$clusters, list(c("1x1", "2x2"), c("2x2", "3x3")))
expect_equal(tmp$chi2, c(9, 15))
expect_equal(tmp$n, c(6, 5))
expect_equal(tmp$groups, list(c(1,1,1,2,2,2,NA),c(2,2,NA,1,1,1,NA)))
tmp <- res[res$k == 3,]
expect_equal(tmp$clusters, list(c("1x1", "2x2", "4x4")))
expect_equal(tmp$chi2, 13)
expect_equal(tmp$n, 7)
expect_equal(tmp$groups, list(c(1,1,1,2,2,2,3)))
# With full = FALSE
res <- rainette:::get_optimal_partitions(partitions, valid, n_tot, full = FALSE)
tmp <- res[res$k == 2,]
expect_equal(tmp$clusters, list(c("2x2", "3x3")))
expect_equal(tmp$chi2, 15)
expect_equal(tmp$n, 5)
expect_equal(tmp$groups, list(c(2,2,NA,1,1,1,NA)))
tmp <- res[res$k == 3,]
expect_equal(tmp$clusters, list(c("1x1", "2x2", "4x4")))
expect_equal(tmp$chi2, 13)
expect_equal(tmp$n, 7)
expect_equal(tmp$groups, list(c(1,1,1,2,2,2,3)))
})
test_that("rainette2 gives the same result on dtm and on two clustering results", {
expect_equal(res[,c("k", "chi2", "n")], res12[,c("k", "chi2", "n")])
expect_equal(res$groups, res12$groups)
expect_equal(res$clusters, res12$clusters)
})
test_that("rainette2 display message when stopping before max_k", {
res1 <- rainette(dtm, k = 4, min_segment_size = 2, min_split_members = 30)
res2 <- rainette(dtm, k = 4, min_segment_size = 3, min_split_members = 30)
expect_message(res <- rainette::rainette2(res1, res2, max_k = 4, min_members = 50),
"^! No more partitions found, stopping at k=2")
expect_equal(max(res$k), 2)
})
test_that("rainette2_plot error if full=FALSE and criterion=n", {
expect_error(
rainette2_plot(res_notfull, dtm, k = 2, criterion = "n"),
"if rainette2 has been computed with full=FALSE, only 'chi2' criterion is available"
)
expect_error(
rainette2_plot(res, dtm, k = 2, criterion = "n"),
NA
)
})
test_that("same results with different values of full and parallel", {
res_f <- rainette2(res1, res2, max_k = 5, min_members = 2, full = TRUE)
res_fp <- rainette2(res1, res2, max_k = 5, min_members = 2, full = TRUE, parallel = TRUE)
expect_equal(res_f, res_fp)
expect_equal(attr(res_f, "full"), TRUE)
expect_equal(attr(res_fp, "full"), TRUE)
res_c <- rainette2(res1, res2, max_k = 5, min_members = 2, full = FALSE)
res_cp <- rainette2(res1, res2, max_k = 5, min_members = 2, full = FALSE, parallel = TRUE)
expect_equal(res_c, res_cp)
expect_equal(attr(res_c, "full"), FALSE)
expect_equal(attr(res_cp, "full"), FALSE)
})
test_that("plot functions class checking", {
expect_error(rainette2_plot(res1, k = 5), "res must be a rainette2 result object")
expect_error(rainette_plot(res12, k = 5), "res must be a rainette result object")
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.