tests/testthat/test_rainette2.R

library(quanteda)
context("double reinert classification")

mini_corpus <- head(data_corpus_inaugural, n = 2)
mini_corpus <- split_segments(mini_corpus, 5)
dtm <- dfm(tokens(mini_corpus, remove_punct = TRUE), tolower = TRUE)
dtm <- dfm_remove(dtm, stopwords("en"))
dtm <- dfm_wordstem(dtm, language = "english")
dtm <- dfm_trim(dtm, min_termfreq = 3)

res1 <- rainette(dtm, k = 5, min_segment_size = 2, min_split_members = 3)
res2 <- rainette(dtm, k = 5, min_segment_size = 3, min_split_members = 3)
res12 <- rainette2(dtm, max_k = 5, min_segment_size1 = 2, min_segment_size2 = 3, min_members = 3)

res <- rainette2(res1, res2, min_members = 2)
res_notfull <- rainette2(res1, res2, min_members = 2, full = FALSE)

test_that("compute_chi2", {
  tab <- matrix(c(45, 121 - 45, 257 - 45, 1213 - 121 - 257 + 45), nrow = 2)
  expect_equal(rainette:::compute_chi2(45, 121, 257, 1213),
    chisq.test(tab)$statistic)
  tab <- matrix(c(0, 12, 25, 121 - 12 - 25), nrow = 2)
  expect_equal(rainette:::compute_chi2(0, 12, 25, 121),
    suppressWarnings(-chisq.test(tab)$statistic))
})

test_that("get_groups", {
  expect_equal(dim(rainette:::get_groups(res1)), c(316, 4))
  expect_equal(substr(rainette:::get_groups(res1)[[2]],3,3), as.character(res1$uce_groups[[2]]))
  expect_equal(substr(rainette:::get_groups(res1)[[2]],1,2), rep("2.", length(res1$group)))
})

test_that("groups_crosstab", {
  g1 <- tibble(c(1,1,2,2), c(1,1,2,3))
  g2 <- tibble(c(1,2,1,2), c(3,1,2,1))
  colnames(g1) <- 1:2
  colnames(g2) <- 1:2
  n_tot <- 4
  tab <- rainette:::groups_crosstab(g1, g2, min_members = -Inf, min_chi2 = -Inf)

  expect_equal(nrow(tab), 25)
  expect_equal(
    colnames(tab),
    c("g1", "g2", "n_both", "level1", "level2", "n1", "n2", "chi2", "interclass")
  )
  tmp <- tab %>% dplyr::filter(level1 == 2, level2 == 2, g1 == 2, g2 == 2)
  expect_equal(tmp$n_both, 1)
  expect_equal(tmp$n1, 1)
  expect_equal(tmp$n2, 1)
  expect_equal(tmp$chi2, unname(rainette:::compute_chi2(tmp$n_both, tmp$n1, tmp$n2, n_tot)))
  expect_equal(tmp$interclass, "2x2")
})

test_that("crosstab_add_members and filtering", {
  g1 <- tibble(c(1,1,1,1,2), c(1,1,2,3,3))
  g2 <- tibble(c(1,1,1,1,2), c(3,1,3,1,2))
  colnames(g1) <- 1:2
  colnames(g2) <- 1:2
  n_tot <- 5
  tab <- rainette:::groups_crosstab(g1, g2, min_members = 2, min_chi2 = 0.5)
  tab <- rainette:::crosstab_add_members(tab, g1, g2)

  expect_equal(nrow(tab), 1)
  expect_equal(tab$g1, 1)
  expect_equal(tab$g2, 1)
  expect_equal(tab$n_both, 4)
  expect_equal(tab$interclass, "1x1")
  expect_equal(tab$chi2,
    suppressWarnings(unname(chisq.test(matrix(c(4,0,0,1), nrow=2))$statistic)))
  expect_equal(tab$members, list(1:4))
})

test_that("crosstab_keep_max", {
  df <- tribble(
    ~ g1, ~ g2, ~chi2,
      11,   21,   10,
      11,   22,   5,
      11,   23,   8,
      12,   21,   9, 
      12,   22,   8,
      12,   23,   10,
      13,   21,   8,
      13,   22,   2,
      13,   23,   2
  )
  res <- crosstab_keep_max(df)
  expect_equal(res$g1, c(11, 12))
  expect_equal(res$g2, c(21, 23))
})

test_that("cross_sizes", {
  tab <- tibble(interclass = c("1.1x1.1", "1.2x1.1", "2.1x2.2", "2.1x1.1"),
    members = list(c(1,2,3), c(4,5,6), c(1,2), 5), id = 1:4)
  sizes <- rainette:::cross_sizes(tab)
  expect_equal(sizes, structure(c(1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 1, 0, 1, 0, 1), .Dim = c(4L,
    4L)))
})

test_that("next_partitions", {
  tab <- tibble(interclass = c("1.1x1.1", "1.2x1.1", "2.1x2.2", "2.1x1.1"),
    members = list(c(1,2,3), c(4,5,6), c(1,2), 7), id = 1:4)
  sizes <- rainette:::cross_sizes(tab)
  partitions <- list(tab$id)
  partitions[[2]] <- which(sizes == 0, arr.ind = TRUE, useNames = FALSE) %>%
      asplit(1)

  partitions[[3]] <- rainette:::next_partitions_for(partitions[[2]], sizes)
  expect_equal(partitions[[3]], list(c(1, 2, 4), c(2, 3, 4)))
  partitions[[3]] <- rainette:::next_partitions_parallel(partitions[[2]], sizes)
  expect_equal(partitions[[3]], list(c(1, 2, 4), c(2, 3, 4)))
  expect_equal(rainette:::next_partitions_for(partitions[[3]], sizes), NULL)
  expect_equal(rainette:::next_partitions_parallel(partitions[[3]], sizes), NULL)
})


test_that("get_optimal_partitions", {
  valid <- tibble(interclass = c("1x1", "2x2", "3x3", "4x4"),
                  n_both = c(3, 3, 2, 1),
                  chi2 = c(4, 5, 10, 4),
                  members = list(c(1,2,3), c(4,5,6), c(1,2), 7),
                  id = 1:4)
  partitions <- list(
    list(c(1, 2), c(2, 3), c(1, 4)),
    list(c(1, 2, 4)))
  n_tot <- 7

  # With full = TRUE
  res <- rainette:::get_optimal_partitions(partitions, valid, n_tot, full = TRUE)
  tmp <- res[res$k == 2,]
  expect_equal(tmp$clusters, list(c("1x1", "2x2"), c("2x2", "3x3")))
  expect_equal(tmp$chi2, c(9, 15))
  expect_equal(tmp$n, c(6, 5))
  expect_equal(tmp$groups, list(c(1,1,1,2,2,2,NA),c(2,2,NA,1,1,1,NA)))
  tmp <- res[res$k == 3,]
  expect_equal(tmp$clusters, list(c("1x1", "2x2", "4x4")))
  expect_equal(tmp$chi2, 13)
  expect_equal(tmp$n, 7)
  expect_equal(tmp$groups, list(c(1,1,1,2,2,2,3)))

  # With full = FALSE
  res <- rainette:::get_optimal_partitions(partitions, valid, n_tot, full = FALSE)
  tmp <- res[res$k == 2,]
  expect_equal(tmp$clusters, list(c("2x2", "3x3")))
  expect_equal(tmp$chi2, 15)
  expect_equal(tmp$n, 5)
  expect_equal(tmp$groups, list(c(2,2,NA,1,1,1,NA)))
  tmp <- res[res$k == 3,]
  expect_equal(tmp$clusters, list(c("1x1", "2x2", "4x4")))
  expect_equal(tmp$chi2, 13)
  expect_equal(tmp$n, 7)
  expect_equal(tmp$groups, list(c(1,1,1,2,2,2,3)))
})

test_that("rainette2 gives the same result on dtm and on two clustering results", {
  expect_equal(res[,c("k", "chi2", "n")], res12[,c("k", "chi2", "n")])
  expect_equal(res$groups, res12$groups)
  expect_equal(res$clusters, res12$clusters)
})

test_that("rainette2 display message when stopping before max_k", {
  res1 <- rainette(dtm, k = 4, min_segment_size = 2, min_split_members = 30)
  res2 <- rainette(dtm, k = 4, min_segment_size = 3, min_split_members = 30)
  expect_message(res <- rainette::rainette2(res1, res2, max_k = 4, min_members = 50),
    "^! No more partitions found, stopping at k=2")
  expect_equal(max(res$k), 2)
})

test_that("rainette2_plot error if full=FALSE and criterion=n", {
  expect_error(
    rainette2_plot(res_notfull, dtm, k = 2, criterion = "n"),
    "if rainette2 has been computed with full=FALSE, only 'chi2' criterion is available"
  )
  expect_error(
    rainette2_plot(res, dtm, k = 2, criterion = "n"),
    NA
  )
})

test_that("same results with different values of full and parallel", {
  res_f <- rainette2(res1, res2, max_k = 5, min_members = 2, full = TRUE)
  res_fp <- rainette2(res1, res2, max_k = 5, min_members = 2, full = TRUE, parallel = TRUE)
  expect_equal(res_f, res_fp)
  expect_equal(attr(res_f, "full"), TRUE)
  expect_equal(attr(res_fp, "full"), TRUE)

  res_c <- rainette2(res1, res2, max_k = 5, min_members = 2, full = FALSE)
  res_cp <- rainette2(res1, res2, max_k = 5, min_members = 2, full = FALSE, parallel = TRUE)
  expect_equal(res_c, res_cp)
  expect_equal(attr(res_c, "full"), FALSE)
  expect_equal(attr(res_cp, "full"), FALSE)
})

test_that("plot functions class checking", {
  expect_error(rainette2_plot(res1, k = 5), "res must be a rainette2 result object")
  expect_error(rainette_plot(res12, k = 5), "res must be a rainette result object")
})

Try the rainette package in your browser

Any scripts or data that you put into this service are public.

rainette documentation built on March 31, 2023, 6:43 p.m.