tests/testthat/test_chisquare.R

library(polmineR)
testthat::context("chisquare")
use("polmineR")

test_that(
  "check chisquare computation",
  {
    m <- partition(
      "GERMAPARLMINI", speaker = "Merkel", interjection = "speech",
      regex = TRUE, p_attribute = "word"
    )
    f <- features(m, "GERMAPARLMINI", included = TRUE)
    f_min <- subset(f, count_coi >= 5)
    f_min_sorted <- sort(f_min, by = "chisquare")

    for (token in f_min_sorted@stat[["word"]][1:10]){
      o <- matrix(data = rep(NA, 4), ncol = 2)
      o[1,1] <- m@stat[word == token][["count"]]
      o[1,2] <- count("GERMAPARLMINI", query = token)[["count"]] - o[1,1]
      o[2,1] <- size(f)[["coi"]] - o[1,1]
      o[2,2] <- size(f)[["ref"]] - o[1,2]

      r <- rowSums(o)
      c <- colSums(o)
      N <- sum(o)
      
      e <- matrix(data = rep(NA, 4), ncol = 2)
      e[1,1] <- r[1] * (c[1] / N)
      e[1,2] <- r[1] * (c[2] / N)
      e[2,1] <- r[2] * (c[1] / N)
      e[2,2] <- r[2] * (c[2] / N)
      
      y <- matrix(rep(NA, 4), ncol = 2)
      for (i in 1:2) for (j in 1:2) y[i,j] <- (o[i,j] - e[i,j])^2 / e[i,j]
      chisquare_value_selfmade <- sum(y)
      
      chisquare_value_fn <- as(f, "data.table")[word == token][["chisquare"]]
      expect_equal(chisquare_value_selfmade, chisquare_value_fn)
    }
  }
)
  
PolMine/polmineR documentation built on Nov. 9, 2023, 8:07 a.m.