tests/testthat/test_aggregation.R

context("Aggregation")

library("data.table")
library("quanteda")

set.seed(123)

# corpus, lexicon and aggregation control creation
data("usnews")
corpus <- quanteda::corpus_sample(sento_corpus(corpusdf = usnews), size = 1000)

data("list_lexicons")
lex <- sento_lexicons(list_lexicons[c("GI_en", "LM_en")])
lexClust <- sento_lexicons(list_lexicons[c("GI_en", "LM_en", "HENRY_en")],
                           list_valence_shifters[["en"]][, c("x", "t")])

### tests from here ###

ctr1 <- ctr_agg(howWithin = "proportionalPol", howDocs = "equal_weight", howTime = "almon", by = "month",
                lag = 5, ordersAlm = 1:3, do.inverseAlm = TRUE)
sentMeas1 <- sento_measures(corpus, lex, ctr1)

ctr2 <- ctr_agg(howWithin = "counts", howDocs = "proportional", howTime = c("equal_weight", "linear", "own"),
                by = "year", lag = 2, weights = data.frame(q1 = c(0.25, 0.75), q3 = c(0.75, 0.25)),
                do.ignoreZeros = FALSE, do.sentence = TRUE)
sentMeas2 <- sento_measures(corpus, lex, ctr2)

ctr3 <- ctr_agg(howWithin = "counts", howDocs = "inverseProportional", howTime = c("equal_weight", "own"),
                by = "year", lag = 3, weights = data.frame(GI_en = c(0.3, 0.6, 0.1)))

ctr4 <- ctr_agg(howWithin = "UShaped", howDocs = "inverseProportional", howTime = "exponential",
                do.inverseExp = TRUE, alphas = c(0.1, 0.2, 0.3), by = "day", lag = 180)

ctr5 <- ctr_agg(howWithin = "counts", howDocs = "exponential", alphaExpDocs = 0.2,
                howTime = "linear", by = "year", lag = 3)

ctr6 <- ctr_agg(howWithin = "TFIDF", howDocs = "inverseExponential", alphaExpDocs = 0.1,
                howTime = "equal_weight", by = "week", lag = 7)

# sento_measures
test_that("Number of columns coincide with provided dimensions", {
  expect_equal(nmeasures(sentMeas1), length(sentMeas1$features) * length(sentMeas1$lexicons) * length(sentMeas1$time))
  expect_equal(nmeasures(sentMeas2), length(sentMeas2$features) * length(sentMeas2$lexicons) * length(sentMeas2$time))
})

# ctr_agg
test_that("Aggregation control function breaks when wrong inputs supplied", {
  expect_error(ctr_agg(howWithin = c("oops", "again"), howDocs = c("mistake", "forYou"), howTime = "bad",
                       lag = 42, by = "infinitely", fill = "theMartiniPolice", nCore = c("yes", "man")))
  expect_error(ctr_agg(howTime = c("almon", "beta", "exponential"), lag = 0,
                       ordersAlm = -1:2, aBeta = -2, bBeta = -3, alphasExp = c(-1, -3)))
  expect_message(ctr_agg(howTime = "linear", lag = 4, weights = data.frame(a = c(1/2, 1/2))))
  expect_error(ctr_agg(howTime = "own", lag = 12, weights = data.frame("dot--hacker" = rep(1/12, 12), check.names = FALSE)))
  expect_message(ctr_agg(howTime = c("linear", "beta"), lag = 1))
})

# aggregate.sentiment
s1 <- compute_sentiment(corpus, lex, how = "proportional")
s2 <- compute_sentiment(as.character(corpus), lex, how = "counts")
s3 <- compute_sentiment(corpus, lexClust, how = "proportionalSquareRoot", do.sentence = TRUE)
sentimentAgg <- aggregate(s3, ctr_agg(lag = 7), do.full = FALSE)
test_that("Test input and output of sentiment aggregation functionality", {
  expect_true(inherits(s1, "sentiment"))
  expect_true(inherits(s2, "data.table"))
  expect_true(inherits(s3, "sentiment"))
  expect_true(inherits(aggregate(s1, ctr1), "sento_measures"))
  expect_true(inherits(aggregate(s3, ctr1), "sento_measures")) # sentence-level with dates (full)
  expect_true(inherits(aggregate(s3, ctr1, do.full = FALSE), "sentiment"))
  expect_error(aggregate(s2, ctr2)) # doc-level but no dates
  expect_error(sento_measures(corpus, lex, ctr3)) # because overlapping names specified
  expect_true(inherits(sento_measures(corpus, lex, ctr4), "sento_measures"))
  expect_true(inherits(sento_measures(corpus, lex, ctr5), "sento_measures"))
  expect_true(inherits(sento_measures(corpus, lex, ctr6), "sento_measures"))
  # expect_true(all.equal(sentimentAgg[["word_count"]], s1[["word_count"]]))
})

# peakdocs
test_that("Output for peak documents extraction in line with input", {
  expect_length(peakdocs(s1, n = 7, type = "both"), 7)
  expect_length(peakdocs(s1, n = 11, type = "pos"), 11)
  expect_length(peakdocs(s1, n = 1, type = "neg"), 1)
  expect_length(peakdocs(s1, n = 25, type = "both", do.average = TRUE), 25)
})

# peakdates
test_that("Output for peak dates extraction in line with input", {
  expect_length(peakdates(sentMeas1, n = 15, type = "both"), 15)
  expect_length(peakdates(sentMeas1, n = 21, type = "pos"), 21)
  expect_length(peakdates(sentMeas1, n = 4, type = "neg"), 4)
  expect_length(peakdates(sentMeas1, n = 10, type = "both", do.average = TRUE), 10)
})
sborms/sentometrics documentation built on Aug. 21, 2021, 6:40 a.m.