context("Aggregation")
library("data.table")
library("quanteda")
set.seed(123)
# corpus, lexicon and aggregation control creation
data("usnews")
corpus <- quanteda::corpus_sample(sento_corpus(corpusdf = usnews), size = 1000)
data("list_lexicons")
lex <- sento_lexicons(list_lexicons[c("GI_en", "LM_en")])
lexClust <- sento_lexicons(list_lexicons[c("GI_en", "LM_en", "HENRY_en")],
list_valence_shifters[["en"]][, c("x", "t")])
### tests from here ###
ctr1 <- ctr_agg(howWithin = "proportionalPol", howDocs = "equal_weight", howTime = "almon", by = "month",
lag = 5, ordersAlm = 1:3, do.inverseAlm = TRUE)
sentMeas1 <- sento_measures(corpus, lex, ctr1)
ctr2 <- ctr_agg(howWithin = "counts", howDocs = "proportional", howTime = c("equal_weight", "linear", "own"),
by = "year", lag = 2, weights = data.frame(q1 = c(0.25, 0.75), q3 = c(0.75, 0.25)),
do.ignoreZeros = FALSE, do.sentence = TRUE)
sentMeas2 <- sento_measures(corpus, lex, ctr2)
ctr3 <- ctr_agg(howWithin = "counts", howDocs = "inverseProportional", howTime = c("equal_weight", "own"),
by = "year", lag = 3, weights = data.frame(GI_en = c(0.3, 0.6, 0.1)))
ctr4 <- ctr_agg(howWithin = "UShaped", howDocs = "inverseProportional", howTime = "exponential",
do.inverseExp = TRUE, alphas = c(0.1, 0.2, 0.3), by = "day", lag = 180)
ctr5 <- ctr_agg(howWithin = "counts", howDocs = "exponential", alphaExpDocs = 0.2,
howTime = "linear", by = "year", lag = 3)
ctr6 <- ctr_agg(howWithin = "TFIDF", howDocs = "inverseExponential", alphaExpDocs = 0.1,
howTime = "equal_weight", by = "week", lag = 7)
# sento_measures
test_that("Number of columns coincide with provided dimensions", {
expect_equal(nmeasures(sentMeas1), length(sentMeas1$features) * length(sentMeas1$lexicons) * length(sentMeas1$time))
expect_equal(nmeasures(sentMeas2), length(sentMeas2$features) * length(sentMeas2$lexicons) * length(sentMeas2$time))
})
# ctr_agg
test_that("Aggregation control function breaks when wrong inputs supplied", {
expect_error(ctr_agg(howWithin = c("oops", "again"), howDocs = c("mistake", "forYou"), howTime = "bad",
lag = 42, by = "infinitely", fill = "theMartiniPolice", nCore = c("yes", "man")))
expect_error(ctr_agg(howTime = c("almon", "beta", "exponential"), lag = 0,
ordersAlm = -1:2, aBeta = -2, bBeta = -3, alphasExp = c(-1, -3)))
expect_message(ctr_agg(howTime = "linear", lag = 4, weights = data.frame(a = c(1/2, 1/2))))
expect_error(ctr_agg(howTime = "own", lag = 12, weights = data.frame("dot--hacker" = rep(1/12, 12), check.names = FALSE)))
expect_message(ctr_agg(howTime = c("linear", "beta"), lag = 1))
})
# aggregate.sentiment
s1 <- compute_sentiment(corpus, lex, how = "proportional")
s2 <- compute_sentiment(as.character(corpus), lex, how = "counts")
s3 <- compute_sentiment(corpus, lexClust, how = "proportionalSquareRoot", do.sentence = TRUE)
sentimentAgg <- aggregate(s3, ctr_agg(lag = 7), do.full = FALSE)
test_that("Test input and output of sentiment aggregation functionality", {
expect_true(inherits(s1, "sentiment"))
expect_true(inherits(s2, "data.table"))
expect_true(inherits(s3, "sentiment"))
expect_true(inherits(aggregate(s1, ctr1), "sento_measures"))
expect_true(inherits(aggregate(s3, ctr1), "sento_measures")) # sentence-level with dates (full)
expect_true(inherits(aggregate(s3, ctr1, do.full = FALSE), "sentiment"))
expect_error(aggregate(s2, ctr2)) # doc-level but no dates
expect_error(sento_measures(corpus, lex, ctr3)) # because overlapping names specified
expect_true(inherits(sento_measures(corpus, lex, ctr4), "sento_measures"))
expect_true(inherits(sento_measures(corpus, lex, ctr5), "sento_measures"))
expect_true(inherits(sento_measures(corpus, lex, ctr6), "sento_measures"))
# expect_true(all.equal(sentimentAgg[["word_count"]], s1[["word_count"]]))
})
# peakdocs
test_that("Output for peak documents extraction in line with input", {
expect_length(peakdocs(s1, n = 7, type = "both"), 7)
expect_length(peakdocs(s1, n = 11, type = "pos"), 11)
expect_length(peakdocs(s1, n = 1, type = "neg"), 1)
expect_length(peakdocs(s1, n = 25, type = "both", do.average = TRUE), 25)
})
# peakdates
test_that("Output for peak dates extraction in line with input", {
expect_length(peakdates(sentMeas1, n = 15, type = "both"), 15)
expect_length(peakdates(sentMeas1, n = 21, type = "pos"), 21)
expect_length(peakdates(sentMeas1, n = 4, type = "neg"), 4)
expect_length(peakdates(sentMeas1, n = 10, type = "both", do.average = TRUE), 10)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.