Nothing
require(quanteda)
data(keyATM_data_bills)
bills_dfm <- keyATM_data_bills$doc_dfm
keyATM_docs <- keyATM_read(bills_dfm)
bills_keywords <- keyATM_data_bills$keywords
test_that("normal usage", {
bill_dictionary <- quanteda::dictionary(bills_keywords)
x <- read_keywords(dictionary = bill_dictionary, docs = keyATM_docs)
expect_equal(x$Drug, "drug")
## defensive programming
expect_error(read_keywords())
expect_warning(read_keywords(dictionary = bill_dictionary))
})
test_that("resolving and split", {
## a simplfied example from koheiw/quanteda.seededlda
corp <- corpus(c("air force 1", "soldier soldiers navy army politician", "party members", "parliament is politicians' party politics leaders leadership"))
toks <- tokens(corp, remove_punct = TRUE)
dfmt <- dfm(toks) %>%
dfm_select("^[A-Za-z]+$", valuetype = "regex") %>%
dfm_remove(stopwords('en'))
docs <- keyATM_read(texts = dfmt)
smalldictionary <- dictionary(list(politics = c("parliament*", "congress*", "party leader*", "party member*", "voter*", "lawmaker*", "politician*"), military = c("military", "soldier*", "air force", "marine", "navy", "army")))
## default: split = TRUE
x <- read_keywords(dictionary = smalldictionary, docs = docs)
expect_true("soldiers" %in% x$military)
expect_true("air" %in% x$military)
y <- read_keywords(dictionary = smalldictionary, docs = docs, split = FALSE)
expect_true(!"air" %in% y$military)
})
test_that("file I/O", {
## the example of the function
skip_on_cran() ; skip_on_os("linux")
dictfile <- tempfile()
download.file("https://moralfoundations.org/wp-content/uploads/files/downloads/moral%20foundations%20dictionary.dic", dictfile, quiet = TRUE)
## first position is file.
x <- read_keywords(dictfile, docs = keyATM_docs, format = "LIWC")
expect_true("terrorism" %in% x$IngroupVice)
})
test_that("Integration",{
skip_on_cran() ; skip_on_os("linux")
## exact example: koheiw/quanteda.seededlda
rdsfile <- tempfile()
download.file("https://github.com/koheiw/quanteda.seededlda/raw/master/tests/data/data_corpus_sputnik.RDS", rdsfile, quiet = TRUE)
corp <- readRDS(rdsfile)
toks <- tokens(corp, remove_punct = TRUE)
dfmt <- dfm(toks) %>%
dfm_select("^[A-Za-z]+$", valuetype = "regex") %>%
dfm_remove(stopwords('en')) %>%
dfm_trim(min_termfreq = 0.90, termfreq_type = "quantile",
max_docfreq = 0.1, docfreq_type = "prop")
docs <- keyATM_read(texts = dfmt)
dictfile <- tempfile(fileext = ".yml")
download.file("https://raw.githubusercontent.com/koheiw/quanteda.seededlda/master/tests/data/topics.yml", dictfile, quiet = TRUE)
kw <- read_keywords(dictfile, docs = docs)
res <- keyATM(docs, no_keyword_topics = 0, keywords = kw, model = "base", options = list(seed = 1234, iterations = 20))
## syria is related to military
expect_true("syria" %in% top_words(res, n = 20, show_keyword = FALSE)[,"5_military"])
## German is not related to military
expect_true(!"german" %in% top_words(res, n = 20, show_keyword = FALSE)[,"5_military"])
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.