context("WarpLDA")
#------------------------------------------------------------------------------
train_ind = 1:200
ids = movie_review$id[train_ind]
txt = tolower(movie_review[['review']][train_ind])
names(txt) = ids
tokens = word_tokenizer(txt)
it = itoken(tokens, progressbar = FALSE, ids = ids)
vocab = create_vocabulary(it)
vocab = prune_vocabulary(vocab, term_count_min = 5, doc_proportion_min = 0.02)
vctrz = vocab_vectorizer(vocab)
dtm = create_dtm(it, vectorizer = vctrz)
#------------------------------------------------------------------------------
n_topic = 10
test_that("LDA, perplexity", {
model = LDA$new(n_topic, doc_topic_prior = 0.1, topic_word_prior = 0.01)
expect_warning(model$components)
expect_error(suppressWarnings(model$topic_word_distribution))
expect_null(model$doc_topic_distribution)
set.seed(1)
n_iter = 10
doc_topic_distr_10 = model$fit_transform(dtm, n_iter = n_iter, n_check_convergence = 1,
convergence_tol = -1, progressbar = FALSE)
expect_equal(nrow(attr(doc_topic_distr_10, "likelihood")), n_iter)
topic_word_distr_10 = model$topic_word_distribution
perpl_10 = perplexity(dtm, topic_word_distr_10, doc_topic_distr_10)
expect_equal(dim(doc_topic_distr_10), c(nrow(dtm), n_topic))
expect_equivalent(rowSums(doc_topic_distr_10), rep(1, nrow(dtm)))
expect_equal(dim(model$components), c(n_topic, ncol(dtm)))
expect_equivalent(rowSums(topic_word_distr_10), rep(1, n_topic))
set.seed(1)
n_iter = 20
doc_topic_distr_20 = model$fit_transform(dtm, n_iter = n_iter, n_check_convergence = 4,
convergence_tol = -1, progressbar = FALSE)
topic_word_distr_20 = model$topic_word_distribution
expect_equal(nrow(attr(doc_topic_distr_20, "likelihood")), n_iter / 4)
perpl_20 = perplexity(dtm, topic_word_distr_20, doc_topic_distr_20)
expect_gt(perpl_10, perpl_20)
expect_error(perplexity(dtm, doc_topic_distr_20, topic_word_distr_20))
# check that perplexity() checks input dimensions
expect_error(perplexity(dtm[1:50, ], doc_topic_distr_20, topic_word_distr_20))
expect_error(perplexity(dtm, doc_topic_distr_20[1:5, ], topic_word_distr_20))
expect_error(perplexity(dtm, doc_topic_distr_20[, 1:5], topic_word_distr_20))
expect_error(perplexity(dtm, doc_topic_distr_20, topic_word_distr_20[, 1:5]))
expect_error(perplexity(dtm, doc_topic_distr_20, topic_word_distr_20[1:5, ]))
# check that perplexity() checks that inpputs are actual distributions
expect_error(perplexity(dtm, doc_topic_distr_20 + 1, topic_word_distr_20))
expect_error(perplexity(dtm, doc_topic_distr_20, topic_word_distr_20 + 1))
# check model transform new data correctly
dtm_test = dtm
dtm_predict = model$transform(dtm_test, n_iter = 1)
expect_equal(dim(dtm_predict), c(nrow(dtm_test), n_topic))
expect_equivalent(rowSums(dtm_predict), rep(1, nrow(dtm_test)))
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.