Nothing
require(seededlda)
require(quanteda)
data_corpus_guardian <- readRDS('../Dropbox/Public/data_corpus_guardian2016.rds')
data_corpus_guardian <- readRDS('/home/kohei/Dropbox/Public/data_corpus_guardian2016-10k.rds')
#data_corpus_guardian <- readRDS('C:/Users/watan/Dropbox/Public/data_corpus_guardian2016-10k.rds')
toks <- tokens(data_corpus_guardian,
remove_punct = TRUE,
remove_symbols = TRUE,
remove_number = TRUE)
dfmt <- dfm(toks) %>%
dfm_remove(stopwords(), min_nchar = 2) %>%
dfm_trim(max_docfreq = 0.1, docfreq_type = "prop")
lda0 <- textmodel_lda(dfmt, k = 20, verbose = TRUE, max_iter = 200, batch_size = 1)
lda1 <- textmodel_lda(dfmt, k = 20, verbose = TRUE, max_iter = 200, batch_size = 0.5)
lda2 <- textmodel_lda(dfmt, k = 20, verbose = TRUE, max_iter = 200, batch_size = 0.2)
lda3 <- textmodel_lda(dfmt, k = 20, verbose = TRUE, max_iter = 200, batch_size = 0.1)
lda4 <- textmodel_lda(dfmt, k = 20, verbose = TRUE, max_iter = 200, batch_size = 0.05)
microbenchmark::microbenchmark(
para = textmodel_lda(dfmt, k = 20, verbose = TRUE, batch_size = 0.01),
auto = textmodel_lda(dfmt, k = 20, verbose = TRUE, batch_size = 0.01, auto_iter = TRUE),
seri = textmodel_lda(dfmt, k = 20, verbose = TRUE),
times = 1
)
system.time(textmodel_lda(dfmt, k = 20, verbose = TRUE, batch_size = 0.01))
system.time(textmodel_lda(dfmt, k = 20, verbose = TRUE, batch_size = 0.01, auto_iter = TRUE))
system.time(textmodel_lda(dfmt, k = 20, verbose = TRUE))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.