Nothing
context("tests of refit method for tidylda")
dtm <- nih_sample_dtm
d1 <- dtm[1:50, ]
d2 <- dtm[51:100, ]
# make sure we have different vocabulary for each data set
d1 <- d1[, Matrix::colSums(d1) > 0]
d2 <- d2[, Matrix::colSums(d2) > 0]
lda <- tidylda(
data = d1,
k = 4,
iterations = 20, burnin = 10,
alpha = 0.1, eta = 0.05,
optimize_alpha = TRUE,
calc_likelihood = TRUE,
calc_r2 = TRUE,
return_data = FALSE,
verbose = FALSE
)
### Tests for refit.tidylda ----
test_that("can update models", {
# continuation of the old model for another 20 iterations
# matters because dtm lines up exactly with existing vocabulary etc.
lda2 <- refit(
object = lda,
new_data = d1,
iterations = 20,
burnin = 10,
prior_weight = NA,
additional_k = 0,
optimize_alpha = TRUE,
calc_likelihood = TRUE,
calc_r2 = TRUE,
return_data = FALSE,
verbose = FALSE
)
expect_named(lda2, names(lda))
expect_equal(nrow(lda2$theta), nrow(d1))
expect_equal(ncol(lda2$theta), ncol(lda$theta))
expect_equal(sum(dim(lda2$beta) == dim(lda$beta)), 2)
expect_equal(sum(dim(lda2$lambda) == dim(lda$lambda)), 2)
# new data adding no extra topics no beta as prior
lda2 <- refit(
object = lda,
new_data = d2,
additional_k = 0,
prior_weight = NA,
iterations = 20,
burnin = 10,
optimize_alpha = TRUE,
calc_likelihood = TRUE,
calc_r2 = TRUE,
return_data = FALSE,
verbose = FALSE
)
expect_named(lda2, names(lda))
expect_equal(nrow(lda2$theta), nrow(d2))
expect_equal(ncol(lda2$theta), ncol(lda$theta))
expect_equal(ncol(lda2$beta), length(union(colnames(d1), colnames(d2))))
# 1 additonal topic and no beta as prior
lda2 <- refit(
object = lda,
new_data = d2,
additional_k = 1,
prior_weight = NA,
iterations = 20,
burnin = 10,
optimize_alpha = TRUE,
calc_likelihood = TRUE,
calc_r2 = TRUE,
return_data = FALSE,
verbose = FALSE
)
expect_named(lda2, names(lda))
expect_equal(nrow(lda2$theta), nrow(d2))
expect_equal(ncol(lda2$theta), ncol(lda$theta) + 1)
expect_equal(ncol(lda2$beta), length(union(colnames(d1), colnames(d2))))
# 3 additional topics and no beta as prior
lda2 <- refit(
object = lda,
new_data = d2,
additional_k = 3,
prior_weight = NA,
iterations = 20,
burnin = 10,
optimize_alpha = TRUE,
calc_likelihood = TRUE,
calc_r2 = TRUE,
return_data = FALSE,
verbose = FALSE
)
expect_named(lda2, names(lda))
expect_equal(nrow(lda2$theta), nrow(d2))
expect_equal(ncol(lda2$theta), ncol(lda$theta) + 3)
expect_equal(ncol(lda2$beta), length(union(colnames(d1), colnames(d2))))
# no additional topics and beta as prior
lda2 <- refit(
object = lda,
new_data = d2,
additional_k = 0,
prior_weight = 1,
iterations = 20,
burnin = 10,
optimize_alpha = TRUE,
calc_likelihood = TRUE,
calc_r2 = TRUE,
return_data = FALSE,
verbose = FALSE
)
expect_named(lda2, names(lda))
expect_equal(nrow(lda2$theta), nrow(d2))
expect_equal(ncol(lda2$theta), ncol(lda$theta))
expect_equal(ncol(lda2$beta), length(union(colnames(d1), colnames(d2))))
expect_true(inherits(lda2$eta, "matrix"))
# 3 additonal topics and beta as prior
lda2 <- refit(
object = lda,
new_data = d2,
additional_k = 3,
prior_weight = 1,
iterations = 20,
burnin = 10,
optimize_alpha = TRUE,
calc_likelihood = TRUE,
calc_r2 = TRUE,
return_data = FALSE,
verbose = FALSE
)
expect_named(lda2, names(lda))
expect_equal(nrow(lda2$theta), nrow(d2))
expect_equal(ncol(lda2$theta), ncol(lda$theta) + 3)
expect_equal(ncol(lda2$beta), length(union(colnames(d1), colnames(d2))))
# update models with scalar eta
# update models with matrix eta
l1 <- tidylda(
data = d1,
k = 4,
iterations = 20, burnin = 10,
alpha = 0.1,
eta = matrix(0.05, nrow = 4, ncol = ncol(d1)),
optimize_alpha = TRUE,
calc_likelihood = TRUE,
calc_r2 = TRUE,
return_data = FALSE,
verbose = FALSE
)
l2 <- refit(l1, d2, iterations = 20, verbose = FALSE)
expect_equal(ncol(l2$eta), length(union(colnames(d1), colnames(d2))))
})
test_that("errors are thrown for malformed inputs to refit.tidylda", {
# no vocabulary overlap between models
nd <- rbind(numeric(10), numeric(10), numeric(10))
colnames(nd) <- 1:10 # numbers means no vocab overlap
lda2 <- refit(
object = lda,
new_data = nd,
iterations = 10,
verbose = FALSE
)
expect_s3_class(lda2, "tidylda")
# data doesn't have column names
d3 <- d2
colnames(d3) <- NULL
expect_error(
refit(
object = lda,
new_data = d3,
iterations = 20,
verbose = FALSE
)
)
# iterations not specified
expect_error(
refit(
object = lda,
new_data = d2
)
)
# burnin >= iterations
expect_error(
refit(
object = lda,
new_data = d2,
additional_k = 3,
beta_as_prior = TRUE,
iterations = 20,
burnin = 30,
optimize_alpha = TRUE,
calc_likelihood = TRUE,
calc_r2 = TRUE,
return_data = FALSE
)
)
# additional_k is not numeric
expect_error(
refit(
object = lda,
new_data = d2,
additional_k = "3",
beta_as_prior = TRUE,
iterations = 20,
burnin = 10,
optimize_alpha = TRUE,
calc_likelihood = TRUE,
calc_r2 = TRUE,
return_data = FALSE
)
)
# additional_k is less than zero
expect_error(
refit(
object = lda,
new_data = d2,
additional_k = -3,
beta_as_prior = TRUE,
iterations = 20,
burnin = 10,
optimize_alpha = TRUE,
calc_likelihood = TRUE,
calc_r2 = TRUE,
return_data = FALSE
)
)
# iterations not specified
expect_error(
refit(
object = lda,
new_data = d2,
additional_k = 3,
beta_as_prior = TRUE
)
)
# malformed prior weight
expect_error(
refit(
object = lda,
new_data = d2,
additional_k = 3,
prior_weight = TRUE,
iterations = 20,
burnin = 10,
optimize_alpha = TRUE,
calc_likelihood = TRUE,
calc_r2 = TRUE,
return_data = FALSE
)
)
# logical things aren't logical
expect_error(
refit(
object = lda,
new_data = d2,
additional_k = 3,
prior_weight = 1,
iterations = 20,
burnin = 10,
optimize_alpha = "TRUE",
calc_likelihood = TRUE,
calc_r2 = TRUE,
return_data = FALSE
)
)
expect_error(
refit(
object = lda,
new_data = d2,
additional_k = 3,
beta_as_prior = TRUE,
iterations = 20,
burnin = 10,
optimize_alpha = TRUE,
calc_likelihood = "TRUE",
calc_r2 = TRUE,
return_data = FALSE
)
)
expect_error(
refit(
object = lda,
new_data = d2,
additional_k = 3,
beta_as_prior = TRUE,
iterations = 20,
burnin = 10,
optimize_alpha = TRUE,
calc_likelihood = TRUE,
calc_r2 = "TRUE",
return_data = FALSE
)
)
expect_error(
refit(
object = lda,
new_data = d2,
additional_k = 3,
beta_as_prior = TRUE,
iterations = 20,
burnin = 10,
optimize_alpha = TRUE,
calc_likelihood = TRUE,
calc_r2 = TRUE,
return_data = "FALSE"
)
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.