tests/testthat/test-tidylda-fit-methods.R

context("tidylda core tests")

### Define some common objects ----

dtm <- nih_sample_dtm

d1 <- dtm[1:50, ]

### Tests for initial fitting of topic models ----

test_that("can fit lda models without error", {

  # if any of the below throw an error, you've got a problem...

  # scalar priors without optimizing alpha
  lda <- tidylda(
    data = d1,
    k = 4,
    iterations = 20, 
    burnin = 10,
    alpha = 0.1, 
    eta = 0.05,
    optimize_alpha = FALSE,
    calc_likelihood = TRUE,
    calc_r2 = TRUE,
    return_data = FALSE,
    verbose = FALSE
  )
  
  # make sure r2 is numeric since calc_r2 = TRUE
  expect_type(lda$r2, "double")
  
  # make sure r2 doesn't have a names element
  expect_null(names(lda$r2))

  # make sure that likelihood is correct since calc_likelihood = TRUE
  expect_s3_class(lda$log_likelihood, "tbl_df")
  
  expect_equal(ncol(lda$log_likelihood), 2)
  
  expect_equal(nrow(lda$log_likelihood), tail(lda$log_likelihood$iteration, 1) + 1)
  
  # while we're here... check dimensions and names of objects
  expect_s3_class(lda, "tidylda")

  expect_length(lda$alpha, 1)
  
  expect_length(lda$eta, 1)
  
  expect_equal(sum(dim(lda$beta) == c(4, ncol(d1))), 2)

  expect_equal(sum(dim(lda$beta) == dim(lda$lambda)), 2)

  expect_equal(sum(dim(lda$theta) == c(nrow(d1), nrow(lda$beta))), 2)

  expect_setequal(colnames(lda$beta), colnames(d1))

  expect_setequal(rownames(lda$beta), colnames(lda$theta))

  expect_setequal(rownames(lda$theta), rownames(d1))

  # scalar priors optimizing alpha
  lda <- tidylda(
    data = d1,
    k = 4,
    iterations = 20, 
    burnin = 10,
    alpha = 0.1, 
    eta = 0.05,
    optimize_alpha = TRUE,
    calc_likelihood = TRUE,
    calc_r2 = FALSE,
    return_data = FALSE,
    verbose = FALSE
  )

  expect_length(lda$alpha, 4)

  # vector priors
  lda <- tidylda(
    data = d1,
    k = 4,
    iterations = 20, 
    burnin = 10,
    alpha = rep(0.1, 4), 
    eta = rep(0.05, ncol(d1)),
    optimize_alpha = TRUE,
    calc_likelihood = TRUE,
    calc_r2 = FALSE,
    return_data = FALSE,
    verbose = FALSE
  )

  expect_length(lda$alpha, 4)

  expect_length(lda$eta, ncol(d1))

  # eta as matrix prior
  lda <- tidylda(
    data = d1,
    k = 4,
    iterations = 20, 
    burnin = 10,
    alpha = 0.1, 
    eta = matrix(0.05, nrow = 4, ncol = ncol(d1)),
    optimize_alpha = FALSE,
    calc_likelihood = FALSE,
    calc_r2 = FALSE,
    return_data = FALSE,
    verbose = FALSE
  )

  expect_true(inherits(lda$eta, "matrix"))
})

test_that("sparse priors for eta don't cause underflow failures", {
  m <- tidylda(
    data = nih_sample_dtm,
    k = 10,
    iterations = 20,
    burnin = 15,
    alpha = 0.05,
    eta = 0.01,
    optimize_alpha = FALSE,
    calc_likelihood = TRUE,
    calc_r2 = FALSE,
    return_data = FALSE,
    verbose = FALSE
  )
  
  expect_s3_class(m, "tidylda")
})

test_that("errors hit for malformed parameters", {

  # k = 1 is bad
  expect_error(
    tidylda(
      data = d1,
      k = 1,
      iterations = 20, burnin = 10,
      alpha = 0.1, eta = 0.05,
      optimize_alpha = TRUE,
      calc_likelihood = FALSE,
      calc_r2 = FALSE,
      return_data = FALSE,
      verbose = FALSE
    ),
    regexp = "k must be 2 or greater"
  )

  # iterations not specified
  expect_error(
    tidylda(
      data = d1,
      k = 10,
      alpha = 0.1, eta = 0.05,
      optimize_alpha = TRUE,
      calc_likelihood = FALSE,
      calc_r2 = FALSE,
      return_data = FALSE,
      verbose = FALSE
    ),
    label = "iterations not specified"
  )
  

  # burnin >= iterations
  expect_error(tidylda(
    data = d1,
    k = 4,
    iterations = 20, burnin = 21,
    alpha = 0.1, eta = 0.05,
    optimize_alpha = FALSE,
    calc_likelihood = FALSE,
    calc_r2 = FALSE,
    return_data = FALSE,
    verbose = FALSE
  ))

  # non-numeric k
  expect_error(tidylda(
    data = d1,
    k = "4",
    iterations = 20, burnin = 10,
    alpha = 0.1, eta = 0.05,
    optimize_alpha = FALSE,
    calc_likelihood = FALSE,
    calc_r2 = FALSE,
    return_data = FALSE,
    verbose = FALSE
  ))

  # iterations not specified
  expect_error(tidylda(
    data = d1,
    k = 4
  ))

  # non-logical logicals
  expect_error(tidylda(
    data = d1,
    k = 4,
    iterations = 20, burnin = 10,
    alpha = 0.1, eta = 0.05,
    optimize_alpha = "FALSE",
    calc_likelihood = FALSE,
    calc_r2 = FALSE,
    return_data = FALSE,
    verbose = FALSE
  ))

  expect_error(tidylda(
    data = d1,
    k = 4,
    iterations = 20, burnin = 10,
    alpha = 0.1, eta = 0.05,
    optimize_alpha = FALSE,
    calc_likelihood = "FALSE",
    calc_r2 = FALSE,
    return_data = FALSE,
    verbose = FALSE
  ))

  expect_error(tidylda(
    data = d1,
    k = 4,
    iterations = 20, burnin = 10,
    alpha = 0.1, eta = 0.05,
    optimize_alpha = FALSE,
    calc_likelihood = FALSE,
    calc_r2 = "FALSE",
    return_data = FALSE
  ))

  expect_error(tidylda(
    data = d1,
    k = 4,
    iterations = 20, burnin = 10,
    alpha = 0.1, eta = 0.05,
    optimize_alpha = FALSE,
    calc_likelihood = FALSE,
    calc_r2 = FALSE,
    return_data = "FALSE"
  ))
  
  expect_error(tidylda(
    data = d1,
    k = 4,
    iterations = 20, burnin = 10,
    alpha = 0.1, eta = 0.05,
    optimize_alpha = FALSE,
    calc_likelihood = FALSE,
    calc_r2 = FALSE,
    return_data = FALSE,
    threads = nrow(d1) + 1
  ), label = "threads > nrow(dtm)")
  
  expect_warning(tidylda(
    data = d1,
    k = 4,
    iterations = 20, burnin = 10,
    alpha = 0.1, eta = 0.05,
    optimize_alpha = FALSE,
    calc_likelihood = FALSE,
    calc_r2 = FALSE,
    return_data = FALSE,
    threads = 2,
    verbose = FALSE
  ), label = "nrow(dtm) / threads < 100")
  
  # data doesn't have column names
  d3 <- d1
  colnames(d3) <- NULL
  expect_error(
    tidylda(
      data = d3,
      k = 4,
      iterations = 20
    )
  )
  
})

# note as of this writing, not parallel, 
# but use of threads argument should not throw errors
test_that("parallelism works as expected", {
  suppressWarnings(
    lda <- tidylda(
      data = d1,
      k = 4,
      iterations = 20, burnin = 10,
      alpha = 0.1, eta = 0.05,
      optimize_alpha = FALSE,
      calc_likelihood = TRUE,
      calc_r2 = TRUE,
      return_data = FALSE,
      threads = 2,
      verbose = FALSE
    )
  )

  
  expect_s3_class(lda, "tidylda")
  
})

Try the tidylda package in your browser

Any scripts or data that you put into this service are public.

tidylda documentation built on July 26, 2023, 5:34 p.m.