test-unsupervised.R
In fastrtext: 'fastText' Wrapper for Text Classification and Word Representation

context("Unsupervised training")

model_test_path <- system.file("extdata",
                               "model_unsupervised_test.bin",
                               package = "fastrtext")

test_that("Training", {
  data("train_sentences")
  data("test_sentences")
  texts <- tolower(train_sentences[, "text"])
  tmp_file_txt <- tempfile()
  tmp_file_model <- tempfile()
  writeLines(text = texts, con = tmp_file_txt)
  execute(commands = c("skipgram",
                       "-input", tmp_file_txt,
                       "-output", tmp_file_model,
                       "-verbose", 0,
                       "-dim", 10,
                       "-bucket", 1e3,
                       "-loss", "ns",
                       "-epoch", 3))

  # Check learned file exists
  expect_true(file.exists(paste0(tmp_file_model, ".bin")))
  expect_true(file.exists(paste0(tmp_file_model, ".vec")))

  model <- load_model(tmp_file_model)
  parameters <- get_parameters(model)
  expect_equal(parameters$model_name, "sg")

  build_vectors(documents = texts,
                model_path = tmp_file_model,
                modeltype = "skipgram",
                bucket = 1e3,
                dim = 10,
                epoch = 3,
                loss = "softmax",
                verbose = 0)

})

test_that("Test parameter extraction", {
  model <- load_model(model_test_path)
  parameters <- get_parameters(model)
  expect_equal(parameters$dim, 70)
  expect_equal(parameters$model_name, "sg")
})

test_that("Test word extraction and word IDs", {
  model <- load_model(model_test_path)
  dict <- get_dictionary(model)
  expect_length(dict, 2061)
  expect_true("time" %in% dict)
  expect_true("timing" %in% dict)
  expect_true("experience" %in% dict)
  expect_true("section" %in% dict)

  sentence_to_test <- c("this", "is", "a", "test")
  ids <- get_word_ids(model, sentence_to_test)
  expect_equal(get_dictionary(model)[ids], sentence_to_test)
})

test_that("Tokenization separate words in a text document", {
  model <- load_model(model_test_path)
  tokens <- get_tokenized_text(model, "this is a test")
  expect_equal(tokens, list(c("this", "is", "a", "test")))
})

test_that("Test word embeddings", {
  model <- load_model(model_test_path)

  # test vector lentgh
  parameters <- get_parameters(model)
  expect_length(get_word_vectors(model, "time")[1, ], parameters$dim)

  # test word distance
  expect_lt(get_word_distance(model, "introduction", "conclusions"),
            get_word_distance(model, "experience", "section"))
  expect_lt(get_word_distance(model, "our", "we"),
            get_word_distance(model, "introduction", "conclusions"))
})

test_that("Nearest neighbours", {
  model <- load_model(model_test_path)
  nn <- get_nn(model, "time", 10)
  expect_true("times" %in% names(nn))
})

test_that("Test sentence representation", {
  model <- load_model(model_test_path)
  m <- get_sentence_representation(model, "this is a test")
  expect_length(m, 70)
  expect_equal(nrow(m), 1)
  m <- get_sentence_representation(model, c("this is a test", "and here is another"))
  expect_equal(nrow(m), 2)
  expect_false(any(is.na(m)))
})

gc()

Any scripts or data that you put into this service are public.

fastrtext documentation built on Oct. 30, 2019, 11:32 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

fastrtext
'fastText' Wrapper for Text Classification and Word Representation

tests/testthat/test-unsupervised.R
In fastrtext: 'fastText' Wrapper for Text Classification and Word Representation

Try the fastrtext package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fastrtext 'fastText' Wrapper for Text Classification and Word Representation

tests/testthat/test-unsupervised.R In fastrtext: 'fastText' Wrapper for Text Classification and Word Representation

Try the fastrtext package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fastrtext
'fastText' Wrapper for Text Classification and Word Representation

tests/testthat/test-unsupervised.R
In fastrtext: 'fastText' Wrapper for Text Classification and Word Representation