text: Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

library(tibble)
library(dplyr)
library(text)
library(testthat)
library(ggplot2)
#install.packages("topics")
#
# .rs.restartR()
#help(textrpp_initialize)
#textrpp_install()
#text::textrpp_initialize()
#text::textrpp_initialize(
#  save_profile = TRUE,
#  condaenv = "textrpp_condaenv",
#  refresh_settings = TRUE)
#textrpp_initialize(
#  condaenv = "berttopic2",
#  refresh_settings = TRUE
#)
#textEmbed("hello")

test_that("Bertopic", {
  skip_on_cran()

  save_dir_temp <- tempdir()
  #Langauge_based_assessment_data_8 <- load(Language_based_assessment_data_8.rda")

  # Load and prepare data
  data1 <- Language_based_assessment_data_8[c("satisfactiontexts", "swlstotal")]
  colnames(data1) <- c("text", "score")

  data2 <- Language_based_assessment_data_8[c("harmonytexts", "hilstotal")]
  colnames(data2) <- c("text", "score")

  data3 <- Language_based_assessment_data_3_100[1:2]
  colnames(data3) <- c("text", "score")

  data <- dplyr::bind_rows(data1, data2, data3)

  if (Sys.info()["sysname"] == "Darwin" | Sys.info()["sysname"] == "Windows") {

  # Create BERTopic model trained on data["text"] help(textTopics)
  bert_model <- text::textTopics(data = data,
                           variable_name = "text",
                           embedding_model = "distilroberta",
                           min_df = 2,
                           set_seed = 42,
                           save_dir = save_dir_temp)

  testthat::expect_equal(bert_model$preds$t_1[2],
                         .1115696,
                         tolerance = 0.0001)


#  textTopicsReduce(
#    data = data,
#    data_var = "text",
#    n_topics = 10L,
#    load_path = "./results/seed_8/my_model/", # From textTopics saved output
#    save_dir = "./results_reduced",
#    embedding_model = "distilroberta"
#  )
#

#  Testing  how individual topics are associated with "score"
  test2 <- text::textTopicsTest(
    model = bert_model,
    x_variable = "score",
    test_method = "linear_regression"
    )

#  testthat::expect_equal(test2$test$x.score.estimate[1],
#                         .1056764,
#                         tolerance = 0.0001)


  plots <- text::textTopicsWordcloud(
    model = bert_model,
    save_dir = save_dir_temp,
    figure_format = "png",
    seed = 42,
  )

#  plots <- text::textTopicsWordcloud(
#    model = bert_model,
#    test = test2,
#   # p_alpha = 0.05,
#    figure_format = "png",
#   seed = 42,
#    save_dir = save_dir_temp
#  )



  }
})

OscarKjell/text documentation built on April 3, 2025, 3:07 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

OscarKjell/text
Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

tests/testthat/test_7_textTopics.R
In OscarKjell/text: Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

R Package Documentation

Browse R Packages

We want your feedback!

OscarKjell/text Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

tests/testthat/test_7_textTopics.R In OscarKjell/text: Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

R Package Documentation

Browse R Packages

We want your feedback!

OscarKjell/text
Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

tests/testthat/test_7_textTopics.R
In OscarKjell/text: Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning