text: Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

library(tibble)
library(dplyr)
library(text)
library(testthat)

context("textPlot Functions")

test_that("textProjection MEAN and PCA produces a tibble with character variable and numeric variable.", {
  skip_on_cran()

  # Pre-processing data for plotting
  df_for_plotting1 <- text::textProjection(
    words = Language_based_assessment_data_8$harmonywords[1:10],
    word_embeddings = word_embeddings_4$texts$harmonywords[1:10, ],
    word_types_embeddings = word_embeddings_4$word_types,
    x = Language_based_assessment_data_8$hilstotal[1:10],
    split = "mean",
    Npermutations = 2,
    n_per_split = 1,
    pca = 2
  )

  testthat::expect_true(tibble::is_tibble(df_for_plotting1[[2]]))
  testthat::expect_is(df_for_plotting1[[2]]$words[1], "character")
  testthat::expect_is(df_for_plotting1[[2]]$n[1], "numeric")
#  testthat::expect_equal(df_for_plotting1[[2]]$dot.x[1], -3.847934, tolerance = 0.001) #Not anchoring the G1 and G2 ambeddings
  testthat::expect_equal(df_for_plotting1[[2]]$dot.x[1], -4.215651, tolerance = 0.001)


  # Pre-processing data for plotting
  df_for_plotting1_no_split <- text::textProjection(
    words = Language_based_assessment_data_8$harmonywords[1:10],
    word_embeddings = word_embeddings_4$texts$harmonywords[1:10, ],
    word_embeddings_4$word_types,
    Language_based_assessment_data_8$hilstotal[1:10],
    split = "no",
    Npermutations = 2,
    n_per_split = 1,
    pca = 2
  )

  testthat::expect_true(tibble::is_tibble(df_for_plotting1_no_split[[2]]))
  testthat::expect_is(df_for_plotting1_no_split[[2]]$words[1], "character")
  testthat::expect_is(df_for_plotting1_no_split[[2]]$n[1], "numeric")
 # testthat::expect_equal(df_for_plotting1_no_split[[2]]$dot.x[1], -1.537714, tolerance = 0.001) #Not anchoring the G1 and G2 ambeddings
  testthat::expect_equal(df_for_plotting1_no_split[[2]]$dot.x[1],  -1.039681, tolerance = 0.001)

})

test_that("textProjection with QUARTILE, 0.9 PCA and
          2-dimensions produces a tibble with character
          variable and numeric variable.", {
  skip_on_cran()

  # Pre-processing data for plotting
  df_for_plotting2 <- text::textProjection(
    words = Language_based_assessment_data_8$harmonywords[1:12],
    word_embeddings = word_embeddings_4$texts$harmonywords[1:12, ],
    word_types_embeddings = word_embeddings_4$word_types,
    Language_based_assessment_data_8$hilstotal[1:12],
    Language_based_assessment_data_8$swlstotal[1:12],
    split = "quartile",
    Npermutations = 2,
    n_per_split = 3,
    pca = 0.9
  )

  expect_true(tibble::is_tibble(df_for_plotting2[[2]]))
  expect_is(df_for_plotting2[[2]]$words[1], "character")
  expect_is(df_for_plotting2[[2]]$n[1], "numeric")
  expect_equal(df_for_plotting2[[2]]$dot.x[3], 4.948596, tolerance = 0.001)
})

test_that("textProjectionPlot 1-DIMENSIONS produces a plot", {
  skip_on_cran()

  # remotes::install_github("tidyverse/ggplot2", ref = remotes::github_pull("5592"))
  # Dot Product Projection Plot help(textProjectionPlot)
  p1 <- text::textProjectionPlot(
    word_data = DP_projections_HILS_SWLS_100,
    k_n_words_to_test = TRUE,
    min_freq_words_test = 1,
    plot_n_words_square = 3,
    plot_n_words_p = 3,
    plot_n_word_extreme = 1,
    plot_n_word_frequency = 1,
    plot_n_words_middle = 1,
    # x_axes = TRUE,
    y_axes = FALSE,
    p_alpha = 0.05,
    title_top = " Dot Product Projection (DPP)",
    x_axes_label = "Low vs. High HILS score",
    y_axes_label = "Low vs. High SWLS score",
    p_adjust_method = "bonferroni",
    scale_y_axes_lim = NULL,
    group_embeddings1 = F,
    group_embeddings2 = F,
    projection_embedding = F
  )

  expect_true(ggplot2::is.ggplot(p1$final_plot))
  expect_equal(p1$processed_word_data$dot.y[1], 2.988819, tolerance = 0.00001)

})


test_that("textProjectionPlot 1-DIMENSIONS produces a plot", {
  skip_on_cran()

  # Dot Product Projection Plot
  p2 <- text::textProjectionPlot(
    word_data = DP_projections_HILS_SWLS_100,
    k_n_words_to_test = TRUE,
    min_freq_words_test = 1,
    plot_n_words_square = 3,
    plot_n_words_p = 3,
    plot_n_word_extreme = 1,
    plot_n_word_frequency = 1,
    plot_n_words_middle = 1,
    # x_axes = FALSE,
    y_axes = TRUE,
    p_alpha = 0.05,
    title_top = " Dot Product Projection (DPP)",
    x_axes_label = "Low vs. High HILS score",
    y_axes_label = "Low vs. High SWLS score",
    p_adjust_method = "bonferroni",
    scale_y_axes_lim = NULL,
    group_embeddings1 = T,
    group_embeddings2 = T,
    projection_embedding = T,
  )

  expect_true(ggplot2::is.ggplot(p2$final_plot))
  expect_equal(p2$processed_word_data$x_plotted[1], 1.415753, tolerance = 0.0001)
})


test_that("textProjectionPlot 2-DIMENSIONS produces a plot", {
  skip_on_cran()

  # Dot Product Projection Plot
  p3 <- text::textProjectionPlot(
    word_data = DP_projections_HILS_SWLS_100,
    k_n_words_to_test = FALSE,
    min_freq_words_test = 1,
    plot_n_words_square = 3,
    plot_n_words_p = 3,
    plot_n_word_extreme = 1,
    plot_n_word_frequency = 1,
    plot_n_words_middle = 1,
    # x_axes = TRUE,
    y_axes = TRUE,
    p_alpha = 0.05,
    title_top = " Dot Product Projection (DPP)",
    x_axes_label = "Low vs. High HILS score",
    y_axes_label = "Low vs. High SWLS score",
    p_adjust_method = "fdr",
    scale_y_axes_lim = NULL
  )

  expect_true(ggplot2::is.ggplot(p3$final_plot))
  expect_equal(p3$processed_word_data$x_plotted[2], 0.7323493, tolerance = 0.0001)

  # Dot Product Projection Plot
  p3 <- text::textPlot(
    word_data = DP_projections_HILS_SWLS_100,
    k_n_words_to_test = FALSE,
    min_freq_words_test = 1,
    plot_n_words_square = 0,
    plot_n_words_p = 0,
    plot_n_word_extreme = 0,
    plot_n_word_extreme_xy = 5,
    plot_n_word_frequency = 0,
    plot_n_words_middle = 0,
    plot_n_word_random = 0,
    # x_axes = TRUE,
    y_axes = TRUE,
    p_alpha = 0.05,
    title_top = " Dot Product Projection (DPP)",
    x_axes_label = "Low vs. High HILS score",
    y_axes_label = "Low vs. High SWLS score",
    p_adjust_method = "fdr",
    scale_y_axes_lim = NULL
  )
  p3
  expect_true(ggplot2::is.ggplot(p3$final_plot))
  expect_equal(p3$processed_word_data$x_plotted[2], 0.7323493, tolerance = 0.0001)


  # Cohens_d pipeline
  # Pre-processing data for plotting
  df_cohensD <- text::textProjection(
    words = Language_based_assessment_data_8$harmonywords,
    word_embeddings = word_embeddings_4$texts$harmonywords,
    word_types_embeddings = word_embeddings_4$word_types,
    Language_based_assessment_data_8$hilstotal,
    Language_based_assessment_data_8$swlstotal,
    split = "quartile",
    Npermutations = 1000,
    n_per_split = 3
  )

  p_cohensD <- text::textPlot(
    word_data = df_cohensD,
    min_freq_words_test = 1,
    plot_n_words_square = 3,
    plot_n_words_p = 3,
    plot_n_word_extreme = 1,
    plot_n_word_frequency = 1,
    plot_n_words_middle = 1,
    projection_metric = "cohens_d",
    y_axes = F,
    p_alpha = 0.0005,
    title_top = "Cohen's D Dot Product Projection (DPP)",
    x_axes_label = "Low vs. High HILS score",
    y_axes_label = "Low vs. High SWLS score",
   # p_adjust_method = "fdr",
    scale_y_axes_lim = NULL
  )
  p_cohensD
  expect_true(ggplot2::is.ggplot(p_cohensD$final_plot))

})


test_that("textCentrality produces a tibble with character variable and numeric variable.", {
  skip_on_cran()

  df_for_plotting <- text::textCentrality(
    Language_based_assessment_data_8$harmonywords[1:20],
    word_embeddings_4$texts$harmonywords[1:20, ],
    word_embeddings_4$word_types,
    method = "euclidean"
  )

  expect_is(df_for_plotting$words[1], "character")
  expect_is(df_for_plotting$n[1], "integer")
  expect_true(tibble::is_tibble(df_for_plotting))
  expect_equal(df_for_plotting$n[1], 2)

  plot_c <- text::textCentralityPlot(
    word_data = df_for_plotting,
    x_axes = "central_semantic_similarity"
  )
  plot_c
  expect_true(ggplot2::is.ggplot(plot_c$final_plot))
})

test_that("textCentralityPlot produces a plot.", {
  skip_on_cran()

  # Plot help(textCentralityPlot)
  centrality_plot <- text::textCentralityPlot(
    word_data = centrality_data_harmony,
    min_freq_words_test = 10,
    plot_n_word_extreme = 10,
    plot_n_word_frequency = 10,
    plot_n_words_middle = 10,
    titles_color = "#61605e",
    # x_axes = "central_cosine",

    title_top = "Semantic Centrality Plot",
    x_axes_label = "Semantic Centrality",
    word_font = NULL,
    centrality_color_codes = c("#EAEAEA", "#85DB8E", "#398CF9", "#9e9d9d"),
    word_size_range = c(3, 8),
    point_size = 0.5,
    arrow_transparency = 0.1,
    points_without_words_size = 0.5,
    points_without_words_alpha = 0.5
  )

  expect_true(ggplot2::is.ggplot(centrality_plot$final_plot))
  expect_equal(centrality_plot$processed_word_data$n[2], 21)
  expect_equal(centrality_plot$processed_word_data$central_semantic_similarity[2],
               0.5079464, tolerance = 0.00001)
})



test_that("textPCA produces a tibble with character variable and numeric variable.", {
  skip_on_cran()


  df_for_plotting2d <- textPCA(
    words = Language_based_assessment_data_8$harmonywords,
    word_types_embeddings = word_embeddings_4$word_types
  )


  expect_is(df_for_plotting2d$words[1], "character")
  expect_is(df_for_plotting2d$n[1], "integer")
  expect_true(tibble::is_tibble(df_for_plotting2d))
  expect_equal(df_for_plotting2d$n[1], 2)
})

test_that("textPCAPlot produces a plot.", {
  skip_on_cran()

  # Plot
  principle_component_plot_projection <- textPCAPlot(PC_projections_satisfactionwords_40)
  # principle_component_plot_projection

  expect_true(ggplot2::is.ggplot(principle_component_plot_projection$final_plot))
  expect_equal(principle_component_plot_projection$processed_word_data$n[2], 2)
})

OscarKjell/text documentation built on April 3, 2025, 3:07 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

OscarKjell/text
Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

tests/testthat/test_4_textPlot.R
In OscarKjell/text: Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

R Package Documentation

Browse R Packages

We want your feedback!

OscarKjell/text Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

tests/testthat/test_4_textPlot.R In OscarKjell/text: Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

R Package Documentation

Browse R Packages

We want your feedback!

OscarKjell/text
Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

tests/testthat/test_4_textPlot.R
In OscarKjell/text: Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning