huggingface_in_r.R
In text: Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
evaluate = FALSE

## ----HuggingFface_tabble_long, echo=FALSE, results='asis'---------------------
library(magrittr)
Models <- c("'bert-base-uncased'",
            "'roberta-base'",
            "'distilbert-base-cased'",
            "'bert-base-multilingual-cased'",
            "'xlm-roberta-large'"
            )

References <- c("[Devlin et al. 2019]( https://aclanthology.org/N19-1423/)",
                "[Liu et al. 2019](https://arxiv.org/abs/1907.11692)",
                "[Sahn et al., 2019](https://arxiv.org/abs/1910.01108)",
                "[Devlin et al.2019]( https://aclanthology.org/N19-1423/)",
                "[Liu et al]( http://arxiv.org/pdf/1907.11692)"
                )

Layers <- c("12",
            "12", 
            "6?",
            "12",
            "24")

Language <- c("English",
              "English", 
              "English",
              "[104 top languages at Wikipedia](https://meta.wikimedia.org/wiki/List_of_Wikipedias)",
              "[100 language](https://huggingface.co/bert-base-multilingual-cased)")

Dimensions <- c("768", 
                "768", 
                "768?", 
                "768", 
                "1024")

Tables_short <- tibble::tibble(Models, References, Layers, Dimensions, Language)

knitr::kable(Tables_short, caption="", bootstrap_options = c("hover"), full_width = T)

## ----word_embedding_tutorial_1, eval = evaluate, warning=FALSE, message=FALSE----
#  library(text)
#  # Example text
#  texts <- c("I feel great")
#  
#  # Transform the text to BERT word embeddings
#  wordembeddings <- textEmbed(texts = texts,
#                              model = 'bert-base-uncased',
#                              layers = 11:12,
#                              aggregation_from_layers_to_tokens = "concatenate",
#                              aggregation_from_tokens_to_texts = "mean"
#                              )
#  
#  wordembeddings

## ----word_embedding_tutorial_2, eval = FALSE, warning=FALSE, message=FALSE----
#  library(text)
#  
#  # Transform the text data to BERT word embeddings
#  wordembeddings <- textEmbed(texts = Language_based_assessment_data_8[1:2],
#                              aggregation_from_tokens_to_word_types = "mean",
#                              keep_token_embeddings = FALSE)
#  
#  # See how word embeddings are structured
#  wordembeddings

## ----word_embedding_tutorial_3, eval = evaluate, warning=FALSE, message=FALSE----
#  library(text)
#  
#  #Transform the text data to BERT word embeddings
#  
#  # Example test
#  texts <- c("I feel great")
#  
#  wordembeddings_tokens_layers <- textEmbedRawLayers(
#    texts = texts,
#    layers = 10:12)
#  wordembeddings_tokens_layers

## ----word_embedding_tutorial_4, eval = evaluate, warning=FALSE, message=FALSE----
#  library(text)
#  
#  # Aggregating layer 11 and 12 by taking the mean of each dimension.
#  we_11_12_mean <- textEmbedLayerAggregation(
#    word_embeddings_layers = wordembeddings_tokens_layers$context_tokens$texts,
#    layers = 11:12,
#    aggregation_from_layers_to_tokens = "concatenate",
#    aggregation_from_tokens_to_texts = "mean")
#  we_11_12_mean
#  # Aggregating layer 11 and 12 by taking the minimum of each dimension accross the two layers.
#  we_10_11_min <- textEmbedLayerAggregation(
#    word_embeddings_layers = wordembeddings_tokens_layers$context_tokens$texts,
#    layers = 10:11,
#    aggregation_from_layers_to_tokens = "concatenate",
#    aggregation_from_tokens_to_texts = "min")
#  we_10_11_min
#  # Aggregating layer 1 to 12 by taking the max value of each dimension across the 12 layers.
#  we_11_max <- textEmbedLayerAggregation(
#    word_embeddings_layers = wordembeddings_tokens_layers$context_tokens$texts,
#    layers = 11,
#    aggregation_from_tokens_to_texts = "max")
#  we_11_max

Any scripts or data that you put into this service are public.

text documentation built on June 8, 2025, 1:32 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

text
Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

inst/doc/huggingface_in_r.R
In text: Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

Try the text package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

text Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

inst/doc/huggingface_in_r.R In text: Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

Try the text package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

text
Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

inst/doc/huggingface_in_r.R
In text: Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning