Nothing
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
evaluate = FALSE
## ----HuggingFface_tabble_long, echo=FALSE, results='asis'---------------------
library(magrittr)
Models <- c("'bert-base-uncased'",
"'roberta-base'",
"'distilbert-base-cased'",
"'bert-base-multilingual-cased'",
"'xlm-roberta-large'"
)
References <- c("[Devlin et al. 2019]( https://aclanthology.org/N19-1423/)",
"[Liu et al. 2019](https://arxiv.org/abs/1907.11692)",
"[Sahn et al., 2019](https://arxiv.org/abs/1910.01108)",
"[Devlin et al.2019]( https://aclanthology.org/N19-1423/)",
"[Liu et al](https://arxiv.org/pdf/1907.11692.pdf)"
)
Layers <- c("12",
"12",
"6?",
"12",
"24")
Language <- c("English",
"English",
"English",
"[104 top languages at Wikipedia](https://meta.wikimedia.org/wiki/List_of_Wikipedias)",
"[100 language](https://huggingface.co/bert-base-multilingual-cased)")
Dimensions <- c("768",
"768",
"768?",
"768",
"1024")
Tables_short <- tibble::tibble(Models, References, Layers, Dimensions, Language)
knitr::kable(Tables_short, caption="", bootstrap_options = c("hover"), full_width = T)
## ----word_embedding_tutorial_1, eval = evaluate, warning=FALSE, message=FALSE----
# library(text)
# # Example text
# texts <- c("I feel great")
#
# # Transform the text to BERT word embeddings
# wordembeddings <- textEmbed(texts = texts,
# model = 'bert-base-uncased',
# layers = 11:12,
# aggregation_from_layers_to_tokens = "concatenate",
# aggregation_from_tokens_to_texts = "mean"
# )
#
# wordembeddings
## ----word_embedding_tutorial_2, eval = FALSE, warning=FALSE, message=FALSE----
# library(text)
#
# # Transform the text data to BERT word embeddings
# wordembeddings <- textEmbed(texts = Language_based_assessment_data_8[1:2],
# aggregation_from_tokens_to_word_types = "mean",
# keep_token_embeddings = FALSE)
#
# # See how word embeddings are structured
# wordembeddings
## ----word_embedding_tutorial_3, eval = evaluate, warning=FALSE, message=FALSE----
# library(text)
#
# #Transform the text data to BERT word embeddings
#
# # Example test
# texts <- c("I feel great")
#
# wordembeddings_tokens_layers <- textEmbedRawLayers(
# texts = texts,
# layers = 10:12)
# wordembeddings_tokens_layers
## ----word_embedding_tutorial_4, eval = evaluate, warning=FALSE, message=FALSE----
# library(text)
#
# # Aggregating layer 11 and 12 by taking the mean of each dimension.
# we_11_12_mean <- textEmbedLayerAggregation(
# word_embeddings_layers = wordembeddings_tokens_layers$context_tokens$texts,
# layers = 11:12,
# aggregation_from_layers_to_tokens = "concatenate",
# aggregation_from_tokens_to_texts = "mean")
# we_11_12_mean
# # Aggregating layer 11 and 12 by taking the minimum of each dimension accross the two layers.
# we_10_11_min <- textEmbedLayerAggregation(
# word_embeddings_layers = wordembeddings_tokens_layers$context_tokens$texts,
# layers = 10:11,
# aggregation_from_layers_to_tokens = "concatenate",
# aggregation_from_tokens_to_texts = "min")
# we_10_11_min
# # Aggregating layer 1 to 12 by taking the max value of each dimension across the 12 layers.
# we_11_max <- textEmbedLayerAggregation(
# word_embeddings_layers = wordembeddings_tokens_layers$context_tokens$texts,
# layers = 11,
# aggregation_from_tokens_to_texts = "max")
# we_11_max
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.