Psychological Methods: the Text Tutorial
In text: Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
evaluate = FALSE

# For text-version => 0.9.99
# Install text from CRAN
install.packages("text")
library(text)

# Set-up en environment with text-required python packages
textrpp_install()

# Initialize the environment – and save the settings for next time
textrpp_initialize(save_profile = TRUE)

# # # # # # # # # # # # # # # # # # # # # # # # # # # #

# Example text
texts <- c("I am feeling relatedness with others", "That's great!")

# Defaults
embeddings <- textEmbed(texts)

# Output
embeddings$tokens

# Output
embeddings$texts
# # # # # # # # # # # # # # # # # # # # # # # # # # # #

# Look at example data included in the text- package comprising both text and numerical variables (note that there are only 40 participants in this example).
Language_based_assessment_data_8

# Transform the text/word data to word embeddings (see help(textEmbed) to see the default settings).
word_embeddings <- textEmbed(
  Language_based_assessment_data_8,
  model = "bert-base-uncased",
  aggregation_from_layers_to_tokens = "concatenate",
  aggregation_from_tokens_to_texts = "mean",
  keep_token_embeddings = FALSE
)

# See how the word embeddings are structured
word_embeddings

# Save the word embeddings to avoid having to embed the text again. It is good practice to save output from analyses that take a lot of time to compute, which is often the case when analyzing text data.
saveRDS(word_embeddings, "word_embeddings.rds")

# Get the saved word embeddings (again)
word_embeddings <- readRDS("word_embeddings.rds")

# # # # # # # # # # # # # # # # # # # # # # # # # # # #

# Get hidden states for "I am fine"
imf_embeddings_11_12 <- textEmbedRawLayers(
  "I am fine",
  layers = 11:12
)
imf_embeddings_11_12

#OUTPUT


# # # # # # # # # # # # # # # # # # # # # # # # # # # #


# 1. Concatenate layers(results in 1,536 dimensions).
textEmbedLayerAggregation(
  imf_embeddings_11_12$context_tokens,
  layers = 11:12,
  aggregation_from_layers_to_tokens = "concatenate",
  aggregation_from_tokens_to_texts = "mean"
)
# OUTPUT

# # # # # # # # # # # # # # # # # # # # # # # # # # # #

# 2. Aggregate layers using mean (results in 768).
textEmbedLayerAggregation(
  imf_embeddings_11_12$context_tokens,
  layers = 11,
  aggregation_from_tokens_to_texts = "mean"
)

# OUTPUT

# # # # # # # # # # # # # # # # # # # # # # # # # # # #


# Examine the relationship between satisfactiontext and the corresponding rating scale
model_satisfactiontext_swls <- textTrain(
  x = word_embeddings$texts$satisfactiontexts, # the predictor variables (i.e., the word embeddings)
  y = Language_based_assessment_data_8$swlstotal, # the criterion variable (i.e.,the rating scale score.
  model_description = "author(s): Kjell, Giorgi, & Schwartz; data: N=40, population =  Online, Mechanical Turk; publication: title = Example for demo; description: swls = the satisfaction with life scale"
)

# Examine the correlation between predicted and observed Harmony in life scale scores
model_satisfactiontext_swls$results

# OUTPUT:

# # # # # # # # # # # # # # # # # # # # # # # # # # # #

# Save the mode
saveRDS(
  model_satisfactiontext_swls,
  "model_satisfactiontext_swls.rds"
)
# Read the model
model_satisfactiontext_swls <- readRDS(
  "model_satisfactiontext_swls.rds"
)

# Examine the names in the object returned from training
names(model_satisfactiontext_swls)

#OUTPUT:

# # # # # # # # # # # # # # # # # # # # # # # # # # # #


# Predicting several outcomes from several word embeddings
models_words_ratings <- textTrainLists(
  word_embeddings$texts[1:2],
  Language_based_assessment_data_8[5:6]
)

# See results
models_words_ratings$results

# OUTPUT


# Save model
saveRDS(models_words_ratings, "models_words_ratings.rds")
# Read model
models_words_ratings <- readRDS(
  "models_words_ratings.rds"
)


# # # # # # # # # # # # # # # # # # # # # # # # # # # #

# Read a valence trained prediction model (download it from https://osf.io/dgczt/)
valence_Warriner_L11 <- readRDS(
  "valence_Warriner_L11.rds"
)

# Examine the model
valence_Warriner_L11

# PART OF THE OUTPUT

# # # # # # # # # # # # # # # # # # # # # # # # # # # #

# Apply the model to the satisfaction text
satisfaction_text_valence <- textPredict(
  valence_Warriner_L11,
  word_embeddings$texts$satisfactiontexts,
  dim_names = FALSE
)

# Examine the correlation between the predicted valence and the Satisfaction with life scale score
psych::corr.test(
  satisfaction_text_valence$word_embeddings__ypred,
  Language_based_assessment_data_8$swlstotal
)


# OUTPUT

# # # # # # # # # # # # # # # # # # # # # # # # # # # #



# Compute semantic similarity scores between two text columns, using the previously created word_embeddings.
semantic_similarity_scores <- textSimilarity(
  word_embeddings$texts$harmonytexts,
  word_embeddings$texts$satisfactiontexts
)
# Look at the first scores
head(semantic_similarity_scores)

# OUTPUT
# # # # # # # # # # # # # # # # # # # # # # # # # # # #


# Read word norms text (later we will use these for the semantic centrality plot)
word_norms <- read.csv(
  "Word_Norms_Mental_Health_Kjell2018_text.csv"
)

# Read the word embeddings for the word norms
word_norms_embeddings <- readRDS(
  "Word_Norms_Mental_Health_Kjell2018_text_embedding_L11.rds"
)

# Examine which word norms there are.
names(word_norms_embeddings$texts)

# OUTPUT
# # # # # # # # # # # # # # # # # # # # # # # # # # # #


# Compute semantic similarity score between the harmony answers and the harmony norm
# Note that the descriptive word answers are used instead of text answers to correspond with how the word norm was created.
norm_similarity_scores_harmony <- textSimilarityNorm(
  word_embeddings$texts$harmonywords,
  word_norms_embeddings$texts$harmonynorm
)

# Correlating the semantic measure with the corresponding rating scale
psych::corr.test(
  norm_similarity_scores_harmony,
  Language_based_assessment_data_8$hilstotal
)

# OUTPUT
# # # # # # # # # # # # # # # # # # # # # # # # # # # #


# Extract word type embeddings and text embeddings for harmony words
harmony_words_embeddings <- textEmbed(
  texts = Language_based_assessment_data_8["harmonywords"],
  aggregation_from_layers_to_tokens = "concatenate",
  aggregation_from_tokens_to_texts = "mean",
  aggregation_from_tokens_to_word_types = "mean",
  keep_token_embeddings = FALSE
)

# Pre-processing data for plotting
projection_results <- textProjection(
  words = Language_based_assessment_data_8$harmonywords,
  word_embeddings = harmony_words_embeddings$texts,
  word_types_embeddings = harmony_words_embeddings$word_types,
  x = Language_based_assessment_data_8$hilstotal,
  y = Language_based_assessment_data_8$age
)

projection_results$word_data

# To avoid warnings -- and that words do not get plotted, first increase the max.overlaps for the entire session:
options(ggrepel.max.overlaps = 1000)

# Plot
plot_projection <- textPlot(
  projection_results,
  min_freq_words_plot = 1,
  plot_n_word_extreme = 10,
  plot_n_word_frequency = 5,
  plot_n_words_middle = 5,
  y_axes = FALSE,
  p_alpha = 0.05,
  p_adjust_method = "fdr",
  title_top = "Harmony Words Responses (Supervised Dimension Projection)",
  x_axes_label = "Low to High Harmony in Life Scale Score",
  y_axes_label = "",
  bivariate_color_codes = c("#FFFFFF", "#FFFFFF", "#FFFFFF",
                            "#E07f6a", "#EAEAEA", "#85DB8E",
                            "#FFFFFF", "#FFFFFF", "#FFFFFF"
  )
)
# View plot

plot_projection$final_plot


# # # # # # # # # # # # # # # # # # # # # # # # # # # #

# Plot
plot_projection_2D <- textPlot(
  projection_results,
  min_freq_words_plot = 1,
  plot_n_word_extreme = 10,
  plot_n_word_frequency = 5,
  plot_n_words_middle = 5,
  y_axes = TRUE, # Change to TRUE/FALSE
  p_alpha = 0.05,
  p_adjust_method = "fdr",
  title_top = "Harmony Words Responses (Supervised Dimension Projection)",
  x_axes_label = "Low vs. High Harmony in Life Scale Score",
  y_axes_label = "Low vs.High Age",
  bivariate_color_codes = c("#E07f6b", "#60A1F7", "#85DB8D",
                            "#FF0000", "#EAEAEA", "#5dc688",
                            "#E07f6a", "#60A1F7", "#85DB8E"
  )
)
# View plot
plot_projection_2D$final_plot

# # # # # # # # # # # # # # # # # # # # # # # # # # # #


# Computing words' centrality (semantic similarity) score to the aggregated embedding of all words
centrality_results <- textCentrality(
  words = word_norms$satisfactionnorm,
  word_embeddings = word_norms_embeddings$texts$satisfactionnorm,
  word_types_embeddings = word_norms_embeddings$word_types
)

options(ggrepel.max.overlaps = 1000)
centrality_plot <- textCentralityPlot(
  word_data = centrality_results,
  min_freq_words_test = 2,
  plot_n_word_extreme = 10,
  plot_n_word_frequency = 5,
  plot_n_words_middle = 5,
  title_top = "Satisfaction with life word norm: Semantic Centrality Plot",
  x_axes_label = "Satisfaction with Life Semantic Centrality"
)

centrality_plot$final_plot

# OUTPUT

# # # # # # # # # # # # # # # # # # # # # # # # # # # #


# Supplementary

# PCA results to be plotted help(textPCA)
textPCA_results <- textPCA(
  words = Language_based_assessment_data_8$satisfactionwords,
  word_types_embeddings = harmony_words_embeddings$word_types
)


# Plotting the PCA results
plot_PCA <- textPCAPlot(
  word_data = textPCA_results,
  min_freq_words_test = 2,
  plot_n_word_extreme = 5,
  plot_n_word_frequency = 5,
  plot_n_words_middle = 5
)
plot_PCA$final_plot

Any scripts or data that you put into this service are public.

text documentation built on Aug. 9, 2023, 5:08 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

text
Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

Psychological Methods: the Text Tutorial
In text: Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

Try the text package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

text Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

Psychological Methods: the Text Tutorial In text: Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

Try the text package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

text
Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning

Psychological Methods: the Text Tutorial
In text: Analyses of Text using Transformers Models from HuggingFace, Natural Language Processing and Machine Learning