R/7_2_textTopicsAnalysis.R

Defines functions textTopicsWordcloud textTopicsTest

Documented in textTopicsTest textTopicsWordcloud

#' Wrapper for topicsTest function from the topics package
#' @param model (list) The trained model
#' @param x_variable (string) The x variable name to be predicted, and to be plotted (only needed for regression or correlation)
#' @param y_variable (string) The y variable name to be predicted, and to be plotted (only needed for regression or correlation)
# @param group_var (string) The variable to group by (only needed for t-test)
#' @param controls (vector) The control variables (not supported yet)
#' @param test_method (string) The test method to use, either "correlation","t-test", "linear_regression","logistic_regression", or "ridge_regression"
# @param p_alpha (numeric) Threshold of p value set by the user for visualising significant topics
#' @param p_adjust_method (character) Method to adjust/correct p-values for multiple comparisons
#' (default = "none"; see also "holm", "hochberg", "hommel", "bonferroni", "BH", "BY",  "fdr").
# @param load_dir (string) The directory to load the test from, if NULL, the test will not be loaded
# @param save_dir (string) The directory to save the test, if NULL, the test will not be saved
#' @param ... Parameter settings from topicsTest in the topics-package.
#' @return A list of the test results, test method, and prediction variable
#' @importFrom topics topicsTest
#' @export
textTopicsTest <- function(
    model,
    x_variable = NULL,
    y_variable = NULL,
    controls = c(),
    test_method = "default",
    p_adjust_method = "fdr",
    ...) {

  # Extract components from the model
  data <- model$train_data %>% tibble::as_tibble()
  preds <- model$preds %>% tibble::as_tibble()
  seed <- model$seed
#  save_dir <- model$save_dir
  ngrams <- model$ngrams
  model_summary <- model$model

  # Adding dummy for prevalence and coherence to fit the topics package
  model_summary$summary$prevalence <- NA
  model_summary$summary$coherence <- NA

  # Call topicsTest from the topics package
  results <- topics::topicsTest(
    data = data,
    model = model_summary,
    preds = preds,
    ngrams = ngrams,
    x_variable = x_variable,
    y_variable = y_variable,
#    group_var = group_var,
    controls = controls,
    test_method = test_method,
    p_adjust_method = p_adjust_method,
    seed = seed
    #, ...
  )
  results
  return(results)
}

#' Plot word clouds
#'
#' This function create word clouds and topic fugures
#' @param model (list) A trained topics model. For examples from topicsModel(). Should be NULL if plotting ngrams.
#' @param ngrams (list) The output from the the topicsGram() function . Should be NULL if plotting topics.
#' @param test (list) The test results; if plotting according to dimension(s) include the object from topicsTest() function.
# @param p_threshold (integer) The p-value threshold to use for significance testing.
# @param color_scheme (string 'default' or vector) The color scheme.
#
# For plots not including a test, the color_scheme should in clude 2 colours (1 gradient pair), such as:
#
# c("lightgray", "darkblue)
#
# For 1 dimensional plots of n-grams it should contain 4 colours (2 gradient pairs), such as:
#
# c(
# "#EAEAEA", "darkred", # negative ngrams colors
#
# "#EAEAEA", "darkgreen" # positve ngrams colors)
#
#
#
# For 1-dimension plots of topics, it should contain 6 colours (3 gradient pairs), such as
#
#  c(
# "#EAEAEA", "darkred",     # negative topics colors
#
# "#EAEAEA", "darkgray",     # colours of topics not significantly associated
#
# "#EAEAEA", "darkgreen"     # positve topics colors)
#
#
#
#  For 2-dimensional plots of topics, the color scheme should contain 18 colours (9 gradient pairs), such as:
#
#  c(
#   "lightgray", "#398CF9",     # quadrant 1 (upper left corner)
#
#   "lightgray", "#60A1F7",     # quadrant 2
#
#   "lightgray", "#5dc688",     # quadrant 3 (upper right corner)
#
#   "lightgray", "#e07f6a",     # quadrant 4
#
#   "lightgray", "darkgray",     # quadrant 5 (middle square)
#
#   "lightgray", "#40DD52",     # quadrant 6
#
#   "lightgray", "#FF0000",     # quadrant 7 (bottom left corner)
#
#   "lightgray", "#EA7467",     # quadrant 8
#
#   "lightgray", "#85DB8E")     # quadrant 9 (bottom right corner)
#
#
# @param scale_size (logical) Whether to scale the size of the words.
# @param plot_topics_idx (vector)  The index or indeces of the topics to plot
# (e.g., look in the model-object for the indices; can for example, be c(1, 3:5) to plot topic t_1, t_3, t_4 and t_5) (optional).
#' @param save_dir (string) The directory to save the plots.
# @param figure_format (string) Set the figure format, e.g., ".svg", or ".png".
# @param width (integer) The width of the topic (units = "in").
# @param height (integer) The width of the topic (units = "in").
# @param max_size (integer) The max size of the words.
#' @param seed (integer) The seed to set for reproducibility; need to be the same seed number as in in
# @param scatter_legend_dot_size (integer) The size of dots in the scatter legend.
# @param scatter_legend_bg_dot_size (integer) The size of background dots in the scatter legend.
# @param scatter_legend_n (numeric or vector) A vector determining the number of dots to emphasis in each quadrant of the scatter legend.
# For example: c(1,1,1,1,0,1,1,1,1) result in one dot in each quadrant except for the middle quadrant.
# @param scatter_legend_method (string) The method to filter topics to be emphasised in the scatter legend.
# Can be either "mean", "max_x", or "max_y"
# @param scatter_legend_specified_topics (vector) Specify which topic(s) to be emphasised in the scatter legend.
# For example c("t_1", "t_2"). If set, scatter_legend_method will have no effect.
# @param scatter_legend_topic_n (boolean) Allow showing the topic number or not in the scatter legend
# @param grid_legend_title The title of grid topic plot.
# @param grid_legend_title_size The size of the title of the plot.
# @param grid_legend_title_color The color of the legend title.
# @param grid_legend_x_axes_label The label of the x axes.
# @param grid_legend_y_axes_label The label of the y axes.
# @param grid_legend_number_color The color in the text in the legend.
# @param grid_legend_number_size The color in the text in the legend.
#' @param ... Parameters from the topicsPlot() function in the topics pacakge.
#' @return The function saves figures in the save_dir.
#' @importFrom topics topicsPlot
#' @export
textTopicsWordcloud <- function(
    model = NULL,
    ngrams = NULL,
    test = NULL,
    save_dir,
    seed = 2024,
    ...) {


  plots <- topics::topicsPlot(
    model = model,
    ngrams = NULL,
    test = test,
    save_dir = save_dir,
    seed = seed
#    scatter_legend_dot_size = scatter_legend_dot_size,
#    scatter_legend_bg_dot_size = scatter_legend_bg_dot_size,
#    scatter_legend_n = scatter_legend_n,
#    scatter_legend_method = scatter_legend_method,
#    scatter_legend_specified_topics = scatter_legend_specified_topics,
#    scatter_legend_topic_n = scatter_legend_topic_n,
#    grid_legend_title = grid_legend_title,
#    grid_legend_title_size = grid_legend_title_size,
#    grid_legend_title_color = grid_legend_title_color,
#    grid_legend_x_axes_label = grid_legend_x_axes_label,
#    grid_legend_y_axes_label = grid_legend_y_axes_label,
#    grid_legend_number_color = grid_legend_number_color,
#    grid_legend_number_size = grid_legend_number_size,
    #, ...
  )

  completion_text <- paste0("The plots (p< p_alpha) are saved in ", save_dir, "/seed_", seed, "/wordclouds")
  message(colourise(completion_text, "green"))

  return(plots)
}
OscarKjell/text documentation built on April 3, 2025, 3:07 p.m.