#' @title Returns 'similar' texts to user given strings
#' @description Use to return semantically similar texts from one or severeal user-defined
#' texts.
#' @param texts The texts given by the user to classify later.
#' @param texts_ids The text_ids in the text to output nice clean format.
#' @param strings A vector of strings to be feed into the model.
#' @param num_texts The number of texts to be returned for each user-given string.
#' @param Term_count_min GloVe parameter.
#' @param Skip_gram_window GloVe parameter.
#' @param Word_vectors_size GloVe parameter.
#' @param X_max GloVe parameter.
#' @param N_iter GloVe parameter.
#' @param xprt_txt_vctrs Defaults to TRUE. Returns a list with the text vectors computed by the
#' GloVe model to be reused again to feed for more strings.
#' @return A list with dataframes of similiar texts for each string. If \code{text_vectors} == TRUE,
#' an additional list is added to the end of list containing the text vectors of the model to
#' used by \code{similar_texts_lite()} function.
#' @export
#' @examples
#' similar_texts(texts = df$text,
#' texts_id = df$text_id,
#' strings = mystrgs,
#' num_texts = 5,
#' Term_count_min = 5,
#' Skip_gram_window = 10,
#' Word_vectors_size = 100,
#' X_max = 10,
#' N_iter = 8,
#' xprt_txt_vctrs = TRUE
#' )
#'
similar_texts = function(texts, texts_id, strings, num_texts, Term_count_min, Skip_gram_window, Word_vectors_size,
X_max, N_iter, xprt_txt_vctrs = TRUE){
# Notes -----------------------
# -
# Plug this piece of code to parallize
future::plan(future::multisession) ## => parallelize on your local computer
texts_tokens_clean = future.apply::future_lapply(
texts_tokens,
function(x) x[which(x %in% vocab_trimmed$term)])
texts_by_ndim_list = future.apply::future_lapply(
texts_tokens_clean,
function(x) word_vectors[x, , drop = FALSE])
texts_by_ndim_averaged = future.apply::future_lapply(
texts_by_ndim_list,
function(x) apply(x, 2, sum)/dim(x)[1]) ## average each doc across its terms
}
similar_texts_lite = function(texts,texts_id,string,num_texts,Term_count_min,Skip_gram_window,Word_vectors_size,X_max,N_iter){
# Notes -----------------------
# - Is not parallellized b/c it's only meant to run 1 single combination of parameters
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.