#' df_to_fixed_dictionary_tf_idf_matrix
#'
#' Calculates the TF-IDF matrix for a data.frame using a fixed dictionary
#'
#' @param df A data.frame that contains the text to process
#' @param text_col The name of the column in `df` that contains the text
#' @param dictionary A vector of terms that will be used to calculate the TF-IDF
#' @param num_docs The total number of documents in the text corpus
#' @return A matrix that contains the TF-IDF of each term in the dictionary for each document in the data.frame
#' @export
#' @examples
#' df <- data.frame(text = c("This is a sentence", "This is another sentence"))
#' dictionary <- c("sentence", "this", "is")
#' num_docs <- 2
#' df_to_fixed_dictionary_tf_idf_matrix(df, "text", dictionary, num_docs)
#'
#' @rdname df_to_fixed_dictionary_tf_idf_matrix
#' @export df_to_fixed_dictionary_tf_idf_matrix
df_to_fixed_dictionary_tf_idf_matrix <- function(df, text_col, dictionary, num_docs) {
# Initialize a matrix to store the TF-IDF vectors for each document
tf_idf_matrix <- matrix(ncol = length(dictionary), nrow = nrow(df))
colnames(tf_idf_matrix) <- dictionary
# Calculate the TF-IDF for each document
for (i in 1:nrow(df)) {
string <- as.character(df[i, text_col])
tf_idf_vector <- string_to_fixed_dictionary_tf_idf_vector(string, dictionary, num_docs)
tf_idf_matrix[i, ] <- tf_idf_vector
}
return(tf_idf_matrix)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.