# Functions for naming topics based on word frequency or tf-idf
#' Collapse String List
#'
#' Collapses a list of strings but limits the number of strings
#' collapsed to max_n
#'
#' @param max_n An integer. Maximum number of strings to collapse.
#' @param len An integer. Length over which to use only the first max_n strings.
#' @return A string which is a comma separated list of collapsed strings.
str_collapse <- function(str_list, len, max_n = 2) {
ifelse(stringr::str_length(paste(str_list, collapse = " ")) <= len,
paste(str_list, collapse = " "),
paste(str_list[1:max_n], collapse=" "))
}
test_str_collapse <- function(){
letters[1:26]
out <- str_collapse(list(letters[1:26]), 2, 17)
stringr::str_length(out) == 2
}
# Capitalize a string
str_tocap <- function(str_list){
first <- purrr::map_chr(str_list, ~ stringr::str_sub(.x, 1, 1))
purrr::map2(toupper(first), stringr::str_sub(str_list, 2), paste0)
}
#' Create Topic Names Based on an LDA Model
#'
#'
#' @param model The LDA model which will have names created for its
#' topics
#' @return A tibble with the single column `topic_name`
get_topic_names <- function(model){
class(model) <- "LDA"
lda_clust <- get_lda_clusters(list(model)) %>%
.$clust %>%
unlist
set_clust <- tibble(set_num = names(lda_clust),
topic_id = lda_clust) %>%
left_join(sets_df) %>%
arrange(topic_id) %>%
mutate(topic_id = forcats::fct_inorder(factor(topic_id)))
# Tidy tokenization and remove stop words
title_words <- set_clust %>% select(topic_id, name) %>%
tidytext::unnest_tokens(word, name) %>%
anti_join(tidytext::stop_words) %>%
count(topic_id, word, sort = TRUE) %>%
ungroup
total_words <- title_words %>%
group_by(topic_id) %>%
summarize(total = sum(n))
title_words <- left_join(title_words, total_words) %>%
tidytext::bind_tf_idf(word, topic_id, n)
plot_words <- title_words %>%
arrange(desc(tf_idf)) %>%
mutate(word = factor(word, levels = rev(unique(word))))
top3 <- plot_words %>%
group_by(topic_id) %>%
top_n(3, tf_idf) %>%
arrange(topic_id, desc(tf_idf)) %>%
ungroup
tnames <- top3 %>% split(., top3$topic_id) %>%
purrr::map_chr(~str_collapse(.x$word, len = 26)) %>%
str_tocap %>%
# map_chr( ~stringr::str_sub(.x, 1, 32)) %>%
tibble(tname = ., id = names(.)) %>%
mutate(topic_name = paste0(id, " ", tname)) %>%
mutate(topic = as.numeric(id)) %>%
select(topic, topic_name)
tnames
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.