This package is for text analysis including preprocessing, topic modelling, semantic network, and sentiment analysis. The function includes both analysis and graphic visualization.
library(devtools) install_github("BeaJJ/ComTxt") library(ComTxt) library(dplyr) library(quanteda) library(tidyr) library(scales) library(igraph) library(topicmodels) library(maps) library(purrr) library(mallet) library(stopwords) library(ggplot2)
Instead of "exmple_df" place your Data name. Multi_names indicate other names that user might write. I found users wrote Croatia mainly but some people also wrote republic of croatia. You can check from the geo_preprocess function.
## put all different names of your country check the value of multi_name ?geo_preprocess example_df <- geo_preprocess(example_df, location_country = "spain" , ##"finland" ##"croatia" multi_name = c("españa","spagna")) ##c("Suomi") c("republic of croatia")
geo_map(example_df, location_country = "Spain") geo_table(example_df)
Please check the function and ud_lang, stopwords you can choose according to your language. Select your own langauge for preprocessing
unique(example_df$lang) example_lang.1 <- example_df[example_df$lang == "es", ] example_lang.2 <- example_df[example_df$lang == "ca", ] ?twitter_preprocess ## preprocess : lemmatization, stopwords, urls, emoticons stop_words <- c(stopwords::stopwords("es", source = "stopwords-iso")) df_lang.1 <- twitter_preprocess(example_lang.1, ud_lang = "spanish", stop_words) ## stopwords stop_words <- c(stopwords::stopwords("ca", source = "stopwords-iso")) df_lang.2 <- twitter_preprocess(example_lang.2, ud_lang = "catalan", stop_words) df_pre <- rbind(df_lang.1, df_lang.2)
keyword_network(df_pre, keyword_remove = "cultura", top_n = 40) keyword_table(df_pre, keyword_remove= "cultura", top_n = 10) ##select your keyword to zoom in keyword_network_hub(df_pre,hub="libro", top_n = 40 ) shiny_keyword(df_pre)
hashtags_network(df_pre, hashtag_remove = "#cultura", top_n = 40) hashtags_table(df_pre, hashtag_remove = "#cultura", top_n = 10) ##cine #historia #literatura #música ##select your own key hashtags fromm table hashtags_network_hub(df_pre, hub = "#arte", top_n = 10)
Top n users based on follower, friends, tweets number
user_table(df_pre, top_n = 10 ) mention_table(df_pre, top_n = 10) mention_network(df_pre, top_n = 20) shiny_mention(df_pre)
n_topic is the topic number K you select.
## Topic modeling------------------------------------------------------ ?topic_number topic_number(df_pre)
### mallet topic modeling ?twitter_topic df_topics <- twitter_topic(df_pre, n_topic = 7)
### data frame words topic ?topic_getwords topic_getwords(df_topics, n_words = 100) ### draw wordscloud for each topics 3*3 = 9 spots (depending on your topic numbers) ?topic_wordcloud topic_wordcloud(df_topics, maximum_n = 200, cloud_scale = 2.5) ### draw top n words in each topic ?topic_wordplot topic_wordplot(df_topics, num_words = 10) ### draw topic radar map based on year distribution ?topic_radarmap topic_radarmap(df_pre, df_topics) ?shiny_topic shiny_topic(df_topics, df_pre)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.