knitr::opts_chunk$set(echo = FALSE)
library("quotefinder")

twitter_token <- readRDS(file = "twitter_token.rds")
Sys.time()
all_users_id <- fs::dir_ls("tweets_by_user") %>% 
  fs::path_file() %>% 
  fs::path_ext_remove()

qf_bind_rows_tweets(users = all_users_id,
                    lists = 1184769179712397312)
qf_create_language_list()
qf_create_hashtag_list()
qf_create_trending_hashtag_list()
if (fs::file_exists("all_MEPs_full.csv")==FALSE) {
  download.file(url = "https://raw.githubusercontent.com/giocomai/edjnetquotefinder/master/vignettes/all_MEPs_full.csv",
                destfile = "all_MEPs_full.csv")
}
all_MEPs_full <- readr::read_csv(file = "all_MEPs_full.csv")
all_MEPs_tweets_df <- readr::read_rds(file = fs::path("tweets_processed", "tweets_all.rds")) %>% 
  dplyr::arrange(dplyr::desc(created_at)) %>%
  dplyr::rename(time = created_at) %>% 
  dplyr::mutate(user_id = as.numeric(user_id)) %>% 
  dplyr::select(screen_name, date, time, text, status_id, retweet_count, favorite_count, lang, hashtags, urls_expanded_url, user_id) %>% 
  dplyr::left_join(y = all_MEPs_full %>% dplyr::select(-screen_name), by = "user_id") 
## clean text
all_MEPs_tweets_df$clean_text <- stringr::str_replace_all(
  string = all_MEPs_tweets_df$text,
  pattern = stringr::regex(pattern = "@\\w+|#[[:alnum:]]+|http[[:graph:]]+|RT ", ignore_case = FALSE), replacement = "")
all_MEPs_tweets_df <- all_MEPs_tweets_df %>% dplyr::filter(stringr::str_detect(string = clean_text, pattern = stringr::fixed(pattern = "retweeted", ignore_case = TRUE))==FALSE)
all_MEPs_tweets_df$clean_text <- stringr::str_replace_all(string = all_MEPs_tweets_df$clean_text, pattern = stringr::fixed("&amp;"), replacement = "&") 
all_MEPs_tweets_df$clean_text <- stringr::str_replace_all(string = all_MEPs_tweets_df$clean_text, pattern = stringr::fixed("&gt;"), replacement = " ")
all_MEPs_tweets_df$clean_text <- stringr::str_replace_all(string = all_MEPs_tweets_df$clean_text, pattern = stringr::fixed("&lt;"), replacement = " ") 

all_MEPs_tweets_df$Link <- paste0("<a href='https://twitter.com/", all_MEPs_tweets_df$screen_name, "/status/", all_MEPs_tweets_df$status_id, "'  target='_blank'>Source</a>")

all_MEPs_tweets_df$full_name <- paste0("<a href='", officialURL, "'  target='_blank'>", all_MEPs_tweets_df$fullName, "</a>")
readr::write_rds(x = all_MEPs_tweets_df, 
                 path = fs::path("tweets_processed", "dataset.rds"))


giocomai/edjnetquotefinder documentation built on Feb. 11, 2022, 12:51 p.m.