#' @export
create_twitter_credentials_template <- function(path = "~/Desktop/twittercred.json"){
if(!file.exists(path)){
cred_file <- jsonlite::toJSON(list(consumer_key = "consumer_key",
consumer_secret = "consumer_secret",
access_token = "access_token",
access_secret = "access_secret")
, auto_unbox = TRUE)
cat(cred_file, file = path)
}
}
#' @import data.table
#' @export
fetchAllTweets <- function(loadCache = TRUE, cred_path = "~/Desktop/twittercred.json",
username = "NUMBER26", query_type = "timeline"){
backup_path <- sprintf("inst/tweetBackup_%s_%s.rda", username, query_type)
if(loadCache){
if(file.exists(backup_path)){
return(readRDS(backup_path))
} else {
warning("No cached data available.")
}
}
n26db <- data.table()
cred <- jsonlite::fromJSON(cred_path)
twitteR::setup_twitter_oauth(cred$consumer_key, cred$consumer_secret,
cred$access_token, cred$access_secret)
userTimelineSafe <- dplyr::failwith(NULL, twitteR::userTimeline)
searchSafe <- dplyr::failwith(NULL, twitteR::searchTwitter)
twListToDFSafe <- dplyr::failwith(NULL, twitteR::twListToDF)
for(i in 1:50){
if(nrow(n26db) > 0){
maxID = n26db[created == min(created), id]
} else { maxID = NULL }
if(query_type == "timeline"){
n26tweets <- userTimelineSafe(username, n = 200, maxID = maxID) %>%
twListToDFSafe() %>% data.table
} else if(query_type == "search"){
n26tweets <- searchSafe(paste0("@", username), n = 200, maxID = maxID) %>%
twListToDFSafe() %>% data.table
}
if(nrow(n26tweets) <= 1 | is.null(n26tweets)) { break } else {
n26db <- rbind(n26db, n26tweets)
n26db %<>% unique()
saveRDS(n26db, file = backup_path)
}
}
return(n26db)
}
#' @export
identifyTweetLanguage <- function(text_vect){
text_vect %<>% iconv("UTF-8", "latin1")
data(TC_char_profiles, package = "textcat")
lang_1 <- text_vect %>% textcat::textcat() %>% tolower
lang_1[is.na(lang_1)] <- "NA_1"
lang_2 <- cldr::detectLanguage(text_vect)$detectedLanguage %>% tolower %>% replace(NA, "NA_2")
lang_2[is.na(lang_2)] <- "NA_2"
lang <- lang_1
lang[lang_1 != lang_2] <- NA
lang[!lang %in% c("english", "german")] <- NA #, "spanish", "italian", "greek", "slovakian"
return(lang)
}
#' @export
getTopics <- function(text_vect, lang = "german"){
text_vect %<>% iconv("UTF-8", "latin1")
text_vect %<>% gsub("(RT|via)((?:\\b\\W*@\\w+)+)", '', .)
text_vect %<>% gsub("http[^[:blank:]]+", '', .)
text_vect %<>% gsub("@\\w+", '', .)
text_vect %<>% gsub("[ \t]{2,}", '', .)
text_vect %<>% gsub("^\\s+|\\s+$", '', .)
text_vect %<>% gsub('\\d+', '', .)
text_vect %<>% tm::removeWords(tm::stopwords(lang))
text_vect %<>% tm::removeWords(c("hey", "gibt", "number26"))
text_vect %<>% tm::removePunctuation()
text_vect %<>% tm::stripWhitespace()
tweetCorpus <- text_vect %>% tm::VectorSource() %>% tm::VCorpus()
# Text processing...
corpus_clean <- tm::tm_map(tweetCorpus, tm::content_transformer(tolower))
# corpus_clean <- tm_map(corpus_clean, removeNumbers)
# stemDocument
# filter out very frequent words...
corpus_clean <- tm::tm_map(corpus_clean, tm::removeWords, tm::stopwords(lang))
corpus_clean <- tm::tm_map(corpus_clean, tm::removeWords, c("hey", "gibt", "number26"))
corpus_clean <- tm::tm_map(corpus_clean, tm::removePunctuation)
corpus_clean <- tm::tm_map(corpus_clean, tm::stripWhitespace)
corpus_clean <- tm::DocumentTermMatrix(corpus_clean) #, control = list(weighting = tm::weightTfIdf)
return(corpus_clean)
}
#' @export
twitterLookup <- function(twitterID, resource = "/statuses/lookup", cred_path = "~/Desktop/twittercred.json"){
cred <- jsonlite::fromJSON(cred_path)
twitteR::setup_twitter_oauth(cred$consumer_key, cred$consumer_secret,
cred$access_token, cred$access_secret)
twitterData <- twitterID[!is.na(twitterID)]
twitterData <- split(twitterData, ceiling(seq_along(twitterData) / 100))
ratelimit <- twitteR::getCurRateLimitInfo()
ratelimit <- as.numeric(ratelimit[ratelimit$resource == resource, ]$remaining)
for(i in 1:min(length(twitterData), ratelimit)){
if(resource == "/statuses/lookup"){
twitterData[[i]] %<>% twitteR::lookup_statuses()
} else if(resource == "/users/lookup"){
twitterData[[i]] %<>% twitteR::lookupUsers()
}
sprintf("Pulling next 100 objects, %s round(s) completed.", i) %>% print
}
twitterData %<>% unlist(recursive = FALSE)
return(twitterData)
}
#' @export
addMeta <- function(tweets){
tweets %<>% data.table() %>% unique()
tweets$lang <- identifyTweetLanguage(text_vect = tweets$text)
tweets$tw_length <- nchar(gsub(" ?@[^[:space:]]+ ?", "", tweets$text))
return(tweets)
}
getSentiment <- function(tweets){
tweets <- copy(tweets)
a <- httr::POST("http://www.sentiment140.com/api/bulkClassifyJson?app",
body = list(data = tweets[, .(text, id)]), encode = "json")
a_text <- httr::content(a, "text")
a_results <- a_text %>% jsonlite::fromJSON()
a_results <- a_results$data[, c("id", "polarity")] %>% data.table()
a_results[, polarity := factor(polarity, levels = c(0, 2, 4),
labels = c("negative", "neutral", "positive"),
ordered = TRUE)]
tweets_mod <- merge(tweets, a_results, by = "id")
return(tweets_mod)
}
#' @export
export_tweet_data <- function(loadCache = TRUE){
timelineTweets <- fetchAllTweets(loadCache)
timelineTweets %<>% addMeta()
timelineConvTweets <- twitterLookup(timelineTweets$replyToSID)
timelineConvDf <- timelineConvTweets %>% twitteR::twListToDF() %>% data.table()
usersDf <- twitterLookup(timelineTweets[, unique(replyToUID)], resource = "/users/lookup")
usersDf %<>% twitteR::twListToDF() %>% data.table()
timelineConvDf <- merge(timelineConvDf, usersDf, by = "screenName", suffixes = c("", "_user"))
timelineConvDf %<>% addMeta()
convDF <- merge(timelineConvDf, timelineTweets, by.x = "id", by.y = "replyToSID",
suffixes = c("_usertweet", "_n26tweet"))
convDF[, created_usertweet := created_usertweet %>% as.POSIXct()]
convDF[, created_n26tweet := created_n26tweet %>% as.POSIXct()]
convDF[, response_time := ((created_n26tweet - created_usertweet) / 60) %>% as.numeric()]
convDF[, Weekday := lubridate::wday(created_usertweet, label = TRUE)]
convDF[, hour := lubridate::hour(created_usertweet)]
write.csv(convDF[, .(response_time, hour, Weekday, hour, created_usertweet)],
file = "~/Desktop/datascience/inst/waittimeapp/twitterDF.csv")
return(convDF)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.