knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "README-" )
R client for accessing Twitter's REST and stream APIs.
Check out the rtweet package documentation website.
To get the current released version from CRAN:
## install rtweet from CRAN install.packages("rtweet") ## load rtweet package library(rtweet)
To get the current development version from Github:
## install devtools package if it's not already if (!requireNamespace("devtools", quietly = TRUE)) { install.packages("devtools") } ## install dev version of rtweet from github devtools::install_github("mkearney/rtweet") ## load rtweet package library(rtweet)
NEW: All you need is a Twitter account and rtweet and you're up and running!
search_tweets()
, stream_tweets()
, get_followers()
---a browser
window will open.And that's it! You're ready to start collecting and analyzing Twitter data!
Search for up to 18,000 (non-retweeted) tweets containing the rstats hashtag.
## search for 5000 tweets using the rstats hashtag rt <- search_tweets( "#rstats", n = 18000, include_rts = FALSE )
Quickly visualize frequency of tweets over time using ts_plot()
.
## plot time series of tweets ts_plot(rt, "3 hours") + ggplot2::theme_minimal() + ggplot2::theme(plot.title = ggplot2::element_text(face = "bold")) + ggplot2::labs( x = NULL, y = NULL, title = "Frequency of #rstats Twitter statuses from past 9 days", subtitle = "Twitter status (tweet) counts aggregated using three-hour intervals", caption = "\nSource: Data collected from Twitter's REST API via rtweet" )
Twitter rate limits cap the number of search results returned to
18,000 every 15 minutes. To request more than that, simply set
retryonratelimit = TRUE
and rtweet will wait for rate limit
resets for you.
## search for 250,000 tweets containing the word data rt <- search_tweets( "data", n = 250000, retryonratelimit = TRUE )
Search by geo-location---for example, find 10,000 tweets in the English language sent from the United States.
## search for 10,000 tweets sent from the US rt <- search_tweets( "lang:en", geocode = lookup_coords("usa"), n = 10000 ) ## create lat/lng variables using all available tweet and profile geo-location data rt <- lat_lng(rt) ## plot state boundaries par(mar = c(0, 0, 0, 0)) maps::map("state", lwd = .25) ## plot lat and lng points onto state map with(rt, points(lng, lat, pch = 20, cex = .75, col = rgb(0, .3, .7, .75)))
Randomly sample (approximately 1%) from the live stream of all tweets.
## random sample for 30 seconds (default) rt <- stream_tweets("")
Stream all geo enabled tweets from London for 60 seconds.
## stream tweets from london for 60 seconds rt <- stream_tweets(lookup_coords("london, uk"), timeout = 60)
Stream all tweets mentioning realDonaldTrump or Trump for a week.
## stream london tweets for a week (60 secs x 60 mins * 24 hours * 7 days) stream_tweets( "realdonaldtrump,trump", timeout = 60 * 60 * 24 * 7, file_name = "tweetsabouttrump.json", parse = FALSE ) ## read in the data as a tidy tbl data frame djt <- parse_stream("tweetsabouttrump.json")
Retrieve a list of all the accounts a user follows.
## get user IDs of accounts followed by CNN cnn_fds <- get_friends("cnn") ## lookup data on those accounts cnn_fds_data <- lookup_users(cnn_fds$user_id)
Retrieve a list of the accounts following a user.
## get user IDs of accounts following CNN cnn_flw <- get_followers("cnn", n = 75000) ## lookup data on those accounts cnn_flw_data <- lookup_users(cnn_flw$user_id)
Or if you really want ALL of their followers:
## how many total follows does cnn have? cnn <- lookup_users("cnn") ## get them all (this would take a little over 5 days) cnn_flw <- get_followers( "cnn", n = cnn$followers_count, retryonratelimit = TRUE )
Get the most recent 3,200 tweets from cnn, BBCWorld, and foxnews.
## get user IDs of accounts followed by CNN tmls <- get_timelines(c("cnn", "BBCWorld", "foxnews"), n = 3200) ## plot the frequency of tweets for each user over time tmls %>% dplyr::filter(created_at > "2017-10-29") %>% dplyr::group_by(screen_name) %>% ts_plot("days", trim = 1L) + ggplot2::geom_point() + ggplot2::theme_minimal() + ggplot2::theme( legend.title = ggplot2::element_blank(), legend.position = "bottom", plot.title = ggplot2::element_text(face = "bold")) + ggplot2::labs( x = NULL, y = NULL, title = "Frequency of Twitter statuses posted by news organization", subtitle = "Twitter status (tweet) counts aggregated by day from October/November 2017", caption = "\nSource: Data collected from Twitter's REST API via rtweet" )
Get the 3,000 most recently favorited statuses by JK Rowling.
jkr <- get_favorites("jk_rowling", n = 3000)
Search for 1,000 users with the rstats hashtag in their profile bios.
## search for users with #rstats in their profiles usrs <- search_users("#rstats", n = 1000)
Discover what's currently trending in San Francisco.
sf <- get_trends("san francisco")
## authorizing API access vignette("auth", package = "rtweet")
Quick overview of rtweet package
## quick overview of rtweet functions vignette("intro", package = "rtweet")
## working with the stream vignette("stream", package = "rtweet")
Communicating with Twitter's APIs relies on an internet connection, which can sometimes be inconsistent. With that said, if you encounter an obvious bug for which there is not already an active issue, please create a new issue with all code used (preferably a reproducible example) on Github.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.