R/swcovidbot.R

Defines functions swcovidtwitter_token

# bot for retweeting #SWthruCovid, #SWcovid19, and #SWcovid
# maintained by: Johnny Sullivan, MSW
# email: johnny@jesullivan.com
# twitter: @johnnysullivan

# dependencies
library(dplyr)      # to work with data frame like objects, both in memory and out of memory
library(purrr)      # to work efficiently with functions and vectors
library(rtweet)     # to access Twitter API, pull tweets, and retweet


# This bot is based on code from epibot, available here: https://github.com/malcolmbarrett/epibot)
# To search for Twitter data and retweet, this bot uses the rtweet package, available here: https://rtweet.info/


# twitter API keys and token -------------------------------------------------
swcovidtwitter_token <- function() {
  rtweet::create_token(
    "swcovid_bot",
    consumer_key = Sys.getenv("SWCOVIDBOT_CONSUMER_KEY"),
    consumer_secret = Sys.getenv("SWCOVIDBOT_CONSUMER_SECRET"),
    access_token = Sys.getenv("SWCOVIDBOT_ACCESS_TOKEN"),
    access_secret = Sys.getenv("SWCOVIDBOT_ACCESS_SECRET"),
    set_renv = FALSE
  )
}



# pull new tweets ------------------------------------------------------------

## search terms
swcovidtwitter <- ("#swcovid19 OR #swcovid OR #swthrucovid OR #swthrucovid19")



## use since_id from previous search (if exists)
if (file.exists(file.path("data", "swcovid_search.rds"))) {
  previous_tweets <- readRDS(file.path("data", "swcovid_search.rds"))
  since_id <- previous_tweets$status_id[1]
} else {
  since_id <- NULL
}


## search for up to 100,000 tweets using the hashtags
swcovidtwitter_tweets <- search_tweets(
  swcovidtwitter,
  n = 1e5, verbose = FALSE,
  since_id = since_id,
  retryonratelimit = TRUE,
  include_rts = FALSE,
  token = swcovidtwitter_token()
)


if (!is_empty(swcovidtwitter_tweets)) {
  swcovidtwitter_tweets <- distinct(swcovidtwitter_tweets, status_id, .keep_all = TRUE)
}



# select tweets to retweet ---------------------------------------------------
# don't retweet tweets from account named'SwCovid'
filtered_tweets <- swcovidtwitter_tweets %>%
  filter(tolower(screen_name) != "SwCovid")

if (nrow(filtered_tweets) > 5) {
  tweets_to_retweet <- filtered_tweets %>%
    sample_n(5) %>%
    arrange(desc(created_at)) %>%
    pull(status_id)

  thru <- filtered_tweets$hashtags %>%
    map_lgl(~ "swthrucovid" %in% tolower(.x))

  thru_ids <- filtered_tweets %>%
    filter(thru) %>%
    pull(status_id)

  tweets_to_retweet <- unique(c(thru_ids, tweets_to_retweet))
} else {
  tweets_to_retweet <- filtered_tweets$status_id
}




# bind and save data ---------------------------------------------------------

if (!is_empty(swcovidtwitter_tweets)) {
  kp <- !duplicated(swcovidtwitter_tweets$status_id)
  ## only keep rows (observations) with unique status IDs
  users <- users_data(swcovidtwitter_tweets)[kp, ]
  ## the rows of users should correspond with the tweets
  swcovidtwitter_tweets <- swcovidtwitter_tweets[kp, ]
  ## restore as users attribute
  attr(swcovidtwitter_tweets, "users") <- users

  ## if there's already a search data file saved, then read it in,
  ## drop the duplicates then update the data
  if (file.exists(file.path("data", "swcovid_search.rds"))) {

    ## bind rows (for tweets AND users data)
    swcovidtwitter_tweets <- do_call_rbind(
      list(swcovidtwitter_tweets, readRDS(file.path("data", "swcovid_search.rds")))
    )

    ## determine whether each observation has a unique status ID
    kp <- !duplicated(swcovidtwitter_tweets$status_id)

    ## only keep rows (observations) with unique status IDs
    users <- users_data(swcovidtwitter_tweets)[kp, ]

    ## the rows of users should correspond with the tweets
    swcovidtwitter_tweets <- swcovidtwitter_tweets[kp, ]

    ## restore as users attribute
    attr(swcovidtwitter_tweets, "users") <- users
  }

  ## save the data
  saveRDS(swcovidtwitter_tweets, file.path("data", "swcovid_search.rds"))

  ## save shareable data (only status_ids)
  saveRDS(swcovidtwitter_tweets[, "status_id"], file.path("data", "swcovid_search-ids.rds"))
}



# retweet random 5 tweets (maybe reconsider this # later) ---------------
if (!is.null(tweets_to_retweet)) {
  walk(tweets_to_retweet, function(.x) {
    post_tweet(
      retweet_id = .x,
      token = swcovidtwitter_token()
    )
    Sys.sleep(20) # takes a 20 second rest between retweets in batch
  })
}
johnnysullivan/swcovidbot documentation built on May 31, 2020, 2:40 p.m.