R/gathertweet_actions.R

Defines functions gathertweet_search gathertweet_update gathertweet_timeline gathertweet_favorites gathertweet_simplify isFALSE set_since_id save_tweets_or_exit

Documented in gathertweet_search

#' @title gathertweet actions
#' @export
gathertweet_search <- function(
  terms,
  file             = "tweets.rds",
  n                = 18000,
  max_id           = NULL,
  since_id         = "last",
  type             = "recent",
  include_rts      = FALSE,
  geocode          = NULL,
  `no-parse`       = FALSE,
  token            = NULL,
  retryonratelimit = FALSE,
  quiet            = FALSE,
  ...
) {
  log_info("Searching for \"{paste0(terms, collapse = '\", \"')}\"")

  since_id <- set_since_id(since_id, max_id, file)

  tweets <- lapply(
    terms,
    function(term) rtweet::search_tweets(
      q                = term,
      n                = as.integer(n),
      type             = type,
      include_rts      = include_rts,
      geocode          = geocode,
      max_id           = max_id,
      parse            = isFALSE(`no-parse`),
      token            = token,
      retryonratelimit = retryonratelimit,
      verbose          = isFALSE(quiet),
      since_id         = since_id
    )
  )

  if (isTRUE(`no-parse`)) {
    log_info("Saving un-parsed tweets in {file}")
    saveRDS(tweets, file)
  } else {
    tweets <- dplyr::bind_rows(tweets)
    save_tweets_or_exit(tweets, file)
  }

  tweets
}

#' @export
gathertweet_update <- function(file = "tweets.rds", `no-parse` = FALSE, token = NULL, ...) {
  logger("Updating tweets in {file}")
  if (!file.exists(file)) {
    log_fatal("`{file}` does not exist")
  }
  tweets <- update_tweets(
    file = file,
    # passed to rtweet::lookup_statuses()
    parse = isFALSE(`no-parse`),
    token = token
  )
  log_debug("Status lookup returned {nrow(tweets)} tweets")
  tweets <- save_tweets(tweets, file)
  log_debug("Total of {nrow(tweets)} tweets in {file}")
  tweets
}

#' @export
gathertweet_timeline <- function(
  users,
  file        = "tweets.rds",
  n           = 3200,
  max_id      = NULL,
  home        = TRUE,
  `no-parse`  = FALSE,
  token       = NULL,
  include_rts = FALSE,
  ...
) {
  log_info("Gathering tweets by {collapse(users)}")

  n <- as.integer(n)
  if (n > 3200) {
    log_warn("Twitter API for timelines returns a maximum of 3200 tweets per user")
  }

  tweets <- rtweet::get_timeline(
    user        = users,
    n           = n,
    max_id      = max_id,
    home        = isTRUE(home),
    parse       = isFALSE(`no-parse`),
    check       = TRUE,
    token       = token,
    include_rts = isTRUE(include_rts)
  )

  save_tweets_or_exit(tweets, file)
  tweets
}

#' @export
gathertweet_favorites <- function(
  users,
  file        = "tweets.rds",
  n           = 3000,
  max_id      = NULL,
  since_id    = NULL,
  `no-parse`  = FALSE,
  token       = NULL,
  ...
) {
  log_info("Gathering tweets favorited by {collapse(users)}")

  since_id <- set_since_id(since_id, max_id, file)
  n <- as.integer(n)
  if (n > 3000) {
    log_warn("Twitter API for favorites/list returns a maximum of 3000 tweets per user")
    n <- 3000
  }

  tweets <- rtweet::get_favorites(
    user        = users,
    n           = n,
    max_id      = max_id,
    since_id    = since_id,
    parse       = isFALSE(`no-parse`),
    token       = token
  )

  save_tweets_or_exit(tweets, file)
  tweets
}

#' @export
gathertweet_simplify <- function(
  file = "tweets.rds",
  fields = NULL,
  output = NULL,
  ...
) {
  logger("Simplifying tweets in {file}")
  if (!file.exists(file)) {
    log_fatal("`{file}` does not exist")
  }
  tweets_simplified <- simplify_tweets(
    tweets = NULL,
    file = file,
    .fields = fields
  )
  log_debug("Simplified {nrow(tweets_simplified)} tweets")
  if (is.null(output)) {
    output <- gathertweet:::path_add(file, append = "_simplified")
  }
  log_info("Saving simplified tweets to {output}")
  save_tweets(tweets_simplified, output)
}

isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x

set_since_id <- function(since_id = NULL, max_id = NULL, file = NULL) {
  since_id <- if (is.null(max_id)) {
    if (since_id == "last") {
      if (is.null(file)) {
        log_fatal("`file` must be provided for since_id = \"last\"")
      }
      last_seen_tweet(file = file)
    } else if (since_id == "none") {
      NULL
    } else since_id
  }
  if (!is.null(since_id)) log_info("Tweets from {since_id}")
  if (!is.null(max_id)) log_info("Tweets up to {max_id}")
  since_id
}


save_tweets_or_exit <- function(tweets, file) {
  if (nrow(tweets) == 0) {
    log_info("---- No new tweets. ----")
    exit()
  }

  tweets <- tweets[!duplicated(tweets$status_id), ]
  tweets <- tweets[order(tweets$status_id), ]

  log_info("Gathered {nrow(tweets)} tweets")
  tweets <- save_tweets(tweets, file)

  log_info("Total of {nrow(tweets)} tweets in {file}")
}
gadenbuie/gathertweet documentation built on March 15, 2020, 12:32 p.m.