inst/gathertweet.R

#! /usr/bin/env Rscript

# Usage -----------------------------------------------------------------------
'Gather tweets from the command line

Usage:
  gathertweet search [--file=<file>] [options] [--] <terms>...
  gathertweet update [--file=<file> --and-simplify --polite --debug-args --token=<token> --backup --backup-dir=<dir>]
  gathertweet timeline [options] [--] <users>...
  gathertweet favorites [options] [--] <users>...
  gathertweet simplify [--file=<file> --output=<output> --debug-args --polite] [<fields>...]

Options:
  -h --help             Show this screen.
  --file <file>         Name of RDS file where tweets are stored
                        [default: tweets.rds]
  --no-parse            Disable parsing of the results
  --token <token>       See {rtweet} for more information
  --retryonratelimit    Wait and retry when rate limited (only relevant when n
                        exceeds 18000 tweets)
  --quiet               Disable printing of {rtweet} processing messages
  --polite              Only allow one process (search|update) to run at a time
  --backup              Create a backup of existing tweet file
  --backup-dir <dir>    Location for backups [default: backups]
  --debug-args          Debug input arguments
  --and-simplify        Create additional simplified tweet set.
                        Run `gathertweet simplify` manually for more control.
search:
  <terms>  Search terms. Individual search terms are queried separately,
           but duplicated tweets are removed from the stored results.
           Each search term counts against the 15 minute rate limit of 180
           searches, which can be avoided by manually joining search terms
           into a single query. NOTE: Wrap queries with spaces in
           \'single quotes\': only use double quotes within single quotes.
  --type <type>         Type of search results: "recent", "mixed", or "popular"
                        [default: recent]
  --geocode <geocode>   Geographical limiter of the template
                        "latitude,longitude,radius"
  --since_id <since_id> Return results with an ID greather than (newer than) or
                        equal to since_id, automatically extracted from the
                        existing tweets <file>, if it exists, and ignored when
                        <max_id> is set. Use "none" for all available tweets,
                        or "last" for the maximum seen status_id in existing
                        tweets. [default: last]

search and timeline:
  -n, --n <n>        Number of tweets to return [default: 18000]
  --include_rts      Logical indicating whether retweets should be included
                     (default is to exclude RTs)
  --max_id <max_id>  Return tweets with an ID less (older) than or equal to

timeline and favorites:
  <users>  A list of users as user names, IDs, or a mixture of both,
           separated by spaces.

timeline:
  --home   If included, returns home-timeline instead of user-timeline.

simplify:
  <fields>  Tweet fields that should be included. By default includes:
            `status_id`, `created_at`, `user_id`, `screen_name`, `text`,
            `favorite_count`, `retweet_count`, `is_quote`, `hashtags`,
            `mentions_screen_name`, `profile_url`, `profile_image_url`,
            `media_url`, `urls_url`, `urls_expanded_url`.
  --output <output>  Output file, default is input file with `_simplified`
                     appended to name.
' -> doc

library(docopt)
opts <- docopt(doc, version = paste('gathertweet version', packageVersion("gathertweet")))
exit <- function(value = 0) q(save = "no", value)

if (opts$`--debug-args`) {
  str(opts)
  saveRDS(opts, "args.rds")
  exit()
}

do_gathertweet <- function() {
  library(gathertweet)
  collapse <- function(..., sep = ", ") paste(..., collapse = sep)

  # Which action was called?
  valid_actions <- c("search", "update", "simplify", "timeline", "favorites")
  action <- names(Filter(isTRUE, opts[valid_actions]))
  if (!length(action)) {
    log_fatal("Please specify a valid action: {collapse(valid_actions)}")
  }

  if (opts$polite) {
    lockfile <- paste0(
      ".gathertweet_",
      digest::digest(opts[c("file", "search", "update", "simplify")]),
      ".lock"
    )
    lck <- filelock::lock(lockfile, exclusive = TRUE, timeout = 0)
    gathertweet:::stopifnot_locked(
      lck,
      "Another gathertweet {action} process is currently running for {opts$file}"
    )
    on.exit({
      filelock::unlock(lck)
      unlink(lockfile)
    })
  }

  log_info("---- gathertweet {action} start ----")

  if (isTRUE(opts$backup)) {
    backup_tweets(opts$file, backup_dir = opts[["backup-dir"]])
  }

  # Also simplify if --and-simplify flag is called
  if (opts[["--and-simplify"]]) opts$simplify <- TRUE

  tweets <-
    # Search ----
  if (isTRUE(opts$search)) {

    do.call("gathertweet_search", opts)

    # Update ----
  } else if (isTRUE(opts$update)) {

    do.call("gathertweet_update", opts)

    # Timeline ----
  } else if (isTRUE(opts$timeline)) {
    if (!length(opts$users)) {
      stop("Please provide a list of users as user names, user IDs, ",
           "or a mixture of both.")
    }

    do.call("gathertweet_timeline", opts)

    # Favorites ----
  } else if (isTRUE(opts$favorites)) {
    if (!length(opts$users)) {
      stop("Please provide a list of users as user names, user IDs, ",
           "or a mixture of both.")
    }

    do.call("gathertweet_favorites", opts)
  }


  # Simplify ----------------------------------------------------------------
  if (isTRUE(opts$simplify)) {
    do.call("gathertweet_simplify", opts)
  }

  if (opts$polite) {
    filelock::unlock(lck)
    unlink(lockfile)
  }

  log_info("---- gathertweet {action} complete ----")
}

do_gathertweet()
gadenbuie/gathertweet documentation built on March 15, 2020, 12:32 p.m.