R/comm.R

## twitter API has multiple methods of handling paging issues, not to mention the search API
## has a completely different interface.  Trying to manage all of these below using one unified
## approach to actually sending the data back & receiving response and then providing multiple
## mechanisms to page
tw_from_response = function(response) {
  ## Will provide some basic error checking, as well as suppress
  ## warnings that always seem to come out of fromJSON, even
  ## in good cases.
  out <- try(suppressWarnings(fromJSON(content(response, as="text", encoding="UTF-8"))), silent=TRUE)
  if (inherits(out, "try-error")) {
    stop("Error: Malformed response from server, was not JSON.\n",
         "The most likely cause of this error is Twitter returning a character which\n",
         "can't be properly parsed by R. Generally the only remedy is to wait long\n",
         "enough for the offending character to disappear from searches (e.g. if\n",
         "using searchTwitter()).")
  }

  return(out)
}

doAPICall = function(cmd, params=NULL, method="GET", retryCount=5,
                     retryOnRateLimit=0, debug=FALSE, ...) {
  if (debug) {
    browser()
  }

  if (!is.numeric(retryOnRateLimit)) {
    stop("retryOnRateLimit must be a number")
  }

  if (!is.numeric(retryCount)) {
    stop("retryCount must be a number")
  }

  recall_func = function(retryCount, rateLimitCount) {
    return(doAPICall(cmd, params=params, method=method, retryCount=retryCount,
                     retryOnRateLimit=rateLimitCount, ...))
  }
  url = getAPIStr(cmd)
  if (method == "POST") {
    out = try(POST(url, config(token=get_oauth_sig()), body=params), silent=TRUE)
  } else {
    if (is.null(params)) {
      query = NULL
    } else {
      query = lapply(params, function(x) URLencode(as.character(x)))
    }
    out = GET(url, query=query, config(token=get_oauth_sig()))
  }

  httr_status = out$status
  http_message = http_status(out)$message

  if (httr_status %in% c(500, 502)) {
    print(http_message)
    print(paste("This error is likely transient, retrying up to", retryCount, "more times ..."))
    ## These are typically fail whales or similar such things
    Sys.sleep(1)
    return(recall_func(retryCount - 1, rateLimitCount=retryOnRateLimit))
  } else if (httr_status == 429) {
    if (retryOnRateLimit > 0) {
      ## We're rate limited. Wait a while and try again
      newRateLimit = retryOnRateLimit - 1
      print(paste("Rate limited .... blocking for a minute and retrying up to", newRateLimit, "times ..."))
      Sys.sleep(60)
      return(recall_func(retryCount, newRateLimit))
    } else {
      ## FIXME: very experimental - the idea is that if we're rate limited,
      ## just give a warning and return. This should result in rate limited
      ## operations returning the partial result
      warning("Rate limit encountered & retry limit reached - returning partial results")
      return(NULL)
    }
  } else if (httr_status == 401) {
    stop("OAuth authentication error:\nThis most likely means that you have incorrectly called setup_twitter_oauth()'")
  } else {
    ## Generic catch-all for any other errors
    stop_for_status(out)
  }

  json = tw_from_response(out, ...)

  if (length(json[["errors"]]) > 0) {
    stop(json[["errors"]][[1]][["message"]])
  }

  out = json
}

setRefClass('twAPIInterface',
            fields = list(
              maxResults = 'integer'
              ),
            methods = list(
              initialize=function(...) {
                maxResults <<- 100L
                callSuper(...)
                .self
              },
              tw_from_response = tw_from_response,
              doAPICall = doAPICall
              )
            )


tint <- getRefClass('twAPIInterface')
tint$accessors(names(tint$fields()))
twInterfaceObj <- tint$new()


doPagedAPICall = function(cmd, num, params=NULL, method='GET', ...) {
  if (num <= 0)
    stop('num must be positive')
  else
    num <- as.integer(num)

  maxResults <- twInterfaceObj$getMaxResults()
  page <- 1
  total <- num
  count <- ifelse(num < maxResults, num, maxResults)
  jsonList <- list()
  params[['count']] <- count
  while (total > 0) {
    params[['page']] <- page
    results = twInterfaceObj$doAPICall(cmd, params, method, ...)
    if (is.null(results)) {
      return(jsonList)
    }

    jsonList <- c(jsonList, results)

    total <- total - count
    page <- page + 1
  }
  jLen <- length(jsonList)
  if ((jLen > 0) && (jLen > num))
    jsonList <- jsonList[1:num]
  jsonList
}

doCursorAPICall = function(cmd, type, num=NULL, params=NULL, method='GET', ...) {
  cursor <- -1
  if (!is.null(num)) {
    if (num <= 0)
      stop("num must be positive")
    else
      num <- as.integer(num)
  }
  vals <- character()
  while(cursor != 0) {
    params[['cursor']] <- cursor
    curResults <- twInterfaceObj$doAPICall(cmd, params, method, ...)
    if (is.null(curResults)) {
      return(vals)
    }
    vals <- c(vals, curResults[[type]])
    if ((!is.null(num)) && (length(vals) >= num))
      break
    cursor <- curResults[['next_cursor_str']]
  }
  if ((!is.null(num)) && (length(vals) > num))
    vals <- vals[1:num]
  vals
}

doRppAPICall = function(cmd, num, params, ...) {
  if (! 'q' %in% names(params))
    stop("parameter 'q' must be supplied")
  maxResults <- twInterfaceObj$getMaxResults()
  params[['count']] <- ifelse(num < maxResults, num, maxResults)

  curDiff <- num
  jsonList <- list()
  ids = list()
  while (curDiff > 0) {
    fromJSON <- twInterfaceObj$doAPICall(cmd, params, 'GET', ...)
    if (is.null(fromJSON)) {
      return(jsonList)
    }
    newList <- fromJSON$statuses

    curIds = sapply(newList, function(x) x[["id"]])
    dups = which(ids %in% ids)
    if (length(dups) > 0) {
      curIds = curIds[-dups]
      newList = newList[-dups]
    }

    if (length(curIds) == 0) {
      break
    }

    jsonList <- c(jsonList, newList)
    curDiff <- num - length(jsonList)
    if ((curDiff > 0)) { #&& (length(newList) == params[["count"]])) {
      params[["max_id"]] = as.character(as.integer64(min(curIds)) - 1)
    } else {
      break
    }
  }

  if (length(jsonList) > num) {
    jsonList = jsonList[seq_len(num)]
  }

  if (length(jsonList) < num) {
    warning(num, " tweets were requested but the API can only return ", length(jsonList))
  }

  return(jsonList)
}

twitterDateToPOSIX <- function(dateStr) {
  ## In typical twitter fashion, there are multiple ways that they
  ## spit dates back at us.  First, let's take a look at unix
  ## epoch time, and then try a few data string formats
  dateInt <- suppressWarnings(as.numeric(dateStr))

  ## Locale must be set to something american-y in order to properly
  ## parse the Twitter dates. Get the current LC_TIME, reset it on
  ## exit and then change the locale
  curLocale <- Sys.getlocale("LC_TIME")
  on.exit(Sys.setlocale("LC_TIME", curLocale), add=TRUE)
  Sys.setlocale("LC_TIME", "C")

  if (!is.na(dateInt)) {
    posDate <- as.POSIXct(dateInt, tz='UTC', origin='1970-01-01')
  } else {
    posDate <- as.POSIXct(dateStr, tz='UTC',
                          format="%a %b %d %H:%M:%S +0000 %Y")
    ## try again if necessary
    if (is.na(posDate))
      posDate <- as.POSIXct(dateStr, tz='UTC',
                            format="%a, %d %b %Y %H:%M:%S +0000")
  }
  ## might still be NA, but we tried
  return(posDate)
}

Try the twitteR package in your browser

Any scripts or data that you put into this service are public.

twitteR documentation built on May 2, 2019, 6:46 a.m.