R/utils.R

Defines functions vtcn_silent vtcn_cat vtcn_msg msg err_msg f_verbose prog_bar build_chunks clean_results add_tweets_results lst_tweets_results lst_counts_results rm_dt_tail check_fmt_datetime resp_rate_limit ids_from_urls req_auth_header get_ua get_version

# get package version
get_version <- function() {
  if ("voson.tcn" %in% loadedNamespaces()) {
    return(utils::packageVersion("voson.tcn"))
  }
  "_"
}

# build user-agent string
get_ua <- function() {
  paste0("voson.tcn v", get_version(), " (R package)")
}

# httr request header
req_auth_header <- function(token) {
  httr::add_headers(.headers = c(
    Authorization = paste0("Bearer ", token),
    "User-Agent" = get_ua()
  ))
}

# extract tweet ids from tweet urls
ids_from_urls <- function(x) {
  if (is.null(x)) return(x)

  url_regex <- "^https://twitter\\.com/[A-Za-z0-9_]+?/status/(\\d+)/{0,1}$"
  x <- stringr::str_match(tolower(as.character(x)),
    paste0(url_regex, "|", "^(\\d+)$"))

  dplyr::coalesce(x[, 2], x[, 3])
}

# api rate-limit
resp_rate_limit <- function(headers, endpoint = NULL, sleep = FALSE, verbose = TRUE) {
  msg <- f_verbose(verbose)

  reset <- as.numeric(headers$`x-rate-limit-reset`)
  msg(
    paste0(
      ifelse(is.null(endpoint), "", paste0(endpoint, " ")),
      "remaining: ",
      headers$`x-rate-limit-remaining`,
      "/",
      headers$`x-rate-limit-limit`,
      " reset: ",
      as.POSIXct(reset, origin = "1970-01-01"),
      " (UNIX ",
      reset,
      ")"
    )
  )

  if (sleep) {
    wait_time <- reset - as.numeric(as.POSIXct(Sys.time()))

    # should be approx 15 mins, but if wait time > 30 mins signal exit
    if (wait_time <= 0 | wait_time > (15*60)*2) {
      return(FALSE)
    }

    msg(paste0("waiting ", round((wait_time/60), digits = 1), " mins for reset"))
    Sys.sleep(wait_time + 2) # add 2 sec buffer
  }

  TRUE
}

# check ISO 8601 format string
check_fmt_datetime <- function(dt) {
  dt_pattern <- "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z$"
  if (!stringr::str_detect(toupper(dt), dt_pattern)) {
    return(FALSE)
  }
  TRUE
}

# remove trailing .000Z from twitter datetime if present
rm_dt_tail <- function(x) {
  stringr::str_replace(x, "\\.000Z$", "Z")
}

# counts results list
lst_counts_results <- function(...) {
  list(counts = NULL, errors = NULL, meta = NULL, ...)
}

# tweets results list
lst_tweets_results <- function(...) {
  list(tweets = NULL, users = NULL, errors = NULL, meta = NULL, ...)
}

# append dataframes in tweet results list
add_tweets_results <- function(x, y) {
  x$tweets <- dplyr::bind_rows(x$tweets, y$tweets)
  x$users <- dplyr::bind_rows(x$users, y$users)
  x$errors <- dplyr::bind_rows(x$errors, y$errors)
  x$meta <- dplyr::bind_rows(x$meta, y$meta)
  x
}

# reformat results data and remove duplicates
# attempts to retain most recent version if duplicated
clean_results <- function(results) {
  if (!is.null(results$tweets) && nrow(results$tweets)) {
    results$tweets <- results$tweets |>
      dplyr::rename(tweet_id = .data$id) |>
      dplyr::arrange(dplyr::desc(.data$timestamp)) |>
      dplyr::distinct(.data$tweet_id, .keep_all = TRUE)
  }

  if (!is.null(results$users) && nrow(results$users)) {
    results$users <- results$users |>
      dplyr::rename_with(~ paste0("profile.", .x)) |>
      dplyr::rename(user_id = .data$profile.id,
                    timestamp = .data$profile.timestamp) |>
      dplyr::arrange(
        dplyr::desc(.data$timestamp),
        dplyr::desc(as.integer(.data$profile.public_metrics.tweet_count)) # imperfect
      ) |>
      dplyr::distinct(.data$user_id, .keep_all = TRUE)
  }

  results
}

# build conversation id search queries based on ids and endpoint
build_chunks <- function(ids, endpoint = "recent") {
  q_size <- 512
  if (endpoint == "all") q_size <- 1024

  # remove trailing "or" and encode "or" separators then add chunk
  add_chunk <- function(chunks, x) {
    chunk_qs <- sub("\\sOR\\s$", "", x)
    chunk_qs <- gsub("\\sOR\\s", "%20OR%20", chunk_qs)
    c(chunks, chunk_qs)
  }

  chunks <- c()
  qs <- ""
  n_chr_tally <- 0
  for (x in ids) {
    q <- paste0("conversation_id:", x)

    if ((n_chr_tally + nchar(q)) > q_size) {
      chunks <- add_chunk(chunks, qs)

      qs <- ""
      n_chr_tally <- 0
    }

    qs <- paste0(qs, q, " OR ")
    n_chr_tally <- nchar(qs)
  }

  if (nchar(qs) > 0) chunks <- add_chunk(chunks, qs)

  chunks
}

# progress bar format
prog_bar <- function(n_iter, desc) {
  progress::progress_bar$new(
    format = paste0("downloading [:bar] :percent", desc),
    total = n_iter,
    complete = "=",
    incomplete = "-",
    current = ">",
    width = 60,
    clear = FALSE
  )
}

# assign message function for verbose output
f_verbose <- function(x) {
  opt_msg <- getOption("voson.msg")
  opt_f <- vtcn_msg
  if (!is.null(opt_msg) && !is.na(opt_msg)) {
    if (is.logical(opt_msg)) {
      if (opt_msg == FALSE) {
        opt_f <- vtcn_cat
      }
    }
  }

  f <- vtcn_silent
  if (!is.null(x)) {
    if (is.logical(x)) {
      if (x) f <- opt_f
    }
  }

  f
}

# errors
err_msg <- function(x) {
  opt_msg <- getOption("voson.msg")
  if (!is.null(opt_msg) && !is.na(opt_msg)) {
    if (is.logical(opt_msg)) {
      if (opt_msg == FALSE) {
        vtcn_cat(x)
        return()
      }
    }
  }
  invisible(vtcn_msg(x))
}

# base output function
msg <- function(x) vtcn_msg(x)

# message output
vtcn_msg <- function(x) message(x) # appendLF = FALSE

# cat output
vtcn_cat <- function(x) {
  cat(x)
  flush.console()
  Sys.sleep(0.05)
}

# silent output
vtcn_silent <- function(x) return()

Try the voson.tcn package in your browser

Any scripts or data that you put into this service are public.

voson.tcn documentation built on Aug. 30, 2022, 5:07 p.m.