R/qnews_unshorten_urls.R

Defines functions qnews_unshorten_urls

Documented in qnews_unshorten_urls

#' Unshorten urls -- mostly from Twitter.
#'
#' @name qnews_unshorten_urls
#' @param x A vector of shortened URLs
#' @param cores # cores
#' @return A data frame
#'
#'
#' @export
#' @rdname qnews_unshorten_urls
#'
qnews_unshorten_urls <- function(x, cores = 3) {

  batches <- split(x, ceiling(seq_along(x)/20))

  ###
  get_url <- function(url) {

    s_HEAD <- purrr::safely(httr::HEAD)

    unlist(lapply(url, function(x) {
      res <- s_HEAD(x,
                    httr::user_agent('qnews-r-package'),
                    httr::timeout(5))

      if(length(grepl('Timeout was reached', res$error)) > 0) {
        y <- 'timeout' } else{
          y <- res$result$url
          if(is.null(y)) {y <- httr::GET(x)}
        }
      y})
    )
  }

  ###
  clust <- parallel::makeCluster(cores)
  parallel::clusterExport(cl = clust,
                          varlist = c('batches'),
                          envir = environment())

  new_links <- pbapply::pblapply(cl = clust,
                                 X = batches,
                                 FUN = get_url)

  parallel::stopCluster(clust)

  unlist(new_links)
}
jaytimm/quicknews documentation built on Aug. 23, 2023, 12:09 a.m.