R/get_everything.R

## In the same source file (to remind you that you did it) add:
if (getRversion() >= "2.15.1") utils::globalVariables(c("."))
#' This endpoint suits article discovery and analysis, but can be used to retrieve articles for display, too.
#' @param q Keywords to search for.
#' @param sources A comma-seperated string of identifiers for the news sources or blogs you want headlines from. Use newsapi.org sources index.
#' @param domains A comma-seperated string of domains (eg bbc.co.uk, techcrunch.com, engadget.com) to restrict the search to.
#' @param from A date and optional time for the oldest article allowed. This should be in ISO 8601 format
#' @param to A date and optional time for the newest article allowed
#' @param language The 2-letter ISO-639-1 code of the language you want to get headlines for.
#' @param sort_by The order to sort the articles in. Possible options: relevancy, popularity, publishedAt
#' @param apiKey String apiKey
#' @param endpoint String of endpoints

#' @return dataframe

#' @importFrom curl has_internet
#' @importFrom httr http_error GET content status_code
#' @importFrom purrr map
#' @importFrom plyr rbind.fill
#' @importFrom magrittr %>%
#' @importFrom utils globalVariables

#' @examples
#'\dontrun{get_everything(category = 'general',country = 'de')}
#' @export
get_everything <- function(q,sources,domains,language,from,to,sort_by,apiKey = newsapi_key(),
                           endpoint = "everything") {

  argg <- as.list(environment())

  if ((curl::has_internet())) {

    if (missing(q) & missing(sources) & missing(domains) & missing(language)) {
      stop("Please pass at least one of the following sources:sources, domains,language or q",
           call. = FALSE)
    }

    if (!missing(language)) {
      if (!all(language %in% c("ar", "en", "de", "es", "fr", "he", "it", "nl",
                               "no", "pt", "ru", "sv", "ud"))) {
        stop("language must be one of these: ar, en, cn, de, es, fr, he, it, nl, no, pt, ru, sv, ud",call. = FALSE)
      }
    }
    if (!(missing(from) | missing(to))) {
        if (!(is_date(from) & is_date(to))) {
      stop("Please provide a valid date format e.g. 2018-01-12",call. = FALSE)}
      if (from > to) {
        stop("From date has to be less or equal to To date",call. = FALSE)
      }
    }

    if (!missing(sort_by)) {
      if (!all(sort_by %in% c("relevancy", "popularity", "publishedAt"))) {
        stop("language must be one of these: relevancy,popularity or publishedAt",call. = FALSE)
      }
    }

    resp <- httr::GET(create_url(endpoint, argg))
    if (httr::http_error(resp)) {
      stop("Newsapi request failed ", httr::status_code(resp))
      call. = FALSE
    } else {
      cat("dowloading...\n")
      source_list <- jsonlite::fromJSON(httr::content(resp, "text"))
      cat("finished dowloading...\n")
      source_list <- source_list["articles"]
    }
  } else {
    stop("No local internet connection available", call. = FALSE)
  }


  source_df <- lapply(source_list, function(x) {
    source <- x$source
    x$source <- NULL
    list(x, source)
  }) %>% purrr::map(~data.frame(.)) %>%
                do.call(plyr::rbind.fill, .)

  return(source_df)
}
data-atelier/newsExploreR documentation built on May 22, 2019, 11:51 p.m.