Nothing
#' Get resources of newsapi.org
#'
#' \code{get_everything} returns articles from large and small news
#' sources and blogs. This includes news as well as other regular articles.
#' You can search for multiple \code{sources}, different \code{language},
#' or use your own keywords. Articles can be sorted by the earliest date
#' \code{publishedAt}, \code{relevancy}, or \code{popularity}. To automatically
#' download all results, use \code{get_everything_all()}.\cr\cr
#' Please check that the \code{api_key} is available. You can provide an explicit
#' definition of the key or use \code{set_api_key()}. \cr\cr
#' Valid languages for \code{language} are provided in the dataset
#' \code{terms_language}.
#'
#' @param query Character string that contains the searchterm for the API's
#' data base. API supports advanced search parameters, see 'details'.
#' Passing a searchterm is compulsory.
#' @param sources Character vector with with IDs of the news outlets
#' you want to focus on (e.g., c("usa-today", "spiegel-online")).
#' @param domains Character vector with domains that you want
#' to restrict your search to (e.g. c("bbc.com", "nytimes.com")).
#' @param exclude_domains Similar usage as with 'domains'. Will exclude these
#' domains from your search.
#' @param from Character string with start date of your search. Needs to conform
#' to one of the following lubridate order strings:
#' \code{"ymdHMs, ymdHMsz, ymd"}. See help for lubridate::parse_date_time.
#' If from is not specified, NewsAPI defaults to the oldest available date
#' (depends on your paid/unpaid plan from newsapi.org).
#' @param to Character string that marks the end date of your search. Needs to conform
#' to one of the following lubridate order strings:
#' \code{"ymdHMs, ymdHMsz, ymd"}. See help for lubridate::parse_date_time.
#' If \code{to} is not specified,
#' NewsAPI defaults to the most recent article available.
#' @param language Specifies the language of the articles of your search. Must
#' be in ISO shortcut format (e.g., "de", "en"). See list of all
#' languages using \code{newsanchor::terms_language}. Default
#' is all languages.
#' @param sort_by Character string that specifies the sorting variable of your article
#' results. Accepts three options: "publishedAt", "relevancy",
#' "popularity". Default is "publishedAt".
#' @param page Specifies the page number of your results that is returned. Must
#' be numeric. Default is first page. If you want to get all results
#' at once, use \code{get_everything_all} from 'newsanchor'.
#' @param page_size The number of articles per page that are returned.
#' Maximum is 100 (also default).
#' @param api_key Character string with the API key you get from newsapi.org.
#' Passing it is compulsory. Alternatively, function can be
#' provided from the global environment (see \code{set_api_key()}).
#'
#' @details Advanced search (see also www.newsapi.org): Surround entire phrases
#' with quotes (") for exact matches. Prepend words/phrases that must
#' appear with "+" symbol (e.g., +bitcoin). Prepend words that must not
#' appear with "-" symbol (e.g., -bitcoin). You can also use AND, OR,
#' NOT keywords (optionally grouped with parenthesis, e.g., 'crypto AND
#' (ethereum OR litecoin) NOT bitcoin)'). \cr
#' @examples
#' \dontrun{
#' df <- get_everything(query = "stuttgart", language = "de")
#' df <- get_everything(query = "mannheim", from = "2019-01-02 12:00:00")
#' }
#' @importFrom httr content GET build_url parse_url add_headers
#' @importFrom jsonlite fromJSON
#' @return List with two dataframes:\cr
#' 1) Data frame with \code{results_df}\cr
#' 2) Data frame with \code{meta_data}
#' @export
get_everything <- function(query,
sources = NULL,
domains = NULL,
exclude_domains = NULL,
from = NULL,
to = NULL,
language = NULL,
sort_by = "publishedAt",
page = 1,
page_size = 100,
api_key = Sys.getenv("NEWS_API_KEY")) {
# Initial proceedings -----------------------------------------------------
# Provide a vector with available ways of sorting the articles
sortings <- c("publishedAt", "relevancy", "popularity")
# Errors and warnings -----------------------------------------------------
# Make sure that any search term is passed
if (missing(query) == TRUE)
stop("You need to specify at least some content that you search for.")
# check that page_size is <= 100
if (!is.numeric(page_size)) {
stop("You need to insert numeric values for the number of texts per page.")
}
if (page_size > 100) {
stop("Page size cannot not exceed 100 articles per page.")
}
# Error for non-numeric page parameter
if (!is.numeric(page)) {
stop("Page should be a number.")
}
# Error if language indicated does not match the ones provided by the API
if (!is.null(language)) {
if (length(language) > 1) {
stop("You cannot specify more than one language.")
}
stop_if_invalid_language(language)
}
# Error if selected sorting does not match the ones provided by the API
if (!sort_by %in% c("publishedAt", "relevancy", "popularity")) {
stop("Sortings can be only by 'publishedAt', 'relevancy', or 'popularity'.")
}
# Bind together various search parameters as comma-separated strings as required by API
# Parameter: sources (plus limit to maximum of 20 sources)
if (!is.null(sources)) {
if (length(sources) > 20) {
stop("You cannot specify more than 20 sources.")
}
sapply(sources, stop_if_invalid_source)
sources <- collapse_to_comma_separated(sources)
}
# Parameter: domains
if (!is.null(domains)) {
domains <- collapse_to_comma_separated(domains)
}
# Parameter: exclude_domains
if (!is.null(exclude_domains)) {
exclude_domains <- collapse_to_comma_separated(exclude_domains)
}
# Make sure an API key is provided
if (nchar(api_key) == 0)
stop(
paste0(
"You did not specify your API key as an argument or as a global variable.",
" See documentation for further info."
)
)
# Parse date
lubridate_orders <- c("ymdHMs","ymdHMsz", "ymd")
lubridate_orders_string <- paste(lubridate_orders, collapse = ", ")
if (!is.null(from)) {
from_parsed <- lubridate::parse_date_time(from, lubridate_orders, quiet = TRUE)
if (is.na(from_parsed)) {
stop(paste0("From argument needs conform to one of the following lubridate orders: ",
lubridate_orders_string, ". See help for lubridate::parse_date_time. ",
"If in doubt, use %Y-%m-%d for a date or %Y-%m-%d %H:%M:%S for datetime."))
}
from <- format(from_parsed, "%Y-%m-%dT%H:%M:%S")
}
if (!is.null(to)) {
to_parsed <- lubridate::parse_date_time(to, lubridate_orders, quiet = TRUE)
if (is.na(to_parsed)) {
stop(paste0("To argument needs conform to one of the following lubridate orders: ",
lubridate_orders_string, ". See help for lubridate::parse_date_time. ",
"If in doubt, use %Y-%m-%d for a date or %Y-%m-%d %H:%M:%S for datetime."))
}
to <- format(to_parsed, "%Y-%m-%dT%H:%M:%S")
}
# Accessing the API -----------------------------------------------------
# Build URL
query_params <- list(
q = query,
language = language,
sources = sources,
domains = domains,
excludeDomains = exclude_domains,
from = from,
to = to,
sortBy = sort_by,
pageSize = page_size,
page = page
)
url <-
build_newsanchor_url("https://newsapi.org/v2/everything", query_params)
response <-
make_newsanchor_get_request(url = url, api_key = api_key)
# extract meta data and results from response --------------------------------------------------
content_parsed <- parse_newsanchor_content(response)
if (is.null(content_parsed$totalResults)) {
content_parsed$totalResults <- 0
}
metadata <- extract_newsanchor_metadata(response, content_parsed, page, page_size)
results_df <- extract_newsanchor_articles(metadata = metadata,
content_parsed = content_parsed)
# return results ----------------------------------------------------------
return(list(metadata = metadata,
results_df = results_df))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.