Nothing
#' @importFrom httr http_error content modify_url parse_url
m_scrape <- function(bow, query=NULL, params=NULL, accept="html", content=NULL, verbose=FALSE) { # nolint
if(!inherits(bow, "polite"))
stop("Please be polite: bow then scrape!")
stopifnot(is.list(query) || is.null(query))
if(!is.null(query))
bow$url <- httr::modify_url(bow$url, query=query)
else{
if(!is.null(params)){
warning("Argument `params` is deprecated. Please, use `query` passing parameters as a named list.", call.=FALSE)
params <- gsub("^.*\\?", "", params) # this will delete url prefix if any
query <- httr::parse_url(paste0("http://www.example.com/q?", params))$query
bow$url <- httr::modify_url(bow$url, query=query)
}
}
if(!is_scrapable(bow)){
warning("No scraping allowed here!", call. = FALSE)
return(NULL)
}
if(substr(accept,1,1)!="." && !grepl("/", accept)){
accept <- paste0(".", accept)
}
accept_type <- httr::accept(accept)
bow$config <- c(bow$config, accept_type)
response <- httr_get_ltd(bow$url, bow$config, bow$handle, bow$times, verbose)
if(httr::http_error(response)){
warning(httr::http_status(response)$message, " ", bow$url, call. = FALSE)
return(NULL)
}
content <- content %otherwise% response$headers$`content-type`
res <- tryCatch(
{
httr::content(response, type = content)
},
error=function(cond){
warning(paste0("<polite session> Encountered an error while parsing content.\n",
"There seems to be a mismatch of content type or encoding or both.\n",
"The server says it is serving:", response$headers$`content-type`, "\n",
"Here's the text of the error generated by httr::content(): \n", cond, "\n",
"Returning raw vector which can be parsed with rawToChar(). Good luck!\n"), call. = FALSE)
return(httr::content(response, as = "raw"))
}
)
res
}
#' Scrape the content of authorized page/API
#'
#' @param bow host introduction object of class `polite`, `session` created by `bow()` or `nod()`
#' @param query named list of parameters to be appended to URL in the format `list(param1=valA, param2=valB)`
#' @param params deprecated. Use `query` argument above.
#' @param accept character value of expected data type to be returned by host (e.g. `html`, `json`, `xml`, `csv`, `txt`, etc.)
#' @param content MIME type (aka internet media type) used to override the content type returned by the server.
#' See http://en.wikipedia.org/wiki/Internet_media_type for a list of common types. You can add the `charset` parameter to override the server's default encoding
#' @param verbose extra feedback from the function. Defaults to `FALSE`
#'
#' @return Object of class `httr::response` which can be further processed by functions in `rvest` package
#'
#' @examples
#' \donttest{
#' library(rvest)
#' bow("https://en.wikipedia.org/wiki/List_of_cognitive_biases") %>%
#' scrape(content="text/html; charset=UTF-8") %>%
#' html_nodes(".wikitable") %>%
#' html_table()
#'}
#'
#' @export
scrape <- memoise::memoise(m_scrape)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.