Defines functions scrapeR

Documented in scrapeR

#' ScrapeR - scrape web information with scrapeR
#' Takes the url and xpath and scrapes HTML table elements from a website.
#' This function is specifically designed to work with HTML tables and x path links through to direct HTML elements.
#' The function is versatile and can be used on any URL where an xpath can be obtained through the URL and HTML inspection process.
#' @param url Website address to connect to
#' @param xpath Xpath obtained through inspecting the individual HTML elements
#' @param ... Function to pass additional function forwarding options
#' @importFrom purrr pluck
#' @importFrom rvest html_nodes html_table html_text
#' @importFrom magrittr %>%
#' @importFrom xml2 read_html
#' @return Returns the results of the scraping operation and the relevant fields from the html table - the xpath should make reference to an html table, otherwise an error is returned advising the user to check the xpath and url are correct.
#' @export
scrapeR <- function(url, xpath, ...){
        expr = {
          xpath <- xpath
          url <- url
          content <- url %>%
            httr::GET(config = httr::config(ssl_verifypeer= FALSE))
          website <- xml2::read_html(content)

          simple_lookup_table_scraped <- website %>%
            rvest::html_nodes(xpath=xpath) %>%
            rvest::html_table() %>%

          if (length(simple_lookup_table_scraped) > 0){
            simple_lookup_table_scraped <- simple_lookup_table_scraped
            simple_lookup_table_scraped <- as.data.frame(apply(simple_lookup_table_scraped, 2, function(x) gsub('\\s+', ' ', x)))
          } else{
            message(paste0("HTML Table does not exist for this URL.\n",
                           "Check the URL online at:\n", url,".", "\n"))



    error = function(e){

          message(paste("Please make sure url or xpath are correctly specified and make sure you are connected to the internet.", as.character(e)))




Try the NHSDataDictionaRy package in your browser

Any scripts or data that you put into this service are public.

NHSDataDictionaRy documentation built on July 9, 2021, 5:08 p.m.