Defines functions linkScrapeR

Documented in linkScrapeR

#' linkScrapeR
#' This is used to scrape all hyperlinks from a specific web page.
#' Once the links have been scraped they will be outputted into a tibble for exploration.
#' This can be used on any website to pull back the hyperlink content of a web page.
#' @param url The website URL to detect active anchor hyperlink tags and extract them into a tibble
#' @param SSL_needed Default - FALSE - Boolean to indicate whether to need a SSL certificate
#' @importFrom rvest html_nodes html_table html_text html_attr
#' @importFrom magrittr %>%
#' @importFrom xml2 read_html
#' @importFrom httr GET config
#' @return A tibble (class data.frame) with all active hyperlinks on the website for the URL (uniform resource locator) passed to the function.
#' \itemize{
#'   \item result - the extracted html table from url and xpath passed
#'   \item link_name - the name of the link
#'   \item url - the full url of the active href tag from HTML
#' }
#' @examples linkScrapeR("https://www.datadictionary.nhs.uk/", FALSE)
#' @export

linkScrapeR <- function(url, SSL_needed = FALSE){
   expr = {
          url <- url
          content <- url %>%
            httr::GET(config = httr::config(ssl_verifypeer= SSL_needed))
          read_con <- xml2::read_html(content)

          url_ <- read_con %>%
            rvest::html_nodes("a") %>%

          link_return <- read_con %>%
            rvest::html_nodes("a") %>%

            link_name = link_return,
            url = url_))

       error = function(e){
                 ("There has been an issue with the return.\n","Please check url passed to the function, or set the SSL_needed parameter to FALSE, as the sites SSL certificate may have expired. Additionally, please make sure you are connected to the internet."))




Try the NHSDataDictionaRy package in your browser

Any scripts or data that you put into this service are public.

NHSDataDictionaRy documentation built on July 9, 2021, 5:08 p.m.