R/scrape_html.R

Defines functions search_lens bib2html doi2html read_html

# This script is for functions that scrape/read html as the first part of the doi2txt workflow
# Please document/annotate new functions as you write them

# wrapper function to combine all subfunctions
read_html <- function(doi = NULL,
                      title = NULL,
                      year = NULL,
                      author = NULL) {

}


# Functions for scraping html from a doi ####

doi2html <- function(doi = NULL, url = NULL) {
  if (is.null(doi) & is.null(url)) {
    warning(print("Either a doi or a url must be supplied; returning NA."))
    return(NA)
  }else{
    if (is.null(url)) {
      url <- get_url(doi)
    }
    x <- try(parse_html(url = url))
    if (class(x) != "character") {
      x <- NA
    }
    return(x)
  }

}

# Functions for looking up doi or scraping html from bibliographic data ####

bib2html <- function(title = NULL,
                     year = NULL,
                     author = NULL) {

}

search_lens <- function(title) {

}

# Subfunctions that address various common scraping issues encountered ####
ESHackathon/doi2txt documentation built on Dec. 17, 2021, 5:39 p.m.