#' Get site html.
#'
#' @name get_site
#' @param x A vector of URLs
#' @return A data frame
#'
#'
#' @export
#' @rdname get_site
#'
get_site <- function(x) {
site <- tryCatch(
xml2::read_html(httr::GET(x, httr::timeout(60))),
# xml2::read_html(url),
error = function(e) paste("Error"))
if(any(site == 'Error')) {
articles <- data.frame(url = x, type = '', text = '') } else{
ntype1 <- 'p,h1,h2,h3'
w0 <- rvest::html_nodes(site, ntype1)
if(length(w0) == 0) {
w1 <- 'no dice'
w2 <- 'no dice' } else{
w1 <- rvest::html_name(w0)
w2 <- rvest::html_text(w0)
}
if(any(!validUTF8(w2))){
w1 <- 'no dice'
w2 <- 'no dice'}
data.frame(url = x,
type = w1,
text = w2)
}
}
# get_meta <- function(x, url = url) {
#
# feats <- rvest::html_nodes(x, 'meta')
# feats <- rvest::html_attrs(feats)
#
# feats <- Filter(function(x) length(x) != 1 , feats)
#
# feats0 <- lapply(feats, function(x){
#
# if(length(x) > 2) {
# x <- tail(x, 2)
# }
#
# names(x) <- c('content', 'value')
# x1 <- data.frame(x)
# x1$type <- rownames(x1)
# return(x1)})
#
# feats1 <- data.table::rbindlist(feats0, idcol = 'id')
# feats1$doc_id <- url
# feats2 <- data.table::dcast(feats1, doc_id + id~type, value.var = 'x')
# feats2
# }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.