knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(rvest) library(dplyr) library(xml2)
rv_doc <- rvest::read_html("https://www.churchofjesuschrist.org/study/liahona/2020/11/15cook?lang=eng") rv_doc %>% html_elements(".body-block") %>% xml2::html_structure()
Explore node 1:
rv_doc %>% html_elements(".body-block") %>% xml2::xml_child(1)
Explore node 2:
rv_doc %>% html_elements(".body-block") %>% xml2::xml_child(2)
rv_doc %>% html_elements(".body-block") %>% xml_contents() rv_doc %>% html_elements(".body-block p") rv_doc %>% html_elements(".body-block") %>% html_children()
rv_doc %>% html_elements("header")
rv_doc %>% html_elements(".body") %>% html_elements("header") %>% html_text2()
Get specific paragraph by id:
rv_doc %>% html_elements("#p5")
Get multiple things at the same time (headers and paragraphs):
rv_doc %>% html_elements(".body-block h2, .body-block p")
header_ids <- rv_doc %>% html_elements(".body-block h2") %>% html_attr("id")
p_ids <- rv_doc %>% html_elements(".body-block p") %>% html_element("#p1")
xm_contents <- rv_doc %>% html_elements(".body-block") %>% xml_contents()
rv_doc %>% html_elements(".body-block") %>% # html_children() %>% xml_child(1) %>% xml_contents() %>% html_elements("p")
xm_contents %>% xml_child(1) %>% html_text()
Scrape metadata for url
rv_doc %>% html_elements("head") %>% html_elements("meta")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.