cas_extract_script | R Documentation |
Extracts scripts from an html page
cas_extract_script(
html_document,
script_type = NULL,
match = NULL,
accessors = NULL,
remove_from_script = NULL
)
html_document |
An html document parsed with |
script_type |
Defaults to NULL. Type of script. Common script types
include |
match |
Default to NULL. If given, used to filter extracted scripts.
Must be a named vector in the format |
accessors |
Defaults to NULL. If given, a vector of accessors passed to
|
remove_from_script |
Defaults to NULL. If given, removed after the script has been extracted but before processing the json. |
May return a list or a character vector. If no match is found, returns NA_character_
## Not run:
if (interactive()) {
url <- "https://www.digi24.ro/stiri/externe/casa-alba-pune-capat-isteriei-globale-nu-exista-indicii-ca-obiectele-zburatoare-doborate-de-rachetele-sua-ar-fi-extraterestre-2250863"
html_document <- rvest::read_html(x = url)
cas_extract_script(
html_document = html_document,
script_type = "application/ld+json"
)
# get date published
cas_extract_script(
html_document = html_document,
script_type = "application/ld+json",
match = c(`@type` = "NewsArticle"),
accessors = "datePublished"
)
# get title
cas_extract_script(
html_document = html_document,
script_type = "application/ld+json",
match = c(`@type` = "NewsArticle"),
accessors = "headline"
)
# get nested element, e.g. url of the logo of the publisher
cas_extract_script(
html_document = html_document,
script_type = "application/ld+json",
match = c(`@type` = "NewsArticle"),
accessors = c("publisher", "logo", "url")
)
}
## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.