rss_parse <- function(response, list, clean_tags, parse_dates) {
# spec here: https://validator.w3.org/feed/docs/rss2.html
res <- read_xml(response, options = "HUGE")
geocheck(res)
channel <- xml_find_first(res, "//*[name()='channel']")
# meta data. Necessary: title, link, description
metadata <- tibble(
feed_title = xml_find_first(channel, "//*[name()='title']") %>% xml_text(),
feed_link = xml_find_first(channel, "//*[name()='link']") %>% xml_text(),
feed_description = xml_find_first(channel, "//*[name()='description']") %>%
xml_text()
)
# optional metadata: language, copyright, managingEditor, webMaster, pubDate,
# lastBuildDate; category, generator, docs, cloud, link, managingEditor,
# podcast:guid, podcast:license, podcast:locked, podcast:funding,
# podcast:location, podcast:trailer, ttl, image, textInput,
# skipHours, skipDays
meta_optional <- tibble(
feed_language = safe_run(channel, "first", "//*[name()='language']"),
feed_managing_editor = safe_run(channel,
"first", "//*[name()='managingEditor']"),
feed_web_master = safe_run(channel, "first", "//*[name()='webMaster']"),
feed_pub_date = safe_run(channel, "first", "//*[name()='pubDate']"),
feed_last_build_date = safe_run(channel,
"first", "//*[name()='lastBuildDate']"),
feed_category = list(category = safe_run(
channel, "first", "//*[name()='category']"
)),
feed_generator = safe_run(channel, "first", "//*[name()='generator']"),
feed_docs = safe_run(channel, "first", "//*[name()='docs']"),
feed_managingEditor = safe_run(channel, "first", "//*[name()='managingEditor']"),
feed_webMaster = safe_run(channel, "first", "//*[name()='webMaster']"),
feed_guid = safe_run(channel, "first", "//*[name()='podcast:guid']"),
feed_license = safe_run(channel, "first", "//*[name()='podcast:license']"),
feed_locked = safe_run(channel, "first", "//*[name()='podcast:locked']"),
feed_funding = safe_run(channel, "first", "//*[name()='podcast:funding']"),
feed_location = safe_run(channel, "first", "//*[name()='podcast:location']"),
feed_trailer = safe_run(channel, "first", "//*[name()='podcast:trailer']"),
feed_ttl = safe_run(channel, "first", "//*[name()='ttl']")
)
meta <- bind_cols(metadata, meta_optional)
# entries
# necessary: title or description
res_entry <- xml_find_all(channel, "//*[name()='item']") %>% as_list()
res_entry_xml <- xml_find_all(channel, "//*[name()='item']")
entries <- tibble(
item_title = map(res_entry, "title", .default = def) %>% unlist(),
item_link = map(res_entry, "link", .default = def) %>% unlist(),
item_description = map(res_entry, "description", .default = def) %>%
replace_null() %>% discard(safe_check_comment) %>% unlist(),
item_pub_date = map(res_entry, "pubDate", .default = def) %>% unlist(),
item_guid = map(res_entry, "guid", .default = def) %>% unlist(),
item_author = map(res_entry, "author", .default = def),
item_season = map(res_entry, "podcast:season", .default = def),
item_episode = map(res_entry, "podcast:episode", .default = def),
item_enclosure = map(res_entry, "enclosure", .default = def),
item_category = map(res_entry_xml, ~ {
xml_find_all(.x, "category") %>% map(xml_text)
}),
item_comments = map(res_entry, "comments", .default = def) %>% unlist(),
item_duration = map(res_entry, "duration", .default = def) %>% unlist()
)
# clean up
meta <- clean_up(meta, "rss", clean_tags, parse_dates)
entries <- clean_up(entries, "rss", clean_tags, parse_dates)
if (isTRUE(list)) {
out <- list(meta = meta, entries = entries) # nocov
return(out) # nocov
} else {
if (!has_name(meta, "feed_title")) {
meta$feed_title <- NA_character_ # nocov
}
entries$feed_title <- meta$feed_title
out <- suppressMessages(safe_join(meta, entries))
if (is.null(out$error)) {
out <- out$result
if (all(is.na(out$feed_title))) out <- out %>% select(-feed_title) # nocov
return(out)
} else {
# nocov start
meta$tmp <- "temp"
entries$tmp <- "temp"
out <- suppressMessages(full_join(meta, entries))
out <- out %>% select(-tmp)
return(out)
# nocov end
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.