R/qnews_parse_rss.R

Defines functions qnews_parse_rss

Documented in qnews_parse_rss

#' Get URLs/metadata for articles per RSS feed url
#'
#' @name qnews_parse_rss
#' @param x Character string
#' @return A data frame
#'
#'
#' @export
#' @rdname qnews_parse_rss
#'
qnews_parse_rss <- function(x){

  doc <- tryCatch(
    xml2::read_xml(x),
    error = function(e) paste("Error")
  )

  ## records <- xml2::xml_find_all(doc, "//")

  if(any(doc == 'Error')) {return(NA)} else{
    title1 <- xml2::xml_text(xml2::xml_find_all(doc,"//item/title"))
    title <- gsub(' - .*$', '', title1)
    link <- xml2::xml_text(xml2::xml_find_all(doc,"//item/link"))
    pubDate <- xml2::xml_text(xml2::xml_find_all(doc,"//item/pubDate"))

    source1 <- sub('^.* - ', '', title1)
    source2 <- xml2::xml_text(xml2::xml_find_all(doc,"//channel/title"))
    if(grepl('Google News', source2)) {source <- source1} else{
      source <- source2}

    date <- gsub("^.+, ","",pubDate)
    date <- gsub(" [0-9]*:.+$","", date)
    date <- as.Date(date, "%d %b %Y")

    data.frame(date, source, title, link)
    }
}
jaytimm/quicknews documentation built on Aug. 23, 2023, 12:09 a.m.